nx86add.pas 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,
  22. cpubase,
  23. node,nadd,ncgadd;
  24. type
  25. tx86addnode = class(tcgaddnode)
  26. protected
  27. function getresflags(unsigned : boolean) : tresflags;
  28. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  29. procedure left_and_right_must_be_fpureg;
  30. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  31. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  32. procedure second_cmpfloatsse;
  33. procedure second_addfloatsse;
  34. procedure second_mul;virtual;abstract;
  35. public
  36. procedure second_addfloat;override;
  37. procedure second_addsmallset;override;
  38. procedure second_add64bit;override;
  39. procedure second_addordinal;override;
  40. procedure second_cmpfloat;override;
  41. procedure second_cmpsmallset;override;
  42. procedure second_cmp64bit;override;
  43. procedure second_cmpordinal;override;
  44. {$ifdef SUPPORT_MMX}
  45. procedure second_opmmxset;override;
  46. procedure second_opmmx;override;
  47. {$endif SUPPORT_MMX}
  48. procedure second_opvector;override;
  49. end;
  50. implementation
  51. uses
  52. globtype,globals,
  53. verbose,cutils,
  54. cpuinfo,
  55. aasmbase,aasmtai,aasmdata,aasmcpu,
  56. symconst,symdef,
  57. cgobj,cgx86,cga,cgutils,
  58. paramgr,tgobj,ncgutil,
  59. ncon,nset,
  60. defutil;
  61. {*****************************************************************************
  62. Helpers
  63. *****************************************************************************}
  64. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  65. var
  66. power : longint;
  67. hl4 : tasmlabel;
  68. r : Tregister;
  69. begin
  70. { at this point, left.location.loc should be LOC_REGISTER }
  71. if right.location.loc=LOC_REGISTER then
  72. begin
  73. { right.location is a LOC_REGISTER }
  74. { when swapped another result register }
  75. if (nodetype=subn) and (nf_swapped in flags) then
  76. begin
  77. if extra_not then
  78. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  79. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  80. { newly swapped also set swapped flag }
  81. location_swap(left.location,right.location);
  82. toggleflag(nf_swapped);
  83. end
  84. else
  85. begin
  86. if extra_not then
  87. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  88. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  89. location_swap(left.location,right.location);
  90. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  91. end;
  92. end
  93. else
  94. begin
  95. { right.location is not a LOC_REGISTER }
  96. if (nodetype=subn) and (nf_swapped in flags) then
  97. begin
  98. if extra_not then
  99. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  100. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  101. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  102. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  103. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  104. end
  105. else
  106. begin
  107. { Optimizations when right.location is a constant value }
  108. if (op=A_CMP) and
  109. (nodetype in [equaln,unequaln]) and
  110. (right.location.loc=LOC_CONSTANT) and
  111. (right.location.value=0) then
  112. begin
  113. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  114. end
  115. else
  116. if (op=A_ADD) and
  117. (right.location.loc=LOC_CONSTANT) and
  118. (right.location.value=1) and
  119. not(cs_check_overflow in current_settings.localswitches) then
  120. begin
  121. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  122. end
  123. else
  124. if (op=A_SUB) and
  125. (right.location.loc=LOC_CONSTANT) and
  126. (right.location.value=1) and
  127. not(cs_check_overflow in current_settings.localswitches) then
  128. begin
  129. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  130. end
  131. else
  132. if (op=A_IMUL) and
  133. (right.location.loc=LOC_CONSTANT) and
  134. (ispowerof2(int64(right.location.value),power)) and
  135. not(cs_check_overflow in current_settings.localswitches) then
  136. begin
  137. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  138. end
  139. else
  140. begin
  141. if extra_not then
  142. begin
  143. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  144. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  145. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  146. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  147. end
  148. else
  149. begin
  150. emit_op_right_left(op,opsize);
  151. end;
  152. end;
  153. end;
  154. end;
  155. { only in case of overflow operations }
  156. { produce overflow code }
  157. { we must put it here directly, because sign of operation }
  158. { is in unsigned VAR!! }
  159. if mboverflow then
  160. begin
  161. if cs_check_overflow in current_settings.localswitches then
  162. begin
  163. current_asmdata.getjumplabel(hl4);
  164. if unsigned then
  165. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  166. else
  167. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  168. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW');
  169. cg.a_label(current_asmdata.CurrAsmList,hl4);
  170. end;
  171. end;
  172. end;
  173. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  174. begin
  175. { left location is not a register? }
  176. if (left.location.loc<>LOC_REGISTER) then
  177. begin
  178. { if right is register then we can swap the locations }
  179. if (not noswap) and
  180. (right.location.loc=LOC_REGISTER) then
  181. begin
  182. location_swap(left.location,right.location);
  183. toggleflag(nf_swapped);
  184. end
  185. else
  186. begin
  187. { maybe we can reuse a constant register when the
  188. operation is a comparison that doesn't change the
  189. value of the register }
  190. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  191. end;
  192. end;
  193. end;
  194. procedure tx86addnode.left_and_right_must_be_fpureg;
  195. begin
  196. if (right.location.loc<>LOC_FPUREGISTER) then
  197. begin
  198. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  199. if (left.location.loc<>LOC_FPUREGISTER) then
  200. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  201. else
  202. { left was on the stack => swap }
  203. toggleflag(nf_swapped);
  204. end
  205. { the nominator in st0 }
  206. else if (left.location.loc<>LOC_FPUREGISTER) then
  207. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  208. else
  209. begin
  210. { fpu operands are always in the wrong order on the stack }
  211. toggleflag(nf_swapped);
  212. end;
  213. end;
  214. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  215. {$ifdef x86_64}
  216. var
  217. tmpreg : tregister;
  218. {$endif x86_64}
  219. begin
  220. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  221. location_force_reg(current_asmdata.CurrAsmList,right.location,def_cgsize(right.resultdef),true);
  222. { left must be a register }
  223. case right.location.loc of
  224. LOC_REGISTER,
  225. LOC_CREGISTER :
  226. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  227. LOC_REFERENCE,
  228. LOC_CREFERENCE :
  229. begin
  230. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  231. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  232. end;
  233. LOC_CONSTANT :
  234. begin
  235. {$ifdef x86_64}
  236. { x86_64 only supports signed 32 bits constants directly }
  237. if (opsize in [OS_S64,OS_64]) and
  238. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  239. begin
  240. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  241. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  242. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  243. end
  244. else
  245. {$endif x86_64}
  246. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  247. end;
  248. else
  249. internalerror(200203232);
  250. end;
  251. end;
  252. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  253. begin
  254. case nodetype of
  255. equaln : getresflags:=F_E;
  256. unequaln : getresflags:=F_NE;
  257. else
  258. if not(unsigned) then
  259. begin
  260. if nf_swapped in flags then
  261. case nodetype of
  262. ltn : getresflags:=F_G;
  263. lten : getresflags:=F_GE;
  264. gtn : getresflags:=F_L;
  265. gten : getresflags:=F_LE;
  266. end
  267. else
  268. case nodetype of
  269. ltn : getresflags:=F_L;
  270. lten : getresflags:=F_LE;
  271. gtn : getresflags:=F_G;
  272. gten : getresflags:=F_GE;
  273. end;
  274. end
  275. else
  276. begin
  277. if nf_swapped in flags then
  278. case nodetype of
  279. ltn : getresflags:=F_A;
  280. lten : getresflags:=F_AE;
  281. gtn : getresflags:=F_B;
  282. gten : getresflags:=F_BE;
  283. end
  284. else
  285. case nodetype of
  286. ltn : getresflags:=F_B;
  287. lten : getresflags:=F_BE;
  288. gtn : getresflags:=F_A;
  289. gten : getresflags:=F_AE;
  290. end;
  291. end;
  292. end;
  293. end;
  294. {*****************************************************************************
  295. AddSmallSet
  296. *****************************************************************************}
  297. procedure tx86addnode.second_addsmallset;
  298. var
  299. opsize : TCGSize;
  300. op : TAsmOp;
  301. extra_not,
  302. noswap : boolean;
  303. begin
  304. pass_left_right;
  305. noswap:=false;
  306. extra_not:=false;
  307. opsize:=OS_32;
  308. case nodetype of
  309. addn :
  310. begin
  311. { this is a really ugly hack!!!!!!!!!! }
  312. { this could be done later using EDI }
  313. { as it is done for subn }
  314. { instead of two registers!!!! }
  315. { adding elements is not commutative }
  316. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  317. swapleftright;
  318. { are we adding set elements ? }
  319. if right.nodetype=setelementn then
  320. begin
  321. { no range support for smallsets! }
  322. if assigned(tsetelementnode(right).right) then
  323. internalerror(43244);
  324. { bts requires both elements to be registers }
  325. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  326. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  327. op:=A_BTS;
  328. noswap:=true;
  329. end
  330. else
  331. op:=A_OR;
  332. end;
  333. symdifn :
  334. op:=A_XOR;
  335. muln :
  336. op:=A_AND;
  337. subn :
  338. begin
  339. op:=A_AND;
  340. if (not(nf_swapped in flags)) and
  341. (right.location.loc=LOC_CONSTANT) then
  342. right.location.value := not(right.location.value)
  343. else if (nf_swapped in flags) and
  344. (left.location.loc=LOC_CONSTANT) then
  345. left.location.value := not(left.location.value)
  346. else
  347. extra_not:=true;
  348. end;
  349. xorn :
  350. op:=A_XOR;
  351. orn :
  352. op:=A_OR;
  353. andn :
  354. op:=A_AND;
  355. else
  356. internalerror(2003042215);
  357. end;
  358. { left must be a register }
  359. left_must_be_reg(opsize,noswap);
  360. emit_generic_code(op,opsize,true,extra_not,false);
  361. location_freetemp(current_asmdata.CurrAsmList,right.location);
  362. set_result_location_reg;
  363. end;
  364. procedure tx86addnode.second_cmpsmallset;
  365. var
  366. opsize : TCGSize;
  367. op : TAsmOp;
  368. begin
  369. pass_left_right;
  370. opsize:=OS_32;
  371. case nodetype of
  372. equaln,
  373. unequaln :
  374. op:=A_CMP;
  375. lten,gten:
  376. begin
  377. if (not(nf_swapped in flags) and (nodetype = lten)) or
  378. ((nf_swapped in flags) and (nodetype = gten)) then
  379. swapleftright;
  380. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  381. emit_op_right_left(A_AND,opsize);
  382. op:=A_CMP;
  383. { warning: ugly hack, we need a JE so change the node to equaln }
  384. nodetype:=equaln;
  385. end;
  386. else
  387. internalerror(2003042215);
  388. end;
  389. { left must be a register }
  390. left_must_be_reg(opsize,false);
  391. emit_generic_code(op,opsize,true,false,false);
  392. location_freetemp(current_asmdata.CurrAsmList,right.location);
  393. location_freetemp(current_asmdata.CurrAsmList,left.location);
  394. location_reset(location,LOC_FLAGS,OS_NO);
  395. location.resflags:=getresflags(true);
  396. end;
  397. {*****************************************************************************
  398. AddMMX
  399. *****************************************************************************}
  400. {$ifdef SUPPORT_MMX}
  401. procedure tx86addnode.second_opmmx;
  402. var
  403. op : TAsmOp;
  404. cmpop : boolean;
  405. mmxbase : tmmxtype;
  406. hreg,
  407. hregister : tregister;
  408. begin
  409. pass_left_right;
  410. cmpop:=false;
  411. mmxbase:=mmx_type(left.resultdef);
  412. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  413. case nodetype of
  414. addn :
  415. begin
  416. if (cs_mmx_saturation in current_settings.localswitches) then
  417. begin
  418. case mmxbase of
  419. mmxs8bit:
  420. op:=A_PADDSB;
  421. mmxu8bit:
  422. op:=A_PADDUSB;
  423. mmxs16bit,mmxfixed16:
  424. op:=A_PADDSW;
  425. mmxu16bit:
  426. op:=A_PADDUSW;
  427. end;
  428. end
  429. else
  430. begin
  431. case mmxbase of
  432. mmxs8bit,mmxu8bit:
  433. op:=A_PADDB;
  434. mmxs16bit,mmxu16bit,mmxfixed16:
  435. op:=A_PADDW;
  436. mmxs32bit,mmxu32bit:
  437. op:=A_PADDD;
  438. end;
  439. end;
  440. end;
  441. muln :
  442. begin
  443. case mmxbase of
  444. mmxs16bit,mmxu16bit:
  445. op:=A_PMULLW;
  446. mmxfixed16:
  447. op:=A_PMULHW;
  448. end;
  449. end;
  450. subn :
  451. begin
  452. if (cs_mmx_saturation in current_settings.localswitches) then
  453. begin
  454. case mmxbase of
  455. mmxs8bit:
  456. op:=A_PSUBSB;
  457. mmxu8bit:
  458. op:=A_PSUBUSB;
  459. mmxs16bit,mmxfixed16:
  460. op:=A_PSUBSB;
  461. mmxu16bit:
  462. op:=A_PSUBUSW;
  463. end;
  464. end
  465. else
  466. begin
  467. case mmxbase of
  468. mmxs8bit,mmxu8bit:
  469. op:=A_PSUBB;
  470. mmxs16bit,mmxu16bit,mmxfixed16:
  471. op:=A_PSUBW;
  472. mmxs32bit,mmxu32bit:
  473. op:=A_PSUBD;
  474. end;
  475. end;
  476. end;
  477. xorn:
  478. op:=A_PXOR;
  479. orn:
  480. op:=A_POR;
  481. andn:
  482. op:=A_PAND;
  483. else
  484. internalerror(2003042214);
  485. end;
  486. { left and right no register? }
  487. { then one must be demanded }
  488. if (left.location.loc<>LOC_MMXREGISTER) then
  489. begin
  490. if (right.location.loc=LOC_MMXREGISTER) then
  491. begin
  492. location_swap(left.location,right.location);
  493. toggleflag(nf_swapped);
  494. end
  495. else
  496. begin
  497. { register variable ? }
  498. if (left.location.loc=LOC_CMMXREGISTER) then
  499. begin
  500. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  501. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  502. end
  503. else
  504. begin
  505. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  506. internalerror(200203245);
  507. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  508. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  509. end;
  510. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  511. left.location.register:=hregister;
  512. end;
  513. end;
  514. { at this point, left.location.loc should be LOC_MMXREGISTER }
  515. if right.location.loc<>LOC_MMXREGISTER then
  516. begin
  517. if (nodetype=subn) and (nf_swapped in flags) then
  518. begin
  519. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  520. if right.location.loc=LOC_CMMXREGISTER then
  521. begin
  522. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  523. emit_reg_reg(op,S_NO,left.location.register,hreg);
  524. end
  525. else
  526. begin
  527. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  528. internalerror(200203247);
  529. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  530. emit_reg_reg(op,S_NO,left.location.register,hreg);
  531. end;
  532. location.register:=hreg;
  533. end
  534. else
  535. begin
  536. if (right.location.loc=LOC_CMMXREGISTER) then
  537. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  538. else
  539. begin
  540. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  541. internalerror(200203246);
  542. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  543. end;
  544. location.register:=left.location.register;
  545. end;
  546. end
  547. else
  548. begin
  549. { right.location=LOC_MMXREGISTER }
  550. if (nodetype=subn) and (nf_swapped in flags) then
  551. begin
  552. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  553. location_swap(left.location,right.location);
  554. toggleflag(nf_swapped);
  555. end
  556. else
  557. begin
  558. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  559. end;
  560. location.register:=left.location.register;
  561. end;
  562. location_freetemp(current_asmdata.CurrAsmList,right.location);
  563. if cmpop then
  564. location_freetemp(current_asmdata.CurrAsmList,left.location);
  565. end;
  566. {$endif SUPPORT_MMX}
  567. {*****************************************************************************
  568. addmmxset
  569. *****************************************************************************}
  570. {$ifdef SUPPORT_MMX}
  571. procedure tx86addnode.second_opmmxset;
  572. var opsize : TCGSize;
  573. op : TAsmOp;
  574. cmpop,
  575. noswap : boolean;
  576. begin
  577. pass_left_right;
  578. cmpop:=false;
  579. noswap:=false;
  580. opsize:=OS_32;
  581. case nodetype of
  582. addn:
  583. begin
  584. { are we adding set elements ? }
  585. if right.nodetype=setelementn then
  586. begin
  587. { adding elements is not commutative }
  588. { if nf_swapped in flags then
  589. swapleftright;}
  590. { bts requires both elements to be registers }
  591. { location_force_reg(current_asmdata.CurrAsmList,left.location,opsize_2_cgsize[opsize],false);
  592. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize_2_cgsize[opsize],true);
  593. op:=A_BTS;
  594. noswap:=true;}
  595. end
  596. else
  597. op:=A_POR;
  598. end;
  599. symdifn :
  600. op:=A_PXOR;
  601. muln:
  602. op:=A_PAND;
  603. subn:
  604. op:=A_PANDN;
  605. equaln,
  606. unequaln :
  607. begin
  608. op:=A_PCMPEQD;
  609. cmpop:=true;
  610. end;
  611. lten,gten:
  612. begin
  613. if (not(nf_swapped in flags) and (nodetype = lten)) or
  614. ((nf_swapped in flags) and (nodetype = gten)) then
  615. swapleftright;
  616. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  617. emit_op_right_left(A_AND,opsize);
  618. op:=A_PCMPEQD;
  619. cmpop:=true;
  620. { warning: ugly hack, we need a JE so change the node to equaln }
  621. nodetype:=equaln;
  622. end;
  623. xorn :
  624. op:=A_PXOR;
  625. orn :
  626. op:=A_POR;
  627. andn :
  628. op:=A_PAND;
  629. else
  630. internalerror(2003042215);
  631. end;
  632. { left must be a register }
  633. left_must_be_reg(opsize,noswap);
  634. { emit_generic_code(op,opsize,true,extra_not,false);}
  635. location_freetemp(current_asmdata.CurrAsmList,right.location);
  636. if cmpop then
  637. location_freetemp(current_asmdata.CurrAsmList,left.location);
  638. end;
  639. {$endif SUPPORT_MMX}
  640. {*****************************************************************************
  641. AddFloat
  642. *****************************************************************************}
  643. procedure tx86addnode.second_addfloatsse;
  644. var
  645. op : topcg;
  646. begin
  647. pass_left_right;
  648. if (nf_swapped in flags) then
  649. swapleftright;
  650. case nodetype of
  651. addn :
  652. op:=OP_ADD;
  653. muln :
  654. op:=OP_MUL;
  655. subn :
  656. op:=OP_SUB;
  657. slashn :
  658. op:=OP_DIV;
  659. else
  660. internalerror(200312231);
  661. end;
  662. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  663. { we can use only right as left operand if the operation is commutative }
  664. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  665. begin
  666. location.register:=right.location.register;
  667. { force floating point reg. location to be written to memory,
  668. we don't force it to mm register because writing to memory
  669. allows probably shorter code because there is no direct fpu->mm register
  670. copy instruction
  671. }
  672. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  673. location_force_mem(current_asmdata.CurrAsmList,left.location);
  674. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  675. end
  676. else
  677. begin
  678. if (nf_swapped in flags) then
  679. swapleftright;
  680. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  681. location.register:=left.location.register;
  682. { force floating point reg. location to be written to memory,
  683. we don't force it to mm register because writing to memory
  684. allows probably shorter code because there is no direct fpu->mm register
  685. copy instruction
  686. }
  687. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  688. location_force_mem(current_asmdata.CurrAsmList,right.location);
  689. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  690. end;
  691. end;
  692. procedure tx86addnode.second_cmpfloatsse;
  693. var
  694. op : tasmop;
  695. begin
  696. if is_single(left.resultdef) then
  697. op:=A_COMISS
  698. else if is_double(left.resultdef) then
  699. op:=A_COMISD
  700. else
  701. internalerror(200402222);
  702. pass_left_right;
  703. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  704. { we can use only right as left operand if the operation is commutative }
  705. if (right.location.loc=LOC_MMREGISTER) then
  706. begin
  707. { force floating point reg. location to be written to memory,
  708. we don't force it to mm register because writing to memory
  709. allows probably shorter code because there is no direct fpu->mm register
  710. copy instruction
  711. }
  712. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  713. location_force_mem(current_asmdata.CurrAsmList,left.location);
  714. case left.location.loc of
  715. LOC_REFERENCE,LOC_CREFERENCE:
  716. begin
  717. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  718. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  719. end;
  720. LOC_MMREGISTER,LOC_CMMREGISTER:
  721. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  722. else
  723. internalerror(200402221);
  724. end;
  725. if nf_swapped in flags then
  726. exclude(flags,nf_swapped)
  727. else
  728. include(flags,nf_swapped)
  729. end
  730. else
  731. begin
  732. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  733. { force floating point reg. location to be written to memory,
  734. we don't force it to mm register because writing to memory
  735. allows probably shorter code because there is no direct fpu->mm register
  736. copy instruction
  737. }
  738. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  739. location_force_mem(current_asmdata.CurrAsmList,right.location);
  740. case right.location.loc of
  741. LOC_REFERENCE,LOC_CREFERENCE:
  742. begin
  743. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  744. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  745. end;
  746. LOC_MMREGISTER,LOC_CMMREGISTER:
  747. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  748. else
  749. internalerror(200402223);
  750. end;
  751. end;
  752. location.resflags:=getresflags(true);
  753. end;
  754. procedure tx86addnode.second_opvector;
  755. var
  756. op : topcg;
  757. begin
  758. pass_left_right;
  759. if (nf_swapped in flags) then
  760. swapleftright;
  761. case nodetype of
  762. addn :
  763. op:=OP_ADD;
  764. muln :
  765. op:=OP_MUL;
  766. subn :
  767. op:=OP_SUB;
  768. slashn :
  769. op:=OP_DIV;
  770. else
  771. internalerror(200610071);
  772. end;
  773. if fits_in_mm_register(left.resultdef) then
  774. begin
  775. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  776. { we can use only right as left operand if the operation is commutative }
  777. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  778. begin
  779. location.register:=right.location.register;
  780. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  781. end
  782. else
  783. begin
  784. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  785. location.register:=left.location.register;
  786. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  787. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  788. end;
  789. end
  790. else
  791. begin
  792. { not yet supported }
  793. internalerror(200610072);
  794. end
  795. end;
  796. procedure tx86addnode.second_addfloat;
  797. var
  798. op : TAsmOp;
  799. begin
  800. if use_sse(resultdef) then
  801. begin
  802. second_addfloatsse;
  803. exit;
  804. end;
  805. pass_left_right;
  806. case nodetype of
  807. addn :
  808. op:=A_FADDP;
  809. muln :
  810. op:=A_FMULP;
  811. subn :
  812. op:=A_FSUBP;
  813. slashn :
  814. op:=A_FDIVP;
  815. else
  816. internalerror(2003042214);
  817. end;
  818. left_and_right_must_be_fpureg;
  819. { if we swaped the tree nodes, then use the reverse operator }
  820. if nf_swapped in flags then
  821. begin
  822. if (nodetype=slashn) then
  823. op:=A_FDIVRP
  824. else if (nodetype=subn) then
  825. op:=A_FSUBRP;
  826. end;
  827. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  828. tcgx86(cg).dec_fpu_stack;
  829. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  830. location.register:=NR_ST;
  831. end;
  832. procedure tx86addnode.second_cmpfloat;
  833. var
  834. resflags : tresflags;
  835. begin
  836. if use_sse(left.resultdef) or use_sse(right.resultdef) then
  837. begin
  838. second_cmpfloatsse;
  839. exit;
  840. end;
  841. pass_left_right;
  842. left_and_right_must_be_fpureg;
  843. {$ifndef x86_64}
  844. if current_settings.cputype<cpu_Pentium2 then
  845. begin
  846. emit_none(A_FCOMPP,S_NO);
  847. tcgx86(cg).dec_fpu_stack;
  848. tcgx86(cg).dec_fpu_stack;
  849. { load fpu flags }
  850. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  851. emit_reg(A_FNSTSW,S_NO,NR_AX);
  852. emit_none(A_SAHF,S_NO);
  853. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  854. if nf_swapped in flags then
  855. begin
  856. case nodetype of
  857. equaln : resflags:=F_E;
  858. unequaln : resflags:=F_NE;
  859. ltn : resflags:=F_A;
  860. lten : resflags:=F_AE;
  861. gtn : resflags:=F_B;
  862. gten : resflags:=F_BE;
  863. end;
  864. end
  865. else
  866. begin
  867. case nodetype of
  868. equaln : resflags:=F_E;
  869. unequaln : resflags:=F_NE;
  870. ltn : resflags:=F_B;
  871. lten : resflags:=F_BE;
  872. gtn : resflags:=F_A;
  873. gten : resflags:=F_AE;
  874. end;
  875. end;
  876. end
  877. else
  878. {$endif x86_64}
  879. begin
  880. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  881. { fcomip pops only one fpu register }
  882. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  883. tcgx86(cg).dec_fpu_stack;
  884. tcgx86(cg).dec_fpu_stack;
  885. { load fpu flags }
  886. if nf_swapped in flags then
  887. begin
  888. case nodetype of
  889. equaln : resflags:=F_E;
  890. unequaln : resflags:=F_NE;
  891. ltn : resflags:=F_A;
  892. lten : resflags:=F_AE;
  893. gtn : resflags:=F_B;
  894. gten : resflags:=F_BE;
  895. end;
  896. end
  897. else
  898. begin
  899. case nodetype of
  900. equaln : resflags:=F_E;
  901. unequaln : resflags:=F_NE;
  902. ltn : resflags:=F_B;
  903. lten : resflags:=F_BE;
  904. gtn : resflags:=F_A;
  905. gten : resflags:=F_AE;
  906. end;
  907. end;
  908. end;
  909. location_reset(location,LOC_FLAGS,OS_NO);
  910. location.resflags:=resflags;
  911. end;
  912. {*****************************************************************************
  913. Add64bit
  914. *****************************************************************************}
  915. procedure tx86addnode.second_add64bit;
  916. begin
  917. {$ifdef cpu64bit}
  918. second_addordinal;
  919. {$else cpu64bit}
  920. { must be implemented separate }
  921. internalerror(200402042);
  922. {$endif cpu64bit}
  923. end;
  924. procedure tx86addnode.second_cmp64bit;
  925. begin
  926. {$ifdef cpu64bit}
  927. second_cmpordinal;
  928. {$else cpu64bit}
  929. { must be implemented separate }
  930. internalerror(200402043);
  931. {$endif cpu64bit}
  932. end;
  933. {*****************************************************************************
  934. AddOrdinal
  935. *****************************************************************************}
  936. procedure tx86addnode.second_addordinal;
  937. begin
  938. { filter unsigned MUL opcode, which requires special handling }
  939. if (nodetype=muln) and
  940. (not(is_signed(left.resultdef)) or
  941. not(is_signed(right.resultdef))) then
  942. begin
  943. second_mul;
  944. exit;
  945. end;
  946. inherited second_addordinal;
  947. end;
  948. procedure tx86addnode.second_cmpordinal;
  949. var
  950. opsize : tcgsize;
  951. unsigned : boolean;
  952. begin
  953. unsigned:=not(is_signed(left.resultdef)) or
  954. not(is_signed(right.resultdef));
  955. opsize:=def_cgsize(left.resultdef);
  956. pass_left_right;
  957. left_must_be_reg(opsize,false);
  958. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  959. location_freetemp(current_asmdata.CurrAsmList,right.location);
  960. location_freetemp(current_asmdata.CurrAsmList,left.location);
  961. location_reset(location,LOC_FLAGS,OS_NO);
  962. location.resflags:=getresflags(unsigned);
  963. end;
  964. begin
  965. caddnode:=tx86addnode;
  966. end.