nx86add.pas 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,
  22. cpubase,
  23. node,nadd,ncgadd;
  24. type
  25. tx86addnode = class(tcgaddnode)
  26. protected
  27. function getresflags(unsigned : boolean) : tresflags;
  28. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  29. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  30. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  31. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  32. procedure second_cmpfloatsse;
  33. procedure second_addfloatsse;
  34. procedure second_mul;virtual;abstract;
  35. public
  36. procedure second_addfloat;override;
  37. procedure second_addsmallset;override;
  38. procedure second_add64bit;override;
  39. procedure second_addordinal;override;
  40. procedure second_cmpfloat;override;
  41. procedure second_cmpsmallset;override;
  42. procedure second_cmp64bit;override;
  43. procedure second_cmpordinal;override;
  44. {$ifdef SUPPORT_MMX}
  45. procedure second_opmmxset;override;
  46. procedure second_opmmx;override;
  47. {$endif SUPPORT_MMX}
  48. procedure second_opvector;override;
  49. end;
  50. implementation
  51. uses
  52. globtype,globals,
  53. verbose,cutils,
  54. cpuinfo,
  55. aasmbase,aasmtai,aasmdata,aasmcpu,
  56. symconst,symdef,
  57. cgobj,cgx86,cga,cgutils,
  58. paramgr,tgobj,ncgutil,
  59. ncon,nset,
  60. defutil;
  61. {*****************************************************************************
  62. Helpers
  63. *****************************************************************************}
  64. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  65. var
  66. power : longint;
  67. hl4 : tasmlabel;
  68. r : Tregister;
  69. begin
  70. { at this point, left.location.loc should be LOC_REGISTER }
  71. if right.location.loc=LOC_REGISTER then
  72. begin
  73. { right.location is a LOC_REGISTER }
  74. { when swapped another result register }
  75. if (nodetype=subn) and (nf_swapped in flags) then
  76. begin
  77. if extra_not then
  78. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  79. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  80. { newly swapped also set swapped flag }
  81. location_swap(left.location,right.location);
  82. toggleflag(nf_swapped);
  83. end
  84. else
  85. begin
  86. if extra_not then
  87. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  88. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  89. location_swap(left.location,right.location);
  90. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  91. end;
  92. end
  93. else
  94. begin
  95. { right.location is not a LOC_REGISTER }
  96. if (nodetype=subn) and (nf_swapped in flags) then
  97. begin
  98. if extra_not then
  99. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  100. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  101. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  102. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  103. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  104. end
  105. else
  106. begin
  107. { Optimizations when right.location is a constant value }
  108. if (op=A_CMP) and
  109. (nodetype in [equaln,unequaln]) and
  110. (right.location.loc=LOC_CONSTANT) and
  111. (right.location.value=0) then
  112. begin
  113. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  114. end
  115. else
  116. if (op=A_ADD) and
  117. (right.location.loc=LOC_CONSTANT) and
  118. (right.location.value=1) and
  119. not(cs_check_overflow in current_settings.localswitches) then
  120. begin
  121. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  122. end
  123. else
  124. if (op=A_SUB) and
  125. (right.location.loc=LOC_CONSTANT) and
  126. (right.location.value=1) and
  127. not(cs_check_overflow in current_settings.localswitches) then
  128. begin
  129. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  130. end
  131. else
  132. if (op=A_IMUL) and
  133. (right.location.loc=LOC_CONSTANT) and
  134. (ispowerof2(int64(right.location.value),power)) and
  135. not(cs_check_overflow in current_settings.localswitches) then
  136. begin
  137. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  138. end
  139. else
  140. begin
  141. if extra_not then
  142. begin
  143. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  144. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  145. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  146. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  147. end
  148. else
  149. begin
  150. emit_op_right_left(op,opsize);
  151. end;
  152. end;
  153. end;
  154. end;
  155. { only in case of overflow operations }
  156. { produce overflow code }
  157. { we must put it here directly, because sign of operation }
  158. { is in unsigned VAR!! }
  159. if mboverflow then
  160. begin
  161. if cs_check_overflow in current_settings.localswitches then
  162. begin
  163. current_asmdata.getjumplabel(hl4);
  164. if unsigned then
  165. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  166. else
  167. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  168. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW');
  169. cg.a_label(current_asmdata.CurrAsmList,hl4);
  170. end;
  171. end;
  172. end;
  173. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  174. begin
  175. { left location is not a register? }
  176. if (left.location.loc<>LOC_REGISTER) then
  177. begin
  178. { if right is register then we can swap the locations }
  179. if (not noswap) and
  180. (right.location.loc=LOC_REGISTER) then
  181. begin
  182. location_swap(left.location,right.location);
  183. toggleflag(nf_swapped);
  184. end
  185. else
  186. begin
  187. { maybe we can reuse a constant register when the
  188. operation is a comparison that doesn't change the
  189. value of the register }
  190. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  191. end;
  192. end;
  193. if (right.location.loc<>LOC_CONSTANT) and
  194. (tcgsize2unsigned[right.location.size]<>opsize) then
  195. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  196. if (left.location.loc<>LOC_CONSTANT) and
  197. (tcgsize2unsigned[left.location.size]<>opsize) then
  198. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  199. end;
  200. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  201. begin
  202. if (right.location.loc<>LOC_FPUREGISTER) then
  203. begin
  204. if (force_fpureg) then
  205. begin
  206. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  207. if (left.location.loc<>LOC_FPUREGISTER) then
  208. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  209. else
  210. { left was on the stack => swap }
  211. toggleflag(nf_swapped);
  212. end
  213. end
  214. { the nominator in st0 }
  215. else if (left.location.loc<>LOC_FPUREGISTER) then
  216. begin
  217. if (force_fpureg) then
  218. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  219. end
  220. else
  221. begin
  222. { fpu operands are always in the wrong order on the stack }
  223. toggleflag(nf_swapped);
  224. end;
  225. end;
  226. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  227. {$ifdef x86_64}
  228. var
  229. tmpreg : tregister;
  230. {$endif x86_64}
  231. begin
  232. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  233. location_force_reg(current_asmdata.CurrAsmList,right.location,def_cgsize(right.resultdef),true);
  234. { left must be a register }
  235. case right.location.loc of
  236. LOC_REGISTER,
  237. LOC_CREGISTER :
  238. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  239. LOC_REFERENCE,
  240. LOC_CREFERENCE :
  241. begin
  242. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  243. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  244. end;
  245. LOC_CONSTANT :
  246. begin
  247. {$ifdef x86_64}
  248. { x86_64 only supports signed 32 bits constants directly }
  249. if (opsize in [OS_S64,OS_64]) and
  250. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  251. begin
  252. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  253. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  254. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  255. end
  256. else
  257. {$endif x86_64}
  258. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  259. end;
  260. else
  261. internalerror(200203232);
  262. end;
  263. end;
  264. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  265. begin
  266. case nodetype of
  267. equaln : getresflags:=F_E;
  268. unequaln : getresflags:=F_NE;
  269. else
  270. if not(unsigned) then
  271. begin
  272. if nf_swapped in flags then
  273. case nodetype of
  274. ltn : getresflags:=F_G;
  275. lten : getresflags:=F_GE;
  276. gtn : getresflags:=F_L;
  277. gten : getresflags:=F_LE;
  278. end
  279. else
  280. case nodetype of
  281. ltn : getresflags:=F_L;
  282. lten : getresflags:=F_LE;
  283. gtn : getresflags:=F_G;
  284. gten : getresflags:=F_GE;
  285. end;
  286. end
  287. else
  288. begin
  289. if nf_swapped in flags then
  290. case nodetype of
  291. ltn : getresflags:=F_A;
  292. lten : getresflags:=F_AE;
  293. gtn : getresflags:=F_B;
  294. gten : getresflags:=F_BE;
  295. end
  296. else
  297. case nodetype of
  298. ltn : getresflags:=F_B;
  299. lten : getresflags:=F_BE;
  300. gtn : getresflags:=F_A;
  301. gten : getresflags:=F_AE;
  302. end;
  303. end;
  304. end;
  305. end;
  306. {*****************************************************************************
  307. AddSmallSet
  308. *****************************************************************************}
  309. procedure tx86addnode.second_addsmallset;
  310. var
  311. setbase : aint;
  312. opsize : TCGSize;
  313. op : TAsmOp;
  314. extra_not,
  315. noswap : boolean;
  316. all_member_optimization:boolean;
  317. begin
  318. pass_left_right;
  319. noswap:=false;
  320. extra_not:=false;
  321. all_member_optimization:=false;
  322. opsize:=int_cgsize(resultdef.size);
  323. if (left.resultdef.typ=setdef) then
  324. setbase:=tsetdef(left.resultdef).setbase
  325. else
  326. setbase:=tsetdef(right.resultdef).setbase;
  327. case nodetype of
  328. addn :
  329. begin
  330. { adding elements is not commutative }
  331. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  332. swapleftright;
  333. { are we adding set elements ? }
  334. if right.nodetype=setelementn then
  335. begin
  336. { no range support for smallsets! }
  337. if assigned(tsetelementnode(right).right) then
  338. internalerror(43244);
  339. { btsb isn't supported }
  340. if opsize=OS_8 then
  341. opsize:=OS_32;
  342. { bts requires both elements to be registers }
  343. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  344. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  345. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  346. op:=A_BTS;
  347. noswap:=true;
  348. end
  349. else
  350. op:=A_OR;
  351. end;
  352. symdifn :
  353. op:=A_XOR;
  354. muln :
  355. op:=A_AND;
  356. subn :
  357. begin
  358. op:=A_AND;
  359. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  360. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  361. all_member_optimization:=true;
  362. if (not(nf_swapped in flags)) and
  363. (right.location.loc=LOC_CONSTANT) then
  364. right.location.value := not(right.location.value)
  365. else if (nf_swapped in flags) and
  366. (left.location.loc=LOC_CONSTANT) then
  367. left.location.value := not(left.location.value)
  368. else
  369. extra_not:=true;
  370. end;
  371. xorn :
  372. op:=A_XOR;
  373. orn :
  374. op:=A_OR;
  375. andn :
  376. op:=A_AND;
  377. else
  378. internalerror(2003042215);
  379. end;
  380. if all_member_optimization then
  381. begin
  382. {A set expression [0..31]-x can be implemented with a simple NOT.}
  383. if nf_swapped in flags then
  384. begin
  385. { newly swapped also set swapped flag }
  386. location_swap(left.location,right.location);
  387. toggleflag(nf_swapped);
  388. end;
  389. location_force_reg(current_asmdata.currAsmList,right.location,opsize,false);
  390. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  391. location:=right.location;
  392. end
  393. else
  394. begin
  395. { left must be a register }
  396. left_must_be_reg(opsize,noswap);
  397. emit_generic_code(op,opsize,true,extra_not,false);
  398. location_freetemp(current_asmdata.CurrAsmList,right.location);
  399. { left is always a register and contains the result }
  400. location:=left.location;
  401. end;
  402. { fix the changed opsize we did above because of the missing btsb }
  403. if opsize<>int_cgsize(resultdef.size) then
  404. location_force_reg(current_asmdata.CurrAsmList,location,int_cgsize(resultdef.size),false);
  405. end;
  406. procedure tx86addnode.second_cmpsmallset;
  407. var
  408. opsize : TCGSize;
  409. op : TAsmOp;
  410. begin
  411. pass_left_right;
  412. opsize:=int_cgsize(left.resultdef.size);
  413. case nodetype of
  414. equaln,
  415. unequaln :
  416. op:=A_CMP;
  417. lten,gten:
  418. begin
  419. if (not(nf_swapped in flags) and (nodetype = lten)) or
  420. ((nf_swapped in flags) and (nodetype = gten)) then
  421. swapleftright;
  422. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  423. emit_op_right_left(A_AND,opsize);
  424. op:=A_CMP;
  425. { warning: ugly hack, we need a JE so change the node to equaln }
  426. nodetype:=equaln;
  427. end;
  428. else
  429. internalerror(2003042215);
  430. end;
  431. { left must be a register }
  432. left_must_be_reg(opsize,false);
  433. emit_generic_code(op,opsize,true,false,false);
  434. location_freetemp(current_asmdata.CurrAsmList,right.location);
  435. location_freetemp(current_asmdata.CurrAsmList,left.location);
  436. location_reset(location,LOC_FLAGS,OS_NO);
  437. location.resflags:=getresflags(true);
  438. end;
  439. {*****************************************************************************
  440. AddMMX
  441. *****************************************************************************}
  442. {$ifdef SUPPORT_MMX}
  443. procedure tx86addnode.second_opmmx;
  444. var
  445. op : TAsmOp;
  446. cmpop : boolean;
  447. mmxbase : tmmxtype;
  448. hreg,
  449. hregister : tregister;
  450. begin
  451. pass_left_right;
  452. cmpop:=false;
  453. mmxbase:=mmx_type(left.resultdef);
  454. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  455. case nodetype of
  456. addn :
  457. begin
  458. if (cs_mmx_saturation in current_settings.localswitches) then
  459. begin
  460. case mmxbase of
  461. mmxs8bit:
  462. op:=A_PADDSB;
  463. mmxu8bit:
  464. op:=A_PADDUSB;
  465. mmxs16bit,mmxfixed16:
  466. op:=A_PADDSW;
  467. mmxu16bit:
  468. op:=A_PADDUSW;
  469. end;
  470. end
  471. else
  472. begin
  473. case mmxbase of
  474. mmxs8bit,mmxu8bit:
  475. op:=A_PADDB;
  476. mmxs16bit,mmxu16bit,mmxfixed16:
  477. op:=A_PADDW;
  478. mmxs32bit,mmxu32bit:
  479. op:=A_PADDD;
  480. end;
  481. end;
  482. end;
  483. muln :
  484. begin
  485. case mmxbase of
  486. mmxs16bit,mmxu16bit:
  487. op:=A_PMULLW;
  488. mmxfixed16:
  489. op:=A_PMULHW;
  490. end;
  491. end;
  492. subn :
  493. begin
  494. if (cs_mmx_saturation in current_settings.localswitches) then
  495. begin
  496. case mmxbase of
  497. mmxs8bit:
  498. op:=A_PSUBSB;
  499. mmxu8bit:
  500. op:=A_PSUBUSB;
  501. mmxs16bit,mmxfixed16:
  502. op:=A_PSUBSB;
  503. mmxu16bit:
  504. op:=A_PSUBUSW;
  505. end;
  506. end
  507. else
  508. begin
  509. case mmxbase of
  510. mmxs8bit,mmxu8bit:
  511. op:=A_PSUBB;
  512. mmxs16bit,mmxu16bit,mmxfixed16:
  513. op:=A_PSUBW;
  514. mmxs32bit,mmxu32bit:
  515. op:=A_PSUBD;
  516. end;
  517. end;
  518. end;
  519. xorn:
  520. op:=A_PXOR;
  521. orn:
  522. op:=A_POR;
  523. andn:
  524. op:=A_PAND;
  525. else
  526. internalerror(2003042214);
  527. end;
  528. { left and right no register? }
  529. { then one must be demanded }
  530. if (left.location.loc<>LOC_MMXREGISTER) then
  531. begin
  532. if (right.location.loc=LOC_MMXREGISTER) then
  533. begin
  534. location_swap(left.location,right.location);
  535. toggleflag(nf_swapped);
  536. end
  537. else
  538. begin
  539. { register variable ? }
  540. if (left.location.loc=LOC_CMMXREGISTER) then
  541. begin
  542. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  543. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  544. end
  545. else
  546. begin
  547. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  548. internalerror(200203245);
  549. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  550. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  551. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  552. end;
  553. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  554. left.location.register:=hregister;
  555. end;
  556. end;
  557. { at this point, left.location.loc should be LOC_MMXREGISTER }
  558. if right.location.loc<>LOC_MMXREGISTER then
  559. begin
  560. if (nodetype=subn) and (nf_swapped in flags) then
  561. begin
  562. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  563. if right.location.loc=LOC_CMMXREGISTER then
  564. begin
  565. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  566. emit_reg_reg(op,S_NO,left.location.register,hreg);
  567. end
  568. else
  569. begin
  570. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  571. internalerror(200203247);
  572. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  573. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  574. emit_reg_reg(op,S_NO,left.location.register,hreg);
  575. end;
  576. location.register:=hreg;
  577. end
  578. else
  579. begin
  580. if (right.location.loc=LOC_CMMXREGISTER) then
  581. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  582. else
  583. begin
  584. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  585. internalerror(200203246);
  586. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  587. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  588. end;
  589. location.register:=left.location.register;
  590. end;
  591. end
  592. else
  593. begin
  594. { right.location=LOC_MMXREGISTER }
  595. if (nodetype=subn) and (nf_swapped in flags) then
  596. begin
  597. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  598. location_swap(left.location,right.location);
  599. toggleflag(nf_swapped);
  600. end
  601. else
  602. begin
  603. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  604. end;
  605. location.register:=left.location.register;
  606. end;
  607. location_freetemp(current_asmdata.CurrAsmList,right.location);
  608. if cmpop then
  609. location_freetemp(current_asmdata.CurrAsmList,left.location);
  610. end;
  611. {$endif SUPPORT_MMX}
  612. {*****************************************************************************
  613. addmmxset
  614. *****************************************************************************}
  615. {$ifdef SUPPORT_MMX}
  616. procedure tx86addnode.second_opmmxset;
  617. var opsize : TCGSize;
  618. op : TAsmOp;
  619. cmpop,
  620. noswap : boolean;
  621. begin
  622. pass_left_right;
  623. cmpop:=false;
  624. noswap:=false;
  625. opsize:=OS_32;
  626. case nodetype of
  627. addn:
  628. begin
  629. { are we adding set elements ? }
  630. if right.nodetype=setelementn then
  631. begin
  632. { adding elements is not commutative }
  633. { if nf_swapped in flags then
  634. swapleftright;}
  635. { bts requires both elements to be registers }
  636. { location_force_reg(current_asmdata.CurrAsmList,left.location,opsize_2_cgsize[opsize],false);
  637. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize_2_cgsize[opsize],true);
  638. op:=A_BTS;
  639. noswap:=true;}
  640. end
  641. else
  642. op:=A_POR;
  643. end;
  644. symdifn :
  645. op:=A_PXOR;
  646. muln:
  647. op:=A_PAND;
  648. subn:
  649. op:=A_PANDN;
  650. equaln,
  651. unequaln :
  652. begin
  653. op:=A_PCMPEQD;
  654. cmpop:=true;
  655. end;
  656. lten,gten:
  657. begin
  658. if (not(nf_swapped in flags) and (nodetype = lten)) or
  659. ((nf_swapped in flags) and (nodetype = gten)) then
  660. swapleftright;
  661. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,true);
  662. emit_op_right_left(A_AND,opsize);
  663. op:=A_PCMPEQD;
  664. cmpop:=true;
  665. { warning: ugly hack, we need a JE so change the node to equaln }
  666. nodetype:=equaln;
  667. end;
  668. xorn :
  669. op:=A_PXOR;
  670. orn :
  671. op:=A_POR;
  672. andn :
  673. op:=A_PAND;
  674. else
  675. internalerror(2003042215);
  676. end;
  677. { left must be a register }
  678. left_must_be_reg(opsize,noswap);
  679. { emit_generic_code(op,opsize,true,extra_not,false);}
  680. location_freetemp(current_asmdata.CurrAsmList,right.location);
  681. if cmpop then
  682. location_freetemp(current_asmdata.CurrAsmList,left.location);
  683. end;
  684. {$endif SUPPORT_MMX}
  685. {*****************************************************************************
  686. AddFloat
  687. *****************************************************************************}
  688. procedure tx86addnode.second_addfloatsse;
  689. var
  690. op : topcg;
  691. begin
  692. pass_left_right;
  693. check_left_and_right_fpureg(false);
  694. if (nf_swapped in flags) then
  695. { can't use swapleftright if both are on the fpu stack, since then }
  696. { both are "R_ST" -> nothing would change -> manually switch }
  697. if (left.location.loc = LOC_FPUREGISTER) and
  698. (right.location.loc = LOC_FPUREGISTER) then
  699. emit_none(A_FXCH,S_NO)
  700. else
  701. swapleftright;
  702. case nodetype of
  703. addn :
  704. op:=OP_ADD;
  705. muln :
  706. op:=OP_MUL;
  707. subn :
  708. op:=OP_SUB;
  709. slashn :
  710. op:=OP_DIV;
  711. else
  712. internalerror(200312231);
  713. end;
  714. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  715. { we can use only right as left operand if the operation is commutative }
  716. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  717. begin
  718. location.register:=right.location.register;
  719. { force floating point reg. location to be written to memory,
  720. we don't force it to mm register because writing to memory
  721. allows probably shorter code because there is no direct fpu->mm register
  722. copy instruction
  723. }
  724. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  725. location_force_mem(current_asmdata.CurrAsmList,left.location);
  726. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  727. end
  728. else
  729. begin
  730. if (nf_swapped in flags) then
  731. swapleftright;
  732. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  733. location.register:=left.location.register;
  734. { force floating point reg. location to be written to memory,
  735. we don't force it to mm register because writing to memory
  736. allows probably shorter code because there is no direct fpu->mm register
  737. copy instruction
  738. }
  739. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  740. location_force_mem(current_asmdata.CurrAsmList,right.location);
  741. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  742. end;
  743. end;
  744. procedure tx86addnode.second_cmpfloatsse;
  745. var
  746. op : tasmop;
  747. begin
  748. if is_single(left.resultdef) then
  749. op:=A_COMISS
  750. else if is_double(left.resultdef) then
  751. op:=A_COMISD
  752. else
  753. internalerror(200402222);
  754. pass_left_right;
  755. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  756. { we can use only right as left operand if the operation is commutative }
  757. if (right.location.loc=LOC_MMREGISTER) then
  758. begin
  759. { force floating point reg. location to be written to memory,
  760. we don't force it to mm register because writing to memory
  761. allows probably shorter code because there is no direct fpu->mm register
  762. copy instruction
  763. }
  764. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  765. location_force_mem(current_asmdata.CurrAsmList,left.location);
  766. case left.location.loc of
  767. LOC_REFERENCE,LOC_CREFERENCE:
  768. begin
  769. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  770. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  771. end;
  772. LOC_MMREGISTER,LOC_CMMREGISTER:
  773. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  774. else
  775. internalerror(200402221);
  776. end;
  777. if nf_swapped in flags then
  778. exclude(flags,nf_swapped)
  779. else
  780. include(flags,nf_swapped)
  781. end
  782. else
  783. begin
  784. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  785. { force floating point reg. location to be written to memory,
  786. we don't force it to mm register because writing to memory
  787. allows probably shorter code because there is no direct fpu->mm register
  788. copy instruction
  789. }
  790. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  791. location_force_mem(current_asmdata.CurrAsmList,right.location);
  792. case right.location.loc of
  793. LOC_REFERENCE,LOC_CREFERENCE:
  794. begin
  795. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  796. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  797. end;
  798. LOC_MMREGISTER,LOC_CMMREGISTER:
  799. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  800. else
  801. internalerror(200402223);
  802. end;
  803. end;
  804. location.resflags:=getresflags(true);
  805. end;
  806. procedure tx86addnode.second_opvector;
  807. var
  808. op : topcg;
  809. begin
  810. pass_left_right;
  811. if (nf_swapped in flags) then
  812. swapleftright;
  813. case nodetype of
  814. addn :
  815. op:=OP_ADD;
  816. muln :
  817. op:=OP_MUL;
  818. subn :
  819. op:=OP_SUB;
  820. slashn :
  821. op:=OP_DIV;
  822. else
  823. internalerror(200610071);
  824. end;
  825. if fits_in_mm_register(left.resultdef) then
  826. begin
  827. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  828. { we can use only right as left operand if the operation is commutative }
  829. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  830. begin
  831. location.register:=right.location.register;
  832. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  833. end
  834. else
  835. begin
  836. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  837. location.register:=left.location.register;
  838. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  839. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  840. end;
  841. end
  842. else
  843. begin
  844. { not yet supported }
  845. internalerror(200610072);
  846. end
  847. end;
  848. procedure tx86addnode.second_addfloat;
  849. var
  850. op : TAsmOp;
  851. begin
  852. if use_sse(resultdef) then
  853. begin
  854. second_addfloatsse;
  855. exit;
  856. end;
  857. pass_left_right;
  858. case nodetype of
  859. addn :
  860. op:=A_FADDP;
  861. muln :
  862. op:=A_FMULP;
  863. subn :
  864. op:=A_FSUBP;
  865. slashn :
  866. op:=A_FDIVP;
  867. else
  868. internalerror(2003042214);
  869. end;
  870. check_left_and_right_fpureg(true);
  871. { if we swaped the tree nodes, then use the reverse operator }
  872. if nf_swapped in flags then
  873. begin
  874. if (nodetype=slashn) then
  875. op:=A_FDIVRP
  876. else if (nodetype=subn) then
  877. op:=A_FSUBRP;
  878. end;
  879. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  880. tcgx86(cg).dec_fpu_stack;
  881. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  882. location.register:=NR_ST;
  883. end;
  884. procedure tx86addnode.second_cmpfloat;
  885. var
  886. resflags : tresflags;
  887. begin
  888. if use_sse(left.resultdef) or use_sse(right.resultdef) then
  889. begin
  890. second_cmpfloatsse;
  891. exit;
  892. end;
  893. pass_left_right;
  894. check_left_and_right_fpureg(true);
  895. {$ifndef x86_64}
  896. if current_settings.cputype<cpu_Pentium2 then
  897. begin
  898. emit_none(A_FCOMPP,S_NO);
  899. tcgx86(cg).dec_fpu_stack;
  900. tcgx86(cg).dec_fpu_stack;
  901. { load fpu flags }
  902. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  903. emit_reg(A_FNSTSW,S_NO,NR_AX);
  904. emit_none(A_SAHF,S_NO);
  905. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  906. if nf_swapped in flags then
  907. begin
  908. case nodetype of
  909. equaln : resflags:=F_E;
  910. unequaln : resflags:=F_NE;
  911. ltn : resflags:=F_A;
  912. lten : resflags:=F_AE;
  913. gtn : resflags:=F_B;
  914. gten : resflags:=F_BE;
  915. end;
  916. end
  917. else
  918. begin
  919. case nodetype of
  920. equaln : resflags:=F_E;
  921. unequaln : resflags:=F_NE;
  922. ltn : resflags:=F_B;
  923. lten : resflags:=F_BE;
  924. gtn : resflags:=F_A;
  925. gten : resflags:=F_AE;
  926. end;
  927. end;
  928. end
  929. else
  930. {$endif x86_64}
  931. begin
  932. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  933. { fcomip pops only one fpu register }
  934. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  935. tcgx86(cg).dec_fpu_stack;
  936. tcgx86(cg).dec_fpu_stack;
  937. { load fpu flags }
  938. if nf_swapped in flags then
  939. begin
  940. case nodetype of
  941. equaln : resflags:=F_E;
  942. unequaln : resflags:=F_NE;
  943. ltn : resflags:=F_A;
  944. lten : resflags:=F_AE;
  945. gtn : resflags:=F_B;
  946. gten : resflags:=F_BE;
  947. end;
  948. end
  949. else
  950. begin
  951. case nodetype of
  952. equaln : resflags:=F_E;
  953. unequaln : resflags:=F_NE;
  954. ltn : resflags:=F_B;
  955. lten : resflags:=F_BE;
  956. gtn : resflags:=F_A;
  957. gten : resflags:=F_AE;
  958. end;
  959. end;
  960. end;
  961. location_reset(location,LOC_FLAGS,OS_NO);
  962. location.resflags:=resflags;
  963. end;
  964. {*****************************************************************************
  965. Add64bit
  966. *****************************************************************************}
  967. procedure tx86addnode.second_add64bit;
  968. begin
  969. {$ifdef cpu64bitalu}
  970. second_addordinal;
  971. {$else cpu64bitalu}
  972. { must be implemented separate }
  973. internalerror(200402042);
  974. {$endif cpu64bitalu}
  975. end;
  976. procedure tx86addnode.second_cmp64bit;
  977. begin
  978. {$ifdef cpu64bitalu}
  979. second_cmpordinal;
  980. {$else cpu64bitalu}
  981. { must be implemented separate }
  982. internalerror(200402043);
  983. {$endif cpu64bitalu}
  984. end;
  985. {*****************************************************************************
  986. AddOrdinal
  987. *****************************************************************************}
  988. procedure tx86addnode.second_addordinal;
  989. begin
  990. { filter unsigned MUL opcode, which requires special handling }
  991. if (nodetype=muln) and
  992. (not(is_signed(left.resultdef)) or
  993. not(is_signed(right.resultdef))) then
  994. begin
  995. second_mul;
  996. exit;
  997. end;
  998. inherited second_addordinal;
  999. end;
  1000. procedure tx86addnode.second_cmpordinal;
  1001. var
  1002. opsize : tcgsize;
  1003. unsigned : boolean;
  1004. begin
  1005. unsigned:=not(is_signed(left.resultdef)) or
  1006. not(is_signed(right.resultdef));
  1007. opsize:=def_cgsize(left.resultdef);
  1008. pass_left_right;
  1009. left_must_be_reg(opsize,false);
  1010. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1011. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1012. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1013. location_reset(location,LOC_FLAGS,OS_NO);
  1014. location.resflags:=getresflags(unsigned);
  1015. end;
  1016. begin
  1017. caddnode:=tx86addnode;
  1018. end.