2
0

nx86add.pas 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. cgbase,
  22. cpubase,
  23. node,nadd,ncgadd;
  24. type
  25. tx86addnode = class(tcgaddnode)
  26. protected
  27. function getresflags(unsigned : boolean) : tresflags;
  28. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  29. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  30. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  31. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  32. procedure second_cmpfloatsse;
  33. procedure second_addfloatsse;
  34. public
  35. procedure second_addfloat;override;
  36. procedure second_addsmallset;override;
  37. procedure second_add64bit;override;
  38. procedure second_cmpfloat;override;
  39. procedure second_cmpsmallset;override;
  40. procedure second_cmp64bit;override;
  41. procedure second_cmpordinal;override;
  42. {$ifdef SUPPORT_MMX}
  43. procedure second_opmmx;override;
  44. {$endif SUPPORT_MMX}
  45. procedure second_opvector;override;
  46. end;
  47. implementation
  48. uses
  49. globtype,globals,
  50. verbose,cutils,
  51. cpuinfo,
  52. aasmbase,aasmtai,aasmdata,aasmcpu,
  53. symconst,symdef,
  54. cgobj,cgx86,cga,cgutils,
  55. paramgr,tgobj,ncgutil,
  56. ncon,nset,ninl,
  57. defutil;
  58. {*****************************************************************************
  59. Helpers
  60. *****************************************************************************}
  61. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  62. var
  63. power : longint;
  64. hl4 : tasmlabel;
  65. r : Tregister;
  66. begin
  67. { at this point, left.location.loc should be LOC_REGISTER }
  68. if right.location.loc=LOC_REGISTER then
  69. begin
  70. { right.location is a LOC_REGISTER }
  71. { when swapped another result register }
  72. if (nodetype=subn) and (nf_swapped in flags) then
  73. begin
  74. if extra_not then
  75. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  76. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  77. { newly swapped also set swapped flag }
  78. location_swap(left.location,right.location);
  79. toggleflag(nf_swapped);
  80. end
  81. else
  82. begin
  83. if extra_not then
  84. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  85. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  86. location_swap(left.location,right.location);
  87. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  88. end;
  89. end
  90. else
  91. begin
  92. { right.location is not a LOC_REGISTER }
  93. if (nodetype=subn) and (nf_swapped in flags) then
  94. begin
  95. if extra_not then
  96. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  97. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  98. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  99. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  100. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  101. end
  102. else
  103. begin
  104. { Optimizations when right.location is a constant value }
  105. if (op=A_CMP) and
  106. (nodetype in [equaln,unequaln]) and
  107. (right.location.loc=LOC_CONSTANT) and
  108. (right.location.value=0) then
  109. begin
  110. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  111. end
  112. else
  113. if (op=A_ADD) and
  114. (right.location.loc=LOC_CONSTANT) and
  115. (right.location.value=1) and
  116. not(cs_check_overflow in current_settings.localswitches) then
  117. begin
  118. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  119. end
  120. else
  121. if (op=A_SUB) and
  122. (right.location.loc=LOC_CONSTANT) and
  123. (right.location.value=1) and
  124. not(cs_check_overflow in current_settings.localswitches) then
  125. begin
  126. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  127. end
  128. else
  129. if (op=A_IMUL) and
  130. (right.location.loc=LOC_CONSTANT) and
  131. (ispowerof2(int64(right.location.value),power)) and
  132. not(cs_check_overflow in current_settings.localswitches) then
  133. begin
  134. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  135. end
  136. else
  137. begin
  138. if extra_not then
  139. begin
  140. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  141. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
  142. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  143. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  144. end
  145. else
  146. begin
  147. emit_op_right_left(op,opsize);
  148. end;
  149. end;
  150. end;
  151. end;
  152. { only in case of overflow operations }
  153. { produce overflow code }
  154. { we must put it here directly, because sign of operation }
  155. { is in unsigned VAR!! }
  156. if mboverflow then
  157. begin
  158. if cs_check_overflow in current_settings.localswitches then
  159. begin
  160. current_asmdata.getjumplabel(hl4);
  161. if unsigned then
  162. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  163. else
  164. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  165. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  166. cg.a_label(current_asmdata.CurrAsmList,hl4);
  167. end;
  168. end;
  169. end;
  170. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  171. begin
  172. { left location is not a register? }
  173. if (left.location.loc<>LOC_REGISTER) then
  174. begin
  175. { if right is register then we can swap the locations }
  176. if (not noswap) and
  177. (right.location.loc=LOC_REGISTER) then
  178. begin
  179. location_swap(left.location,right.location);
  180. toggleflag(nf_swapped);
  181. end
  182. else
  183. begin
  184. { maybe we can reuse a constant register when the
  185. operation is a comparison that doesn't change the
  186. value of the register }
  187. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  188. end;
  189. end;
  190. if (right.location.loc<>LOC_CONSTANT) and
  191. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  192. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  193. if (left.location.loc<>LOC_CONSTANT) and
  194. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  195. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  196. end;
  197. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  198. begin
  199. if (right.location.loc<>LOC_FPUREGISTER) then
  200. begin
  201. if (force_fpureg) then
  202. begin
  203. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  204. if (left.location.loc<>LOC_FPUREGISTER) then
  205. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  206. else
  207. { left was on the stack => swap }
  208. toggleflag(nf_swapped);
  209. end
  210. end
  211. { the nominator in st0 }
  212. else if (left.location.loc<>LOC_FPUREGISTER) then
  213. begin
  214. if (force_fpureg) then
  215. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  216. end
  217. else
  218. begin
  219. { fpu operands are always in the wrong order on the stack }
  220. toggleflag(nf_swapped);
  221. end;
  222. end;
  223. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  224. {$ifdef x86_64}
  225. var
  226. tmpreg : tregister;
  227. {$endif x86_64}
  228. begin
  229. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  230. location_force_reg(current_asmdata.CurrAsmList,right.location,def_cgsize(right.resultdef),true);
  231. { left must be a register }
  232. case right.location.loc of
  233. LOC_REGISTER,
  234. LOC_CREGISTER :
  235. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  236. LOC_REFERENCE,
  237. LOC_CREFERENCE :
  238. begin
  239. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  240. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  241. end;
  242. LOC_CONSTANT :
  243. begin
  244. {$ifdef x86_64}
  245. { x86_64 only supports signed 32 bits constants directly }
  246. if (opsize in [OS_S64,OS_64]) and
  247. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  248. begin
  249. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  250. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  251. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  252. end
  253. else
  254. {$endif x86_64}
  255. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  256. end;
  257. else
  258. internalerror(200203232);
  259. end;
  260. end;
  261. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  262. begin
  263. case nodetype of
  264. equaln : getresflags:=F_E;
  265. unequaln : getresflags:=F_NE;
  266. else
  267. if not(unsigned) then
  268. begin
  269. if nf_swapped in flags then
  270. case nodetype of
  271. ltn : getresflags:=F_G;
  272. lten : getresflags:=F_GE;
  273. gtn : getresflags:=F_L;
  274. gten : getresflags:=F_LE;
  275. end
  276. else
  277. case nodetype of
  278. ltn : getresflags:=F_L;
  279. lten : getresflags:=F_LE;
  280. gtn : getresflags:=F_G;
  281. gten : getresflags:=F_GE;
  282. end;
  283. end
  284. else
  285. begin
  286. if nf_swapped in flags then
  287. case nodetype of
  288. ltn : getresflags:=F_A;
  289. lten : getresflags:=F_AE;
  290. gtn : getresflags:=F_B;
  291. gten : getresflags:=F_BE;
  292. end
  293. else
  294. case nodetype of
  295. ltn : getresflags:=F_B;
  296. lten : getresflags:=F_BE;
  297. gtn : getresflags:=F_A;
  298. gten : getresflags:=F_AE;
  299. end;
  300. end;
  301. end;
  302. end;
  303. {*****************************************************************************
  304. AddSmallSet
  305. *****************************************************************************}
  306. procedure tx86addnode.second_addsmallset;
  307. var
  308. setbase : aint;
  309. opsize : TCGSize;
  310. op : TAsmOp;
  311. extra_not,
  312. noswap : boolean;
  313. all_member_optimization:boolean;
  314. begin
  315. pass_left_right;
  316. noswap:=false;
  317. extra_not:=false;
  318. all_member_optimization:=false;
  319. opsize:=int_cgsize(resultdef.size);
  320. if (left.resultdef.typ=setdef) then
  321. setbase:=tsetdef(left.resultdef).setbase
  322. else
  323. setbase:=tsetdef(right.resultdef).setbase;
  324. case nodetype of
  325. addn :
  326. begin
  327. { adding elements is not commutative }
  328. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  329. swapleftright;
  330. { are we adding set elements ? }
  331. if right.nodetype=setelementn then
  332. begin
  333. { no range support for smallsets! }
  334. if assigned(tsetelementnode(right).right) then
  335. internalerror(43244);
  336. { btsb isn't supported }
  337. if opsize=OS_8 then
  338. opsize:=OS_32;
  339. { bts requires both elements to be registers }
  340. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  341. location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
  342. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  343. op:=A_BTS;
  344. noswap:=true;
  345. end
  346. else
  347. op:=A_OR;
  348. end;
  349. symdifn :
  350. op:=A_XOR;
  351. muln :
  352. op:=A_AND;
  353. subn :
  354. begin
  355. op:=A_AND;
  356. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  357. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  358. all_member_optimization:=true;
  359. if (not(nf_swapped in flags)) and
  360. (right.location.loc=LOC_CONSTANT) then
  361. right.location.value := not(right.location.value)
  362. else if (nf_swapped in flags) and
  363. (left.location.loc=LOC_CONSTANT) then
  364. left.location.value := not(left.location.value)
  365. else
  366. extra_not:=true;
  367. end;
  368. xorn :
  369. op:=A_XOR;
  370. orn :
  371. op:=A_OR;
  372. andn :
  373. op:=A_AND;
  374. else
  375. internalerror(2003042215);
  376. end;
  377. if all_member_optimization then
  378. begin
  379. {A set expression [0..31]-x can be implemented with a simple NOT.}
  380. if nf_swapped in flags then
  381. begin
  382. { newly swapped also set swapped flag }
  383. location_swap(left.location,right.location);
  384. toggleflag(nf_swapped);
  385. end;
  386. location_force_reg(current_asmdata.currAsmList,right.location,opsize,false);
  387. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  388. location:=right.location;
  389. end
  390. else
  391. begin
  392. { left must be a register }
  393. left_must_be_reg(opsize,noswap);
  394. emit_generic_code(op,opsize,true,extra_not,false);
  395. location_freetemp(current_asmdata.CurrAsmList,right.location);
  396. { left is always a register and contains the result }
  397. location:=left.location;
  398. end;
  399. { fix the changed opsize we did above because of the missing btsb }
  400. if opsize<>int_cgsize(resultdef.size) then
  401. location_force_reg(current_asmdata.CurrAsmList,location,int_cgsize(resultdef.size),false);
  402. end;
  403. procedure tx86addnode.second_cmpsmallset;
  404. var
  405. opsize : TCGSize;
  406. op : TAsmOp;
  407. begin
  408. pass_left_right;
  409. opsize:=int_cgsize(left.resultdef.size);
  410. case nodetype of
  411. equaln,
  412. unequaln :
  413. op:=A_CMP;
  414. lten,gten:
  415. begin
  416. if (not(nf_swapped in flags) and (nodetype = lten)) or
  417. ((nf_swapped in flags) and (nodetype = gten)) then
  418. swapleftright;
  419. location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
  420. emit_op_right_left(A_AND,opsize);
  421. op:=A_CMP;
  422. { warning: ugly hack, we need a JE so change the node to equaln }
  423. nodetype:=equaln;
  424. end;
  425. else
  426. internalerror(2003042215);
  427. end;
  428. { left must be a register }
  429. left_must_be_reg(opsize,false);
  430. emit_generic_code(op,opsize,true,false,false);
  431. location_freetemp(current_asmdata.CurrAsmList,right.location);
  432. location_freetemp(current_asmdata.CurrAsmList,left.location);
  433. location_reset(location,LOC_FLAGS,OS_NO);
  434. location.resflags:=getresflags(true);
  435. end;
  436. {*****************************************************************************
  437. AddMMX
  438. *****************************************************************************}
  439. {$ifdef SUPPORT_MMX}
  440. procedure tx86addnode.second_opmmx;
  441. var
  442. op : TAsmOp;
  443. cmpop : boolean;
  444. mmxbase : tmmxtype;
  445. hreg,
  446. hregister : tregister;
  447. begin
  448. pass_left_right;
  449. cmpop:=false;
  450. mmxbase:=mmx_type(left.resultdef);
  451. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  452. case nodetype of
  453. addn :
  454. begin
  455. if (cs_mmx_saturation in current_settings.localswitches) then
  456. begin
  457. case mmxbase of
  458. mmxs8bit:
  459. op:=A_PADDSB;
  460. mmxu8bit:
  461. op:=A_PADDUSB;
  462. mmxs16bit,mmxfixed16:
  463. op:=A_PADDSW;
  464. mmxu16bit:
  465. op:=A_PADDUSW;
  466. end;
  467. end
  468. else
  469. begin
  470. case mmxbase of
  471. mmxs8bit,mmxu8bit:
  472. op:=A_PADDB;
  473. mmxs16bit,mmxu16bit,mmxfixed16:
  474. op:=A_PADDW;
  475. mmxs32bit,mmxu32bit:
  476. op:=A_PADDD;
  477. end;
  478. end;
  479. end;
  480. muln :
  481. begin
  482. case mmxbase of
  483. mmxs16bit,mmxu16bit:
  484. op:=A_PMULLW;
  485. mmxfixed16:
  486. op:=A_PMULHW;
  487. end;
  488. end;
  489. subn :
  490. begin
  491. if (cs_mmx_saturation in current_settings.localswitches) then
  492. begin
  493. case mmxbase of
  494. mmxs8bit:
  495. op:=A_PSUBSB;
  496. mmxu8bit:
  497. op:=A_PSUBUSB;
  498. mmxs16bit,mmxfixed16:
  499. op:=A_PSUBSB;
  500. mmxu16bit:
  501. op:=A_PSUBUSW;
  502. end;
  503. end
  504. else
  505. begin
  506. case mmxbase of
  507. mmxs8bit,mmxu8bit:
  508. op:=A_PSUBB;
  509. mmxs16bit,mmxu16bit,mmxfixed16:
  510. op:=A_PSUBW;
  511. mmxs32bit,mmxu32bit:
  512. op:=A_PSUBD;
  513. end;
  514. end;
  515. end;
  516. xorn:
  517. op:=A_PXOR;
  518. orn:
  519. op:=A_POR;
  520. andn:
  521. op:=A_PAND;
  522. else
  523. internalerror(2003042214);
  524. end;
  525. { left and right no register? }
  526. { then one must be demanded }
  527. if (left.location.loc<>LOC_MMXREGISTER) then
  528. begin
  529. if (right.location.loc=LOC_MMXREGISTER) then
  530. begin
  531. location_swap(left.location,right.location);
  532. toggleflag(nf_swapped);
  533. end
  534. else
  535. begin
  536. { register variable ? }
  537. if (left.location.loc=LOC_CMMXREGISTER) then
  538. begin
  539. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  540. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  541. end
  542. else
  543. begin
  544. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  545. internalerror(200203245);
  546. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  547. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  548. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  549. end;
  550. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  551. left.location.register:=hregister;
  552. end;
  553. end;
  554. { at this point, left.location.loc should be LOC_MMXREGISTER }
  555. if right.location.loc<>LOC_MMXREGISTER then
  556. begin
  557. if (nodetype=subn) and (nf_swapped in flags) then
  558. begin
  559. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  560. if right.location.loc=LOC_CMMXREGISTER then
  561. begin
  562. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  563. emit_reg_reg(op,S_NO,left.location.register,hreg);
  564. end
  565. else
  566. begin
  567. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  568. internalerror(200203247);
  569. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  570. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  571. emit_reg_reg(op,S_NO,left.location.register,hreg);
  572. end;
  573. location.register:=hreg;
  574. end
  575. else
  576. begin
  577. if (right.location.loc=LOC_CMMXREGISTER) then
  578. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  579. else
  580. begin
  581. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  582. internalerror(200203246);
  583. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  584. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  585. end;
  586. location.register:=left.location.register;
  587. end;
  588. end
  589. else
  590. begin
  591. { right.location=LOC_MMXREGISTER }
  592. if (nodetype=subn) and (nf_swapped in flags) then
  593. begin
  594. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  595. location_swap(left.location,right.location);
  596. toggleflag(nf_swapped);
  597. end
  598. else
  599. begin
  600. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  601. end;
  602. location.register:=left.location.register;
  603. end;
  604. location_freetemp(current_asmdata.CurrAsmList,right.location);
  605. if cmpop then
  606. location_freetemp(current_asmdata.CurrAsmList,left.location);
  607. end;
  608. {$endif SUPPORT_MMX}
  609. {*****************************************************************************
  610. AddFloat
  611. *****************************************************************************}
  612. procedure tx86addnode.second_addfloatsse;
  613. var
  614. op : topcg;
  615. sqr_sum : boolean;
  616. tmp : tnode;
  617. begin
  618. sqr_sum:=false;
  619. if (current_settings.fputype>=fpu_sse3) and
  620. use_vectorfpu(resultdef) and
  621. (nodetype in [addn,subn]) and
  622. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  623. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  624. begin
  625. sqr_sum:=true;
  626. tmp:=tinlinenode(left).left;
  627. tinlinenode(left).left:=nil;
  628. left.free;
  629. left:=tmp;
  630. tmp:=tinlinenode(right).left;
  631. tinlinenode(right).left:=nil;
  632. right.free;
  633. right:=tmp;
  634. end;
  635. pass_left_right;
  636. check_left_and_right_fpureg(false);
  637. if (nf_swapped in flags) then
  638. { can't use swapleftright if both are on the fpu stack, since then }
  639. { both are "R_ST" -> nothing would change -> manually switch }
  640. if (left.location.loc = LOC_FPUREGISTER) and
  641. (right.location.loc = LOC_FPUREGISTER) then
  642. emit_none(A_FXCH,S_NO)
  643. else
  644. swapleftright;
  645. case nodetype of
  646. addn :
  647. op:=OP_ADD;
  648. muln :
  649. op:=OP_MUL;
  650. subn :
  651. op:=OP_SUB;
  652. slashn :
  653. op:=OP_DIV;
  654. else
  655. internalerror(200312231);
  656. end;
  657. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  658. if sqr_sum then
  659. begin
  660. if nf_swapped in flags then
  661. swapleftright;
  662. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  663. location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
  664. location:=left.location;
  665. if is_double(resultdef) then
  666. begin
  667. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  668. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  669. case nodetype of
  670. addn:
  671. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  672. subn:
  673. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  674. else
  675. internalerror(201108162);
  676. end;
  677. end
  678. else
  679. begin
  680. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  681. { ensure that bits 64..127 contain valid values }
  682. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  683. { the data is now in bits 0..32 and 64..95 }
  684. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  685. case nodetype of
  686. addn:
  687. begin
  688. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  689. end;
  690. subn:
  691. begin
  692. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  693. end;
  694. else
  695. internalerror(201108163);
  696. end;
  697. end
  698. end
  699. { we can use only right as left operand if the operation is commutative }
  700. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  701. begin
  702. location.register:=right.location.register;
  703. { force floating point reg. location to be written to memory,
  704. we don't force it to mm register because writing to memory
  705. allows probably shorter code because there is no direct fpu->mm register
  706. copy instruction
  707. }
  708. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  709. location_force_mem(current_asmdata.CurrAsmList,left.location);
  710. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  711. end
  712. else
  713. begin
  714. if (nf_swapped in flags) then
  715. swapleftright;
  716. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  717. location.register:=left.location.register;
  718. { force floating point reg. location to be written to memory,
  719. we don't force it to mm register because writing to memory
  720. allows probably shorter code because there is no direct fpu->mm register
  721. copy instruction
  722. }
  723. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  724. location_force_mem(current_asmdata.CurrAsmList,right.location);
  725. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  726. end;
  727. end;
  728. procedure tx86addnode.second_cmpfloatsse;
  729. var
  730. op : tasmop;
  731. begin
  732. if is_single(left.resultdef) then
  733. op:=A_COMISS
  734. else if is_double(left.resultdef) then
  735. op:=A_COMISD
  736. else
  737. internalerror(200402222);
  738. pass_left_right;
  739. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  740. { we can use only right as left operand if the operation is commutative }
  741. if (right.location.loc=LOC_MMREGISTER) then
  742. begin
  743. { force floating point reg. location to be written to memory,
  744. we don't force it to mm register because writing to memory
  745. allows probably shorter code because there is no direct fpu->mm register
  746. copy instruction
  747. }
  748. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  749. location_force_mem(current_asmdata.CurrAsmList,left.location);
  750. case left.location.loc of
  751. LOC_REFERENCE,LOC_CREFERENCE:
  752. begin
  753. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  754. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  755. end;
  756. LOC_MMREGISTER,LOC_CMMREGISTER:
  757. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  758. else
  759. internalerror(200402221);
  760. end;
  761. if nf_swapped in flags then
  762. exclude(flags,nf_swapped)
  763. else
  764. include(flags,nf_swapped)
  765. end
  766. else
  767. begin
  768. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  769. { force floating point reg. location to be written to memory,
  770. we don't force it to mm register because writing to memory
  771. allows probably shorter code because there is no direct fpu->mm register
  772. copy instruction
  773. }
  774. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  775. location_force_mem(current_asmdata.CurrAsmList,right.location);
  776. case right.location.loc of
  777. LOC_REFERENCE,LOC_CREFERENCE:
  778. begin
  779. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  780. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  781. end;
  782. LOC_MMREGISTER,LOC_CMMREGISTER:
  783. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  784. else
  785. internalerror(200402223);
  786. end;
  787. end;
  788. location.resflags:=getresflags(true);
  789. end;
  790. procedure tx86addnode.second_opvector;
  791. var
  792. op : topcg;
  793. begin
  794. pass_left_right;
  795. if (nf_swapped in flags) then
  796. swapleftright;
  797. case nodetype of
  798. addn :
  799. op:=OP_ADD;
  800. muln :
  801. op:=OP_MUL;
  802. subn :
  803. op:=OP_SUB;
  804. slashn :
  805. op:=OP_DIV;
  806. else
  807. internalerror(200610071);
  808. end;
  809. if fits_in_mm_register(left.resultdef) then
  810. begin
  811. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  812. { we can use only right as left operand if the operation is commutative }
  813. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  814. begin
  815. location.register:=right.location.register;
  816. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  817. end
  818. else
  819. begin
  820. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  821. location.register:=left.location.register;
  822. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  823. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  824. end;
  825. end
  826. else
  827. begin
  828. { not yet supported }
  829. internalerror(200610072);
  830. end
  831. end;
  832. procedure tx86addnode.second_addfloat;
  833. var
  834. op : TAsmOp;
  835. begin
  836. if use_vectorfpu(resultdef) then
  837. begin
  838. second_addfloatsse;
  839. exit;
  840. end;
  841. pass_left_right;
  842. case nodetype of
  843. addn :
  844. op:=A_FADDP;
  845. muln :
  846. op:=A_FMULP;
  847. subn :
  848. op:=A_FSUBP;
  849. slashn :
  850. op:=A_FDIVP;
  851. else
  852. internalerror(2003042214);
  853. end;
  854. check_left_and_right_fpureg(true);
  855. { if we swaped the tree nodes, then use the reverse operator }
  856. if nf_swapped in flags then
  857. begin
  858. if (nodetype=slashn) then
  859. op:=A_FDIVRP
  860. else if (nodetype=subn) then
  861. op:=A_FSUBRP;
  862. end;
  863. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  864. tcgx86(cg).dec_fpu_stack;
  865. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  866. location.register:=NR_ST;
  867. end;
  868. procedure tx86addnode.second_cmpfloat;
  869. var
  870. resflags : tresflags;
  871. begin
  872. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  873. begin
  874. second_cmpfloatsse;
  875. exit;
  876. end;
  877. pass_left_right;
  878. check_left_and_right_fpureg(true);
  879. {$ifndef x86_64}
  880. if current_settings.cputype<cpu_Pentium2 then
  881. begin
  882. emit_none(A_FCOMPP,S_NO);
  883. tcgx86(cg).dec_fpu_stack;
  884. tcgx86(cg).dec_fpu_stack;
  885. { load fpu flags }
  886. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  887. emit_reg(A_FSTSW,S_NO,NR_AX);
  888. emit_none(A_SAHF,S_NO);
  889. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  890. if nf_swapped in flags then
  891. begin
  892. case nodetype of
  893. equaln : resflags:=F_E;
  894. unequaln : resflags:=F_NE;
  895. ltn : resflags:=F_A;
  896. lten : resflags:=F_AE;
  897. gtn : resflags:=F_B;
  898. gten : resflags:=F_BE;
  899. end;
  900. end
  901. else
  902. begin
  903. case nodetype of
  904. equaln : resflags:=F_E;
  905. unequaln : resflags:=F_NE;
  906. ltn : resflags:=F_B;
  907. lten : resflags:=F_BE;
  908. gtn : resflags:=F_A;
  909. gten : resflags:=F_AE;
  910. end;
  911. end;
  912. end
  913. else
  914. {$endif x86_64}
  915. begin
  916. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  917. { fcomip pops only one fpu register }
  918. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  919. tcgx86(cg).dec_fpu_stack;
  920. tcgx86(cg).dec_fpu_stack;
  921. { load fpu flags }
  922. if nf_swapped in flags then
  923. begin
  924. case nodetype of
  925. equaln : resflags:=F_E;
  926. unequaln : resflags:=F_NE;
  927. ltn : resflags:=F_A;
  928. lten : resflags:=F_AE;
  929. gtn : resflags:=F_B;
  930. gten : resflags:=F_BE;
  931. end;
  932. end
  933. else
  934. begin
  935. case nodetype of
  936. equaln : resflags:=F_E;
  937. unequaln : resflags:=F_NE;
  938. ltn : resflags:=F_B;
  939. lten : resflags:=F_BE;
  940. gtn : resflags:=F_A;
  941. gten : resflags:=F_AE;
  942. end;
  943. end;
  944. end;
  945. location_reset(location,LOC_FLAGS,OS_NO);
  946. location.resflags:=resflags;
  947. end;
  948. {*****************************************************************************
  949. Add64bit
  950. *****************************************************************************}
  951. procedure tx86addnode.second_add64bit;
  952. begin
  953. {$ifdef cpu64bitalu}
  954. second_addordinal;
  955. {$else cpu64bitalu}
  956. { must be implemented separate }
  957. internalerror(200402042);
  958. {$endif cpu64bitalu}
  959. end;
  960. procedure tx86addnode.second_cmp64bit;
  961. begin
  962. {$ifdef cpu64bitalu}
  963. second_cmpordinal;
  964. {$else cpu64bitalu}
  965. { must be implemented separate }
  966. internalerror(200402043);
  967. {$endif cpu64bitalu}
  968. end;
  969. {*****************************************************************************
  970. AddOrdinal
  971. *****************************************************************************}
  972. procedure tx86addnode.second_cmpordinal;
  973. var
  974. opsize : tcgsize;
  975. unsigned : boolean;
  976. begin
  977. unsigned:=not(is_signed(left.resultdef)) or
  978. not(is_signed(right.resultdef));
  979. opsize:=def_cgsize(left.resultdef);
  980. pass_left_right;
  981. left_must_be_reg(opsize,false);
  982. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  983. location_freetemp(current_asmdata.CurrAsmList,right.location);
  984. location_freetemp(current_asmdata.CurrAsmList,left.location);
  985. location_reset(location,LOC_FLAGS,OS_NO);
  986. location.resflags:=getresflags(unsigned);
  987. end;
  988. begin
  989. caddnode:=tx86addnode;
  990. end.