nx86add.pas 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  30. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  31. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  32. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  33. procedure second_cmpfloatsse;
  34. procedure second_addfloatsse;
  35. public
  36. procedure second_addfloat;override;
  37. {$ifndef i8086}
  38. procedure second_addsmallset;override;
  39. {$endif not i8086}
  40. procedure second_add64bit;override;
  41. procedure second_cmpfloat;override;
  42. procedure second_cmpsmallset;override;
  43. procedure second_cmp64bit;override;
  44. procedure second_cmpordinal;override;
  45. {$ifdef SUPPORT_MMX}
  46. procedure second_opmmx;override;
  47. {$endif SUPPORT_MMX}
  48. procedure second_opvector;override;
  49. end;
  50. implementation
  51. uses
  52. globtype,globals,
  53. verbose,cutils,
  54. cpuinfo,
  55. aasmbase,aasmtai,aasmdata,aasmcpu,
  56. symconst,symdef,
  57. cgobj,hlcgobj,cgx86,cga,cgutils,
  58. paramgr,tgobj,ncgutil,
  59. ncon,nset,ninl,
  60. defutil;
  61. {*****************************************************************************
  62. Helpers
  63. *****************************************************************************}
  64. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  65. var
  66. power : longint;
  67. hl4 : tasmlabel;
  68. r : Tregister;
  69. begin
  70. { at this point, left.location.loc should be LOC_REGISTER }
  71. if right.location.loc=LOC_REGISTER then
  72. begin
  73. { right.location is a LOC_REGISTER }
  74. { when swapped another result register }
  75. if (nodetype=subn) and (nf_swapped in flags) then
  76. begin
  77. if extra_not then
  78. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  79. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  80. { newly swapped also set swapped flag }
  81. location_swap(left.location,right.location);
  82. toggleflag(nf_swapped);
  83. end
  84. else
  85. begin
  86. if extra_not then
  87. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  88. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  89. location_swap(left.location,right.location);
  90. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  91. end;
  92. end
  93. else
  94. begin
  95. { right.location is not a LOC_REGISTER }
  96. if (nodetype=subn) and (nf_swapped in flags) then
  97. begin
  98. if extra_not then
  99. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  100. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  101. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,hlcg.tcgsize2orddef(opsize),right.location,r);
  102. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  103. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  104. end
  105. else
  106. begin
  107. { Optimizations when right.location is a constant value }
  108. if (op=A_CMP) and
  109. (nodetype in [equaln,unequaln]) and
  110. (right.location.loc=LOC_CONSTANT) and
  111. (right.location.value=0) then
  112. begin
  113. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  114. end
  115. else
  116. if (op=A_ADD) and
  117. (right.location.loc=LOC_CONSTANT) and
  118. (right.location.value=1) and
  119. not(cs_check_overflow in current_settings.localswitches) then
  120. begin
  121. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  122. end
  123. else
  124. if (op=A_SUB) and
  125. (right.location.loc=LOC_CONSTANT) and
  126. (right.location.value=1) and
  127. not(cs_check_overflow in current_settings.localswitches) then
  128. begin
  129. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  130. end
  131. else
  132. if (op=A_IMUL) and
  133. (right.location.loc=LOC_CONSTANT) and
  134. (ispowerof2(int64(right.location.value),power)) and
  135. not(cs_check_overflow in current_settings.localswitches) then
  136. begin
  137. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  138. end
  139. else
  140. begin
  141. if extra_not then
  142. begin
  143. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  144. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,hlcg.tcgsize2orddef(opsize),right.location,r);
  145. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  146. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  147. end
  148. else
  149. begin
  150. emit_op_right_left(op,opsize);
  151. end;
  152. end;
  153. end;
  154. end;
  155. { only in case of overflow operations }
  156. { produce overflow code }
  157. { we must put it here directly, because sign of operation }
  158. { is in unsigned VAR!! }
  159. if mboverflow then
  160. begin
  161. if cs_check_overflow in current_settings.localswitches then
  162. begin
  163. current_asmdata.getjumplabel(hl4);
  164. if unsigned then
  165. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  166. else
  167. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  168. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  169. cg.a_label(current_asmdata.CurrAsmList,hl4);
  170. end;
  171. end;
  172. end;
  173. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  174. begin
  175. { left location is not a register? }
  176. if (left.location.loc<>LOC_REGISTER) then
  177. begin
  178. { if right is register then we can swap the locations }
  179. if (not noswap) and
  180. (right.location.loc=LOC_REGISTER) then
  181. begin
  182. location_swap(left.location,right.location);
  183. toggleflag(nf_swapped);
  184. end
  185. else
  186. begin
  187. { maybe we can reuse a constant register when the
  188. operation is a comparison that doesn't change the
  189. value of the register }
  190. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  191. end;
  192. end;
  193. if (right.location.loc<>LOC_CONSTANT) and
  194. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  195. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  196. if (left.location.loc<>LOC_CONSTANT) and
  197. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  198. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  199. end;
  200. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  201. begin
  202. if (right.location.loc<>LOC_FPUREGISTER) then
  203. begin
  204. if (force_fpureg) then
  205. begin
  206. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  207. if (left.location.loc<>LOC_FPUREGISTER) then
  208. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  209. else
  210. { left was on the stack => swap }
  211. toggleflag(nf_swapped);
  212. end
  213. end
  214. { the nominator in st0 }
  215. else if (left.location.loc<>LOC_FPUREGISTER) then
  216. begin
  217. if (force_fpureg) then
  218. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  219. end
  220. else
  221. begin
  222. { fpu operands are always in the wrong order on the stack }
  223. toggleflag(nf_swapped);
  224. end;
  225. end;
  226. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  227. var
  228. tmpref: treference;
  229. {$ifdef x86_64}
  230. tmpreg : tregister;
  231. {$endif x86_64}
  232. begin
  233. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  234. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  235. { left must be a register }
  236. case right.location.loc of
  237. LOC_REGISTER,
  238. LOC_CREGISTER :
  239. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  240. LOC_REFERENCE,
  241. LOC_CREFERENCE :
  242. begin
  243. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  244. tmpref:=right.location.reference;
  245. {$ifdef i8086}
  246. if (tmpref.segment<>NR_NO) and (not is_segment_reg(tmpref.segment)) then
  247. begin
  248. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_PUSH,S_W,tmpref.segment));
  249. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_POP,S_W,NR_ES));
  250. tmpref.segment:=NR_ES;
  251. end;
  252. {$endif i8086}
  253. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],tmpref,left.location.register));
  254. end;
  255. LOC_CONSTANT :
  256. begin
  257. {$ifdef x86_64}
  258. { x86_64 only supports signed 32 bits constants directly }
  259. if (opsize in [OS_S64,OS_64]) and
  260. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  261. begin
  262. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  263. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  264. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  265. end
  266. else
  267. {$endif x86_64}
  268. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  269. end;
  270. else
  271. internalerror(200203232);
  272. end;
  273. end;
  274. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  275. begin
  276. case nodetype of
  277. equaln : getresflags:=F_E;
  278. unequaln : getresflags:=F_NE;
  279. else
  280. if not(unsigned) then
  281. begin
  282. if nf_swapped in flags then
  283. case nodetype of
  284. ltn : getresflags:=F_G;
  285. lten : getresflags:=F_GE;
  286. gtn : getresflags:=F_L;
  287. gten : getresflags:=F_LE;
  288. end
  289. else
  290. case nodetype of
  291. ltn : getresflags:=F_L;
  292. lten : getresflags:=F_LE;
  293. gtn : getresflags:=F_G;
  294. gten : getresflags:=F_GE;
  295. end;
  296. end
  297. else
  298. begin
  299. if nf_swapped in flags then
  300. case nodetype of
  301. ltn : getresflags:=F_A;
  302. lten : getresflags:=F_AE;
  303. gtn : getresflags:=F_B;
  304. gten : getresflags:=F_BE;
  305. end
  306. else
  307. case nodetype of
  308. ltn : getresflags:=F_B;
  309. lten : getresflags:=F_BE;
  310. gtn : getresflags:=F_A;
  311. gten : getresflags:=F_AE;
  312. end;
  313. end;
  314. end;
  315. end;
  316. {*****************************************************************************
  317. AddSmallSet
  318. *****************************************************************************}
  319. {$ifndef i8086}
  320. procedure tx86addnode.second_addsmallset;
  321. var
  322. setbase : aint;
  323. opdef : tdef;
  324. opsize : TCGSize;
  325. op : TAsmOp;
  326. extra_not,
  327. noswap : boolean;
  328. all_member_optimization:boolean;
  329. begin
  330. pass_left_right;
  331. noswap:=false;
  332. extra_not:=false;
  333. all_member_optimization:=false;
  334. opdef:=resultdef;
  335. opsize:=int_cgsize(opdef.size);
  336. if (left.resultdef.typ=setdef) then
  337. setbase:=tsetdef(left.resultdef).setbase
  338. else
  339. setbase:=tsetdef(right.resultdef).setbase;
  340. case nodetype of
  341. addn :
  342. begin
  343. { adding elements is not commutative }
  344. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  345. swapleftright;
  346. { are we adding set elements ? }
  347. if right.nodetype=setelementn then
  348. begin
  349. { no range support for smallsets! }
  350. if assigned(tsetelementnode(right).right) then
  351. internalerror(43244);
  352. { btsb isn't supported }
  353. if opsize=OS_8 then
  354. begin
  355. opsize:=OS_32;
  356. opdef:=u32inttype;
  357. end;
  358. { bts requires both elements to be registers }
  359. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  360. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  361. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  362. op:=A_BTS;
  363. noswap:=true;
  364. end
  365. else
  366. op:=A_OR;
  367. end;
  368. symdifn :
  369. op:=A_XOR;
  370. muln :
  371. op:=A_AND;
  372. subn :
  373. begin
  374. op:=A_AND;
  375. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  376. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  377. all_member_optimization:=true;
  378. if (not(nf_swapped in flags)) and
  379. (right.location.loc=LOC_CONSTANT) then
  380. right.location.value := not(right.location.value)
  381. else if (nf_swapped in flags) and
  382. (left.location.loc=LOC_CONSTANT) then
  383. left.location.value := not(left.location.value)
  384. else
  385. extra_not:=true;
  386. end;
  387. xorn :
  388. op:=A_XOR;
  389. orn :
  390. op:=A_OR;
  391. andn :
  392. op:=A_AND;
  393. else
  394. internalerror(2003042215);
  395. end;
  396. if all_member_optimization then
  397. begin
  398. {A set expression [0..31]-x can be implemented with a simple NOT.}
  399. if nf_swapped in flags then
  400. begin
  401. { newly swapped also set swapped flag }
  402. location_swap(left.location,right.location);
  403. toggleflag(nf_swapped);
  404. end;
  405. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  406. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  407. location:=right.location;
  408. end
  409. else
  410. begin
  411. { left must be a register }
  412. left_must_be_reg(opdef,opsize,noswap);
  413. emit_generic_code(op,opsize,true,extra_not,false);
  414. location_freetemp(current_asmdata.CurrAsmList,right.location);
  415. { left is always a register and contains the result }
  416. location:=left.location;
  417. end;
  418. { fix the changed opsize we did above because of the missing btsb }
  419. if opsize<>int_cgsize(resultdef.size) then
  420. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,hlcg.tcgsize2orddef(int_cgsize(resultdef.size)),false);
  421. end;
  422. {$endif not i8086}
  423. procedure tx86addnode.second_cmpsmallset;
  424. var
  425. opdef : tdef;
  426. opsize : TCGSize;
  427. op : TAsmOp;
  428. begin
  429. pass_left_right;
  430. opdef:=left.resultdef;
  431. opsize:=int_cgsize(opdef.size);
  432. case nodetype of
  433. equaln,
  434. unequaln :
  435. op:=A_CMP;
  436. lten,gten:
  437. begin
  438. if (not(nf_swapped in flags) and (nodetype = lten)) or
  439. ((nf_swapped in flags) and (nodetype = gten)) then
  440. swapleftright;
  441. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  442. emit_op_right_left(A_AND,opsize);
  443. op:=A_CMP;
  444. { warning: ugly hack, we need a JE so change the node to equaln }
  445. nodetype:=equaln;
  446. end;
  447. else
  448. internalerror(2003042215);
  449. end;
  450. { left must be a register }
  451. left_must_be_reg(opdef,opsize,false);
  452. emit_generic_code(op,opsize,true,false,false);
  453. location_freetemp(current_asmdata.CurrAsmList,right.location);
  454. location_freetemp(current_asmdata.CurrAsmList,left.location);
  455. location_reset(location,LOC_FLAGS,OS_NO);
  456. location.resflags:=getresflags(true);
  457. end;
  458. {*****************************************************************************
  459. AddMMX
  460. *****************************************************************************}
  461. {$ifdef SUPPORT_MMX}
  462. procedure tx86addnode.second_opmmx;
  463. var
  464. op : TAsmOp;
  465. cmpop : boolean;
  466. mmxbase : tmmxtype;
  467. hreg,
  468. hregister : tregister;
  469. begin
  470. pass_left_right;
  471. cmpop:=false;
  472. mmxbase:=mmx_type(left.resultdef);
  473. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  474. case nodetype of
  475. addn :
  476. begin
  477. if (cs_mmx_saturation in current_settings.localswitches) then
  478. begin
  479. case mmxbase of
  480. mmxs8bit:
  481. op:=A_PADDSB;
  482. mmxu8bit:
  483. op:=A_PADDUSB;
  484. mmxs16bit,mmxfixed16:
  485. op:=A_PADDSW;
  486. mmxu16bit:
  487. op:=A_PADDUSW;
  488. end;
  489. end
  490. else
  491. begin
  492. case mmxbase of
  493. mmxs8bit,mmxu8bit:
  494. op:=A_PADDB;
  495. mmxs16bit,mmxu16bit,mmxfixed16:
  496. op:=A_PADDW;
  497. mmxs32bit,mmxu32bit:
  498. op:=A_PADDD;
  499. end;
  500. end;
  501. end;
  502. muln :
  503. begin
  504. case mmxbase of
  505. mmxs16bit,mmxu16bit:
  506. op:=A_PMULLW;
  507. mmxfixed16:
  508. op:=A_PMULHW;
  509. end;
  510. end;
  511. subn :
  512. begin
  513. if (cs_mmx_saturation in current_settings.localswitches) then
  514. begin
  515. case mmxbase of
  516. mmxs8bit:
  517. op:=A_PSUBSB;
  518. mmxu8bit:
  519. op:=A_PSUBUSB;
  520. mmxs16bit,mmxfixed16:
  521. op:=A_PSUBSB;
  522. mmxu16bit:
  523. op:=A_PSUBUSW;
  524. end;
  525. end
  526. else
  527. begin
  528. case mmxbase of
  529. mmxs8bit,mmxu8bit:
  530. op:=A_PSUBB;
  531. mmxs16bit,mmxu16bit,mmxfixed16:
  532. op:=A_PSUBW;
  533. mmxs32bit,mmxu32bit:
  534. op:=A_PSUBD;
  535. end;
  536. end;
  537. end;
  538. xorn:
  539. op:=A_PXOR;
  540. orn:
  541. op:=A_POR;
  542. andn:
  543. op:=A_PAND;
  544. else
  545. internalerror(2003042214);
  546. end;
  547. { left and right no register? }
  548. { then one must be demanded }
  549. if (left.location.loc<>LOC_MMXREGISTER) then
  550. begin
  551. if (right.location.loc=LOC_MMXREGISTER) then
  552. begin
  553. location_swap(left.location,right.location);
  554. toggleflag(nf_swapped);
  555. end
  556. else
  557. begin
  558. { register variable ? }
  559. if (left.location.loc=LOC_CMMXREGISTER) then
  560. begin
  561. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  562. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  563. end
  564. else
  565. begin
  566. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  567. internalerror(200203245);
  568. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  569. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  570. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  571. end;
  572. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  573. left.location.register:=hregister;
  574. end;
  575. end;
  576. { at this point, left.location.loc should be LOC_MMXREGISTER }
  577. if right.location.loc<>LOC_MMXREGISTER then
  578. begin
  579. if (nodetype=subn) and (nf_swapped in flags) then
  580. begin
  581. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  582. if right.location.loc=LOC_CMMXREGISTER then
  583. begin
  584. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  585. emit_reg_reg(op,S_NO,left.location.register,hreg);
  586. end
  587. else
  588. begin
  589. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  590. internalerror(200203247);
  591. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  592. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  593. emit_reg_reg(op,S_NO,left.location.register,hreg);
  594. end;
  595. location.register:=hreg;
  596. end
  597. else
  598. begin
  599. if (right.location.loc=LOC_CMMXREGISTER) then
  600. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  601. else
  602. begin
  603. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  604. internalerror(200203246);
  605. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  606. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  607. end;
  608. location.register:=left.location.register;
  609. end;
  610. end
  611. else
  612. begin
  613. { right.location=LOC_MMXREGISTER }
  614. if (nodetype=subn) and (nf_swapped in flags) then
  615. begin
  616. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  617. location_swap(left.location,right.location);
  618. toggleflag(nf_swapped);
  619. end
  620. else
  621. begin
  622. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  623. end;
  624. location.register:=left.location.register;
  625. end;
  626. location_freetemp(current_asmdata.CurrAsmList,right.location);
  627. if cmpop then
  628. location_freetemp(current_asmdata.CurrAsmList,left.location);
  629. end;
  630. {$endif SUPPORT_MMX}
  631. {*****************************************************************************
  632. AddFloat
  633. *****************************************************************************}
  634. procedure tx86addnode.second_addfloatsse;
  635. var
  636. op : topcg;
  637. sqr_sum : boolean;
  638. tmp : tnode;
  639. begin
  640. sqr_sum:=false;
  641. if (current_settings.fputype>=fpu_sse3) and
  642. use_vectorfpu(resultdef) and
  643. (nodetype in [addn,subn]) and
  644. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  645. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  646. begin
  647. sqr_sum:=true;
  648. tmp:=tinlinenode(left).left;
  649. tinlinenode(left).left:=nil;
  650. left.free;
  651. left:=tmp;
  652. tmp:=tinlinenode(right).left;
  653. tinlinenode(right).left:=nil;
  654. right.free;
  655. right:=tmp;
  656. end;
  657. pass_left_right;
  658. check_left_and_right_fpureg(false);
  659. if (nf_swapped in flags) then
  660. { can't use swapleftright if both are on the fpu stack, since then }
  661. { both are "R_ST" -> nothing would change -> manually switch }
  662. if (left.location.loc = LOC_FPUREGISTER) and
  663. (right.location.loc = LOC_FPUREGISTER) then
  664. emit_none(A_FXCH,S_NO)
  665. else
  666. swapleftright;
  667. case nodetype of
  668. addn :
  669. op:=OP_ADD;
  670. muln :
  671. op:=OP_MUL;
  672. subn :
  673. op:=OP_SUB;
  674. slashn :
  675. op:=OP_DIV;
  676. else
  677. internalerror(200312231);
  678. end;
  679. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  680. if sqr_sum then
  681. begin
  682. if nf_swapped in flags then
  683. swapleftright;
  684. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  685. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  686. location:=left.location;
  687. if is_double(resultdef) then
  688. begin
  689. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  690. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  691. case nodetype of
  692. addn:
  693. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  694. subn:
  695. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  696. else
  697. internalerror(201108162);
  698. end;
  699. end
  700. else
  701. begin
  702. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  703. { ensure that bits 64..127 contain valid values }
  704. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  705. { the data is now in bits 0..32 and 64..95 }
  706. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  707. case nodetype of
  708. addn:
  709. begin
  710. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  711. end;
  712. subn:
  713. begin
  714. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  715. end;
  716. else
  717. internalerror(201108163);
  718. end;
  719. end
  720. end
  721. { we can use only right as left operand if the operation is commutative }
  722. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  723. begin
  724. location.register:=right.location.register;
  725. { force floating point reg. location to be written to memory,
  726. we don't force it to mm register because writing to memory
  727. allows probably shorter code because there is no direct fpu->mm register
  728. copy instruction
  729. }
  730. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  731. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  732. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  733. end
  734. else
  735. begin
  736. if (nf_swapped in flags) then
  737. swapleftright;
  738. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  739. location.register:=left.location.register;
  740. { force floating point reg. location to be written to memory,
  741. we don't force it to mm register because writing to memory
  742. allows probably shorter code because there is no direct fpu->mm register
  743. copy instruction
  744. }
  745. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  746. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  747. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  748. end;
  749. end;
  750. procedure tx86addnode.second_cmpfloatsse;
  751. var
  752. op : tasmop;
  753. begin
  754. if is_single(left.resultdef) then
  755. op:=A_COMISS
  756. else if is_double(left.resultdef) then
  757. op:=A_COMISD
  758. else
  759. internalerror(200402222);
  760. pass_left_right;
  761. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  762. { we can use only right as left operand if the operation is commutative }
  763. if (right.location.loc=LOC_MMREGISTER) then
  764. begin
  765. { force floating point reg. location to be written to memory,
  766. we don't force it to mm register because writing to memory
  767. allows probably shorter code because there is no direct fpu->mm register
  768. copy instruction
  769. }
  770. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  771. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  772. case left.location.loc of
  773. LOC_REFERENCE,LOC_CREFERENCE:
  774. begin
  775. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  776. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  777. end;
  778. LOC_MMREGISTER,LOC_CMMREGISTER:
  779. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  780. else
  781. internalerror(200402221);
  782. end;
  783. if nf_swapped in flags then
  784. exclude(flags,nf_swapped)
  785. else
  786. include(flags,nf_swapped)
  787. end
  788. else
  789. begin
  790. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  791. { force floating point reg. location to be written to memory,
  792. we don't force it to mm register because writing to memory
  793. allows probably shorter code because there is no direct fpu->mm register
  794. copy instruction
  795. }
  796. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  797. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  798. case right.location.loc of
  799. LOC_REFERENCE,LOC_CREFERENCE:
  800. begin
  801. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  802. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  803. end;
  804. LOC_MMREGISTER,LOC_CMMREGISTER:
  805. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  806. else
  807. internalerror(200402223);
  808. end;
  809. end;
  810. location.resflags:=getresflags(true);
  811. end;
  812. procedure tx86addnode.second_opvector;
  813. var
  814. op : topcg;
  815. begin
  816. pass_left_right;
  817. if (nf_swapped in flags) then
  818. swapleftright;
  819. case nodetype of
  820. addn :
  821. op:=OP_ADD;
  822. muln :
  823. op:=OP_MUL;
  824. subn :
  825. op:=OP_SUB;
  826. slashn :
  827. op:=OP_DIV;
  828. else
  829. internalerror(200610071);
  830. end;
  831. if fits_in_mm_register(left.resultdef) then
  832. begin
  833. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  834. { we can use only right as left operand if the operation is commutative }
  835. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  836. begin
  837. location.register:=right.location.register;
  838. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  839. end
  840. else
  841. begin
  842. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  843. location.register:=left.location.register;
  844. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  845. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  846. end;
  847. end
  848. else
  849. begin
  850. { not yet supported }
  851. internalerror(200610072);
  852. end
  853. end;
  854. procedure tx86addnode.second_addfloat;
  855. var
  856. op : TAsmOp;
  857. begin
  858. if use_vectorfpu(resultdef) then
  859. begin
  860. second_addfloatsse;
  861. exit;
  862. end;
  863. pass_left_right;
  864. case nodetype of
  865. addn :
  866. op:=A_FADDP;
  867. muln :
  868. op:=A_FMULP;
  869. subn :
  870. op:=A_FSUBP;
  871. slashn :
  872. op:=A_FDIVP;
  873. else
  874. internalerror(2003042214);
  875. end;
  876. check_left_and_right_fpureg(true);
  877. { if we swaped the tree nodes, then use the reverse operator }
  878. if nf_swapped in flags then
  879. begin
  880. if (nodetype=slashn) then
  881. op:=A_FDIVRP
  882. else if (nodetype=subn) then
  883. op:=A_FSUBRP;
  884. end;
  885. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  886. tcgx86(cg).dec_fpu_stack;
  887. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  888. location.register:=NR_ST;
  889. end;
  890. procedure tx86addnode.second_cmpfloat;
  891. var
  892. resflags : tresflags;
  893. {$ifdef i8086}
  894. tmpref: treference;
  895. {$endif i8086}
  896. begin
  897. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  898. begin
  899. second_cmpfloatsse;
  900. exit;
  901. end;
  902. pass_left_right;
  903. check_left_and_right_fpureg(true);
  904. {$ifndef x86_64}
  905. if current_settings.cputype<cpu_Pentium2 then
  906. begin
  907. emit_none(A_FCOMPP,S_NO);
  908. tcgx86(cg).dec_fpu_stack;
  909. tcgx86(cg).dec_fpu_stack;
  910. { load fpu flags }
  911. {$ifdef i8086}
  912. if current_settings.cputype < cpu_286 then
  913. begin
  914. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  915. emit_ref(A_FNSTSW,S_NO,tmpref);
  916. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  917. emit_ref_reg(A_MOV,S_W,tmpref,NR_AX);
  918. emit_none(A_SAHF,S_NO);
  919. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  920. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  921. end
  922. else
  923. {$endif i8086}
  924. begin
  925. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  926. emit_reg(A_FNSTSW,S_NO,NR_AX);
  927. emit_none(A_SAHF,S_NO);
  928. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  929. end;
  930. if nf_swapped in flags then
  931. begin
  932. case nodetype of
  933. equaln : resflags:=F_E;
  934. unequaln : resflags:=F_NE;
  935. ltn : resflags:=F_A;
  936. lten : resflags:=F_AE;
  937. gtn : resflags:=F_B;
  938. gten : resflags:=F_BE;
  939. end;
  940. end
  941. else
  942. begin
  943. case nodetype of
  944. equaln : resflags:=F_E;
  945. unequaln : resflags:=F_NE;
  946. ltn : resflags:=F_B;
  947. lten : resflags:=F_BE;
  948. gtn : resflags:=F_A;
  949. gten : resflags:=F_AE;
  950. end;
  951. end;
  952. end
  953. else
  954. {$endif x86_64}
  955. begin
  956. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  957. { fcomip pops only one fpu register }
  958. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  959. tcgx86(cg).dec_fpu_stack;
  960. tcgx86(cg).dec_fpu_stack;
  961. { load fpu flags }
  962. if nf_swapped in flags then
  963. begin
  964. case nodetype of
  965. equaln : resflags:=F_E;
  966. unequaln : resflags:=F_NE;
  967. ltn : resflags:=F_A;
  968. lten : resflags:=F_AE;
  969. gtn : resflags:=F_B;
  970. gten : resflags:=F_BE;
  971. end;
  972. end
  973. else
  974. begin
  975. case nodetype of
  976. equaln : resflags:=F_E;
  977. unequaln : resflags:=F_NE;
  978. ltn : resflags:=F_B;
  979. lten : resflags:=F_BE;
  980. gtn : resflags:=F_A;
  981. gten : resflags:=F_AE;
  982. end;
  983. end;
  984. end;
  985. location_reset(location,LOC_FLAGS,OS_NO);
  986. location.resflags:=resflags;
  987. end;
  988. {*****************************************************************************
  989. Add64bit
  990. *****************************************************************************}
  991. procedure tx86addnode.second_add64bit;
  992. begin
  993. {$ifdef cpu64bitalu}
  994. second_addordinal;
  995. {$else cpu64bitalu}
  996. { must be implemented separate }
  997. internalerror(200402042);
  998. {$endif cpu64bitalu}
  999. end;
  1000. procedure tx86addnode.second_cmp64bit;
  1001. begin
  1002. {$ifdef cpu64bitalu}
  1003. second_cmpordinal;
  1004. {$else cpu64bitalu}
  1005. { must be implemented separate }
  1006. internalerror(200402043);
  1007. {$endif cpu64bitalu}
  1008. end;
  1009. {*****************************************************************************
  1010. AddOrdinal
  1011. *****************************************************************************}
  1012. procedure tx86addnode.second_cmpordinal;
  1013. var
  1014. opdef : tdef;
  1015. opsize : tcgsize;
  1016. unsigned : boolean;
  1017. begin
  1018. unsigned:=not(is_signed(left.resultdef)) or
  1019. not(is_signed(right.resultdef));
  1020. opdef:=left.resultdef;
  1021. opsize:=def_cgsize(opdef);
  1022. pass_left_right;
  1023. left_must_be_reg(opdef,opsize,false);
  1024. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1025. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1026. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1027. location_reset(location,LOC_FLAGS,OS_NO);
  1028. location.resflags:=getresflags(unsigned);
  1029. end;
  1030. begin
  1031. caddnode:=tx86addnode;
  1032. end.