nx86add.pas 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945
  1. {
  2. $Id$
  3. Copyright (c) 2000-2002 by Florian Klaempfl
  4. Common code generation for add nodes on the i386 and x86
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. {
  19. Common code generation for add nodes on the i386 and x86
  20. }
  21. unit nx86add;
  22. {$i fpcdefs.inc}
  23. interface
  24. uses
  25. cgbase,
  26. cpubase,
  27. node,nadd,ncgadd;
  28. type
  29. tx86addnode = class(tcgaddnode)
  30. protected
  31. function getresflags(unsigned : boolean) : tresflags;
  32. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  33. procedure left_and_right_must_be_fpureg;
  34. procedure emit_op_right_left(op:TAsmOp;opsize:TOpSize);
  35. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  36. procedure second_cmpfloatsse;
  37. procedure second_addfloatsse;
  38. procedure second_mul;virtual;abstract;
  39. public
  40. function first_addstring : tnode; override;
  41. procedure second_addstring;override;
  42. procedure second_addfloat;override;
  43. procedure second_addsmallset;override;
  44. procedure second_add64bit;override;
  45. procedure second_addordinal;override;
  46. procedure second_cmpfloat;override;
  47. procedure second_cmpsmallset;override;
  48. procedure second_cmp64bit;override;
  49. procedure second_cmpordinal;override;
  50. end;
  51. implementation
  52. uses
  53. globtype,globals,
  54. verbose,
  55. cutils,
  56. cpuinfo,
  57. aasmbase,aasmtai,aasmcpu,
  58. symconst,symdef,
  59. cgobj,cgx86,cga,
  60. paramgr,
  61. htypechk,
  62. pass_2,ncgutil,
  63. ncon,nset,
  64. defutil;
  65. {*****************************************************************************
  66. Helpers
  67. *****************************************************************************}
  68. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  69. var
  70. power : longint;
  71. hl4 : tasmlabel;
  72. r : Tregister;
  73. begin
  74. { at this point, left.location.loc should be LOC_REGISTER }
  75. if right.location.loc=LOC_REGISTER then
  76. begin
  77. { right.location is a LOC_REGISTER }
  78. { when swapped another result register }
  79. if (nodetype=subn) and (nf_swaped in flags) then
  80. begin
  81. if extra_not then
  82. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  83. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  84. { newly swapped also set swapped flag }
  85. location_swap(left.location,right.location);
  86. toggleflag(nf_swaped);
  87. end
  88. else
  89. begin
  90. if extra_not then
  91. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  92. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  93. location_swap(left.location,right.location);
  94. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  95. end;
  96. end
  97. else
  98. begin
  99. { right.location is not a LOC_REGISTER }
  100. if (nodetype=subn) and (nf_swaped in flags) then
  101. begin
  102. if extra_not then
  103. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  104. r:=cg.getintregister(exprasmlist,OS_INT);
  105. cg.a_load_loc_reg(exprasmlist,OS_INT,right.location,r);
  106. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  107. emit_reg_reg(A_MOV,TCGSize2Opsize[opsize],r,left.location.register);
  108. cg.ungetregister(exprasmlist,r);
  109. end
  110. else
  111. begin
  112. { Optimizations when right.location is a constant value }
  113. if (op=A_CMP) and
  114. (nodetype in [equaln,unequaln]) and
  115. (right.location.loc=LOC_CONSTANT) and
  116. (right.location.value=0) then
  117. begin
  118. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  119. end
  120. else
  121. if (op=A_ADD) and
  122. (right.location.loc=LOC_CONSTANT) and
  123. (right.location.value=1) and
  124. not(cs_check_overflow in aktlocalswitches) then
  125. begin
  126. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  127. end
  128. else
  129. if (op=A_SUB) and
  130. (right.location.loc=LOC_CONSTANT) and
  131. (right.location.value=1) and
  132. not(cs_check_overflow in aktlocalswitches) then
  133. begin
  134. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  135. end
  136. else
  137. if (op=A_IMUL) and
  138. (right.location.loc=LOC_CONSTANT) and
  139. (ispowerof2(right.location.value,power)) and
  140. not(cs_check_overflow in aktlocalswitches) then
  141. begin
  142. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  143. end
  144. else
  145. begin
  146. if extra_not then
  147. begin
  148. r:=cg.getintregister(exprasmlist,OS_INT);
  149. cg.a_load_loc_reg(exprasmlist,OS_INT,right.location,r);
  150. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  151. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  152. cg.ungetregister(exprasmlist,r);
  153. end
  154. else
  155. begin
  156. emit_op_right_left(op,TCGSize2Opsize[opsize]);
  157. end;
  158. end;
  159. end;
  160. end;
  161. { only in case of overflow operations }
  162. { produce overflow code }
  163. { we must put it here directly, because sign of operation }
  164. { is in unsigned VAR!! }
  165. if mboverflow then
  166. begin
  167. if cs_check_overflow in aktlocalswitches then
  168. begin
  169. objectlibrary.getlabel(hl4);
  170. if unsigned then
  171. cg.a_jmp_flags(exprasmlist,F_AE,hl4)
  172. else
  173. cg.a_jmp_flags(exprasmlist,F_NO,hl4);
  174. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  175. cg.a_label(exprasmlist,hl4);
  176. end;
  177. end;
  178. end;
  179. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  180. begin
  181. { left location is not a register? }
  182. if (left.location.loc<>LOC_REGISTER) then
  183. begin
  184. { if right is register then we can swap the locations }
  185. if (not noswap) and
  186. (right.location.loc=LOC_REGISTER) then
  187. begin
  188. location_swap(left.location,right.location);
  189. toggleflag(nf_swaped);
  190. end
  191. else
  192. begin
  193. { maybe we can reuse a constant register when the
  194. operation is a comparison that doesn't change the
  195. value of the register }
  196. location_force_reg(exprasmlist,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  197. end;
  198. end;
  199. end;
  200. procedure tx86addnode.left_and_right_must_be_fpureg;
  201. begin
  202. if (right.location.loc<>LOC_FPUREGISTER) then
  203. begin
  204. cg.a_loadfpu_loc_reg(exprasmlist,right.location,NR_ST);
  205. if (right.location.loc <> LOC_CFPUREGISTER) then
  206. location_freetemp(exprasmlist,left.location);
  207. if (left.location.loc<>LOC_FPUREGISTER) then
  208. begin
  209. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  210. if (left.location.loc <> LOC_CFPUREGISTER) then
  211. location_freetemp(exprasmlist,left.location);
  212. end
  213. else
  214. begin
  215. { left was on the stack => swap }
  216. toggleflag(nf_swaped);
  217. end;
  218. { releases the right reference }
  219. location_release(exprasmlist,right.location);
  220. end
  221. { the nominator in st0 }
  222. else if (left.location.loc<>LOC_FPUREGISTER) then
  223. begin
  224. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  225. if (left.location.loc <> LOC_CFPUREGISTER) then
  226. location_freetemp(exprasmlist,left.location);
  227. end
  228. else
  229. begin
  230. { fpu operands are always in the wrong order on the stack }
  231. toggleflag(nf_swaped);
  232. end;
  233. end;
  234. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TOpsize);
  235. begin
  236. { left must be a register }
  237. case right.location.loc of
  238. LOC_REGISTER,
  239. LOC_CREGISTER :
  240. exprasmlist.concat(taicpu.op_reg_reg(op,opsize,right.location.register,left.location.register));
  241. LOC_REFERENCE,
  242. LOC_CREFERENCE :
  243. exprasmlist.concat(taicpu.op_ref_reg(op,opsize,right.location.reference,left.location.register));
  244. LOC_CONSTANT :
  245. exprasmlist.concat(taicpu.op_const_reg(op,opsize,right.location.value,left.location.register));
  246. else
  247. internalerror(200203232);
  248. end;
  249. end;
  250. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  251. begin
  252. case nodetype of
  253. equaln : getresflags:=F_E;
  254. unequaln : getresflags:=F_NE;
  255. else
  256. if not(unsigned) then
  257. begin
  258. if nf_swaped in flags then
  259. case nodetype of
  260. ltn : getresflags:=F_G;
  261. lten : getresflags:=F_GE;
  262. gtn : getresflags:=F_L;
  263. gten : getresflags:=F_LE;
  264. end
  265. else
  266. case nodetype of
  267. ltn : getresflags:=F_L;
  268. lten : getresflags:=F_LE;
  269. gtn : getresflags:=F_G;
  270. gten : getresflags:=F_GE;
  271. end;
  272. end
  273. else
  274. begin
  275. if nf_swaped in flags then
  276. case nodetype of
  277. ltn : getresflags:=F_A;
  278. lten : getresflags:=F_AE;
  279. gtn : getresflags:=F_B;
  280. gten : getresflags:=F_BE;
  281. end
  282. else
  283. case nodetype of
  284. ltn : getresflags:=F_B;
  285. lten : getresflags:=F_BE;
  286. gtn : getresflags:=F_A;
  287. gten : getresflags:=F_AE;
  288. end;
  289. end;
  290. end;
  291. end;
  292. {*****************************************************************************
  293. AddSmallSet
  294. *****************************************************************************}
  295. procedure tx86addnode.second_addsmallset;
  296. var
  297. opsize : TCGSize;
  298. op : TAsmOp;
  299. extra_not,
  300. noswap : boolean;
  301. begin
  302. pass_left_right;
  303. noswap:=false;
  304. extra_not:=false;
  305. opsize:=OS_32;
  306. case nodetype of
  307. addn :
  308. begin
  309. { this is a really ugly hack!!!!!!!!!! }
  310. { this could be done later using EDI }
  311. { as it is done for subn }
  312. { instead of two registers!!!! }
  313. { adding elements is not commutative }
  314. if (nf_swaped in flags) and (left.nodetype=setelementn) then
  315. swapleftright;
  316. { are we adding set elements ? }
  317. if right.nodetype=setelementn then
  318. begin
  319. { no range support for smallsets! }
  320. if assigned(tsetelementnode(right).right) then
  321. internalerror(43244);
  322. { bts requires both elements to be registers }
  323. location_force_reg(exprasmlist,left.location,opsize,false);
  324. location_force_reg(exprasmlist,right.location,opsize,true);
  325. op:=A_BTS;
  326. noswap:=true;
  327. end
  328. else
  329. op:=A_OR;
  330. end;
  331. symdifn :
  332. op:=A_XOR;
  333. muln :
  334. op:=A_AND;
  335. subn :
  336. begin
  337. op:=A_AND;
  338. if (not(nf_swaped in flags)) and
  339. (right.location.loc=LOC_CONSTANT) then
  340. right.location.value := not(right.location.value)
  341. else if (nf_swaped in flags) and
  342. (left.location.loc=LOC_CONSTANT) then
  343. left.location.value := not(left.location.value)
  344. else
  345. extra_not:=true;
  346. end;
  347. xorn :
  348. op:=A_XOR;
  349. orn :
  350. op:=A_OR;
  351. andn :
  352. op:=A_AND;
  353. else
  354. internalerror(2003042215);
  355. end;
  356. { left must be a register }
  357. left_must_be_reg(opsize,noswap);
  358. emit_generic_code(op,opsize,true,extra_not,false);
  359. location_freetemp(exprasmlist,right.location);
  360. location_release(exprasmlist,right.location);
  361. set_result_location_reg;
  362. end;
  363. procedure tx86addnode.second_cmpsmallset;
  364. var
  365. opsize : TCGSize;
  366. op : TAsmOp;
  367. begin
  368. pass_left_right;
  369. opsize:=OS_32;
  370. case nodetype of
  371. equaln,
  372. unequaln :
  373. op:=A_CMP;
  374. lten,gten:
  375. begin
  376. if (not(nf_swaped in flags) and (nodetype = lten)) or
  377. ((nf_swaped in flags) and (nodetype = gten)) then
  378. swapleftright;
  379. location_force_reg(exprasmlist,left.location,opsize,true);
  380. emit_op_right_left(A_AND,TCGSize2Opsize[opsize]);
  381. op:=A_CMP;
  382. { warning: ugly hack, we need a JE so change the node to equaln }
  383. nodetype:=equaln;
  384. end;
  385. else
  386. internalerror(2003042215);
  387. end;
  388. { left must be a register }
  389. left_must_be_reg(opsize,false);
  390. emit_generic_code(op,opsize,true,false,false);
  391. location_freetemp(exprasmlist,right.location);
  392. location_release(exprasmlist,right.location);
  393. location_freetemp(exprasmlist,left.location);
  394. location_release(exprasmlist,left.location);
  395. location_reset(location,LOC_FLAGS,OS_NO);
  396. location.resflags:=getresflags(true);
  397. end;
  398. {*****************************************************************************
  399. AddFloat
  400. *****************************************************************************}
  401. procedure tx86addnode.second_addfloatsse;
  402. var
  403. op : topcg;
  404. begin
  405. pass_left_right;
  406. if (nf_swaped in flags) then
  407. swapleftright;
  408. case nodetype of
  409. addn :
  410. op:=OP_ADD;
  411. muln :
  412. op:=OP_MUL;
  413. subn :
  414. op:=OP_SUB;
  415. slashn :
  416. op:=OP_DIV;
  417. else
  418. internalerror(200312231);
  419. end;
  420. location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
  421. { we can use only right as left operand if the operation is commutative }
  422. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  423. begin
  424. location.register:=right.location.register;
  425. { force floating point reg. location to be written to memory,
  426. we don't force it to mm register because writing to memory
  427. allows probably shorter code because there is no direct fpu->mm register
  428. copy instruction
  429. }
  430. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  431. location_force_mem(exprasmlist,left.location);
  432. cg.a_opmm_loc_reg(exprasmlist,op,location.size,left.location,location.register,mms_movescalar);
  433. location_release(exprasmlist,left.location);
  434. end
  435. else
  436. begin
  437. location_force_mmregscalar(exprasmlist,left.location,false);
  438. location.register:=left.location.register;
  439. { force floating point reg. location to be written to memory,
  440. we don't force it to mm register because writing to memory
  441. allows probably shorter code because there is no direct fpu->mm register
  442. copy instruction
  443. }
  444. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  445. location_force_mem(exprasmlist,right.location);
  446. cg.a_opmm_loc_reg(exprasmlist,op,location.size,right.location,location.register,mms_movescalar);
  447. location_release(exprasmlist,right.location);
  448. end;
  449. end;
  450. procedure tx86addnode.second_cmpfloatsse;
  451. var
  452. op : tasmop;
  453. begin
  454. if is_single(left.resulttype.def) then
  455. op:=A_COMISS
  456. else if is_double(left.resulttype.def) then
  457. op:=A_COMISD
  458. else
  459. internalerror(200402222);
  460. pass_left_right;
  461. location_reset(location,LOC_FLAGS,def_cgsize(resulttype.def));
  462. { we can use only right as left operand if the operation is commutative }
  463. if (right.location.loc=LOC_MMREGISTER) then
  464. begin
  465. { force floating point reg. location to be written to memory,
  466. we don't force it to mm register because writing to memory
  467. allows probably shorter code because there is no direct fpu->mm register
  468. copy instruction
  469. }
  470. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  471. location_force_mem(exprasmlist,left.location);
  472. case left.location.loc of
  473. LOC_REFERENCE,LOC_CREFERENCE:
  474. exprasmlist.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  475. LOC_MMREGISTER,LOC_CMMREGISTER:
  476. exprasmlist.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  477. else
  478. internalerror(200402221);
  479. end;
  480. if nf_swaped in flags then
  481. exclude(flags,nf_swaped)
  482. else
  483. include(flags,nf_swaped)
  484. end
  485. else
  486. begin
  487. location_force_mmregscalar(exprasmlist,left.location,false);
  488. { force floating point reg. location to be written to memory,
  489. we don't force it to mm register because writing to memory
  490. allows probably shorter code because there is no direct fpu->mm register
  491. copy instruction
  492. }
  493. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  494. location_force_mem(exprasmlist,right.location);
  495. case right.location.loc of
  496. LOC_REFERENCE,LOC_CREFERENCE:
  497. exprasmlist.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  498. LOC_MMREGISTER,LOC_CMMREGISTER:
  499. exprasmlist.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  500. else
  501. internalerror(200402223);
  502. end;
  503. end;
  504. location_release(exprasmlist,right.location);
  505. location_release(exprasmlist,left.location);
  506. location.resflags:=getresflags(true);
  507. end;
  508. procedure tx86addnode.second_addfloat;
  509. var
  510. op : TAsmOp;
  511. begin
  512. if use_sse(resulttype.def) then
  513. begin
  514. second_addfloatsse;
  515. exit;
  516. end;
  517. pass_left_right;
  518. case nodetype of
  519. addn :
  520. op:=A_FADDP;
  521. muln :
  522. op:=A_FMULP;
  523. subn :
  524. op:=A_FSUBP;
  525. slashn :
  526. op:=A_FDIVP;
  527. else
  528. internalerror(2003042214);
  529. end;
  530. left_and_right_must_be_fpureg;
  531. { releases the left reference }
  532. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  533. location_release(exprasmlist,left.location);
  534. { if we swaped the tree nodes, then use the reverse operator }
  535. if nf_swaped in flags then
  536. begin
  537. if (nodetype=slashn) then
  538. op:=A_FDIVRP
  539. else if (nodetype=subn) then
  540. op:=A_FSUBRP;
  541. end;
  542. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  543. tcgx86(cg).dec_fpu_stack;
  544. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  545. location.register:=NR_ST;
  546. end;
  547. procedure tx86addnode.second_cmpfloat;
  548. var
  549. resflags : tresflags;
  550. begin
  551. if use_sse(left.resulttype.def) or use_sse(right.resulttype.def) then
  552. begin
  553. second_cmpfloatsse;
  554. exit;
  555. end;
  556. pass_left_right;
  557. left_and_right_must_be_fpureg;
  558. { releases the left reference }
  559. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  560. location_release(exprasmlist,left.location);
  561. {$ifndef x86_64}
  562. if aktspecificoptprocessor<ClassPentium2 then
  563. begin
  564. emit_none(A_FCOMPP,S_NO);
  565. tcgx86(cg).dec_fpu_stack;
  566. tcgx86(cg).dec_fpu_stack;
  567. { load fpu flags }
  568. cg.getexplicitregister(exprasmlist,NR_AX);
  569. emit_reg(A_FNSTSW,S_NO,NR_AX);
  570. emit_none(A_SAHF,S_NO);
  571. cg.ungetregister(exprasmlist,NR_AX);
  572. if nf_swaped in flags then
  573. begin
  574. case nodetype of
  575. equaln : resflags:=F_E;
  576. unequaln : resflags:=F_NE;
  577. ltn : resflags:=F_A;
  578. lten : resflags:=F_AE;
  579. gtn : resflags:=F_B;
  580. gten : resflags:=F_BE;
  581. end;
  582. end
  583. else
  584. begin
  585. case nodetype of
  586. equaln : resflags:=F_E;
  587. unequaln : resflags:=F_NE;
  588. ltn : resflags:=F_B;
  589. lten : resflags:=F_BE;
  590. gtn : resflags:=F_A;
  591. gten : resflags:=F_AE;
  592. end;
  593. end;
  594. end
  595. else
  596. {$endif x86_64}
  597. begin
  598. exprasmlist.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  599. { fcomip pops only one fpu register }
  600. exprasmlist.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  601. tcgx86(cg).dec_fpu_stack;
  602. tcgx86(cg).dec_fpu_stack;
  603. { load fpu flags }
  604. if nf_swaped in flags then
  605. begin
  606. case nodetype of
  607. equaln : resflags:=F_E;
  608. unequaln : resflags:=F_NE;
  609. ltn : resflags:=F_A;
  610. lten : resflags:=F_AE;
  611. gtn : resflags:=F_B;
  612. gten : resflags:=F_BE;
  613. end;
  614. end
  615. else
  616. begin
  617. case nodetype of
  618. equaln : resflags:=F_E;
  619. unequaln : resflags:=F_NE;
  620. ltn : resflags:=F_B;
  621. lten : resflags:=F_BE;
  622. gtn : resflags:=F_A;
  623. gten : resflags:=F_AE;
  624. end;
  625. end;
  626. end;
  627. location_reset(location,LOC_FLAGS,OS_NO);
  628. location.resflags:=resflags;
  629. end;
  630. {*****************************************************************************
  631. Addstring
  632. *****************************************************************************}
  633. { note: if you implemented an fpc_shortstr_concat similar to the }
  634. { one in i386.inc, you have to override first_addstring like in }
  635. { ti386addnode.first_string and implement the shortstring concat }
  636. { manually! The generic routine is different from the i386 one (JM) }
  637. function tx86addnode.first_addstring : tnode;
  638. begin
  639. { special cases for shortstrings, handled in pass_2 (JM) }
  640. { can't handle fpc_shortstr_compare with compilerproc either because it }
  641. { returns its results in the flags instead of in eax }
  642. if (nodetype in [ltn,lten,gtn,gten,equaln,unequaln]) and
  643. is_shortstring(left.resulttype.def) and
  644. not(((left.nodetype=stringconstn) and (str_length(left)=0)) or
  645. ((right.nodetype=stringconstn) and (str_length(right)=0))) then
  646. begin
  647. expectloc:=LOC_FLAGS;
  648. calcregisters(self,0,0,0);
  649. result := nil;
  650. exit;
  651. end;
  652. { otherwise, use the generic code }
  653. result := inherited first_addstring;
  654. end;
  655. procedure tx86addnode.second_addstring;
  656. var
  657. paraloc1,
  658. paraloc2 : tparalocation;
  659. hregister1,
  660. hregister2 : tregister;
  661. begin
  662. { string operations are not commutative }
  663. if nf_swaped in flags then
  664. swapleftright;
  665. case tstringdef(left.resulttype.def).string_typ of
  666. st_shortstring:
  667. begin
  668. case nodetype of
  669. ltn,lten,gtn,gten,equaln,unequaln :
  670. begin
  671. paraloc1:=paramanager.getintparaloc(pocall_default,1);
  672. paraloc2:=paramanager.getintparaloc(pocall_default,2);
  673. { process parameters }
  674. secondpass(left);
  675. location_release(exprasmlist,left.location);
  676. if paraloc2.loc=LOC_REGISTER then
  677. begin
  678. hregister2:=cg.getaddressregister(exprasmlist);
  679. cg.a_loadaddr_ref_reg(exprasmlist,left.location.reference,hregister2);
  680. end
  681. else
  682. begin
  683. paramanager.allocparaloc(exprasmlist,paraloc2);
  684. cg.a_paramaddr_ref(exprasmlist,left.location.reference,paraloc2);
  685. end;
  686. secondpass(right);
  687. location_release(exprasmlist,right.location);
  688. if paraloc1.loc=LOC_REGISTER then
  689. begin
  690. hregister1:=cg.getaddressregister(exprasmlist);
  691. cg.a_loadaddr_ref_reg(exprasmlist,right.location.reference,hregister1);
  692. end
  693. else
  694. begin
  695. paramanager.allocparaloc(exprasmlist,paraloc1);
  696. cg.a_paramaddr_ref(exprasmlist,right.location.reference,paraloc1);
  697. end;
  698. { push parameters }
  699. if paraloc1.loc=LOC_REGISTER then
  700. begin
  701. cg.ungetregister(exprasmlist,hregister2);
  702. paramanager.allocparaloc(exprasmlist,paraloc2);
  703. cg.a_param_reg(exprasmlist,OS_ADDR,hregister2,paraloc2);
  704. end;
  705. if paraloc2.loc=LOC_REGISTER then
  706. begin
  707. cg.ungetregister(exprasmlist,hregister1);
  708. paramanager.allocparaloc(exprasmlist,paraloc1);
  709. cg.a_param_reg(exprasmlist,OS_ADDR,hregister1,paraloc1);
  710. end;
  711. paramanager.freeparaloc(exprasmlist,paraloc1);
  712. paramanager.freeparaloc(exprasmlist,paraloc2);
  713. cg.allocexplicitregisters(exprasmlist,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  714. cg.a_call_name(exprasmlist,'FPC_SHORTSTR_COMPARE');
  715. cg.deallocexplicitregisters(exprasmlist,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  716. location_freetemp(exprasmlist,left.location);
  717. location_freetemp(exprasmlist,right.location);
  718. end;
  719. end;
  720. location_reset(location,LOC_FLAGS,OS_NO);
  721. location.resflags:=getresflags(true);
  722. end;
  723. else
  724. { rest should be handled in first pass (JM) }
  725. internalerror(200108303);
  726. end;
  727. end;
  728. {*****************************************************************************
  729. Add64bit
  730. *****************************************************************************}
  731. procedure tx86addnode.second_add64bit;
  732. begin
  733. {$ifdef cpu64bit}
  734. second_addordinal;
  735. {$else cpu64bit}
  736. { must be implemented separate }
  737. internalerror(200402042);
  738. {$endif cpu64bit}
  739. end;
  740. procedure tx86addnode.second_cmp64bit;
  741. begin
  742. {$ifdef cpu64bit}
  743. second_cmpordinal;
  744. {$else cpu64bit}
  745. { must be implemented separate }
  746. internalerror(200402043);
  747. {$endif cpu64bit}
  748. end;
  749. {*****************************************************************************
  750. AddOrdinal
  751. *****************************************************************************}
  752. procedure tx86addnode.second_addordinal;
  753. var
  754. mboverflow : boolean;
  755. op : tasmop;
  756. opsize : tcgsize;
  757. { true, if unsigned types are compared }
  758. unsigned : boolean;
  759. { true, if for sets subtractions the extra not should generated }
  760. extra_not : boolean;
  761. begin
  762. { defaults }
  763. extra_not:=false;
  764. mboverflow:=false;
  765. unsigned:=not(is_signed(left.resulttype.def)) or
  766. not(is_signed(right.resulttype.def));
  767. opsize:=def_cgsize(left.resulttype.def);
  768. pass_left_right;
  769. case nodetype of
  770. addn :
  771. begin
  772. op:=A_ADD;
  773. mboverflow:=true;
  774. end;
  775. muln :
  776. begin
  777. if unsigned then
  778. op:=A_MUL
  779. else
  780. op:=A_IMUL;
  781. mboverflow:=true;
  782. end;
  783. subn :
  784. begin
  785. op:=A_SUB;
  786. mboverflow:=true;
  787. end;
  788. xorn :
  789. op:=A_XOR;
  790. orn :
  791. op:=A_OR;
  792. andn :
  793. op:=A_AND;
  794. else
  795. internalerror(200304229);
  796. end;
  797. { filter MUL, which requires special handling }
  798. if op=A_MUL then
  799. begin
  800. second_mul;
  801. exit;
  802. end;
  803. left_must_be_reg(opsize,false);
  804. emit_generic_code(op,opsize,unsigned,extra_not,mboverflow);
  805. location_freetemp(exprasmlist,right.location);
  806. location_release(exprasmlist,right.location);
  807. set_result_location_reg;
  808. end;
  809. procedure tx86addnode.second_cmpordinal;
  810. var
  811. opsize : tcgsize;
  812. unsigned : boolean;
  813. begin
  814. unsigned:=not(is_signed(left.resulttype.def)) or
  815. not(is_signed(right.resulttype.def));
  816. opsize:=def_cgsize(left.resulttype.def);
  817. pass_left_right;
  818. left_must_be_reg(opsize,false);
  819. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  820. location_freetemp(exprasmlist,right.location);
  821. location_release(exprasmlist,right.location);
  822. if (left.location.loc<>LOC_CREGISTER) then
  823. begin
  824. location_freetemp(exprasmlist,left.location);
  825. location_release(exprasmlist,left.location);
  826. end;
  827. location_reset(location,LOC_FLAGS,OS_NO);
  828. location.resflags:=getresflags(unsigned);
  829. end;
  830. begin
  831. caddnode:=tx86addnode;
  832. end.
  833. {
  834. $Log$
  835. Revision 1.9 2004-02-22 16:30:37 florian
  836. * fixed
  837. + second_cmpfloatsse
  838. Revision 1.8 2004/02/06 16:44:42 florian
  839. + improved floating point compares for x86-64 and Pentium2 and above
  840. Revision 1.7 2004/02/04 19:22:27 peter
  841. *** empty log message ***
  842. Revision 1.6 2004/01/20 12:59:37 florian
  843. * common addnode code for x86-64 and i386
  844. Revision 1.5 2003/12/26 13:19:16 florian
  845. * rtl and compiler compile with -Cfsse2
  846. Revision 1.4 2003/12/26 00:32:22 florian
  847. + fpu<->mm register conversion
  848. Revision 1.3 2003/12/25 01:07:09 florian
  849. + $fputype directive support
  850. + single data type operations with sse unit
  851. * fixed more x86-64 stuff
  852. Revision 1.2 2003/12/23 14:38:07 florian
  853. + second_floataddsse implemented
  854. Revision 1.1 2003/10/13 01:58:04 florian
  855. * some ideas for mm support implemented
  856. }