nx86add.pas 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956
  1. {
  2. $Id$
  3. Copyright (c) 2000-2002 by Florian Klaempfl
  4. Common code generation for add nodes on the i386 and x86
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. {
  19. Common code generation for add nodes on the i386 and x86
  20. }
  21. unit nx86add;
  22. {$i fpcdefs.inc}
  23. interface
  24. uses
  25. cgbase,
  26. cpubase,
  27. node,nadd,ncgadd;
  28. type
  29. tx86addnode = class(tcgaddnode)
  30. protected
  31. function getresflags(unsigned : boolean) : tresflags;
  32. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  33. procedure left_and_right_must_be_fpureg;
  34. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  35. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  36. procedure second_cmpfloatsse;
  37. procedure second_addfloatsse;
  38. procedure second_mul;virtual;abstract;
  39. public
  40. {$ifdef i386}
  41. function first_addstring : tnode; override;
  42. procedure second_addstring;override;
  43. {$endif i386}
  44. procedure second_addfloat;override;
  45. procedure second_addsmallset;override;
  46. procedure second_add64bit;override;
  47. procedure second_addordinal;override;
  48. procedure second_cmpfloat;override;
  49. procedure second_cmpsmallset;override;
  50. procedure second_cmp64bit;override;
  51. procedure second_cmpordinal;override;
  52. end;
  53. implementation
  54. uses
  55. globtype,globals,
  56. verbose,
  57. cutils,
  58. cpuinfo,
  59. aasmbase,aasmtai,aasmcpu,
  60. symconst,symdef,
  61. cgobj,cgx86,cga,
  62. paramgr,
  63. htypechk,
  64. pass_2,ncgutil,
  65. ncon,nset,
  66. defutil;
  67. {*****************************************************************************
  68. Helpers
  69. *****************************************************************************}
  70. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  71. var
  72. power : longint;
  73. hl4 : tasmlabel;
  74. r : Tregister;
  75. begin
  76. { at this point, left.location.loc should be LOC_REGISTER }
  77. if right.location.loc=LOC_REGISTER then
  78. begin
  79. { right.location is a LOC_REGISTER }
  80. { when swapped another result register }
  81. if (nodetype=subn) and (nf_swaped in flags) then
  82. begin
  83. if extra_not then
  84. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  85. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  86. { newly swapped also set swapped flag }
  87. location_swap(left.location,right.location);
  88. toggleflag(nf_swaped);
  89. end
  90. else
  91. begin
  92. if extra_not then
  93. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  94. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  95. location_swap(left.location,right.location);
  96. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  97. end;
  98. end
  99. else
  100. begin
  101. { right.location is not a LOC_REGISTER }
  102. if (nodetype=subn) and (nf_swaped in flags) then
  103. begin
  104. if extra_not then
  105. cg.a_op_reg_reg(exprasmlist,OP_NOT,opsize,left.location.register,left.location.register);
  106. r:=cg.getintregister(exprasmlist,opsize);
  107. cg.a_load_loc_reg(exprasmlist,opsize,right.location,r);
  108. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  109. cg.a_load_reg_reg(exprasmlist,opsize,opsize,r,left.location.register);
  110. cg.ungetregister(exprasmlist,r);
  111. end
  112. else
  113. begin
  114. { Optimizations when right.location is a constant value }
  115. if (op=A_CMP) and
  116. (nodetype in [equaln,unequaln]) and
  117. (right.location.loc=LOC_CONSTANT) and
  118. (right.location.value=0) then
  119. begin
  120. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  121. end
  122. else
  123. if (op=A_ADD) and
  124. (right.location.loc=LOC_CONSTANT) and
  125. (right.location.value=1) and
  126. not(cs_check_overflow in aktlocalswitches) then
  127. begin
  128. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  129. end
  130. else
  131. if (op=A_SUB) and
  132. (right.location.loc=LOC_CONSTANT) and
  133. (right.location.value=1) and
  134. not(cs_check_overflow in aktlocalswitches) then
  135. begin
  136. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  137. end
  138. else
  139. if (op=A_IMUL) and
  140. (right.location.loc=LOC_CONSTANT) and
  141. (ispowerof2(int64(right.location.value),power)) and
  142. not(cs_check_overflow in aktlocalswitches) then
  143. begin
  144. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  145. end
  146. else
  147. begin
  148. if extra_not then
  149. begin
  150. r:=cg.getintregister(exprasmlist,opsize);
  151. cg.a_load_loc_reg(exprasmlist,opsize,right.location,r);
  152. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  153. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  154. cg.ungetregister(exprasmlist,r);
  155. end
  156. else
  157. begin
  158. emit_op_right_left(op,opsize);
  159. end;
  160. end;
  161. end;
  162. end;
  163. { only in case of overflow operations }
  164. { produce overflow code }
  165. { we must put it here directly, because sign of operation }
  166. { is in unsigned VAR!! }
  167. if mboverflow then
  168. begin
  169. if cs_check_overflow in aktlocalswitches then
  170. begin
  171. objectlibrary.getlabel(hl4);
  172. if unsigned then
  173. cg.a_jmp_flags(exprasmlist,F_AE,hl4)
  174. else
  175. cg.a_jmp_flags(exprasmlist,F_NO,hl4);
  176. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  177. cg.a_label(exprasmlist,hl4);
  178. end;
  179. end;
  180. end;
  181. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  182. begin
  183. { left location is not a register? }
  184. if (left.location.loc<>LOC_REGISTER) then
  185. begin
  186. { if right is register then we can swap the locations }
  187. if (not noswap) and
  188. (right.location.loc=LOC_REGISTER) then
  189. begin
  190. location_swap(left.location,right.location);
  191. toggleflag(nf_swaped);
  192. end
  193. else
  194. begin
  195. { maybe we can reuse a constant register when the
  196. operation is a comparison that doesn't change the
  197. value of the register }
  198. location_force_reg(exprasmlist,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  199. end;
  200. end;
  201. end;
  202. procedure tx86addnode.left_and_right_must_be_fpureg;
  203. begin
  204. if (right.location.loc<>LOC_FPUREGISTER) then
  205. begin
  206. cg.a_loadfpu_loc_reg(exprasmlist,right.location,NR_ST);
  207. if (right.location.loc <> LOC_CFPUREGISTER) then
  208. location_freetemp(exprasmlist,left.location);
  209. if (left.location.loc<>LOC_FPUREGISTER) then
  210. begin
  211. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  212. if (left.location.loc <> LOC_CFPUREGISTER) then
  213. location_freetemp(exprasmlist,left.location);
  214. end
  215. else
  216. begin
  217. { left was on the stack => swap }
  218. toggleflag(nf_swaped);
  219. end;
  220. { releases the right reference }
  221. location_release(exprasmlist,right.location);
  222. end
  223. { the nominator in st0 }
  224. else if (left.location.loc<>LOC_FPUREGISTER) then
  225. begin
  226. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  227. if (left.location.loc <> LOC_CFPUREGISTER) then
  228. location_freetemp(exprasmlist,left.location);
  229. end
  230. else
  231. begin
  232. { fpu operands are always in the wrong order on the stack }
  233. toggleflag(nf_swaped);
  234. end;
  235. end;
  236. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  237. {$ifdef x86_64}
  238. var
  239. tmpreg : tregister;
  240. {$endif x86_64}
  241. begin
  242. { left must be a register }
  243. case right.location.loc of
  244. LOC_REGISTER,
  245. LOC_CREGISTER :
  246. exprasmlist.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  247. LOC_REFERENCE,
  248. LOC_CREFERENCE :
  249. exprasmlist.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  250. LOC_CONSTANT :
  251. begin
  252. {$ifdef x86_64}
  253. { x86_64 only supports signed 32 bits constants directly }
  254. if (opsize in [OS_S64,OS_64]) and
  255. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  256. begin
  257. tmpreg:=cg.getintregister(exprasmlist,opsize);
  258. cg.a_load_const_reg(exprasmlist,opsize,right.location.value,tmpreg);
  259. exprasmlist.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  260. end
  261. else
  262. {$endif x86_64}
  263. exprasmlist.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  264. end;
  265. else
  266. internalerror(200203232);
  267. end;
  268. end;
  269. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  270. begin
  271. case nodetype of
  272. equaln : getresflags:=F_E;
  273. unequaln : getresflags:=F_NE;
  274. else
  275. if not(unsigned) then
  276. begin
  277. if nf_swaped in flags then
  278. case nodetype of
  279. ltn : getresflags:=F_G;
  280. lten : getresflags:=F_GE;
  281. gtn : getresflags:=F_L;
  282. gten : getresflags:=F_LE;
  283. end
  284. else
  285. case nodetype of
  286. ltn : getresflags:=F_L;
  287. lten : getresflags:=F_LE;
  288. gtn : getresflags:=F_G;
  289. gten : getresflags:=F_GE;
  290. end;
  291. end
  292. else
  293. begin
  294. if nf_swaped in flags then
  295. case nodetype of
  296. ltn : getresflags:=F_A;
  297. lten : getresflags:=F_AE;
  298. gtn : getresflags:=F_B;
  299. gten : getresflags:=F_BE;
  300. end
  301. else
  302. case nodetype of
  303. ltn : getresflags:=F_B;
  304. lten : getresflags:=F_BE;
  305. gtn : getresflags:=F_A;
  306. gten : getresflags:=F_AE;
  307. end;
  308. end;
  309. end;
  310. end;
  311. {*****************************************************************************
  312. AddSmallSet
  313. *****************************************************************************}
  314. procedure tx86addnode.second_addsmallset;
  315. var
  316. opsize : TCGSize;
  317. op : TAsmOp;
  318. extra_not,
  319. noswap : boolean;
  320. begin
  321. pass_left_right;
  322. noswap:=false;
  323. extra_not:=false;
  324. opsize:=OS_32;
  325. case nodetype of
  326. addn :
  327. begin
  328. { this is a really ugly hack!!!!!!!!!! }
  329. { this could be done later using EDI }
  330. { as it is done for subn }
  331. { instead of two registers!!!! }
  332. { adding elements is not commutative }
  333. if (nf_swaped in flags) and (left.nodetype=setelementn) then
  334. swapleftright;
  335. { are we adding set elements ? }
  336. if right.nodetype=setelementn then
  337. begin
  338. { no range support for smallsets! }
  339. if assigned(tsetelementnode(right).right) then
  340. internalerror(43244);
  341. { bts requires both elements to be registers }
  342. location_force_reg(exprasmlist,left.location,opsize,false);
  343. location_force_reg(exprasmlist,right.location,opsize,true);
  344. op:=A_BTS;
  345. noswap:=true;
  346. end
  347. else
  348. op:=A_OR;
  349. end;
  350. symdifn :
  351. op:=A_XOR;
  352. muln :
  353. op:=A_AND;
  354. subn :
  355. begin
  356. op:=A_AND;
  357. if (not(nf_swaped in flags)) and
  358. (right.location.loc=LOC_CONSTANT) then
  359. right.location.value := not(right.location.value)
  360. else if (nf_swaped in flags) and
  361. (left.location.loc=LOC_CONSTANT) then
  362. left.location.value := not(left.location.value)
  363. else
  364. extra_not:=true;
  365. end;
  366. xorn :
  367. op:=A_XOR;
  368. orn :
  369. op:=A_OR;
  370. andn :
  371. op:=A_AND;
  372. else
  373. internalerror(2003042215);
  374. end;
  375. { left must be a register }
  376. left_must_be_reg(opsize,noswap);
  377. emit_generic_code(op,opsize,true,extra_not,false);
  378. location_freetemp(exprasmlist,right.location);
  379. location_release(exprasmlist,right.location);
  380. set_result_location_reg;
  381. end;
  382. procedure tx86addnode.second_cmpsmallset;
  383. var
  384. opsize : TCGSize;
  385. op : TAsmOp;
  386. begin
  387. pass_left_right;
  388. opsize:=OS_32;
  389. case nodetype of
  390. equaln,
  391. unequaln :
  392. op:=A_CMP;
  393. lten,gten:
  394. begin
  395. if (not(nf_swaped in flags) and (nodetype = lten)) or
  396. ((nf_swaped in flags) and (nodetype = gten)) then
  397. swapleftright;
  398. location_force_reg(exprasmlist,left.location,opsize,true);
  399. emit_op_right_left(A_AND,opsize);
  400. op:=A_CMP;
  401. { warning: ugly hack, we need a JE so change the node to equaln }
  402. nodetype:=equaln;
  403. end;
  404. else
  405. internalerror(2003042215);
  406. end;
  407. { left must be a register }
  408. left_must_be_reg(opsize,false);
  409. emit_generic_code(op,opsize,true,false,false);
  410. location_freetemp(exprasmlist,right.location);
  411. location_release(exprasmlist,right.location);
  412. location_freetemp(exprasmlist,left.location);
  413. location_release(exprasmlist,left.location);
  414. location_reset(location,LOC_FLAGS,OS_NO);
  415. location.resflags:=getresflags(true);
  416. end;
  417. {*****************************************************************************
  418. AddFloat
  419. *****************************************************************************}
  420. procedure tx86addnode.second_addfloatsse;
  421. var
  422. op : topcg;
  423. begin
  424. pass_left_right;
  425. if (nf_swaped in flags) then
  426. swapleftright;
  427. case nodetype of
  428. addn :
  429. op:=OP_ADD;
  430. muln :
  431. op:=OP_MUL;
  432. subn :
  433. op:=OP_SUB;
  434. slashn :
  435. op:=OP_DIV;
  436. else
  437. internalerror(200312231);
  438. end;
  439. location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
  440. { we can use only right as left operand if the operation is commutative }
  441. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  442. begin
  443. location.register:=right.location.register;
  444. { force floating point reg. location to be written to memory,
  445. we don't force it to mm register because writing to memory
  446. allows probably shorter code because there is no direct fpu->mm register
  447. copy instruction
  448. }
  449. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  450. location_force_mem(exprasmlist,left.location);
  451. cg.a_opmm_loc_reg(exprasmlist,op,location.size,left.location,location.register,mms_movescalar);
  452. location_release(exprasmlist,left.location);
  453. end
  454. else
  455. begin
  456. location_force_mmregscalar(exprasmlist,left.location,false);
  457. location.register:=left.location.register;
  458. { force floating point reg. location to be written to memory,
  459. we don't force it to mm register because writing to memory
  460. allows probably shorter code because there is no direct fpu->mm register
  461. copy instruction
  462. }
  463. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  464. location_force_mem(exprasmlist,right.location);
  465. cg.a_opmm_loc_reg(exprasmlist,op,location.size,right.location,location.register,mms_movescalar);
  466. location_release(exprasmlist,right.location);
  467. end;
  468. end;
  469. procedure tx86addnode.second_cmpfloatsse;
  470. var
  471. op : tasmop;
  472. begin
  473. if is_single(left.resulttype.def) then
  474. op:=A_COMISS
  475. else if is_double(left.resulttype.def) then
  476. op:=A_COMISD
  477. else
  478. internalerror(200402222);
  479. pass_left_right;
  480. location_reset(location,LOC_FLAGS,def_cgsize(resulttype.def));
  481. { we can use only right as left operand if the operation is commutative }
  482. if (right.location.loc=LOC_MMREGISTER) then
  483. begin
  484. { force floating point reg. location to be written to memory,
  485. we don't force it to mm register because writing to memory
  486. allows probably shorter code because there is no direct fpu->mm register
  487. copy instruction
  488. }
  489. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  490. location_force_mem(exprasmlist,left.location);
  491. case left.location.loc of
  492. LOC_REFERENCE,LOC_CREFERENCE:
  493. exprasmlist.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  494. LOC_MMREGISTER,LOC_CMMREGISTER:
  495. exprasmlist.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  496. else
  497. internalerror(200402221);
  498. end;
  499. if nf_swaped in flags then
  500. exclude(flags,nf_swaped)
  501. else
  502. include(flags,nf_swaped)
  503. end
  504. else
  505. begin
  506. location_force_mmregscalar(exprasmlist,left.location,false);
  507. { force floating point reg. location to be written to memory,
  508. we don't force it to mm register because writing to memory
  509. allows probably shorter code because there is no direct fpu->mm register
  510. copy instruction
  511. }
  512. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  513. location_force_mem(exprasmlist,right.location);
  514. case right.location.loc of
  515. LOC_REFERENCE,LOC_CREFERENCE:
  516. exprasmlist.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  517. LOC_MMREGISTER,LOC_CMMREGISTER:
  518. exprasmlist.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  519. else
  520. internalerror(200402223);
  521. end;
  522. end;
  523. location_release(exprasmlist,right.location);
  524. location_release(exprasmlist,left.location);
  525. location.resflags:=getresflags(true);
  526. end;
  527. procedure tx86addnode.second_addfloat;
  528. var
  529. op : TAsmOp;
  530. begin
  531. if use_sse(resulttype.def) then
  532. begin
  533. second_addfloatsse;
  534. exit;
  535. end;
  536. pass_left_right;
  537. case nodetype of
  538. addn :
  539. op:=A_FADDP;
  540. muln :
  541. op:=A_FMULP;
  542. subn :
  543. op:=A_FSUBP;
  544. slashn :
  545. op:=A_FDIVP;
  546. else
  547. internalerror(2003042214);
  548. end;
  549. left_and_right_must_be_fpureg;
  550. { releases the left reference }
  551. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  552. location_release(exprasmlist,left.location);
  553. { if we swaped the tree nodes, then use the reverse operator }
  554. if nf_swaped in flags then
  555. begin
  556. if (nodetype=slashn) then
  557. op:=A_FDIVRP
  558. else if (nodetype=subn) then
  559. op:=A_FSUBRP;
  560. end;
  561. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  562. tcgx86(cg).dec_fpu_stack;
  563. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  564. location.register:=NR_ST;
  565. end;
  566. procedure tx86addnode.second_cmpfloat;
  567. var
  568. resflags : tresflags;
  569. begin
  570. if use_sse(left.resulttype.def) or use_sse(right.resulttype.def) then
  571. begin
  572. second_cmpfloatsse;
  573. exit;
  574. end;
  575. pass_left_right;
  576. left_and_right_must_be_fpureg;
  577. { releases the left reference }
  578. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  579. location_release(exprasmlist,left.location);
  580. {$ifndef x86_64}
  581. if aktspecificoptprocessor<ClassPentium2 then
  582. begin
  583. emit_none(A_FCOMPP,S_NO);
  584. tcgx86(cg).dec_fpu_stack;
  585. tcgx86(cg).dec_fpu_stack;
  586. { load fpu flags }
  587. cg.getexplicitregister(exprasmlist,NR_AX);
  588. emit_reg(A_FNSTSW,S_NO,NR_AX);
  589. emit_none(A_SAHF,S_NO);
  590. cg.ungetregister(exprasmlist,NR_AX);
  591. if nf_swaped in flags then
  592. begin
  593. case nodetype of
  594. equaln : resflags:=F_E;
  595. unequaln : resflags:=F_NE;
  596. ltn : resflags:=F_A;
  597. lten : resflags:=F_AE;
  598. gtn : resflags:=F_B;
  599. gten : resflags:=F_BE;
  600. end;
  601. end
  602. else
  603. begin
  604. case nodetype of
  605. equaln : resflags:=F_E;
  606. unequaln : resflags:=F_NE;
  607. ltn : resflags:=F_B;
  608. lten : resflags:=F_BE;
  609. gtn : resflags:=F_A;
  610. gten : resflags:=F_AE;
  611. end;
  612. end;
  613. end
  614. else
  615. {$endif x86_64}
  616. begin
  617. exprasmlist.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  618. { fcomip pops only one fpu register }
  619. exprasmlist.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  620. tcgx86(cg).dec_fpu_stack;
  621. tcgx86(cg).dec_fpu_stack;
  622. { load fpu flags }
  623. if nf_swaped in flags then
  624. begin
  625. case nodetype of
  626. equaln : resflags:=F_E;
  627. unequaln : resflags:=F_NE;
  628. ltn : resflags:=F_A;
  629. lten : resflags:=F_AE;
  630. gtn : resflags:=F_B;
  631. gten : resflags:=F_BE;
  632. end;
  633. end
  634. else
  635. begin
  636. case nodetype of
  637. equaln : resflags:=F_E;
  638. unequaln : resflags:=F_NE;
  639. ltn : resflags:=F_B;
  640. lten : resflags:=F_BE;
  641. gtn : resflags:=F_A;
  642. gten : resflags:=F_AE;
  643. end;
  644. end;
  645. end;
  646. location_reset(location,LOC_FLAGS,OS_NO);
  647. location.resflags:=resflags;
  648. end;
  649. {*****************************************************************************
  650. Addstring
  651. *****************************************************************************}
  652. {$ifdef i386}
  653. { note: if you implemented an fpc_shortstr_concat similar to the }
  654. { one in i386.inc, you have to override first_addstring like in }
  655. { ti386addnode.first_string and implement the shortstring concat }
  656. { manually! The generic routine is different from the i386 one (JM) }
  657. function tx86addnode.first_addstring : tnode;
  658. begin
  659. { special cases for shortstrings, handled in pass_2 (JM) }
  660. { can't handle fpc_shortstr_compare with compilerproc either because it }
  661. { returns its results in the flags instead of in eax }
  662. if (nodetype in [ltn,lten,gtn,gten,equaln,unequaln]) and
  663. is_shortstring(left.resulttype.def) and
  664. not(((left.nodetype=stringconstn) and (str_length(left)=0)) or
  665. ((right.nodetype=stringconstn) and (str_length(right)=0))) then
  666. begin
  667. expectloc:=LOC_FLAGS;
  668. calcregisters(self,0,0,0);
  669. result := nil;
  670. exit;
  671. end;
  672. { otherwise, use the generic code }
  673. result := inherited first_addstring;
  674. end;
  675. procedure tx86addnode.second_addstring;
  676. var
  677. paraloc1,
  678. paraloc2 : tparalocation;
  679. hregister1,
  680. hregister2 : tregister;
  681. begin
  682. { string operations are not commutative }
  683. if nf_swaped in flags then
  684. swapleftright;
  685. case tstringdef(left.resulttype.def).string_typ of
  686. st_shortstring:
  687. begin
  688. case nodetype of
  689. ltn,lten,gtn,gten,equaln,unequaln :
  690. begin
  691. paraloc1:=paramanager.getintparaloc(pocall_default,1);
  692. paraloc2:=paramanager.getintparaloc(pocall_default,2);
  693. { process parameters }
  694. secondpass(left);
  695. location_release(exprasmlist,left.location);
  696. if paraloc2.loc=LOC_REGISTER then
  697. begin
  698. hregister2:=cg.getaddressregister(exprasmlist);
  699. cg.a_loadaddr_ref_reg(exprasmlist,left.location.reference,hregister2);
  700. end
  701. else
  702. begin
  703. paramanager.allocparaloc(exprasmlist,paraloc2);
  704. cg.a_paramaddr_ref(exprasmlist,left.location.reference,paraloc2);
  705. end;
  706. secondpass(right);
  707. location_release(exprasmlist,right.location);
  708. if paraloc1.loc=LOC_REGISTER then
  709. begin
  710. hregister1:=cg.getaddressregister(exprasmlist);
  711. cg.a_loadaddr_ref_reg(exprasmlist,right.location.reference,hregister1);
  712. end
  713. else
  714. begin
  715. paramanager.allocparaloc(exprasmlist,paraloc1);
  716. cg.a_paramaddr_ref(exprasmlist,right.location.reference,paraloc1);
  717. end;
  718. { push parameters }
  719. if paraloc1.loc=LOC_REGISTER then
  720. begin
  721. cg.ungetregister(exprasmlist,hregister2);
  722. paramanager.allocparaloc(exprasmlist,paraloc2);
  723. cg.a_param_reg(exprasmlist,OS_ADDR,hregister2,paraloc2);
  724. end;
  725. if paraloc2.loc=LOC_REGISTER then
  726. begin
  727. cg.ungetregister(exprasmlist,hregister1);
  728. paramanager.allocparaloc(exprasmlist,paraloc1);
  729. cg.a_param_reg(exprasmlist,OS_ADDR,hregister1,paraloc1);
  730. end;
  731. paramanager.freeparaloc(exprasmlist,paraloc1);
  732. paramanager.freeparaloc(exprasmlist,paraloc2);
  733. cg.allocexplicitregisters(exprasmlist,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  734. cg.a_call_name(exprasmlist,'FPC_SHORTSTR_COMPARE');
  735. cg.deallocexplicitregisters(exprasmlist,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  736. location_freetemp(exprasmlist,left.location);
  737. location_freetemp(exprasmlist,right.location);
  738. end;
  739. end;
  740. location_reset(location,LOC_FLAGS,OS_NO);
  741. location.resflags:=getresflags(true);
  742. end;
  743. else
  744. { rest should be handled in first pass (JM) }
  745. internalerror(200108303);
  746. end;
  747. end;
  748. {$endif i386}
  749. {*****************************************************************************
  750. Add64bit
  751. *****************************************************************************}
  752. procedure tx86addnode.second_add64bit;
  753. begin
  754. {$ifdef cpu64bit}
  755. second_addordinal;
  756. {$else cpu64bit}
  757. { must be implemented separate }
  758. internalerror(200402042);
  759. {$endif cpu64bit}
  760. end;
  761. procedure tx86addnode.second_cmp64bit;
  762. begin
  763. {$ifdef cpu64bit}
  764. second_cmpordinal;
  765. {$else cpu64bit}
  766. { must be implemented separate }
  767. internalerror(200402043);
  768. {$endif cpu64bit}
  769. end;
  770. {*****************************************************************************
  771. AddOrdinal
  772. *****************************************************************************}
  773. procedure tx86addnode.second_addordinal;
  774. var
  775. mboverflow : boolean;
  776. op : tasmop;
  777. opsize : tcgsize;
  778. { true, if unsigned types are compared }
  779. unsigned : boolean;
  780. { true, if for sets subtractions the extra not should generated }
  781. extra_not : boolean;
  782. begin
  783. { defaults }
  784. extra_not:=false;
  785. mboverflow:=false;
  786. unsigned:=not(is_signed(left.resulttype.def)) or
  787. not(is_signed(right.resulttype.def));
  788. opsize:=def_cgsize(left.resulttype.def);
  789. pass_left_right;
  790. case nodetype of
  791. addn :
  792. begin
  793. op:=A_ADD;
  794. mboverflow:=true;
  795. end;
  796. muln :
  797. begin
  798. if unsigned then
  799. op:=A_MUL
  800. else
  801. op:=A_IMUL;
  802. mboverflow:=true;
  803. end;
  804. subn :
  805. begin
  806. op:=A_SUB;
  807. mboverflow:=true;
  808. end;
  809. xorn :
  810. op:=A_XOR;
  811. orn :
  812. op:=A_OR;
  813. andn :
  814. op:=A_AND;
  815. else
  816. internalerror(200304229);
  817. end;
  818. { filter MUL, which requires special handling }
  819. if op=A_MUL then
  820. begin
  821. second_mul;
  822. exit;
  823. end;
  824. left_must_be_reg(opsize,false);
  825. emit_generic_code(op,opsize,unsigned,extra_not,mboverflow);
  826. location_freetemp(exprasmlist,right.location);
  827. location_release(exprasmlist,right.location);
  828. set_result_location_reg;
  829. end;
  830. procedure tx86addnode.second_cmpordinal;
  831. var
  832. opsize : tcgsize;
  833. unsigned : boolean;
  834. begin
  835. unsigned:=not(is_signed(left.resulttype.def)) or
  836. not(is_signed(right.resulttype.def));
  837. opsize:=def_cgsize(left.resulttype.def);
  838. pass_left_right;
  839. left_must_be_reg(opsize,false);
  840. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  841. location_freetemp(exprasmlist,right.location);
  842. location_release(exprasmlist,right.location);
  843. if (left.location.loc<>LOC_CREGISTER) then
  844. begin
  845. location_freetemp(exprasmlist,left.location);
  846. location_release(exprasmlist,left.location);
  847. end;
  848. location_reset(location,LOC_FLAGS,OS_NO);
  849. location.resflags:=getresflags(unsigned);
  850. end;
  851. begin
  852. caddnode:=tx86addnode;
  853. end.
  854. {
  855. $Log$
  856. Revision 1.11 2004-06-20 08:55:32 florian
  857. * logs truncated
  858. Revision 1.10 2004/06/16 20:07:11 florian
  859. * dwarf branch merged
  860. Revision 1.9.2.4 2004/05/02 16:46:28 peter
  861. * disable i386 optimized shortstr_compare for x86_64
  862. Revision 1.9.2.3 2004/04/28 18:35:42 peter
  863. * cardinal fixes for x86-64
  864. Revision 1.9.2.2 2004/04/27 18:18:26 peter
  865. * aword -> aint
  866. Revision 1.9.2.1 2004/04/26 15:54:33 peter
  867. * small x86-64 fixes
  868. }