nx86add.pas 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962
  1. {
  2. $Id$
  3. Copyright (c) 2000-2002 by Florian Klaempfl
  4. Common code generation for add nodes on the i386 and x86
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit nx86add;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
  30. procedure left_and_right_must_be_fpureg;
  31. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  32. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  33. procedure second_cmpfloatsse;
  34. procedure second_addfloatsse;
  35. procedure second_mul;virtual;abstract;
  36. public
  37. {$ifdef i386}
  38. function first_addstring : tnode; override;
  39. procedure second_addstring;override;
  40. {$endif i386}
  41. procedure second_addfloat;override;
  42. procedure second_addsmallset;override;
  43. procedure second_add64bit;override;
  44. procedure second_addordinal;override;
  45. procedure second_cmpfloat;override;
  46. procedure second_cmpsmallset;override;
  47. procedure second_cmp64bit;override;
  48. procedure second_cmpordinal;override;
  49. end;
  50. implementation
  51. uses
  52. globtype,globals,
  53. verbose,cutils,
  54. cpuinfo,
  55. aasmbase,aasmtai,aasmcpu,
  56. symconst,symdef,
  57. cgobj,cgx86,cga,
  58. paramgr,parabase,
  59. htypechk,
  60. pass_2,ncgutil,
  61. ncon,nset,
  62. defutil;
  63. {*****************************************************************************
  64. Helpers
  65. *****************************************************************************}
  66. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  67. var
  68. power : longint;
  69. hl4 : tasmlabel;
  70. r : Tregister;
  71. begin
  72. { at this point, left.location.loc should be LOC_REGISTER }
  73. if right.location.loc=LOC_REGISTER then
  74. begin
  75. { right.location is a LOC_REGISTER }
  76. { when swapped another result register }
  77. if (nodetype=subn) and (nf_swaped in flags) then
  78. begin
  79. if extra_not then
  80. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  81. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  82. { newly swapped also set swapped flag }
  83. location_swap(left.location,right.location);
  84. toggleflag(nf_swaped);
  85. end
  86. else
  87. begin
  88. if extra_not then
  89. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  90. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  91. location_swap(left.location,right.location);
  92. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  93. end;
  94. end
  95. else
  96. begin
  97. { right.location is not a LOC_REGISTER }
  98. if (nodetype=subn) and (nf_swaped in flags) then
  99. begin
  100. if extra_not then
  101. cg.a_op_reg_reg(exprasmlist,OP_NOT,opsize,left.location.register,left.location.register);
  102. r:=cg.getintregister(exprasmlist,opsize);
  103. cg.a_load_loc_reg(exprasmlist,opsize,right.location,r);
  104. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  105. cg.a_load_reg_reg(exprasmlist,opsize,opsize,r,left.location.register);
  106. cg.ungetregister(exprasmlist,r);
  107. end
  108. else
  109. begin
  110. { Optimizations when right.location is a constant value }
  111. if (op=A_CMP) and
  112. (nodetype in [equaln,unequaln]) and
  113. (right.location.loc=LOC_CONSTANT) and
  114. (right.location.value=0) then
  115. begin
  116. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  117. end
  118. else
  119. if (op=A_ADD) and
  120. (right.location.loc=LOC_CONSTANT) and
  121. (right.location.value=1) and
  122. not(cs_check_overflow in aktlocalswitches) then
  123. begin
  124. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  125. end
  126. else
  127. if (op=A_SUB) and
  128. (right.location.loc=LOC_CONSTANT) and
  129. (right.location.value=1) and
  130. not(cs_check_overflow in aktlocalswitches) then
  131. begin
  132. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  133. end
  134. else
  135. if (op=A_IMUL) and
  136. (right.location.loc=LOC_CONSTANT) and
  137. (ispowerof2(int64(right.location.value),power)) and
  138. not(cs_check_overflow in aktlocalswitches) then
  139. begin
  140. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  141. end
  142. else
  143. begin
  144. if extra_not then
  145. begin
  146. r:=cg.getintregister(exprasmlist,opsize);
  147. cg.a_load_loc_reg(exprasmlist,opsize,right.location,r);
  148. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  149. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  150. cg.ungetregister(exprasmlist,r);
  151. end
  152. else
  153. begin
  154. emit_op_right_left(op,opsize);
  155. end;
  156. end;
  157. end;
  158. end;
  159. { only in case of overflow operations }
  160. { produce overflow code }
  161. { we must put it here directly, because sign of operation }
  162. { is in unsigned VAR!! }
  163. if mboverflow then
  164. begin
  165. if cs_check_overflow in aktlocalswitches then
  166. begin
  167. objectlibrary.getlabel(hl4);
  168. if unsigned then
  169. cg.a_jmp_flags(exprasmlist,F_AE,hl4)
  170. else
  171. cg.a_jmp_flags(exprasmlist,F_NO,hl4);
  172. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  173. cg.a_label(exprasmlist,hl4);
  174. end;
  175. end;
  176. end;
  177. procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
  178. begin
  179. { left location is not a register? }
  180. if (left.location.loc<>LOC_REGISTER) then
  181. begin
  182. { if right is register then we can swap the locations }
  183. if (not noswap) and
  184. (right.location.loc=LOC_REGISTER) then
  185. begin
  186. location_swap(left.location,right.location);
  187. toggleflag(nf_swaped);
  188. end
  189. else
  190. begin
  191. { maybe we can reuse a constant register when the
  192. operation is a comparison that doesn't change the
  193. value of the register }
  194. location_force_reg(exprasmlist,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  195. end;
  196. end;
  197. end;
  198. procedure tx86addnode.left_and_right_must_be_fpureg;
  199. begin
  200. if (right.location.loc<>LOC_FPUREGISTER) then
  201. begin
  202. cg.a_loadfpu_loc_reg(exprasmlist,right.location,NR_ST);
  203. if (right.location.loc <> LOC_CFPUREGISTER) then
  204. location_freetemp(exprasmlist,left.location);
  205. if (left.location.loc<>LOC_FPUREGISTER) then
  206. begin
  207. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  208. if (left.location.loc <> LOC_CFPUREGISTER) then
  209. location_freetemp(exprasmlist,left.location);
  210. end
  211. else
  212. begin
  213. { left was on the stack => swap }
  214. toggleflag(nf_swaped);
  215. end;
  216. { releases the right reference }
  217. location_release(exprasmlist,right.location);
  218. end
  219. { the nominator in st0 }
  220. else if (left.location.loc<>LOC_FPUREGISTER) then
  221. begin
  222. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  223. if (left.location.loc <> LOC_CFPUREGISTER) then
  224. location_freetemp(exprasmlist,left.location);
  225. end
  226. else
  227. begin
  228. { fpu operands are always in the wrong order on the stack }
  229. toggleflag(nf_swaped);
  230. end;
  231. end;
  232. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  233. {$ifdef x86_64}
  234. var
  235. tmpreg : tregister;
  236. {$endif x86_64}
  237. begin
  238. { left must be a register }
  239. case right.location.loc of
  240. LOC_REGISTER,
  241. LOC_CREGISTER :
  242. exprasmlist.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  243. LOC_REFERENCE,
  244. LOC_CREFERENCE :
  245. exprasmlist.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  246. LOC_CONSTANT :
  247. begin
  248. {$ifdef x86_64}
  249. { x86_64 only supports signed 32 bits constants directly }
  250. if (opsize in [OS_S64,OS_64]) and
  251. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  252. begin
  253. tmpreg:=cg.getintregister(exprasmlist,opsize);
  254. cg.a_load_const_reg(exprasmlist,opsize,right.location.value,tmpreg);
  255. exprasmlist.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  256. end
  257. else
  258. {$endif x86_64}
  259. exprasmlist.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  260. end;
  261. else
  262. internalerror(200203232);
  263. end;
  264. end;
  265. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  266. begin
  267. case nodetype of
  268. equaln : getresflags:=F_E;
  269. unequaln : getresflags:=F_NE;
  270. else
  271. if not(unsigned) then
  272. begin
  273. if nf_swaped in flags then
  274. case nodetype of
  275. ltn : getresflags:=F_G;
  276. lten : getresflags:=F_GE;
  277. gtn : getresflags:=F_L;
  278. gten : getresflags:=F_LE;
  279. end
  280. else
  281. case nodetype of
  282. ltn : getresflags:=F_L;
  283. lten : getresflags:=F_LE;
  284. gtn : getresflags:=F_G;
  285. gten : getresflags:=F_GE;
  286. end;
  287. end
  288. else
  289. begin
  290. if nf_swaped in flags then
  291. case nodetype of
  292. ltn : getresflags:=F_A;
  293. lten : getresflags:=F_AE;
  294. gtn : getresflags:=F_B;
  295. gten : getresflags:=F_BE;
  296. end
  297. else
  298. case nodetype of
  299. ltn : getresflags:=F_B;
  300. lten : getresflags:=F_BE;
  301. gtn : getresflags:=F_A;
  302. gten : getresflags:=F_AE;
  303. end;
  304. end;
  305. end;
  306. end;
  307. {*****************************************************************************
  308. AddSmallSet
  309. *****************************************************************************}
  310. procedure tx86addnode.second_addsmallset;
  311. var
  312. opsize : TCGSize;
  313. op : TAsmOp;
  314. extra_not,
  315. noswap : boolean;
  316. begin
  317. pass_left_right;
  318. noswap:=false;
  319. extra_not:=false;
  320. opsize:=OS_32;
  321. case nodetype of
  322. addn :
  323. begin
  324. { this is a really ugly hack!!!!!!!!!! }
  325. { this could be done later using EDI }
  326. { as it is done for subn }
  327. { instead of two registers!!!! }
  328. { adding elements is not commutative }
  329. if (nf_swaped in flags) and (left.nodetype=setelementn) then
  330. swapleftright;
  331. { are we adding set elements ? }
  332. if right.nodetype=setelementn then
  333. begin
  334. { no range support for smallsets! }
  335. if assigned(tsetelementnode(right).right) then
  336. internalerror(43244);
  337. { bts requires both elements to be registers }
  338. location_force_reg(exprasmlist,left.location,opsize,false);
  339. location_force_reg(exprasmlist,right.location,opsize,true);
  340. op:=A_BTS;
  341. noswap:=true;
  342. end
  343. else
  344. op:=A_OR;
  345. end;
  346. symdifn :
  347. op:=A_XOR;
  348. muln :
  349. op:=A_AND;
  350. subn :
  351. begin
  352. op:=A_AND;
  353. if (not(nf_swaped in flags)) and
  354. (right.location.loc=LOC_CONSTANT) then
  355. right.location.value := not(right.location.value)
  356. else if (nf_swaped in flags) and
  357. (left.location.loc=LOC_CONSTANT) then
  358. left.location.value := not(left.location.value)
  359. else
  360. extra_not:=true;
  361. end;
  362. xorn :
  363. op:=A_XOR;
  364. orn :
  365. op:=A_OR;
  366. andn :
  367. op:=A_AND;
  368. else
  369. internalerror(2003042215);
  370. end;
  371. { left must be a register }
  372. left_must_be_reg(opsize,noswap);
  373. emit_generic_code(op,opsize,true,extra_not,false);
  374. location_freetemp(exprasmlist,right.location);
  375. location_release(exprasmlist,right.location);
  376. set_result_location_reg;
  377. end;
  378. procedure tx86addnode.second_cmpsmallset;
  379. var
  380. opsize : TCGSize;
  381. op : TAsmOp;
  382. begin
  383. pass_left_right;
  384. opsize:=OS_32;
  385. case nodetype of
  386. equaln,
  387. unequaln :
  388. op:=A_CMP;
  389. lten,gten:
  390. begin
  391. if (not(nf_swaped in flags) and (nodetype = lten)) or
  392. ((nf_swaped in flags) and (nodetype = gten)) then
  393. swapleftright;
  394. location_force_reg(exprasmlist,left.location,opsize,true);
  395. emit_op_right_left(A_AND,opsize);
  396. op:=A_CMP;
  397. { warning: ugly hack, we need a JE so change the node to equaln }
  398. nodetype:=equaln;
  399. end;
  400. else
  401. internalerror(2003042215);
  402. end;
  403. { left must be a register }
  404. left_must_be_reg(opsize,false);
  405. emit_generic_code(op,opsize,true,false,false);
  406. location_freetemp(exprasmlist,right.location);
  407. location_release(exprasmlist,right.location);
  408. location_freetemp(exprasmlist,left.location);
  409. location_release(exprasmlist,left.location);
  410. location_reset(location,LOC_FLAGS,OS_NO);
  411. location.resflags:=getresflags(true);
  412. end;
  413. {*****************************************************************************
  414. AddFloat
  415. *****************************************************************************}
  416. procedure tx86addnode.second_addfloatsse;
  417. var
  418. op : topcg;
  419. begin
  420. pass_left_right;
  421. if (nf_swaped in flags) then
  422. swapleftright;
  423. case nodetype of
  424. addn :
  425. op:=OP_ADD;
  426. muln :
  427. op:=OP_MUL;
  428. subn :
  429. op:=OP_SUB;
  430. slashn :
  431. op:=OP_DIV;
  432. else
  433. internalerror(200312231);
  434. end;
  435. location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
  436. { we can use only right as left operand if the operation is commutative }
  437. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  438. begin
  439. location.register:=right.location.register;
  440. { force floating point reg. location to be written to memory,
  441. we don't force it to mm register because writing to memory
  442. allows probably shorter code because there is no direct fpu->mm register
  443. copy instruction
  444. }
  445. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  446. location_force_mem(exprasmlist,left.location);
  447. cg.a_opmm_loc_reg(exprasmlist,op,location.size,left.location,location.register,mms_movescalar);
  448. location_release(exprasmlist,left.location);
  449. end
  450. else
  451. begin
  452. location_force_mmregscalar(exprasmlist,left.location,false);
  453. location.register:=left.location.register;
  454. { force floating point reg. location to be written to memory,
  455. we don't force it to mm register because writing to memory
  456. allows probably shorter code because there is no direct fpu->mm register
  457. copy instruction
  458. }
  459. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  460. location_force_mem(exprasmlist,right.location);
  461. cg.a_opmm_loc_reg(exprasmlist,op,location.size,right.location,location.register,mms_movescalar);
  462. location_release(exprasmlist,right.location);
  463. end;
  464. end;
  465. procedure tx86addnode.second_cmpfloatsse;
  466. var
  467. op : tasmop;
  468. begin
  469. if is_single(left.resulttype.def) then
  470. op:=A_COMISS
  471. else if is_double(left.resulttype.def) then
  472. op:=A_COMISD
  473. else
  474. internalerror(200402222);
  475. pass_left_right;
  476. location_reset(location,LOC_FLAGS,def_cgsize(resulttype.def));
  477. { we can use only right as left operand if the operation is commutative }
  478. if (right.location.loc=LOC_MMREGISTER) then
  479. begin
  480. { force floating point reg. location to be written to memory,
  481. we don't force it to mm register because writing to memory
  482. allows probably shorter code because there is no direct fpu->mm register
  483. copy instruction
  484. }
  485. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  486. location_force_mem(exprasmlist,left.location);
  487. case left.location.loc of
  488. LOC_REFERENCE,LOC_CREFERENCE:
  489. exprasmlist.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  490. LOC_MMREGISTER,LOC_CMMREGISTER:
  491. exprasmlist.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  492. else
  493. internalerror(200402221);
  494. end;
  495. if nf_swaped in flags then
  496. exclude(flags,nf_swaped)
  497. else
  498. include(flags,nf_swaped)
  499. end
  500. else
  501. begin
  502. location_force_mmregscalar(exprasmlist,left.location,false);
  503. { force floating point reg. location to be written to memory,
  504. we don't force it to mm register because writing to memory
  505. allows probably shorter code because there is no direct fpu->mm register
  506. copy instruction
  507. }
  508. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  509. location_force_mem(exprasmlist,right.location);
  510. case right.location.loc of
  511. LOC_REFERENCE,LOC_CREFERENCE:
  512. exprasmlist.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  513. LOC_MMREGISTER,LOC_CMMREGISTER:
  514. exprasmlist.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  515. else
  516. internalerror(200402223);
  517. end;
  518. end;
  519. location_release(exprasmlist,right.location);
  520. location_release(exprasmlist,left.location);
  521. location.resflags:=getresflags(true);
  522. end;
  523. procedure tx86addnode.second_addfloat;
  524. var
  525. op : TAsmOp;
  526. begin
  527. if use_sse(resulttype.def) then
  528. begin
  529. second_addfloatsse;
  530. exit;
  531. end;
  532. pass_left_right;
  533. case nodetype of
  534. addn :
  535. op:=A_FADDP;
  536. muln :
  537. op:=A_FMULP;
  538. subn :
  539. op:=A_FSUBP;
  540. slashn :
  541. op:=A_FDIVP;
  542. else
  543. internalerror(2003042214);
  544. end;
  545. left_and_right_must_be_fpureg;
  546. { releases the left reference }
  547. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  548. location_release(exprasmlist,left.location);
  549. { if we swaped the tree nodes, then use the reverse operator }
  550. if nf_swaped in flags then
  551. begin
  552. if (nodetype=slashn) then
  553. op:=A_FDIVRP
  554. else if (nodetype=subn) then
  555. op:=A_FSUBRP;
  556. end;
  557. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  558. tcgx86(cg).dec_fpu_stack;
  559. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  560. location.register:=NR_ST;
  561. end;
  562. procedure tx86addnode.second_cmpfloat;
  563. var
  564. resflags : tresflags;
  565. begin
  566. if use_sse(left.resulttype.def) or use_sse(right.resulttype.def) then
  567. begin
  568. second_cmpfloatsse;
  569. exit;
  570. end;
  571. pass_left_right;
  572. left_and_right_must_be_fpureg;
  573. { releases the left reference }
  574. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  575. location_release(exprasmlist,left.location);
  576. {$ifndef x86_64}
  577. if aktspecificoptprocessor<ClassPentium2 then
  578. begin
  579. emit_none(A_FCOMPP,S_NO);
  580. tcgx86(cg).dec_fpu_stack;
  581. tcgx86(cg).dec_fpu_stack;
  582. { load fpu flags }
  583. cg.getexplicitregister(exprasmlist,NR_AX);
  584. emit_reg(A_FNSTSW,S_NO,NR_AX);
  585. emit_none(A_SAHF,S_NO);
  586. cg.ungetregister(exprasmlist,NR_AX);
  587. if nf_swaped in flags then
  588. begin
  589. case nodetype of
  590. equaln : resflags:=F_E;
  591. unequaln : resflags:=F_NE;
  592. ltn : resflags:=F_A;
  593. lten : resflags:=F_AE;
  594. gtn : resflags:=F_B;
  595. gten : resflags:=F_BE;
  596. end;
  597. end
  598. else
  599. begin
  600. case nodetype of
  601. equaln : resflags:=F_E;
  602. unequaln : resflags:=F_NE;
  603. ltn : resflags:=F_B;
  604. lten : resflags:=F_BE;
  605. gtn : resflags:=F_A;
  606. gten : resflags:=F_AE;
  607. end;
  608. end;
  609. end
  610. else
  611. {$endif x86_64}
  612. begin
  613. exprasmlist.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  614. { fcomip pops only one fpu register }
  615. exprasmlist.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  616. tcgx86(cg).dec_fpu_stack;
  617. tcgx86(cg).dec_fpu_stack;
  618. { load fpu flags }
  619. if nf_swaped in flags then
  620. begin
  621. case nodetype of
  622. equaln : resflags:=F_E;
  623. unequaln : resflags:=F_NE;
  624. ltn : resflags:=F_A;
  625. lten : resflags:=F_AE;
  626. gtn : resflags:=F_B;
  627. gten : resflags:=F_BE;
  628. end;
  629. end
  630. else
  631. begin
  632. case nodetype of
  633. equaln : resflags:=F_E;
  634. unequaln : resflags:=F_NE;
  635. ltn : resflags:=F_B;
  636. lten : resflags:=F_BE;
  637. gtn : resflags:=F_A;
  638. gten : resflags:=F_AE;
  639. end;
  640. end;
  641. end;
  642. location_reset(location,LOC_FLAGS,OS_NO);
  643. location.resflags:=resflags;
  644. end;
  645. {*****************************************************************************
  646. Addstring
  647. *****************************************************************************}
  648. {$ifdef i386}
  649. { note: if you implemented an fpc_shortstr_concat similar to the }
  650. { one in i386.inc, you have to override first_addstring like in }
  651. { ti386addnode.first_string and implement the shortstring concat }
  652. { manually! The generic routine is different from the i386 one (JM) }
  653. function tx86addnode.first_addstring : tnode;
  654. begin
  655. { special cases for shortstrings, handled in pass_2 (JM) }
  656. { can't handle fpc_shortstr_compare with compilerproc either because it }
  657. { returns its results in the flags instead of in eax }
  658. if (nodetype in [ltn,lten,gtn,gten,equaln,unequaln]) and
  659. is_shortstring(left.resulttype.def) and
  660. not(((left.nodetype=stringconstn) and (str_length(left)=0)) or
  661. ((right.nodetype=stringconstn) and (str_length(right)=0))) then
  662. begin
  663. expectloc:=LOC_FLAGS;
  664. calcregisters(self,0,0,0);
  665. result := nil;
  666. exit;
  667. end;
  668. { otherwise, use the generic code }
  669. result := inherited first_addstring;
  670. end;
  671. procedure tx86addnode.second_addstring;
  672. var
  673. paraloc1,
  674. paraloc2 : tcgpara;
  675. hregister1,
  676. hregister2 : tregister;
  677. begin
  678. { string operations are not commutative }
  679. if nf_swaped in flags then
  680. swapleftright;
  681. case tstringdef(left.resulttype.def).string_typ of
  682. st_shortstring:
  683. begin
  684. case nodetype of
  685. ltn,lten,gtn,gten,equaln,unequaln :
  686. begin
  687. paraloc1.init;
  688. paraloc2.init;
  689. paramanager.getintparaloc(pocall_default,1,paraloc1);
  690. paramanager.getintparaloc(pocall_default,2,paraloc2);
  691. { process parameters }
  692. secondpass(left);
  693. location_release(exprasmlist,left.location);
  694. if paraloc2.location^.loc=LOC_REGISTER then
  695. begin
  696. hregister2:=cg.getaddressregister(exprasmlist);
  697. cg.a_loadaddr_ref_reg(exprasmlist,left.location.reference,hregister2);
  698. end
  699. else
  700. begin
  701. paramanager.allocparaloc(exprasmlist,paraloc2);
  702. cg.a_paramaddr_ref(exprasmlist,left.location.reference,paraloc2);
  703. end;
  704. secondpass(right);
  705. location_release(exprasmlist,right.location);
  706. if paraloc1.location^.loc=LOC_REGISTER then
  707. begin
  708. hregister1:=cg.getaddressregister(exprasmlist);
  709. cg.a_loadaddr_ref_reg(exprasmlist,right.location.reference,hregister1);
  710. end
  711. else
  712. begin
  713. paramanager.allocparaloc(exprasmlist,paraloc1);
  714. cg.a_paramaddr_ref(exprasmlist,right.location.reference,paraloc1);
  715. end;
  716. { push parameters }
  717. if paraloc1.location^.loc=LOC_REGISTER then
  718. begin
  719. cg.ungetregister(exprasmlist,hregister2);
  720. paramanager.allocparaloc(exprasmlist,paraloc2);
  721. cg.a_param_reg(exprasmlist,OS_ADDR,hregister2,paraloc2);
  722. end;
  723. if paraloc2.location^.loc=LOC_REGISTER then
  724. begin
  725. cg.ungetregister(exprasmlist,hregister1);
  726. paramanager.allocparaloc(exprasmlist,paraloc1);
  727. cg.a_param_reg(exprasmlist,OS_ADDR,hregister1,paraloc1);
  728. end;
  729. paramanager.freeparaloc(exprasmlist,paraloc1);
  730. paramanager.freeparaloc(exprasmlist,paraloc2);
  731. cg.allocexplicitregisters(exprasmlist,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  732. cg.a_call_name(exprasmlist,'FPC_SHORTSTR_COMPARE');
  733. cg.deallocexplicitregisters(exprasmlist,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  734. location_freetemp(exprasmlist,left.location);
  735. location_freetemp(exprasmlist,right.location);
  736. paraloc1.done;
  737. paraloc2.done;
  738. end;
  739. end;
  740. location_reset(location,LOC_FLAGS,OS_NO);
  741. location.resflags:=getresflags(true);
  742. end;
  743. else
  744. { rest should be handled in first pass (JM) }
  745. internalerror(200108303);
  746. end;
  747. end;
  748. {$endif i386}
  749. {*****************************************************************************
  750. Add64bit
  751. *****************************************************************************}
  752. procedure tx86addnode.second_add64bit;
  753. begin
  754. {$ifdef cpu64bit}
  755. second_addordinal;
  756. {$else cpu64bit}
  757. { must be implemented separate }
  758. internalerror(200402042);
  759. {$endif cpu64bit}
  760. end;
  761. procedure tx86addnode.second_cmp64bit;
  762. begin
  763. {$ifdef cpu64bit}
  764. second_cmpordinal;
  765. {$else cpu64bit}
  766. { must be implemented separate }
  767. internalerror(200402043);
  768. {$endif cpu64bit}
  769. end;
  770. {*****************************************************************************
  771. AddOrdinal
  772. *****************************************************************************}
  773. procedure tx86addnode.second_addordinal;
  774. var
  775. mboverflow : boolean;
  776. op : tasmop;
  777. opsize : tcgsize;
  778. { true, if unsigned types are compared }
  779. unsigned : boolean;
  780. { true, if for sets subtractions the extra not should generated }
  781. extra_not : boolean;
  782. begin
  783. { defaults }
  784. extra_not:=false;
  785. mboverflow:=false;
  786. unsigned:=not(is_signed(left.resulttype.def)) or
  787. not(is_signed(right.resulttype.def));
  788. opsize:=def_cgsize(left.resulttype.def);
  789. pass_left_right;
  790. case nodetype of
  791. addn :
  792. begin
  793. op:=A_ADD;
  794. mboverflow:=true;
  795. end;
  796. muln :
  797. begin
  798. if unsigned then
  799. op:=A_MUL
  800. else
  801. op:=A_IMUL;
  802. mboverflow:=true;
  803. end;
  804. subn :
  805. begin
  806. op:=A_SUB;
  807. mboverflow:=true;
  808. end;
  809. xorn :
  810. op:=A_XOR;
  811. orn :
  812. op:=A_OR;
  813. andn :
  814. op:=A_AND;
  815. else
  816. internalerror(200304229);
  817. end;
  818. { filter MUL, which requires special handling }
  819. if op=A_MUL then
  820. begin
  821. second_mul;
  822. exit;
  823. end;
  824. left_must_be_reg(opsize,false);
  825. emit_generic_code(op,opsize,unsigned,extra_not,mboverflow);
  826. location_freetemp(exprasmlist,right.location);
  827. location_release(exprasmlist,right.location);
  828. set_result_location_reg;
  829. end;
  830. procedure tx86addnode.second_cmpordinal;
  831. var
  832. opsize : tcgsize;
  833. unsigned : boolean;
  834. begin
  835. unsigned:=not(is_signed(left.resulttype.def)) or
  836. not(is_signed(right.resulttype.def));
  837. opsize:=def_cgsize(left.resulttype.def);
  838. pass_left_right;
  839. left_must_be_reg(opsize,false);
  840. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  841. location_freetemp(exprasmlist,right.location);
  842. location_release(exprasmlist,right.location);
  843. if (left.location.loc<>LOC_CREGISTER) then
  844. begin
  845. location_freetemp(exprasmlist,left.location);
  846. location_release(exprasmlist,left.location);
  847. end;
  848. location_reset(location,LOC_FLAGS,OS_NO);
  849. location.resflags:=getresflags(unsigned);
  850. end;
  851. begin
  852. caddnode:=tx86addnode;
  853. end.
  854. {
  855. $Log$
  856. Revision 1.12 2004-09-21 17:25:13 peter
  857. * paraloc branch merged
  858. Revision 1.11.4.1 2004/08/31 20:43:06 peter
  859. * paraloc patch
  860. Revision 1.11 2004/06/20 08:55:32 florian
  861. * logs truncated
  862. Revision 1.10 2004/06/16 20:07:11 florian
  863. * dwarf branch merged
  864. Revision 1.9.2.4 2004/05/02 16:46:28 peter
  865. * disable i386 optimized shortstr_compare for x86_64
  866. Revision 1.9.2.3 2004/04/28 18:35:42 peter
  867. * cardinal fixes for x86-64
  868. Revision 1.9.2.2 2004/04/27 18:18:26 peter
  869. * aword -> aint
  870. Revision 1.9.2.1 2004/04/26 15:54:33 peter
  871. * small x86-64 fixes
  872. }