n386add.pas 65 KB


  1. {
  2. $Id$
  3. Copyright (c) 2000-2002 by Florian Klaempfl
  4. Code generation for add nodes on the i386
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit n386add;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nadd,cpubase,cginfo;
  23. type
  24. ti386addnode = class(taddnode)
  25. procedure pass_2;override;
  26. protected
  27. function first_addstring : tnode; override;
  28. private
  29. procedure pass_left_and_right(var pushedfpu:boolean);
  30. function getresflags(unsigned : boolean) : tresflags;
  31. procedure left_must_be_reg(opsize:TOpSize;noswap:boolean);
  32. procedure emit_op_right_left(op:TAsmOp;opsize:TOpSize);
  33. procedure emit_generic_code(op:TAsmOp;opsize:TOpSize;unsigned,extra_not,mboverflow:boolean);
  34. procedure set_result_location(cmpop,unsigned:boolean);
  35. procedure second_addstring;
  36. procedure second_addboolean;
  37. procedure second_addfloat;
  38. procedure second_addsmallset;
  39. procedure second_mul;
  40. {$ifdef SUPPORT_MMX}
  41. procedure second_addmmx;
  42. {$endif SUPPORT_MMX}
  43. procedure second_add64bit;
  44. end;
  45. implementation
  46. uses
  47. globtype,systems,
  48. cutils,verbose,globals,
  49. symconst,symdef,paramgr,
  50. aasmbase,aasmtai,aasmcpu,defutil,htypechk,
  51. cgbase,pass_2,regvars,
  52. ncon,nset,
  53. cga,ncgutil,tgobj,rgobj,cgobj,cg64f32,rgcpu;
  54. {*****************************************************************************
  55. Helpers
  56. *****************************************************************************}
  57. const
  58. opsize_2_cgsize : array[S_B..S_L] of tcgsize = (OS_8,OS_16,OS_32);
  59. procedure ti386addnode.pass_left_and_right(var pushedfpu:boolean);
  60. var
  61. pushedregs : tmaybesave;
  62. begin
  63. { calculate the operator which is more difficult }
  64. firstcomplex(self);
  65. { in case of constant put it to the left }
  66. if (left.nodetype=ordconstn) then
  67. swapleftright;
  68. secondpass(left);
  69. { are too few registers free? }
  70. {$ifndef newra}
  71. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  72. {$endif newra}
  73. if location.loc=LOC_FPUREGISTER then
  74. pushedfpu:=maybe_pushfpu(exprasmlist,right.registersfpu,left.location)
  75. else
  76. pushedfpu:=false;
  77. secondpass(right);
  78. {$ifndef newra}
  79. maybe_restore(exprasmlist,left.location,pushedregs);
  80. {$endif}
  81. end;
  82. function ti386addnode.getresflags(unsigned : boolean) : tresflags;
  83. begin
  84. case nodetype of
  85. equaln : getresflags:=F_E;
  86. unequaln : getresflags:=F_NE;
  87. else
  88. if not(unsigned) then
  89. begin
  90. if nf_swaped in flags then
  91. case nodetype of
  92. ltn : getresflags:=F_G;
  93. lten : getresflags:=F_GE;
  94. gtn : getresflags:=F_L;
  95. gten : getresflags:=F_LE;
  96. end
  97. else
  98. case nodetype of
  99. ltn : getresflags:=F_L;
  100. lten : getresflags:=F_LE;
  101. gtn : getresflags:=F_G;
  102. gten : getresflags:=F_GE;
  103. end;
  104. end
  105. else
  106. begin
  107. if nf_swaped in flags then
  108. case nodetype of
  109. ltn : getresflags:=F_A;
  110. lten : getresflags:=F_AE;
  111. gtn : getresflags:=F_B;
  112. gten : getresflags:=F_BE;
  113. end
  114. else
  115. case nodetype of
  116. ltn : getresflags:=F_B;
  117. lten : getresflags:=F_BE;
  118. gtn : getresflags:=F_A;
  119. gten : getresflags:=F_AE;
  120. end;
  121. end;
  122. end;
  123. end;
  124. procedure ti386addnode.left_must_be_reg(opsize:TOpSize;noswap:boolean);
  125. begin
  126. { left location is not a register? }
  127. if (left.location.loc<>LOC_REGISTER) then
  128. begin
  129. { if right is register then we can swap the locations }
  130. if (not noswap) and
  131. (right.location.loc=LOC_REGISTER) then
  132. begin
  133. location_swap(left.location,right.location);
  134. toggleflag(nf_swaped);
  135. end
  136. else
  137. begin
  138. { maybe we can reuse a constant register when the
  139. operation is a comparison that doesn't change the
  140. value of the register }
  141. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  142. end;
  143. end;
  144. end;
  145. procedure ti386addnode.emit_op_right_left(op:TAsmOp;opsize:TOpsize);
  146. begin
  147. { left must be a register }
  148. case right.location.loc of
  149. LOC_REGISTER,
  150. LOC_CREGISTER :
  151. exprasmlist.concat(taicpu.op_reg_reg(op,opsize,right.location.register,left.location.register));
  152. LOC_REFERENCE,
  153. LOC_CREFERENCE :
  154. exprasmlist.concat(taicpu.op_ref_reg(op,opsize,right.location.reference,left.location.register));
  155. LOC_CONSTANT :
  156. exprasmlist.concat(taicpu.op_const_reg(op,opsize,right.location.value,left.location.register));
  157. else
  158. internalerror(200203232);
  159. end;
  160. end;
  161. procedure ti386addnode.set_result_location(cmpop,unsigned:boolean);
  162. begin
  163. if cmpop then
  164. begin
  165. location_reset(location,LOC_FLAGS,OS_NO);
  166. location.resflags:=getresflags(unsigned);
  167. end
  168. else
  169. location_copy(location,left.location);
  170. end;
  171. procedure ti386addnode.emit_generic_code(op:TAsmOp;opsize:TOpSize;unsigned,extra_not,mboverflow:boolean);
  172. var
  173. power : longint;
  174. hl4 : tasmlabel;
  175. r : Tregister;
  176. begin
  177. { at this point, left.location.loc should be LOC_REGISTER }
  178. if right.location.loc=LOC_REGISTER then
  179. begin
  180. { right.location is a LOC_REGISTER }
  181. { when swapped another result register }
  182. if (nodetype=subn) and (nf_swaped in flags) then
  183. begin
  184. if extra_not then
  185. emit_reg(A_NOT,S_L,left.location.register);
  186. emit_reg_reg(op,opsize,left.location.register,right.location.register);
  187. { newly swapped also set swapped flag }
  188. location_swap(left.location,right.location);
  189. toggleflag(nf_swaped);
  190. end
  191. else
  192. begin
  193. if extra_not then
  194. emit_reg(A_NOT,S_L,right.location.register);
  195. emit_reg_reg(op,opsize,right.location.register,left.location.register);
  196. end;
  197. end
  198. else
  199. begin
  200. { right.location is not a LOC_REGISTER }
  201. if (nodetype=subn) and (nf_swaped in flags) then
  202. begin
  203. if extra_not then
  204. emit_reg(A_NOT,opsize,left.location.register);
  205. {$ifdef newra}
  206. r:=rg.getregisterint(exprasmlist,OS_INT);
  207. {$else}
  208. r.enum:=R_INTREGISTER;
  209. r.number:=NR_EDI;
  210. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  211. {$endif}
  212. cg.a_load_loc_reg(exprasmlist,right.location,r);
  213. emit_reg_reg(op,opsize,left.location.register,r);
  214. emit_reg_reg(A_MOV,opsize,r,left.location.register);
  215. rg.ungetregisterint(exprasmlist,r);
  216. end
  217. else
  218. begin
  219. { Optimizations when right.location is a constant value }
  220. if (op=A_CMP) and
  221. (nodetype in [equaln,unequaln]) and
  222. (right.location.loc=LOC_CONSTANT) and
  223. (right.location.value=0) then
  224. begin
  225. emit_reg_reg(A_TEST,opsize,left.location.register,left.location.register);
  226. end
  227. else
  228. if (op=A_ADD) and
  229. (right.location.loc=LOC_CONSTANT) and
  230. (right.location.value=1) and
  231. not(cs_check_overflow in aktlocalswitches) then
  232. begin
  233. emit_reg(A_INC,opsize,left.location.register);
  234. end
  235. else
  236. if (op=A_SUB) and
  237. (right.location.loc=LOC_CONSTANT) and
  238. (right.location.value=1) and
  239. not(cs_check_overflow in aktlocalswitches) then
  240. begin
  241. emit_reg(A_DEC,opsize,left.location.register);
  242. end
  243. else
  244. if (op=A_IMUL) and
  245. (right.location.loc=LOC_CONSTANT) and
  246. (ispowerof2(right.location.value,power)) and
  247. not(cs_check_overflow in aktlocalswitches) then
  248. begin
  249. emit_const_reg(A_SHL,opsize,power,left.location.register);
  250. end
  251. else
  252. begin
  253. if extra_not then
  254. begin
  255. {$ifdef newra}
  256. r:=rg.getregisterint(exprasmlist,OS_INT);
  257. {$else}
  258. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  259. r.enum:=R_INTREGISTER;
  260. r.number:=NR_EDI;
  261. {$endif}
  262. cg.a_load_loc_reg(exprasmlist,right.location,r);
  263. emit_reg(A_NOT,S_L,r);
  264. emit_reg_reg(A_AND,S_L,r,left.location.register);
  265. rg.ungetregisterint(exprasmlist,r);
  266. end
  267. else
  268. begin
  269. emit_op_right_left(op,opsize);
  270. end;
  271. end;
  272. end;
  273. end;
  274. { only in case of overflow operations }
  275. { produce overflow code }
  276. { we must put it here directly, because sign of operation }
  277. { is in unsigned VAR!! }
  278. if mboverflow then
  279. begin
  280. if cs_check_overflow in aktlocalswitches then
  281. begin
  282. objectlibrary.getlabel(hl4);
  283. if unsigned then
  284. emitjmp(C_NB,hl4)
  285. else
  286. emitjmp(C_NO,hl4);
  287. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  288. cg.a_label(exprasmlist,hl4);
  289. end;
  290. end;
  291. end;
  292. {*****************************************************************************
  293. Addstring
  294. *****************************************************************************}
  295. { note: if you implemented an fpc_shortstr_concat similar to the }
  296. { one in i386.inc, you have to override first_addstring like in }
  297. { ti386addnode.first_string and implement the shortstring concat }
  298. { manually! The generic routine is different from the i386 one (JM) }
  299. function ti386addnode.first_addstring : tnode;
  300. begin
  301. { special cases for shortstrings, handled in pass_2 (JM) }
  302. { can't handle fpc_shortstr_compare with compilerproc either because it }
  303. { returns its results in the flags instead of in eax }
  304. if (nodetype = addn) and
  305. is_shortstring(resulttype.def) then
  306. begin
  307. expectloc:=LOC_REFERENCE;
  308. calcregisters(self,0,0,0);
  309. result := nil;
  310. exit;
  311. end
  312. else
  313. if (nodetype in [ltn,lten,gtn,gten,equaln,unequaln]) and
  314. is_shortstring(left.resulttype.def) and
  315. not(((left.nodetype=stringconstn) and (str_length(left)=0)) or
  316. ((right.nodetype=stringconstn) and (str_length(right)=0))) then
  317. begin
  318. expectloc:=LOC_FLAGS;
  319. calcregisters(self,0,0,0);
  320. result := nil;
  321. exit;
  322. end;
  323. { otherwise, use the generic code }
  324. result := inherited first_addstring;
  325. end;
  326. procedure ti386addnode.second_addstring;
  327. var
  328. href : treference;
  329. cmpop : boolean;
  330. pushed : Tpushedsavedint;
  331. regstopush : Tsupregset;
  332. begin
  333. { string operations are not commutative }
  334. if nf_swaped in flags then
  335. swapleftright;
  336. case tstringdef(left.resulttype.def).string_typ of
  337. st_shortstring:
  338. begin
  339. case nodetype of
  340. addn:
  341. begin
  342. cmpop:=false;
  343. secondpass(left);
  344. { if str_concat is set in expr
  345. s:=s+ ... no need to create a temp string (PM) }
  346. { the tempstring can also come from a typeconversion }
  347. { or a function result, so simply check for a }
  348. { temp of 256 bytes(JM) }
  349. if not(tg.istemp(left.location.reference) and
  350. (tg.SizeOfTemp(exprasmlist,left.location.reference) = 256)) and
  351. not(nf_use_strconcat in flags) then
  352. begin
  353. tg.GetTemp(exprasmlist,256,tt_normal,href);
  354. cg.g_copyshortstring(exprasmlist,left.location.reference,href,255,true,false);
  355. { location is released by copyshortstring }
  356. location_freetemp(exprasmlist,left.location);
  357. location_reset(left.location,LOC_REFERENCE,def_cgsize(resulttype.def));
  358. left.location.reference:=href;
  359. end;
  360. secondpass(right);
  361. { on the right we do not need the register anymore too }
  362. { Instead of releasing them already, simply do not }
  363. { push them (so the release is in the right place, }
  364. { because emitpushreferenceaddr doesn't need extra }
  365. { registers) (JM) }
  366. regstopush := all_intregisters;
  367. remove_non_regvars_from_loc(right.location,regstopush);
  368. rg.saveusedintregisters(exprasmlist,pushed,regstopush);
  369. { push the maximum possible length of the result }
  370. cg.a_paramaddr_ref(exprasmlist,left.location.reference,paramanager.getintparaloc(2));
  371. { the optimizer can more easily put the }
  372. { deallocations in the right place if it happens }
  373. { too early than when it happens too late (if }
  374. { the pushref needs a "lea (..),edi; push edi") }
  375. location_release(exprasmlist,right.location);
  376. cg.a_paramaddr_ref(exprasmlist,right.location.reference,paramanager.getintparaloc(1));
  377. rg.saveintregvars(exprasmlist,regstopush);
  378. cg.a_call_name(exprasmlist,'FPC_SHORTSTR_CONCAT');
  379. tg.ungetiftemp(exprasmlist,right.location.reference);
  380. rg.restoreusedintregisters(exprasmlist,pushed);
  381. end;
  382. ltn,lten,gtn,gten,equaln,unequaln :
  383. begin
  384. cmpop := true;
  385. rg.saveusedintregisters(exprasmlist,pushed,all_intregisters);
  386. secondpass(left);
  387. location_release(exprasmlist,left.location);
  388. cg.a_paramaddr_ref(exprasmlist,left.location.reference,paramanager.getintparaloc(2));
  389. secondpass(right);
  390. location_release(exprasmlist,right.location);
  391. cg.a_paramaddr_ref(exprasmlist,right.location.reference,paramanager.getintparaloc(1));
  392. rg.saveintregvars(exprasmlist,all_intregisters);
  393. cg.a_call_name(exprasmlist,'FPC_SHORTSTR_COMPARE');
  394. rg.restoreusedintregisters(exprasmlist,pushed);
  395. location_freetemp(exprasmlist,left.location);
  396. location_freetemp(exprasmlist,right.location);
  397. end;
  398. end;
  399. set_result_location(cmpop,true);
  400. end;
  401. else
  402. { rest should be handled in first pass (JM) }
  403. internalerror(200108303);
  404. end;
  405. end;
  406. {*****************************************************************************
  407. AddBoolean
  408. *****************************************************************************}
  409. procedure ti386addnode.second_addboolean;
  410. var
  411. op : TAsmOp;
  412. opsize : TOpsize;
  413. cmpop,
  414. isjump : boolean;
  415. otl,ofl : tasmlabel;
  416. pushedregs : tmaybesave;
  417. begin
  418. { calculate the operator which is more difficult }
  419. firstcomplex(self);
  420. cmpop:=false;
  421. if (torddef(left.resulttype.def).typ=bool8bit) or
  422. (torddef(right.resulttype.def).typ=bool8bit) then
  423. opsize:=S_B
  424. else
  425. if (torddef(left.resulttype.def).typ=bool16bit) or
  426. (torddef(right.resulttype.def).typ=bool16bit) then
  427. opsize:=S_W
  428. else
  429. opsize:=S_L;
  430. if (cs_full_boolean_eval in aktlocalswitches) or
  431. (nodetype in [unequaln,ltn,lten,gtn,gten,equaln,xorn]) then
  432. begin
  433. if left.nodetype in [ordconstn,realconstn] then
  434. swapleftright;
  435. isjump:=(left.location.loc=LOC_JUMP);
  436. if isjump then
  437. begin
  438. otl:=truelabel;
  439. objectlibrary.getlabel(truelabel);
  440. ofl:=falselabel;
  441. objectlibrary.getlabel(falselabel);
  442. end;
  443. secondpass(left);
  444. if left.location.loc in [LOC_FLAGS,LOC_JUMP] then
  445. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  446. if isjump then
  447. begin
  448. truelabel:=otl;
  449. falselabel:=ofl;
  450. end;
  451. {$ifndef newra}
  452. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  453. {$endif}
  454. isjump:=(right.location.loc=LOC_JUMP);
  455. if isjump then
  456. begin
  457. otl:=truelabel;
  458. objectlibrary.getlabel(truelabel);
  459. ofl:=falselabel;
  460. objectlibrary.getlabel(falselabel);
  461. end;
  462. secondpass(right);
  463. {$ifndef newra}
  464. maybe_restore(exprasmlist,left.location,pushedregs);
  465. {$endif newra}
  466. if right.location.loc in [LOC_FLAGS,LOC_JUMP] then
  467. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],false);
  468. if isjump then
  469. begin
  470. truelabel:=otl;
  471. falselabel:=ofl;
  472. end;
  473. { left must be a register }
  474. left_must_be_reg(opsize,false);
  475. { compare the }
  476. case nodetype of
  477. ltn,lten,gtn,gten,
  478. equaln,unequaln :
  479. begin
  480. op:=A_CMP;
  481. cmpop:=true;
  482. end;
  483. xorn :
  484. op:=A_XOR;
  485. orn :
  486. op:=A_OR;
  487. andn :
  488. op:=A_AND;
  489. else
  490. internalerror(200203247);
  491. end;
  492. emit_op_right_left(op,opsize);
  493. location_freetemp(exprasmlist,right.location);
  494. location_release(exprasmlist,right.location);
  495. if cmpop then
  496. begin
  497. location_freetemp(exprasmlist,left.location);
  498. location_release(exprasmlist,left.location);
  499. end;
  500. set_result_location(cmpop,true);
  501. end
  502. else
  503. begin
  504. case nodetype of
  505. andn,
  506. orn :
  507. begin
  508. location_reset(location,LOC_JUMP,OS_NO);
  509. case nodetype of
  510. andn :
  511. begin
  512. otl:=truelabel;
  513. objectlibrary.getlabel(truelabel);
  514. secondpass(left);
  515. maketojumpbool(exprasmlist,left,lr_load_regvars);
  516. cg.a_label(exprasmlist,truelabel);
  517. truelabel:=otl;
  518. end;
  519. orn :
  520. begin
  521. ofl:=falselabel;
  522. objectlibrary.getlabel(falselabel);
  523. secondpass(left);
  524. maketojumpbool(exprasmlist,left,lr_load_regvars);
  525. cg.a_label(exprasmlist,falselabel);
  526. falselabel:=ofl;
  527. end;
  528. else
  529. internalerror(2003042212);
  530. end;
  531. secondpass(right);
  532. maketojumpbool(exprasmlist,right,lr_load_regvars);
  533. end;
  534. else
  535. internalerror(2003042213);
  536. end;
  537. end;
  538. end;
  539. {*****************************************************************************
  540. AddFloat
  541. *****************************************************************************}
  542. procedure ti386addnode.second_addfloat;
  543. var
  544. op : TAsmOp;
  545. resflags : tresflags;
  546. pushedfpu,
  547. cmpop : boolean;
  548. r,r2:Tregister;
  549. begin
  550. pass_left_and_right(pushedfpu);
  551. cmpop:=false;
  552. case nodetype of
  553. addn :
  554. op:=A_FADDP;
  555. muln :
  556. op:=A_FMULP;
  557. subn :
  558. op:=A_FSUBP;
  559. slashn :
  560. op:=A_FDIVP;
  561. ltn,lten,gtn,gten,
  562. equaln,unequaln :
  563. begin
  564. op:=A_FCOMPP;
  565. cmpop:=true;
  566. end;
  567. else
  568. internalerror(2003042214);
  569. end;
  570. if (right.location.loc<>LOC_FPUREGISTER) then
  571. begin
  572. r.enum:=R_ST;
  573. cg.a_loadfpu_loc_reg(exprasmlist,right.location,r);
  574. if (right.location.loc <> LOC_CFPUREGISTER) and
  575. pushedfpu then
  576. location_freetemp(exprasmlist,left.location);
  577. if (left.location.loc<>LOC_FPUREGISTER) then
  578. begin
  579. cg.a_loadfpu_loc_reg(exprasmlist,left.location,r);
  580. if (left.location.loc <> LOC_CFPUREGISTER) and
  581. pushedfpu then
  582. location_freetemp(exprasmlist,left.location);
  583. end
  584. else
  585. begin
  586. { left was on the stack => swap }
  587. toggleflag(nf_swaped);
  588. end;
  589. { releases the right reference }
  590. location_release(exprasmlist,right.location);
  591. end
  592. { the nominator in st0 }
  593. else if (left.location.loc<>LOC_FPUREGISTER) then
  594. begin
  595. r.enum:=R_ST;
  596. cg.a_loadfpu_loc_reg(exprasmlist,left.location,r);
  597. if (left.location.loc <> LOC_CFPUREGISTER) and
  598. pushedfpu then
  599. location_freetemp(exprasmlist,left.location);
  600. end
  601. else
  602. begin
  603. { fpu operands are always in the wrong order on the stack }
  604. toggleflag(nf_swaped);
  605. end;
  606. { releases the left reference }
  607. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  608. location_release(exprasmlist,left.location);
  609. { if we swaped the tree nodes, then use the reverse operator }
  610. if nf_swaped in flags then
  611. begin
  612. if (nodetype=slashn) then
  613. op:=A_FDIVRP
  614. else if (nodetype=subn) then
  615. op:=A_FSUBRP;
  616. end;
  617. { to avoid the pentium bug
  618. if (op=FDIVP) and (opt_processors=pentium) then
  619. cg.a_call_name(exprasmlist,'EMUL_FDIVP')
  620. else
  621. }
  622. { the Intel assemblers want operands }
  623. if op<>A_FCOMPP then
  624. begin
  625. r.enum:=R_ST;
  626. r2.enum:=R_ST1;
  627. emit_reg_reg(op,S_NO,r,r2);
  628. dec(trgcpu(rg).fpuvaroffset);
  629. end
  630. else
  631. begin
  632. emit_none(op,S_NO);
  633. dec(trgcpu(rg).fpuvaroffset,2);
  634. end;
  635. { on comparison load flags }
  636. if cmpop then
  637. begin
  638. {$ifdef newra}
  639. r:=rg.getexplicitregisterint(exprasmlist,NR_AX);
  640. {$else}
  641. if not(RS_EAX in rg.unusedregsint) then
  642. begin
  643. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  644. r.enum:=R_INTREGISTER;
  645. r.number:=NR_EAX;
  646. r2.enum:=R_INTREGISTER;;
  647. r2.number:=NR_EDI;
  648. emit_reg_reg(A_MOV,S_L,r,r2);
  649. end;
  650. r.enum:=R_INTREGISTER;
  651. r.number:=NR_AX;
  652. {$endif}
  653. emit_reg(A_FNSTSW,S_NO,r);
  654. emit_none(A_SAHF,S_NO);
  655. {$ifdef newra}
  656. rg.ungetregisterint(exprasmlist,r);
  657. {$else}
  658. if not(RS_EAX in rg.unusedregsint) then
  659. begin
  660. r.enum:=R_INTREGISTER;
  661. r.number:=NR_EAX;
  662. r2.enum:=R_INTREGISTER;;
  663. r2.number:=NR_EDI;
  664. emit_reg_reg(A_MOV,S_L,r2,r);
  665. rg.ungetregisterint(exprasmlist,r2);
  666. end;
  667. {$endif}
  668. if nf_swaped in flags then
  669. begin
  670. case nodetype of
  671. equaln : resflags:=F_E;
  672. unequaln : resflags:=F_NE;
  673. ltn : resflags:=F_A;
  674. lten : resflags:=F_AE;
  675. gtn : resflags:=F_B;
  676. gten : resflags:=F_BE;
  677. end;
  678. end
  679. else
  680. begin
  681. case nodetype of
  682. equaln : resflags:=F_E;
  683. unequaln : resflags:=F_NE;
  684. ltn : resflags:=F_B;
  685. lten : resflags:=F_BE;
  686. gtn : resflags:=F_A;
  687. gten : resflags:=F_AE;
  688. end;
  689. end;
  690. location_reset(location,LOC_FLAGS,OS_NO);
  691. location.resflags:=resflags;
  692. end
  693. else
  694. begin
  695. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  696. location.register.enum:=R_ST;
  697. end;
  698. end;
  699. {*****************************************************************************
  700. AddSmallSet
  701. *****************************************************************************}
  702. procedure ti386addnode.second_addsmallset;
  703. var
  704. opsize : TOpSize;
  705. op : TAsmOp;
  706. cmpop,
  707. pushedfpu,
  708. extra_not,
  709. noswap : boolean;
  710. begin
  711. pass_left_and_right(pushedfpu);
  712. { when a setdef is passed, it has to be a smallset }
  713. if ((left.resulttype.def.deftype=setdef) and
  714. (tsetdef(left.resulttype.def).settype<>smallset)) or
  715. ((right.resulttype.def.deftype=setdef) and
  716. (tsetdef(right.resulttype.def).settype<>smallset)) then
  717. internalerror(200203301);
  718. cmpop:=false;
  719. noswap:=false;
  720. extra_not:=false;
  721. opsize:=S_L;
  722. case nodetype of
  723. addn :
  724. begin
  725. { this is a really ugly hack!!!!!!!!!! }
  726. { this could be done later using EDI }
  727. { as it is done for subn }
  728. { instead of two registers!!!! }
  729. { adding elements is not commutative }
  730. if (nf_swaped in flags) and (left.nodetype=setelementn) then
  731. swapleftright;
  732. { are we adding set elements ? }
  733. if right.nodetype=setelementn then
  734. begin
  735. { no range support for smallsets! }
  736. if assigned(tsetelementnode(right).right) then
  737. internalerror(43244);
  738. { bts requires both elements to be registers }
  739. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  740. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],true);
  741. op:=A_BTS;
  742. noswap:=true;
  743. end
  744. else
  745. op:=A_OR;
  746. end;
  747. symdifn :
  748. op:=A_XOR;
  749. muln :
  750. op:=A_AND;
  751. subn :
  752. begin
  753. op:=A_AND;
  754. if (not(nf_swaped in flags)) and
  755. (right.location.loc=LOC_CONSTANT) then
  756. right.location.value := not(right.location.value)
  757. else if (nf_swaped in flags) and
  758. (left.location.loc=LOC_CONSTANT) then
  759. left.location.value := not(left.location.value)
  760. else
  761. extra_not:=true;
  762. end;
  763. equaln,
  764. unequaln :
  765. begin
  766. op:=A_CMP;
  767. cmpop:=true;
  768. end;
  769. lten,gten:
  770. begin
  771. If (not(nf_swaped in flags) and
  772. (nodetype = lten)) or
  773. ((nf_swaped in flags) and
  774. (nodetype = gten)) then
  775. swapleftright;
  776. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],true);
  777. emit_op_right_left(A_AND,opsize);
  778. op:=A_CMP;
  779. cmpop:=true;
  780. { warning: ugly hack, we need a JE so change the node to equaln }
  781. nodetype:=equaln;
  782. end;
  783. xorn :
  784. op:=A_XOR;
  785. orn :
  786. op:=A_OR;
  787. andn :
  788. op:=A_AND;
  789. else
  790. internalerror(2003042215);
  791. end;
  792. { left must be a register }
  793. left_must_be_reg(opsize,noswap);
  794. emit_generic_code(op,opsize,true,extra_not,false);
  795. location_freetemp(exprasmlist,right.location);
  796. location_release(exprasmlist,right.location);
  797. if cmpop then
  798. begin
  799. location_freetemp(exprasmlist,left.location);
  800. location_release(exprasmlist,left.location);
  801. end;
  802. set_result_location(cmpop,true);
  803. end;
  804. {*****************************************************************************
  805. Add64bit
  806. *****************************************************************************}
  807. procedure ti386addnode.second_add64bit;
  808. var
  809. op : TOpCG;
  810. op1,op2 : TAsmOp;
  811. opsize : TOpSize;
  812. hregister,
  813. hregister2 : tregister;
  814. href : treference;
  815. hl4 : tasmlabel;
  816. pushedfpu,
  817. mboverflow,
  818. cmpop,
  819. unsigned : boolean;
  820. r:Tregister;
  821. procedure firstjmp64bitcmp;
  822. var
  823. oldnodetype : tnodetype;
  824. begin
  825. load_all_regvars(exprasmlist);
  826. { the jump the sequence is a little bit hairy }
  827. case nodetype of
  828. ltn,gtn:
  829. begin
  830. emitjmp(flags_to_cond(getresflags(unsigned)),truelabel);
  831. { cheat a little bit for the negative test }
  832. toggleflag(nf_swaped);
  833. emitjmp(flags_to_cond(getresflags(unsigned)),falselabel);
  834. toggleflag(nf_swaped);
  835. end;
  836. lten,gten:
  837. begin
  838. oldnodetype:=nodetype;
  839. if nodetype=lten then
  840. nodetype:=ltn
  841. else
  842. nodetype:=gtn;
  843. emitjmp(flags_to_cond(getresflags(unsigned)),truelabel);
  844. { cheat for the negative test }
  845. if nodetype=ltn then
  846. nodetype:=gtn
  847. else
  848. nodetype:=ltn;
  849. emitjmp(flags_to_cond(getresflags(unsigned)),falselabel);
  850. nodetype:=oldnodetype;
  851. end;
  852. equaln:
  853. emitjmp(C_NE,falselabel);
  854. unequaln:
  855. emitjmp(C_NE,truelabel);
  856. end;
  857. end;
  858. procedure secondjmp64bitcmp;
  859. begin
  860. { the jump the sequence is a little bit hairy }
  861. case nodetype of
  862. ltn,gtn,lten,gten:
  863. begin
  864. { the comparisaion of the low dword have to be }
  865. { always unsigned! }
  866. emitjmp(flags_to_cond(getresflags(true)),truelabel);
  867. cg.a_jmp_always(exprasmlist,falselabel);
  868. end;
  869. equaln:
  870. begin
  871. emitjmp(C_NE,falselabel);
  872. cg.a_jmp_always(exprasmlist,truelabel);
  873. end;
  874. unequaln:
  875. begin
  876. emitjmp(C_NE,truelabel);
  877. cg.a_jmp_always(exprasmlist,falselabel);
  878. end;
  879. end;
  880. end;
  881. begin
  882. firstcomplex(self);
  883. pass_left_and_right(pushedfpu);
  884. op1:=A_NONE;
  885. op2:=A_NONE;
  886. mboverflow:=false;
  887. cmpop:=false;
  888. opsize:=S_L;
  889. unsigned:=((left.resulttype.def.deftype=orddef) and
  890. (torddef(left.resulttype.def).typ=u64bit)) or
  891. ((right.resulttype.def.deftype=orddef) and
  892. (torddef(right.resulttype.def).typ=u64bit));
  893. case nodetype of
  894. addn :
  895. begin
  896. op:=OP_ADD;
  897. mboverflow:=true;
  898. end;
  899. subn :
  900. begin
  901. op:=OP_SUB;
  902. op1:=A_SUB;
  903. op2:=A_SBB;
  904. mboverflow:=true;
  905. end;
  906. ltn,lten,
  907. gtn,gten,
  908. equaln,unequaln:
  909. begin
  910. op:=OP_NONE;
  911. cmpop:=true;
  912. end;
  913. xorn:
  914. op:=OP_XOR;
  915. orn:
  916. op:=OP_OR;
  917. andn:
  918. op:=OP_AND;
  919. else
  920. begin
  921. { everything should be handled in pass_1 (JM) }
  922. internalerror(200109051);
  923. end;
  924. end;
  925. { left and right no register? }
  926. { then one must be demanded }
  927. if (left.location.loc<>LOC_REGISTER) then
  928. begin
  929. if (right.location.loc<>LOC_REGISTER) then
  930. begin
  931. { we can reuse a CREGISTER for comparison }
  932. if not((left.location.loc=LOC_CREGISTER) and cmpop) then
  933. begin
  934. if (left.location.loc<>LOC_CREGISTER) then
  935. begin
  936. location_freetemp(exprasmlist,left.location);
  937. location_release(exprasmlist,left.location);
  938. end;
  939. hregister:=rg.getregisterint(exprasmlist,OS_INT);
  940. hregister2:=rg.getregisterint(exprasmlist,OS_INT);
  941. cg64.a_load64_loc_reg(exprasmlist,left.location,joinreg64(hregister,hregister2));
  942. location_reset(left.location,LOC_REGISTER,OS_64);
  943. left.location.registerlow:=hregister;
  944. left.location.registerhigh:=hregister2;
  945. end;
  946. end
  947. else
  948. begin
  949. location_swap(left.location,right.location);
  950. toggleflag(nf_swaped);
  951. end;
  952. end;
  953. { at this point, left.location.loc should be LOC_REGISTER }
  954. if right.location.loc=LOC_REGISTER then
  955. begin
  956. { when swapped another result register }
  957. if (nodetype=subn) and (nf_swaped in flags) then
  958. begin
  959. cg64.a_op64_reg_reg(exprasmlist,op,
  960. left.location.register64,
  961. right.location.register64);
  962. location_swap(left.location,right.location);
  963. toggleflag(nf_swaped);
  964. end
  965. else if cmpop then
  966. begin
  967. emit_reg_reg(A_CMP,S_L,right.location.registerhigh,left.location.registerhigh);
  968. firstjmp64bitcmp;
  969. emit_reg_reg(A_CMP,S_L,right.location.registerlow,left.location.registerlow);
  970. secondjmp64bitcmp;
  971. end
  972. else
  973. begin
  974. cg64.a_op64_reg_reg(exprasmlist,op,
  975. right.location.register64,
  976. left.location.register64);
  977. end;
  978. location_release(exprasmlist,right.location);
  979. end
  980. else
  981. begin
  982. { right.location<>LOC_REGISTER }
  983. if (nodetype=subn) and (nf_swaped in flags) then
  984. begin
  985. {$ifdef newra}
  986. r:=rg.getregisterint(exprasmlist,OS_INT);
  987. {$else}
  988. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  989. r.enum:=R_INTREGISTER;
  990. r.number:=NR_EDI;
  991. {$endif}
  992. cg64.a_load64low_loc_reg(exprasmlist,right.location,r);
  993. emit_reg_reg(op1,opsize,left.location.registerlow,r);
  994. emit_reg_reg(A_MOV,opsize,r,left.location.registerlow);
  995. cg64.a_load64high_loc_reg(exprasmlist,right.location,r);
  996. { the carry flag is still ok }
  997. emit_reg_reg(op2,opsize,left.location.registerhigh,r);
  998. emit_reg_reg(A_MOV,opsize,r,left.location.registerhigh);
  999. rg.ungetregisterint(exprasmlist,r);
  1000. if right.location.loc<>LOC_CREGISTER then
  1001. begin
  1002. location_freetemp(exprasmlist,right.location);
  1003. location_release(exprasmlist,right.location);
  1004. end;
  1005. end
  1006. else if cmpop then
  1007. begin
  1008. case right.location.loc of
  1009. LOC_CREGISTER :
  1010. begin
  1011. emit_reg_reg(A_CMP,S_L,right.location.registerhigh,left.location.registerhigh);
  1012. firstjmp64bitcmp;
  1013. emit_reg_reg(A_CMP,S_L,right.location.registerlow,left.location.registerlow);
  1014. secondjmp64bitcmp;
  1015. end;
  1016. LOC_CREFERENCE,
  1017. LOC_REFERENCE :
  1018. begin
  1019. href:=right.location.reference;
  1020. inc(href.offset,4);
  1021. emit_ref_reg(A_CMP,S_L,href,left.location.registerhigh);
  1022. firstjmp64bitcmp;
  1023. emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.registerlow);
  1024. secondjmp64bitcmp;
  1025. cg.a_jmp_always(exprasmlist,falselabel);
  1026. location_freetemp(exprasmlist,right.location);
  1027. location_release(exprasmlist,right.location);
  1028. end;
  1029. LOC_CONSTANT :
  1030. begin
  1031. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,hi(right.location.valueqword),left.location.registerhigh));
  1032. firstjmp64bitcmp;
  1033. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,lo(right.location.valueqword),left.location.registerlow));
  1034. secondjmp64bitcmp;
  1035. end;
  1036. else
  1037. internalerror(200203282);
  1038. end;
  1039. end
  1040. else
  1041. begin
  1042. cg64.a_op64_loc_reg(exprasmlist,op,right.location,
  1043. left.location.register64);
  1044. if (right.location.loc<>LOC_CREGISTER) then
  1045. begin
  1046. location_freetemp(exprasmlist,right.location);
  1047. location_release(exprasmlist,right.location);
  1048. end;
  1049. end;
  1050. end;
  1051. if (left.location.loc<>LOC_CREGISTER) and cmpop then
  1052. begin
  1053. location_freetemp(exprasmlist,left.location);
  1054. location_release(exprasmlist,left.location);
  1055. end;
  1056. { only in case of overflow operations }
  1057. { produce overflow code }
  1058. { we must put it here directly, because sign of operation }
  1059. { is in unsigned VAR!! }
  1060. if mboverflow then
  1061. begin
  1062. if cs_check_overflow in aktlocalswitches then
  1063. begin
  1064. objectlibrary.getlabel(hl4);
  1065. if unsigned then
  1066. emitjmp(C_NB,hl4)
  1067. else
  1068. emitjmp(C_NO,hl4);
  1069. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  1070. cg.a_label(exprasmlist,hl4);
  1071. end;
  1072. end;
  1073. { we have LOC_JUMP as result }
  1074. if cmpop then
  1075. location_reset(location,LOC_JUMP,OS_NO)
  1076. else
  1077. location_copy(location,left.location);
  1078. end;
  1079. {*****************************************************************************
  1080. AddMMX
  1081. *****************************************************************************}
  1082. {$ifdef SUPPORT_MMX}
  1083. procedure ti386addnode.second_addmmx;
  1084. var
  1085. op : TAsmOp;
  1086. pushedfpu,
  1087. cmpop : boolean;
  1088. mmxbase : tmmxtype;
  1089. r,hregister : tregister;
  1090. begin
  1091. pass_left_and_right(pushedfpu);
  1092. cmpop:=false;
  1093. mmxbase:=mmx_type(left.resulttype.def);
  1094. case nodetype of
  1095. addn :
  1096. begin
  1097. if (cs_mmx_saturation in aktlocalswitches) then
  1098. begin
  1099. case mmxbase of
  1100. mmxs8bit:
  1101. op:=A_PADDSB;
  1102. mmxu8bit:
  1103. op:=A_PADDUSB;
  1104. mmxs16bit,mmxfixed16:
  1105. op:=A_PADDSB;
  1106. mmxu16bit:
  1107. op:=A_PADDUSW;
  1108. end;
  1109. end
  1110. else
  1111. begin
  1112. case mmxbase of
  1113. mmxs8bit,mmxu8bit:
  1114. op:=A_PADDB;
  1115. mmxs16bit,mmxu16bit,mmxfixed16:
  1116. op:=A_PADDW;
  1117. mmxs32bit,mmxu32bit:
  1118. op:=A_PADDD;
  1119. end;
  1120. end;
  1121. end;
  1122. muln :
  1123. begin
  1124. case mmxbase of
  1125. mmxs16bit,mmxu16bit:
  1126. op:=A_PMULLW;
  1127. mmxfixed16:
  1128. op:=A_PMULHW;
  1129. end;
  1130. end;
  1131. subn :
  1132. begin
  1133. if (cs_mmx_saturation in aktlocalswitches) then
  1134. begin
  1135. case mmxbase of
  1136. mmxs8bit:
  1137. op:=A_PSUBSB;
  1138. mmxu8bit:
  1139. op:=A_PSUBUSB;
  1140. mmxs16bit,mmxfixed16:
  1141. op:=A_PSUBSB;
  1142. mmxu16bit:
  1143. op:=A_PSUBUSW;
  1144. end;
  1145. end
  1146. else
  1147. begin
  1148. case mmxbase of
  1149. mmxs8bit,mmxu8bit:
  1150. op:=A_PSUBB;
  1151. mmxs16bit,mmxu16bit,mmxfixed16:
  1152. op:=A_PSUBW;
  1153. mmxs32bit,mmxu32bit:
  1154. op:=A_PSUBD;
  1155. end;
  1156. end;
  1157. end;
  1158. xorn:
  1159. op:=A_PXOR;
  1160. orn:
  1161. op:=A_POR;
  1162. andn:
  1163. op:=A_PAND;
  1164. else
  1165. internalerror(2003042214);
  1166. end;
  1167. { left and right no register? }
  1168. { then one must be demanded }
  1169. if (left.location.loc<>LOC_MMXREGISTER) then
  1170. begin
  1171. if (right.location.loc=LOC_MMXREGISTER) then
  1172. begin
  1173. location_swap(left.location,right.location);
  1174. toggleflag(nf_swaped);
  1175. end
  1176. else
  1177. begin
  1178. { register variable ? }
  1179. if (left.location.loc=LOC_CMMXREGISTER) then
  1180. begin
  1181. hregister:=rg.getregistermm(exprasmlist);
  1182. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  1183. end
  1184. else
  1185. begin
  1186. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1187. internalerror(200203245);
  1188. location_release(exprasmlist,left.location);
  1189. hregister:=rg.getregistermm(exprasmlist);
  1190. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  1191. end;
  1192. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  1193. left.location.register:=hregister;
  1194. end;
  1195. end;
  1196. { at this point, left.location.loc should be LOC_MMXREGISTER }
  1197. if right.location.loc<>LOC_MMXREGISTER then
  1198. begin
  1199. if (nodetype=subn) and (nf_swaped in flags) then
  1200. begin
  1201. r.enum:=R_MM7;
  1202. if right.location.loc=LOC_CMMXREGISTER then
  1203. begin
  1204. emit_reg_reg(A_MOVQ,S_NO,right.location.register,r);
  1205. emit_reg_reg(op,S_NO,left.location.register,r);
  1206. emit_reg_reg(A_MOVQ,S_NO,r,left.location.register);
  1207. end
  1208. else
  1209. begin
  1210. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1211. internalerror(200203247);
  1212. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,r);
  1213. emit_reg_reg(op,S_NO,left.location.register,r);
  1214. emit_reg_reg(A_MOVQ,S_NO,r,left.location.register);
  1215. location_release(exprasmlist,right.location);
  1216. end;
  1217. end
  1218. else
  1219. begin
  1220. if (right.location.loc=LOC_CMMXREGISTER) then
  1221. begin
  1222. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  1223. end
  1224. else
  1225. begin
  1226. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1227. internalerror(200203246);
  1228. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  1229. location_release(exprasmlist,right.location);
  1230. end;
  1231. end;
  1232. end
  1233. else
  1234. begin
  1235. { right.location=LOC_MMXREGISTER }
  1236. if (nodetype=subn) and (nf_swaped in flags) then
  1237. begin
  1238. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  1239. location_swap(left.location,right.location);
  1240. toggleflag(nf_swaped);
  1241. end
  1242. else
  1243. begin
  1244. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  1245. end;
  1246. end;
  1247. location_freetemp(exprasmlist,right.location);
  1248. location_release(exprasmlist,right.location);
  1249. if cmpop then
  1250. begin
  1251. location_freetemp(exprasmlist,left.location);
  1252. location_release(exprasmlist,left.location);
  1253. end;
  1254. set_result_location(cmpop,true);
  1255. end;
  1256. {$endif SUPPORT_MMX}
  1257. {*****************************************************************************
  1258. MUL
  1259. *****************************************************************************}
  1260. {$ifdef newra}
  1261. procedure ti386addnode.second_mul;
  1262. var r,r_eax:Tregister;
  1263. begin
  1264. {The location.register will be filled in later (JM)}
  1265. location_reset(location,LOC_REGISTER,OS_INT);
  1266. {Get a temp register and load the left value into it
  1267. and free the location.}
  1268. r:=rg.getregisterint(exprasmlist,OS_INT);
  1269. cg.a_load_loc_reg(exprasmlist,left.location,r);
  1270. location_release(exprasmlist,left.location);
  1271. {Allocate EAX.}
  1272. rg.getexplicitregisterint(exprasmlist,NR_EAX);
  1273. r_eax.enum:=R_INTREGISTER;
  1274. r_eax.number:=NR_EAX;
  1275. {Load the right value.}
  1276. cg.a_load_loc_reg(exprasmlist,right.location,r_eax);
  1277. location_release(exprasmlist,right.location);
  1278. {The mul instruction frees register r.}
  1279. rg.ungetregisterint(exprasmlist,r);
  1280. {Also allocate EDX, since it is also modified by a mul (JM).}
  1281. rg.getexplicitregisterint(exprasmlist,NR_EDX);
  1282. emit_reg(A_MUL,S_L,r);
  1283. {Free EDX}
  1284. r.enum:=R_INTREGISTER;
  1285. r.number:=NR_EDX;
  1286. rg.ungetregisterint(exprasmlist,r);
  1287. {Free EAX}
  1288. rg.ungetregisterint(exprasmlist,r_eax);
  1289. {Allocate a new register and store the result in EAX in it.}
  1290. location.register:=rg.getregisterint(exprasmlist,OS_INT);
  1291. emit_reg_reg(A_MOV,S_L,r_eax,location.register);
  1292. location_freetemp(exprasmlist,left.location);
  1293. location_freetemp(exprasmlist,right.location);
  1294. end;
  1295. {$else}
  1296. procedure ti386addnode.second_mul;
  1297. var popeax,popedx:boolean;
  1298. regstopush:Tsupregset;
  1299. r:Tregister;
  1300. begin
  1301. popeax:=false;
  1302. popedx:=false;
  1303. { here you need to free the symbol first }
  1304. { left.location and right.location must }
  1305. { only be freed when they are really released, }
  1306. { because the optimizer NEEDS correct regalloc }
  1307. { info!!! (JM) }
  1308. { the location.register will be filled in later (JM) }
  1309. location_reset(location,LOC_REGISTER,OS_INT);
  1310. regstopush := all_intregisters;
  1311. remove_non_regvars_from_loc(right.location,regstopush);
  1312. remove_non_regvars_from_loc(left.location,regstopush);
  1313. { now, regstopush does NOT contain EAX and/or EDX if they are }
  1314. { used in either the left or the right location, excepts if }
  1315. {they are regvars. It DOES contain them if they are used in }
  1316. { another location (JM) }
  1317. r.enum:=R_INTREGISTER;
  1318. if not(RS_EAX in rg.unusedregsint) and
  1319. (RS_EAX in regstopush) then
  1320. begin
  1321. r.number:=NR_EAX;
  1322. emit_reg(A_PUSH,S_L,r);
  1323. popeax:=true;
  1324. end;
  1325. if not(RS_EDX in rg.unusedregsint) and
  1326. (RS_EDX in regstopush) then
  1327. begin
  1328. r.number:=NR_EDX;
  1329. emit_reg(A_PUSH,S_L,r);
  1330. popedx:=true;
  1331. end;
  1332. { left.location can be R_EAX !!! }
  1333. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  1334. { load the left value }
  1335. r.number:=NR_EDI;
  1336. cg.a_load_loc_reg(exprasmlist,left.location,r);
  1337. location_release(exprasmlist,left.location);
  1338. { allocate EAX }
  1339. r.number:=NR_EAX;
  1340. if RS_EAX in rg.unusedregsint then
  1341. exprasmList.concat(tai_regalloc.Alloc(r));
  1342. { load he right value }
  1343. cg.a_load_loc_reg(exprasmlist,right.location,r);
  1344. location_release(exprasmlist,right.location);
  1345. { allocate EAX if it isn't yet allocated (JM) }
  1346. if (RS_EAX in rg.unusedregsint) then
  1347. exprasmlist.concat(tai_regalloc.Alloc(r));
  1348. { also allocate EDX, since it is also modified by }
  1349. { a mul (JM) }
  1350. r.number:=NR_EDX;
  1351. if RS_EDX in rg.unusedregsint then
  1352. exprasmlist.concat(tai_regalloc.Alloc(r));
  1353. r.number:=NR_EDI;
  1354. emit_reg(A_MUL,S_L,r);
  1355. rg.ungetregisterint(exprasmlist,r);
  1356. r.enum:=R_INTREGISTER;
  1357. r.number:=NR_EDX;
  1358. if RS_EDX in rg.unusedregsint then
  1359. exprasmlist.concat(tai_regalloc.DeAlloc(r));
  1360. r.number:=NR_EAX;
  1361. if RS_EAX in rg.unusedregsint then
  1362. exprasmlist.concat(tai_regalloc.DeAlloc(r));
  1363. location.register:=rg.getregisterint(exprasmlist,OS_INT);
  1364. r.number:=NR_EAX;
  1365. emit_reg_reg(A_MOV,S_L,r,location.register);
  1366. r.number:=NR_EDX;
  1367. if popedx then
  1368. emit_reg(A_POP,S_L,r);
  1369. r.number:=NR_EAX;
  1370. if popeax then
  1371. emit_reg(A_POP,S_L,r);
  1372. location_freetemp(exprasmlist,left.location);
  1373. location_freetemp(exprasmlist,right.location);
  1374. end;
  1375. {$endif}
  1376. {*****************************************************************************
  1377. pass_2
  1378. *****************************************************************************}
  1379. procedure ti386addnode.pass_2;
  1380. { is also being used for xor, and "mul", "sub, or and comparative }
  1381. { operators }
  1382. var
  1383. pushedfpu,
  1384. mboverflow,cmpop : boolean;
  1385. op : tasmop;
  1386. opsize : topsize;
  1387. { true, if unsigned types are compared }
  1388. unsigned : boolean;
  1389. { is_in_dest if the result is put directly into }
  1390. { the resulting refernce or varregister }
  1391. {is_in_dest : boolean;}
  1392. { true, if for sets subtractions the extra not should generated }
  1393. extra_not : boolean;
  1394. begin
  1395. { to make it more readable, string and set (not smallset!) have their
  1396. own procedures }
  1397. case left.resulttype.def.deftype of
  1398. orddef :
  1399. begin
  1400. { handling boolean expressions }
  1401. if is_boolean(left.resulttype.def) and
  1402. is_boolean(right.resulttype.def) then
  1403. begin
  1404. second_addboolean;
  1405. exit;
  1406. end
  1407. { 64bit operations }
  1408. else if is_64bit(left.resulttype.def) then
  1409. begin
  1410. second_add64bit;
  1411. exit;
  1412. end;
  1413. end;
  1414. stringdef :
  1415. begin
  1416. second_addstring;
  1417. exit;
  1418. end;
  1419. setdef :
  1420. begin
  1421. { normalsets are already handled in pass1 }
  1422. if (tsetdef(left.resulttype.def).settype<>smallset) then
  1423. internalerror(200109041);
  1424. second_addsmallset;
  1425. exit;
  1426. end;
  1427. arraydef :
  1428. begin
  1429. {$ifdef SUPPORT_MMX}
  1430. if is_mmx_able_array(left.resulttype.def) then
  1431. begin
  1432. second_addmmx;
  1433. exit;
  1434. end;
  1435. {$endif SUPPORT_MMX}
  1436. end;
  1437. floatdef :
  1438. begin
  1439. second_addfloat;
  1440. exit;
  1441. end;
  1442. end;
  1443. { defaults }
  1444. {is_in_dest:=false;}
  1445. extra_not:=false;
  1446. mboverflow:=false;
  1447. cmpop:=false;
  1448. unsigned:=not(is_signed(left.resulttype.def)) or
  1449. not(is_signed(right.resulttype.def));
  1450. opsize:=def_opsize(left.resulttype.def);
  1451. pass_left_and_right(pushedfpu);
  1452. if (left.resulttype.def.deftype=pointerdef) or
  1453. (right.resulttype.def.deftype=pointerdef) or
  1454. (is_class_or_interface(right.resulttype.def) and is_class_or_interface(left.resulttype.def)) or
  1455. (left.resulttype.def.deftype=classrefdef) or
  1456. (left.resulttype.def.deftype=procvardef) or
  1457. ((left.resulttype.def.deftype=enumdef) and
  1458. (left.resulttype.def.size=4)) or
  1459. ((left.resulttype.def.deftype=orddef) and
  1460. (torddef(left.resulttype.def).typ in [s32bit,u32bit])) or
  1461. ((right.resulttype.def.deftype=orddef) and
  1462. (torddef(right.resulttype.def).typ in [s32bit,u32bit])) then
  1463. begin
  1464. case nodetype of
  1465. addn :
  1466. begin
  1467. op:=A_ADD;
  1468. mboverflow:=true;
  1469. end;
  1470. muln :
  1471. begin
  1472. if unsigned then
  1473. op:=A_MUL
  1474. else
  1475. op:=A_IMUL;
  1476. mboverflow:=true;
  1477. end;
  1478. subn :
  1479. begin
  1480. op:=A_SUB;
  1481. mboverflow:=true;
  1482. end;
  1483. ltn,lten,
  1484. gtn,gten,
  1485. equaln,unequaln :
  1486. begin
  1487. op:=A_CMP;
  1488. cmpop:=true;
  1489. end;
  1490. xorn :
  1491. op:=A_XOR;
  1492. orn :
  1493. op:=A_OR;
  1494. andn :
  1495. op:=A_AND;
  1496. else
  1497. internalerror(200304229);
  1498. end;
  1499. { filter MUL, which requires special handling }
  1500. if op=A_MUL then
  1501. begin
  1502. second_mul;
  1503. exit;
  1504. end;
  1505. { Convert flags to register first }
  1506. if (left.location.loc=LOC_FLAGS) then
  1507. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  1508. if (right.location.loc=LOC_FLAGS) then
  1509. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],false);
  1510. left_must_be_reg(opsize,false);
  1511. emit_generic_code(op,opsize,unsigned,extra_not,mboverflow);
  1512. location_freetemp(exprasmlist,right.location);
  1513. location_release(exprasmlist,right.location);
  1514. if cmpop and
  1515. (left.location.loc<>LOC_CREGISTER) then
  1516. begin
  1517. location_freetemp(exprasmlist,left.location);
  1518. location_release(exprasmlist,left.location);
  1519. end;
  1520. set_result_location(cmpop,unsigned);
  1521. end
  1522. { 8/16 bit enum,char,wchar types }
  1523. else
  1524. if ((left.resulttype.def.deftype=orddef) and
  1525. (torddef(left.resulttype.def).typ in [uchar,uwidechar])) or
  1526. ((left.resulttype.def.deftype=enumdef) and
  1527. ((left.resulttype.def.size=1) or
  1528. (left.resulttype.def.size=2))) then
  1529. begin
  1530. case nodetype of
  1531. ltn,lten,gtn,gten,
  1532. equaln,unequaln :
  1533. cmpop:=true;
  1534. else
  1535. internalerror(2003042210);
  1536. end;
  1537. left_must_be_reg(opsize,false);
  1538. emit_op_right_left(A_CMP,opsize);
  1539. location_freetemp(exprasmlist,right.location);
  1540. location_release(exprasmlist,right.location);
  1541. if left.location.loc<>LOC_CREGISTER then
  1542. begin
  1543. location_freetemp(exprasmlist,left.location);
  1544. location_release(exprasmlist,left.location);
  1545. end;
  1546. set_result_location(true,true);
  1547. end
  1548. else
  1549. internalerror(2003042211);
  1550. end;
  1551. begin
  1552. caddnode:=ti386addnode;
  1553. end.
  1554. {
  1555. $Log$
  1556. Revision 1.67 2003-05-22 21:32:29 peter
  1557. * removed some unit dependencies
  1558. Revision 1.66 2003/04/26 09:12:55 peter
  1559. * add string returns in LOC_REFERENCE
  1560. Revision 1.65 2003/04/23 20:16:04 peter
  1561. + added currency support based on int64
  1562. + is_64bit for use in cg units instead of is_64bitint
  1563. * removed cgmessage from n386add, replace with internalerrors
  1564. Revision 1.64 2003/04/23 09:51:16 daniel
  1565. * Removed usage of edi in a lot of places when new register allocator used
  1566. + Added newra versions of g_concatcopy and secondadd_float
  1567. Revision 1.63 2003/04/22 23:50:23 peter
  1568. * firstpass uses expectloc
  1569. * checks if there are differences between the expectloc and
  1570. location.loc from secondpass in EXTDEBUG
  1571. Revision 1.62 2003/04/22 10:09:35 daniel
  1572. + Implemented the actual register allocator
  1573. + Scratch registers unavailable when new register allocator used
  1574. + maybe_save/maybe_restore unavailable when new register allocator used
  1575. Revision 1.61 2003/04/17 10:02:48 daniel
  1576. * Tweaked register allocate/deallocate positition to less interferences
  1577. are generated.
  1578. Revision 1.60 2003/03/28 19:16:57 peter
  1579. * generic constructor working for i386
  1580. * remove fixed self register
  1581. * esi added as address register for i386
  1582. Revision 1.59 2003/03/13 19:52:23 jonas
  1583. * and more new register allocator fixes (in the i386 code generator this
  1584. time). At least now the ppc cross compiler can compile the linux
  1585. system unit again, but I haven't tested it.
  1586. Revision 1.58 2003/03/08 20:36:41 daniel
  1587. + Added newra version of Ti386shlshrnode
  1588. + Added interference graph construction code
  1589. Revision 1.57 2003/03/08 13:59:17 daniel
  1590. * Work to handle new register notation in ag386nsm
  1591. + Added newra version of Ti386moddivnode
  1592. Revision 1.56 2003/03/08 10:53:48 daniel
  1593. * Created newra version of secondmul in n386add.pas
  1594. Revision 1.55 2003/02/19 22:00:15 daniel
  1595. * Code generator converted to new register notation
  1596. - Horribily outdated todo.txt removed
  1597. Revision 1.54 2003/01/13 18:37:44 daniel
  1598. * Work on register conversion
  1599. Revision 1.53 2003/01/08 18:43:57 daniel
  1600. * Tregister changed into a record
  1601. Revision 1.52 2002/11/25 17:43:26 peter
  1602. * splitted defbase in defutil,symutil,defcmp
  1603. * merged isconvertable and is_equal into compare_defs(_ext)
  1604. * made operator search faster by walking the list only once
  1605. Revision 1.51 2002/11/15 01:58:56 peter
  1606. * merged changes from 1.0.7 up to 04-11
  1607. - -V option for generating bug report tracing
  1608. - more tracing for option parsing
  1609. - errors for cdecl and high()
  1610. - win32 import stabs
  1611. - win32 records<=8 are returned in eax:edx (turned off by default)
  1612. - heaptrc update
  1613. - more info for temp management in .s file with EXTDEBUG
  1614. Revision 1.50 2002/10/20 13:11:27 jonas
  1615. * re-enabled optimized version of comparisons with the empty string that
  1616. I accidentally disabled in revision 1.26
  1617. Revision 1.49 2002/08/23 16:14:49 peter
  1618. * tempgen cleanup
  1619. * tt_noreuse temp type added that will be used in genentrycode
  1620. Revision 1.48 2002/08/14 18:41:48 jonas
  1621. - remove valuelow/valuehigh fields from tlocation, because they depend
  1622. on the endianess of the host operating system -> difficult to get
  1623. right. Use lo/hi(location.valueqword) instead (remember to use
  1624. valueqword and not value!!)
  1625. Revision 1.47 2002/08/11 14:32:29 peter
  1626. * renamed current_library to objectlibrary
  1627. Revision 1.46 2002/08/11 13:24:16 peter
  1628. * saving of asmsymbols in ppu supported
  1629. * asmsymbollist global is removed and moved into a new class
  1630. tasmlibrarydata that will hold the info of a .a file which
  1631. corresponds with a single module. Added librarydata to tmodule
  1632. to keep the library info stored for the module. In the future the
  1633. objectfiles will also be stored to the tasmlibrarydata class
  1634. * all getlabel/newasmsymbol and friends are moved to the new class
  1635. Revision 1.45 2002/07/26 11:17:52 jonas
  1636. * the optimization of converting a multiplication with a power of two to
  1637. a shl is moved from n386add/secondpass to nadd/resulttypepass
  1638. Revision 1.44 2002/07/20 11:58:00 florian
  1639. * types.pas renamed to defbase.pas because D6 contains a types
  1640. unit so this would conflicts if D6 programms are compiled
  1641. + Willamette/SSE2 instructions to assembler added
  1642. Revision 1.43 2002/07/11 14:41:32 florian
  1643. * start of the new generic parameter handling
  1644. Revision 1.42 2002/07/07 09:52:33 florian
  1645. * powerpc target fixed, very simple units can be compiled
  1646. * some basic stuff for better callparanode handling, far from being finished
  1647. Revision 1.41 2002/07/01 18:46:31 peter
  1648. * internal linker
  1649. * reorganized aasm layer
  1650. Revision 1.40 2002/07/01 16:23:55 peter
  1651. * cg64 patch
  1652. * basics for currency
  1653. * asnode updates for class and interface (not finished)
  1654. Revision 1.39 2002/05/18 13:34:22 peter
  1655. * readded missing revisions
  1656. Revision 1.38 2002/05/16 19:46:51 carl
  1657. + defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand
  1658. + try to fix temp allocation (still in ifdef)
  1659. + generic constructor calls
  1660. + start of tassembler / tmodulebase class cleanup
  1661. Revision 1.36 2002/05/13 19:54:37 peter
  1662. * removed n386ld and n386util units
  1663. * maybe_save/maybe_restore added instead of the old maybe_push
  1664. Revision 1.35 2002/05/12 16:53:17 peter
  1665. * moved entry and exitcode to ncgutil and cgobj
  1666. * foreach gets extra argument for passing local data to the
  1667. iterator function
  1668. * -CR checks also class typecasts at runtime by changing them
  1669. into as
  1670. * fixed compiler to cycle with the -CR option
  1671. * fixed stabs with elf writer, finally the global variables can
  1672. be watched
  1673. * removed a lot of routines from cga unit and replaced them by
  1674. calls to cgobj
  1675. * u32bit-s32bit updates for and,or,xor nodes. When one element is
  1676. u32bit then the other is typecasted also to u32bit without giving
  1677. a rangecheck warning/error.
  1678. * fixed pascal calling method with reversing also the high tree in
  1679. the parast, detected by tcalcst3 test
  1680. Revision 1.34 2002/04/25 20:16:40 peter
  1681. * moved more routines from cga/n386util
  1682. Revision 1.33 2002/04/05 15:09:13 jonas
  1683. * fixed web bug 1915
  1684. Revision 1.32 2002/04/04 19:06:10 peter
  1685. * removed unused units
  1686. * use tlocation.size in cg.a_*loc*() routines
  1687. Revision 1.31 2002/04/02 17:11:35 peter
  1688. * tlocation,treference update
  1689. * LOC_CONSTANT added for better constant handling
  1690. * secondadd splitted in multiple routines
  1691. * location_force_reg added for loading a location to a register
  1692. of a specified size
  1693. * secondassignment parses now first the right and then the left node
  1694. (this is compatible with Kylix). This saves a lot of push/pop especially
  1695. with string operations
  1696. * adapted some routines to use the new cg methods
  1697. Revision 1.29 2002/03/04 19:10:13 peter
  1698. * removed compiler warnings
  1699. }