nx86add.pas 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. {
  2. $Id$
  3. Copyright (c) 2000-2002 by Florian Klaempfl
  4. Common code generation for add nodes on the i386 and x86
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. {
  19. Common code generation for add nodes on the i386 and x86
  20. }
  21. unit nx86add;
  22. {$i fpcdefs.inc}
  23. interface
  24. uses
  25. node,nadd,ncgadd,cpubase;
  26. type
  27. tx86addnode = class(tcgaddnode)
  28. procedure second_addfloat;override;
  29. procedure second_addfloatsse;
  30. procedure pass_left_and_right(var pushedfpu:boolean);
  31. end;
  32. implementation
  33. uses
  34. globals,
  35. verbose,
  36. aasmtai,
  37. cpuinfo,
  38. cgbase,cgobj,cgx86,cga,
  39. pass_2,ncgutil,
  40. defutil;
  41. {*****************************************************************************
  42. AddFloat
  43. *****************************************************************************}
  44. procedure tx86addnode.pass_left_and_right(var pushedfpu:boolean);
  45. begin
  46. { calculate the operator which is more difficult }
  47. firstcomplex(self);
  48. { in case of constant put it to the left }
  49. if (left.nodetype=ordconstn) then
  50. swapleftright;
  51. secondpass(left);
  52. { are too few registers free? }
  53. if location.loc=LOC_FPUREGISTER then
  54. pushedfpu:=maybe_pushfpu(exprasmlist,right.registersfpu,left.location)
  55. else
  56. pushedfpu:=false;
  57. secondpass(right);
  58. end;
  59. procedure tx86addnode.second_addfloat;
  60. var
  61. op : TAsmOp;
  62. resflags : tresflags;
  63. pushedfpu,
  64. cmpop : boolean;
  65. begin
  66. if use_sse(resulttype.def) then
  67. begin
  68. second_addfloatsse;
  69. exit;
  70. end;
  71. pass_left_and_right(pushedfpu);
  72. cmpop:=false;
  73. case nodetype of
  74. addn :
  75. op:=A_FADDP;
  76. muln :
  77. op:=A_FMULP;
  78. subn :
  79. op:=A_FSUBP;
  80. slashn :
  81. op:=A_FDIVP;
  82. ltn,lten,gtn,gten,
  83. equaln,unequaln :
  84. begin
  85. op:=A_FCOMPP;
  86. cmpop:=true;
  87. end;
  88. else
  89. internalerror(2003042214);
  90. end;
  91. if (right.location.loc<>LOC_FPUREGISTER) then
  92. begin
  93. cg.a_loadfpu_loc_reg(exprasmlist,right.location,NR_ST);
  94. if (right.location.loc <> LOC_CFPUREGISTER) and
  95. pushedfpu then
  96. location_freetemp(exprasmlist,left.location);
  97. if (left.location.loc<>LOC_FPUREGISTER) then
  98. begin
  99. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  100. if (left.location.loc <> LOC_CFPUREGISTER) and
  101. pushedfpu then
  102. location_freetemp(exprasmlist,left.location);
  103. end
  104. else
  105. begin
  106. { left was on the stack => swap }
  107. toggleflag(nf_swaped);
  108. end;
  109. { releases the right reference }
  110. location_release(exprasmlist,right.location);
  111. end
  112. { the nominator in st0 }
  113. else if (left.location.loc<>LOC_FPUREGISTER) then
  114. begin
  115. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  116. if (left.location.loc <> LOC_CFPUREGISTER) and
  117. pushedfpu then
  118. location_freetemp(exprasmlist,left.location);
  119. end
  120. else
  121. begin
  122. { fpu operands are always in the wrong order on the stack }
  123. toggleflag(nf_swaped);
  124. end;
  125. { releases the left reference }
  126. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  127. location_release(exprasmlist,left.location);
  128. { if we swaped the tree nodes, then use the reverse operator }
  129. if nf_swaped in flags then
  130. begin
  131. if (nodetype=slashn) then
  132. op:=A_FDIVRP
  133. else if (nodetype=subn) then
  134. op:=A_FSUBRP;
  135. end;
  136. { to avoid the pentium bug
  137. if (op=FDIVP) and (opt_processors=pentium) then
  138. cg.a_call_name(exprasmlist,'EMUL_FDIVP')
  139. else
  140. }
  141. { the Intel assemblers want operands }
  142. if op<>A_FCOMPP then
  143. begin
  144. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  145. tcgx86(cg).dec_fpu_stack;
  146. end
  147. else
  148. begin
  149. emit_none(op,S_NO);
  150. tcgx86(cg).dec_fpu_stack;
  151. tcgx86(cg).dec_fpu_stack;
  152. end;
  153. { on comparison load flags }
  154. if cmpop then
  155. begin
  156. cg.getexplicitregister(exprasmlist,NR_AX);
  157. emit_reg(A_FNSTSW,S_NO,NR_AX);
  158. emit_none(A_SAHF,S_NO);
  159. cg.ungetregister(exprasmlist,NR_AX);
  160. if nf_swaped in flags then
  161. begin
  162. case nodetype of
  163. equaln : resflags:=F_E;
  164. unequaln : resflags:=F_NE;
  165. ltn : resflags:=F_A;
  166. lten : resflags:=F_AE;
  167. gtn : resflags:=F_B;
  168. gten : resflags:=F_BE;
  169. end;
  170. end
  171. else
  172. begin
  173. case nodetype of
  174. equaln : resflags:=F_E;
  175. unequaln : resflags:=F_NE;
  176. ltn : resflags:=F_B;
  177. lten : resflags:=F_BE;
  178. gtn : resflags:=F_A;
  179. gten : resflags:=F_AE;
  180. end;
  181. end;
  182. location_reset(location,LOC_FLAGS,OS_NO);
  183. location.resflags:=resflags;
  184. end
  185. else
  186. begin
  187. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  188. location.register:=NR_ST;
  189. end;
  190. end;
  191. procedure tx86addnode.second_addfloatsse;
  192. var
  193. op : topcg;
  194. begin
  195. pass_left_right;
  196. if (nf_swaped in flags) then
  197. swapleftright;
  198. case nodetype of
  199. addn :
  200. op:=OP_ADD;
  201. muln :
  202. op:=OP_MUL;
  203. subn :
  204. op:=OP_SUB;
  205. slashn :
  206. op:=OP_DIV;
  207. else
  208. internalerror(200312231);
  209. end;
  210. location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
  211. { we can use only right as left operand if the operation is commutative }
  212. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  213. begin
  214. location.register:=right.location.register;
  215. cg.a_opmm_loc_reg(exprasmlist,op,location.size,left.location,location.register,mms_movescalar);
  216. location_release(exprasmlist,left.location);
  217. end
  218. else
  219. begin
  220. location_force_mmregscalar(exprasmlist,left.location,false);
  221. location.register:=left.location.register;
  222. cg.a_opmm_loc_reg(exprasmlist,op,location.size,right.location,location.register,mms_movescalar);
  223. location_release(exprasmlist,right.location);
  224. end;
  225. end;
  226. end.
  227. {
  228. $Log$
  229. Revision 1.4 2003-12-26 00:32:22 florian
  230. + fpu<->mm register conversion
  231. Revision 1.3 2003/12/25 01:07:09 florian
  232. + $fputype directive support
  233. + single data type operations with sse unit
  234. * fixed more x86-64 stuff
  235. Revision 1.2 2003/12/23 14:38:07 florian
  236. + second_floataddsse implemented
  237. Revision 1.1 2003/10/13 01:58:04 florian
  238. * some ideas for mm support implemented
  239. }