nx86add.pas 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. {
  2. $Id$
  3. Copyright (c) 2000-2002 by Florian Klaempfl
  4. Common code generation for add nodes on the i386 and x86
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. {
  19. Common code generation for add nodes on the i386 and x86
  20. }
  21. unit nx86add;
  22. {$i fpcdefs.inc}
  23. interface
  24. uses
  25. node,nadd,ncgadd,cpubase;
  26. type
  27. tx86addnode = class(tcgaddnode)
  28. procedure second_addfloat;override;
  29. procedure second_addfloatsse;
  30. procedure pass_left_and_right(var pushedfpu:boolean);
  31. end;
  32. implementation
  33. uses
  34. globals,
  35. verbose,
  36. aasmtai,
  37. cpuinfo,
  38. cgbase,cgobj,cgx86,cga,
  39. pass_2,ncgutil,
  40. defutil;
  41. {*****************************************************************************
  42. AddFloat
  43. *****************************************************************************}
  44. procedure tx86addnode.pass_left_and_right(var pushedfpu:boolean);
  45. begin
  46. { calculate the operator which is more difficult }
  47. firstcomplex(self);
  48. { in case of constant put it to the left }
  49. if (left.nodetype=ordconstn) then
  50. swapleftright;
  51. secondpass(left);
  52. { are too few registers free? }
  53. if location.loc=LOC_FPUREGISTER then
  54. pushedfpu:=maybe_pushfpu(exprasmlist,right.registersfpu,left.location)
  55. else
  56. pushedfpu:=false;
  57. secondpass(right);
  58. end;
  59. procedure tx86addnode.second_addfloat;
  60. var
  61. op : TAsmOp;
  62. resflags : tresflags;
  63. pushedfpu,
  64. cmpop : boolean;
  65. begin
  66. if (is_single(resulttype.def) and (aktfputype in sse_singlescalar)) or
  67. (is_double(resulttype.def) and (aktfputype in sse_doublescalar)) then
  68. begin
  69. second_addfloatsse;
  70. exit;
  71. end;
  72. pass_left_and_right(pushedfpu);
  73. cmpop:=false;
  74. case nodetype of
  75. addn :
  76. op:=A_FADDP;
  77. muln :
  78. op:=A_FMULP;
  79. subn :
  80. op:=A_FSUBP;
  81. slashn :
  82. op:=A_FDIVP;
  83. ltn,lten,gtn,gten,
  84. equaln,unequaln :
  85. begin
  86. op:=A_FCOMPP;
  87. cmpop:=true;
  88. end;
  89. else
  90. internalerror(2003042214);
  91. end;
  92. if (right.location.loc<>LOC_FPUREGISTER) then
  93. begin
  94. cg.a_loadfpu_loc_reg(exprasmlist,right.location,NR_ST);
  95. if (right.location.loc <> LOC_CFPUREGISTER) and
  96. pushedfpu then
  97. location_freetemp(exprasmlist,left.location);
  98. if (left.location.loc<>LOC_FPUREGISTER) then
  99. begin
  100. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  101. if (left.location.loc <> LOC_CFPUREGISTER) and
  102. pushedfpu then
  103. location_freetemp(exprasmlist,left.location);
  104. end
  105. else
  106. begin
  107. { left was on the stack => swap }
  108. toggleflag(nf_swaped);
  109. end;
  110. { releases the right reference }
  111. location_release(exprasmlist,right.location);
  112. end
  113. { the nominator in st0 }
  114. else if (left.location.loc<>LOC_FPUREGISTER) then
  115. begin
  116. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  117. if (left.location.loc <> LOC_CFPUREGISTER) and
  118. pushedfpu then
  119. location_freetemp(exprasmlist,left.location);
  120. end
  121. else
  122. begin
  123. { fpu operands are always in the wrong order on the stack }
  124. toggleflag(nf_swaped);
  125. end;
  126. { releases the left reference }
  127. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  128. location_release(exprasmlist,left.location);
  129. { if we swaped the tree nodes, then use the reverse operator }
  130. if nf_swaped in flags then
  131. begin
  132. if (nodetype=slashn) then
  133. op:=A_FDIVRP
  134. else if (nodetype=subn) then
  135. op:=A_FSUBRP;
  136. end;
  137. { to avoid the pentium bug
  138. if (op=FDIVP) and (opt_processors=pentium) then
  139. cg.a_call_name(exprasmlist,'EMUL_FDIVP')
  140. else
  141. }
  142. { the Intel assemblers want operands }
  143. if op<>A_FCOMPP then
  144. begin
  145. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  146. tcgx86(cg).dec_fpu_stack;
  147. end
  148. else
  149. begin
  150. emit_none(op,S_NO);
  151. tcgx86(cg).dec_fpu_stack;
  152. tcgx86(cg).dec_fpu_stack;
  153. end;
  154. { on comparison load flags }
  155. if cmpop then
  156. begin
  157. cg.getexplicitregister(exprasmlist,NR_AX);
  158. emit_reg(A_FNSTSW,S_NO,NR_AX);
  159. emit_none(A_SAHF,S_NO);
  160. cg.ungetregister(exprasmlist,NR_AX);
  161. if nf_swaped in flags then
  162. begin
  163. case nodetype of
  164. equaln : resflags:=F_E;
  165. unequaln : resflags:=F_NE;
  166. ltn : resflags:=F_A;
  167. lten : resflags:=F_AE;
  168. gtn : resflags:=F_B;
  169. gten : resflags:=F_BE;
  170. end;
  171. end
  172. else
  173. begin
  174. case nodetype of
  175. equaln : resflags:=F_E;
  176. unequaln : resflags:=F_NE;
  177. ltn : resflags:=F_B;
  178. lten : resflags:=F_BE;
  179. gtn : resflags:=F_A;
  180. gten : resflags:=F_AE;
  181. end;
  182. end;
  183. location_reset(location,LOC_FLAGS,OS_NO);
  184. location.resflags:=resflags;
  185. end
  186. else
  187. begin
  188. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  189. location.register:=NR_ST;
  190. end;
  191. end;
  192. procedure tx86addnode.second_addfloatsse;
  193. var
  194. op : topcg;
  195. begin
  196. pass_left_right;
  197. if (nf_swaped in flags) then
  198. swapleftright;
  199. case nodetype of
  200. addn :
  201. op:=OP_ADD;
  202. muln :
  203. op:=OP_MUL;
  204. subn :
  205. op:=OP_SUB;
  206. slashn :
  207. op:=OP_DIV;
  208. else
  209. internalerror(200312231);
  210. end;
  211. location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
  212. { we can use only right as left operand if the operation is commutative }
  213. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  214. begin
  215. location.register:=right.location.register;
  216. cg.a_opmm_loc_reg(exprasmlist,op,location.size,left.location,location.register,mms_movescalar);
  217. location_release(exprasmlist,left.location);
  218. end
  219. else
  220. begin
  221. location_force_mmregscalar(exprasmlist,left.location,false);
  222. location.register:=left.location.register;
  223. cg.a_opmm_loc_reg(exprasmlist,op,location.size,right.location,location.register,mms_movescalar);
  224. location_release(exprasmlist,right.location);
  225. end;
  226. end;
  227. end.
  228. {
  229. $Log$
  230. Revision 1.3 2003-12-25 01:07:09 florian
  231. + $fputype directive support
  232. + single data type operations with sse unit
  233. * fixed more x86-64 stuff
  234. Revision 1.2 2003/12/23 14:38:07 florian
  235. + second_floataddsse implemented
  236. Revision 1.1 2003/10/13 01:58:04 florian
  237. * some ideas for mm support implemented
  238. }