nx86add.pas 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. {
  2. $Id$
  3. Copyright (c) 2000-2002 by Florian Klaempfl
  4. Common code generation for add nodes on the i386 and x86
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. {
  19. Common code generation for add nodes on the i386 and x86
  20. }
  21. unit nx86add;
  22. {$i fpcdefs.inc}
  23. interface
  24. uses
  25. node,nadd,ncgadd,cpubase;
  26. type
  27. tx86addnode = class(tcgaddnode)
  28. procedure second_addfloat;override;
  29. procedure second_addfloatsse;
  30. procedure pass_left_and_right(var pushedfpu:boolean);
  31. end;
  32. implementation
  33. uses
  34. globals,
  35. verbose,
  36. aasmtai,
  37. cpuinfo,
  38. cgbase,cgobj,cgx86,cga,
  39. pass_2,ncgutil,
  40. defutil;
  41. {*****************************************************************************
  42. AddFloat
  43. *****************************************************************************}
  44. procedure tx86addnode.pass_left_and_right(var pushedfpu:boolean);
  45. begin
  46. { calculate the operator which is more difficult }
  47. firstcomplex(self);
  48. { in case of constant put it to the left }
  49. if (left.nodetype=ordconstn) then
  50. swapleftright;
  51. secondpass(left);
  52. { are too few registers free? }
  53. if location.loc=LOC_FPUREGISTER then
  54. pushedfpu:=maybe_pushfpu(exprasmlist,right.registersfpu,left.location)
  55. else
  56. pushedfpu:=false;
  57. secondpass(right);
  58. end;
  59. procedure tx86addnode.second_addfloat;
  60. var
  61. op : TAsmOp;
  62. resflags : tresflags;
  63. pushedfpu,
  64. cmpop : boolean;
  65. begin
  66. if use_sse(resulttype.def) then
  67. begin
  68. second_addfloatsse;
  69. exit;
  70. end;
  71. pass_left_and_right(pushedfpu);
  72. cmpop:=false;
  73. case nodetype of
  74. addn :
  75. op:=A_FADDP;
  76. muln :
  77. op:=A_FMULP;
  78. subn :
  79. op:=A_FSUBP;
  80. slashn :
  81. op:=A_FDIVP;
  82. ltn,lten,gtn,gten,
  83. equaln,unequaln :
  84. begin
  85. op:=A_FCOMPP;
  86. cmpop:=true;
  87. end;
  88. else
  89. internalerror(2003042214);
  90. end;
  91. if (right.location.loc<>LOC_FPUREGISTER) then
  92. begin
  93. cg.a_loadfpu_loc_reg(exprasmlist,right.location,NR_ST);
  94. if (right.location.loc <> LOC_CFPUREGISTER) and
  95. pushedfpu then
  96. location_freetemp(exprasmlist,left.location);
  97. if (left.location.loc<>LOC_FPUREGISTER) then
  98. begin
  99. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  100. if (left.location.loc <> LOC_CFPUREGISTER) and
  101. pushedfpu then
  102. location_freetemp(exprasmlist,left.location);
  103. end
  104. else
  105. begin
  106. { left was on the stack => swap }
  107. toggleflag(nf_swaped);
  108. end;
  109. { releases the right reference }
  110. location_release(exprasmlist,right.location);
  111. end
  112. { the nominator in st0 }
  113. else if (left.location.loc<>LOC_FPUREGISTER) then
  114. begin
  115. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  116. if (left.location.loc <> LOC_CFPUREGISTER) and
  117. pushedfpu then
  118. location_freetemp(exprasmlist,left.location);
  119. end
  120. else
  121. begin
  122. { fpu operands are always in the wrong order on the stack }
  123. toggleflag(nf_swaped);
  124. end;
  125. { releases the left reference }
  126. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  127. location_release(exprasmlist,left.location);
  128. { if we swaped the tree nodes, then use the reverse operator }
  129. if nf_swaped in flags then
  130. begin
  131. if (nodetype=slashn) then
  132. op:=A_FDIVRP
  133. else if (nodetype=subn) then
  134. op:=A_FSUBRP;
  135. end;
  136. { to avoid the pentium bug
  137. if (op=FDIVP) and (opt_processors=pentium) then
  138. cg.a_call_name(exprasmlist,'EMUL_FDIVP')
  139. else
  140. }
  141. { the Intel assemblers want operands }
  142. if op<>A_FCOMPP then
  143. begin
  144. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  145. tcgx86(cg).dec_fpu_stack;
  146. end
  147. else
  148. begin
  149. emit_none(op,S_NO);
  150. tcgx86(cg).dec_fpu_stack;
  151. tcgx86(cg).dec_fpu_stack;
  152. end;
  153. { on comparison load flags }
  154. if cmpop then
  155. begin
  156. cg.getexplicitregister(exprasmlist,NR_AX);
  157. emit_reg(A_FNSTSW,S_NO,NR_AX);
  158. emit_none(A_SAHF,S_NO);
  159. cg.ungetregister(exprasmlist,NR_AX);
  160. if nf_swaped in flags then
  161. begin
  162. case nodetype of
  163. equaln : resflags:=F_E;
  164. unequaln : resflags:=F_NE;
  165. ltn : resflags:=F_A;
  166. lten : resflags:=F_AE;
  167. gtn : resflags:=F_B;
  168. gten : resflags:=F_BE;
  169. end;
  170. end
  171. else
  172. begin
  173. case nodetype of
  174. equaln : resflags:=F_E;
  175. unequaln : resflags:=F_NE;
  176. ltn : resflags:=F_B;
  177. lten : resflags:=F_BE;
  178. gtn : resflags:=F_A;
  179. gten : resflags:=F_AE;
  180. end;
  181. end;
  182. location_reset(location,LOC_FLAGS,OS_NO);
  183. location.resflags:=resflags;
  184. end
  185. else
  186. begin
  187. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  188. location.register:=NR_ST;
  189. end;
  190. end;
  191. procedure tx86addnode.second_addfloatsse;
  192. var
  193. op : topcg;
  194. begin
  195. pass_left_right;
  196. if (nf_swaped in flags) then
  197. swapleftright;
  198. case nodetype of
  199. addn :
  200. op:=OP_ADD;
  201. muln :
  202. op:=OP_MUL;
  203. subn :
  204. op:=OP_SUB;
  205. slashn :
  206. op:=OP_DIV;
  207. else
  208. internalerror(200312231);
  209. end;
  210. location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
  211. { we can use only right as left operand if the operation is commutative }
  212. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  213. begin
  214. location.register:=right.location.register;
  215. { force floating point reg. location to be written to memory,
  216. we don't force it to mm register because writing to memory
  217. allows probably shorter code because there is no direct fpu->mm register
  218. copy instruction
  219. }
  220. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  221. location_force_mem(exprasmlist,left.location);
  222. cg.a_opmm_loc_reg(exprasmlist,op,location.size,left.location,location.register,mms_movescalar);
  223. location_release(exprasmlist,left.location);
  224. end
  225. else
  226. begin
  227. location_force_mmregscalar(exprasmlist,left.location,false);
  228. location.register:=left.location.register;
  229. { force floating point reg. location to be written to memory,
  230. we don't force it to mm register because writing to memory
  231. allows probably shorter code because there is no direct fpu->mm register
  232. copy instruction
  233. }
  234. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  235. location_force_mem(exprasmlist,right.location);
  236. cg.a_opmm_loc_reg(exprasmlist,op,location.size,right.location,location.register,mms_movescalar);
  237. location_release(exprasmlist,right.location);
  238. end;
  239. end;
  240. end.
  241. {
  242. $Log$
  243. Revision 1.5 2003-12-26 13:19:16 florian
  244. * rtl and compiler compile with -Cfsse2
  245. Revision 1.4 2003/12/26 00:32:22 florian
  246. + fpu<->mm register conversion
  247. Revision 1.3 2003/12/25 01:07:09 florian
  248. + $fputype directive support
  249. + single data type operations with sse unit
  250. * fixed more x86-64 stuff
  251. Revision 1.2 2003/12/23 14:38:07 florian
  252. + second_floataddsse implemented
  253. Revision 1.1 2003/10/13 01:58:04 florian
  254. * some ideas for mm support implemented
  255. }