nx64mat.pas 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Florian Klaempfl
  4. Generate x86-64 assembler for math nodes
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit nx64mat;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nmat,ncgmat,nx86mat;
  23. type
  24. tx8664moddivnode = class(tmoddivnode)
  25. procedure pass_2;override;
  26. end;
  27. tx8664shlshrnode = class(tshlshrnode)
  28. procedure pass_2;override;
  29. end;
  30. tx8664unaryminusnode = class(tx86unaryminusnode)
  31. end;
  32. tx8664notnode = class(tx86notnode)
  33. end;
  34. implementation
  35. uses
  36. globtype,systems,
  37. cutils,verbose,globals,
  38. symconst,symdef,aasmbase,aasmtai,defutil,
  39. pass_1,pass_2,
  40. ncon,
  41. cpubase,cpuinfo,
  42. cgbase,cgutils,cga,cgobj,cgx86,
  43. ncgutil;
  44. {*****************************************************************************
  45. TX8664MODDIVNODE
  46. *****************************************************************************}
  47. procedure tx8664moddivnode.pass_2;
  48. var
  49. hreg1,hreg2:Tregister;
  50. power:longint;
  51. op:Tasmop;
  52. begin
  53. secondpass(left);
  54. if codegenerror then
  55. exit;
  56. secondpass(right);
  57. if codegenerror then
  58. exit;
  59. { put numerator in register }
  60. location_reset(location,LOC_REGISTER,OS_INT);
  61. location_force_reg(exprasmlist,left.location,OS_INT,false);
  62. hreg1:=left.location.register;
  63. if (nodetype=divn) and (right.nodetype=ordconstn) and
  64. ispowerof2(int64(tordconstnode(right).value),power) then
  65. begin
  66. { for signed numbers, the numerator must be adjusted before the
  67. shift instruction, but not wih unsigned numbers! Otherwise,
  68. "Cardinal($ffffffff) div 16" overflows! (JM) }
  69. if is_signed(left.resulttype.def) Then
  70. begin
  71. { use a sequence without jumps, saw this in
  72. comp.compilers (JM) }
  73. { no jumps, but more operations }
  74. hreg2:=cg.getintregister(exprasmlist,OS_INT);
  75. emit_reg_reg(A_MOV,S_Q,hreg1,hreg2);
  76. {If the left value is signed, hreg2=$ffffffff, otherwise 0.}
  77. emit_const_reg(A_SAR,S_Q,63,hreg2);
  78. {If signed, hreg2=right value-1, otherwise 0.}
  79. emit_const_reg(A_AND,S_Q,tordconstnode(right).value-1,hreg2);
  80. { add to the left value }
  81. emit_reg_reg(A_ADD,S_Q,hreg2,hreg1);
  82. { do the shift }
  83. emit_const_reg(A_SAR,S_Q,power,hreg1);
  84. end
  85. else
  86. emit_const_reg(A_SHR,S_Q,power,hreg1);
  87. location.register:=hreg1;
  88. end
  89. else
  90. begin
  91. {Bring denominator to a register.}
  92. cg.getcpuregister(exprasmlist,NR_RAX);
  93. emit_reg_reg(A_MOV,S_Q,hreg1,NR_RAX);
  94. cg.getcpuregister(exprasmlist,NR_RDX);
  95. {Sign extension depends on the left type.}
  96. if torddef(left.resulttype.def).typ=u64bit then
  97. emit_reg_reg(A_XOR,S_Q,NR_RDX,NR_RDX)
  98. else
  99. emit_none(A_CDO,S_NO);
  100. {Division depends on the right type.}
  101. if Torddef(right.resulttype.def).typ=u64bit then
  102. op:=A_DIV
  103. else
  104. op:=A_IDIV;
  105. if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  106. emit_ref(op,S_Q,right.location.reference)
  107. else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  108. emit_reg(op,S_Q,right.location.register)
  109. else
  110. begin
  111. hreg1:=cg.getintregister(exprasmlist,right.location.size);
  112. cg.a_load_loc_reg(exprasmlist,OS_64,right.location,hreg1);
  113. emit_reg(op,S_Q,hreg1);
  114. end;
  115. { Copy the result into a new register. Release RAX & RDX.}
  116. cg.ungetcpuregister(exprasmlist,NR_RDX);
  117. cg.ungetcpuregister(exprasmlist,NR_RAX);
  118. location.register:=cg.getintregister(exprasmlist,OS_INT);
  119. if nodetype=divn then
  120. cg.a_load_reg_reg(exprasmlist,OS_INT,OS_INT,NR_RAX,location.register)
  121. else
  122. cg.a_load_reg_reg(exprasmlist,OS_INT,OS_INT,NR_RDX,location.register);
  123. end;
  124. end;
  125. {*****************************************************************************
  126. TX8664SHLRSHRNODE
  127. *****************************************************************************}
  128. procedure tx8664shlshrnode.pass_2;
  129. var
  130. op : Tasmop;
  131. opsize : tcgsize;
  132. mask : aint;
  133. begin
  134. secondpass(left);
  135. secondpass(right);
  136. { determine operator }
  137. if nodetype=shln then
  138. op:=A_SHL
  139. else
  140. op:=A_SHR;
  141. { special treatment of 32bit values for backwards compatibility }
  142. if left.resulttype.def.size<=4 then
  143. begin
  144. opsize:=OS_32;
  145. mask:=31;
  146. end
  147. else
  148. begin
  149. opsize:=OS_64;
  150. mask:=63;
  151. end;
  152. { load left operators in a register }
  153. location_copy(location,left.location);
  154. location_force_reg(exprasmlist,location,opsize,false);
  155. { shifting by a constant directly coded: }
  156. if (right.nodetype=ordconstn) then
  157. emit_const_reg(op,tcgsize2opsize[opsize],tordconstnode(right).value and mask,location.register)
  158. else
  159. begin
  160. { load right operators in a RCX }
  161. cg.getcpuregister(exprasmlist,NR_RCX);
  162. cg.a_load_loc_reg(exprasmlist,OS_INT,right.location,NR_RCX);
  163. { right operand is in ECX }
  164. cg.ungetcpuregister(exprasmlist,NR_RCX);
  165. emit_reg_reg(op,tcgsize2opsize[opsize],NR_CL,location.register);
  166. end;
  167. end;
  168. begin
  169. cunaryminusnode:=tx8664unaryminusnode;
  170. cmoddivnode:=tx8664moddivnode;
  171. cshlshrnode:=tx8664shlshrnode;
  172. cnotnode:=tx8664notnode;
  173. end.
  174. {
  175. $Log$
  176. Revision 1.8 2004-11-01 17:40:29 florian
  177. + added cgutils uses clause
  178. Revision 1.7 2004/09/25 14:23:55 peter
  179. * ungetregister is now only used for cpuregisters, renamed to
  180. ungetcpuregister
  181. * renamed (get|unget)explicitregister(s) to ..cpuregister
  182. * removed location-release/reference_release
  183. Revision 1.6 2004/06/20 08:55:32 florian
  184. * logs truncated
  185. Revision 1.5 2004/06/16 20:07:11 florian
  186. * dwarf branch merged
  187. Revision 1.4.2.3 2004/05/03 16:27:38 peter
  188. * fixed shl for x86-64
  189. Revision 1.4.2.2 2004/04/26 15:54:33 peter
  190. * small x86-64 fixes
  191. Revision 1.4.2.1 2004/04/24 16:02:19 florian
  192. * sign extension for int div int fixed
  193. Revision 1.4 2004/02/05 18:28:37 peter
  194. * x86_64 fixes for opsize
  195. }