nx64mat.pas 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Florian Klaempfl
  4. Generate x86-64 assembler for math nodes
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit nx64mat;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nmat,ncgmat,nx86mat;
  23. type
  24. tx8664moddivnode = class(tmoddivnode)
  25. procedure pass_2;override;
  26. end;
  27. tx8664shlshrnode = class(tshlshrnode)
  28. procedure pass_2;override;
  29. end;
  30. tx8664unaryminusnode = class(tx86unaryminusnode)
  31. end;
  32. tx8664notnode = class(tx86notnode)
  33. end;
  34. implementation
  35. uses
  36. globtype,systems,
  37. cutils,verbose,globals,
  38. symconst,symdef,aasmbase,aasmtai,defutil,
  39. cgbase,pass_1,pass_2,
  40. ncon,
  41. cpubase,cpuinfo,
  42. cga,ncgutil,cgobj,cgx86;
  43. {*****************************************************************************
  44. TX8664MODDIVNODE
  45. *****************************************************************************}
  46. procedure tx8664moddivnode.pass_2;
  47. var
  48. hreg1,hreg2:Tregister;
  49. power:longint;
  50. op:Tasmop;
  51. begin
  52. secondpass(left);
  53. if codegenerror then
  54. exit;
  55. secondpass(right);
  56. if codegenerror then
  57. exit;
  58. { put numerator in register }
  59. location_reset(location,LOC_REGISTER,OS_INT);
  60. location_force_reg(exprasmlist,left.location,OS_INT,false);
  61. hreg1:=left.location.register;
  62. if (nodetype=divn) and (right.nodetype=ordconstn) and
  63. ispowerof2(int64(tordconstnode(right).value),power) then
  64. begin
  65. { for signed numbers, the numerator must be adjusted before the
  66. shift instruction, but not wih unsigned numbers! Otherwise,
  67. "Cardinal($ffffffff) div 16" overflows! (JM) }
  68. if is_signed(left.resulttype.def) Then
  69. begin
  70. { use a sequence without jumps, saw this in
  71. comp.compilers (JM) }
  72. { no jumps, but more operations }
  73. hreg2:=cg.getintregister(exprasmlist,OS_INT);
  74. emit_reg_reg(A_MOV,S_Q,hreg1,hreg2);
  75. {If the left value is signed, hreg2=$ffffffff, otherwise 0.}
  76. emit_const_reg(A_SAR,S_Q,63,hreg2);
  77. {If signed, hreg2=right value-1, otherwise 0.}
  78. emit_const_reg(A_AND,S_Q,tordconstnode(right).value-1,hreg2);
  79. { add to the left value }
  80. emit_reg_reg(A_ADD,S_Q,hreg2,hreg1);
  81. { do the shift }
  82. emit_const_reg(A_SAR,S_Q,power,hreg1);
  83. end
  84. else
  85. emit_const_reg(A_SHR,S_Q,power,hreg1);
  86. location.register:=hreg1;
  87. end
  88. else
  89. begin
  90. {Bring denominator to a register.}
  91. cg.getcpuregister(exprasmlist,NR_RAX);
  92. emit_reg_reg(A_MOV,S_Q,hreg1,NR_RAX);
  93. cg.getcpuregister(exprasmlist,NR_RDX);
  94. {Sign extension depends on the left type.}
  95. if torddef(left.resulttype.def).typ=u64bit then
  96. emit_reg_reg(A_XOR,S_Q,NR_RDX,NR_RDX)
  97. else
  98. emit_none(A_CDO,S_NO);
  99. {Division depends on the right type.}
  100. if Torddef(right.resulttype.def).typ=u64bit then
  101. op:=A_DIV
  102. else
  103. op:=A_IDIV;
  104. if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  105. emit_ref(op,S_Q,right.location.reference)
  106. else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  107. emit_reg(op,S_Q,right.location.register)
  108. else
  109. begin
  110. hreg1:=cg.getintregister(exprasmlist,right.location.size);
  111. cg.a_load_loc_reg(exprasmlist,OS_64,right.location,hreg1);
  112. emit_reg(op,S_Q,hreg1);
  113. end;
  114. { Copy the result into a new register. Release RAX & RDX.}
  115. cg.ungetcpuregister(exprasmlist,NR_RDX);
  116. cg.ungetcpuregister(exprasmlist,NR_RAX);
  117. location.register:=cg.getintregister(exprasmlist,OS_INT);
  118. if nodetype=divn then
  119. cg.a_load_reg_reg(exprasmlist,OS_INT,OS_INT,NR_RAX,location.register)
  120. else
  121. cg.a_load_reg_reg(exprasmlist,OS_INT,OS_INT,NR_RDX,location.register);
  122. end;
  123. end;
  124. {*****************************************************************************
  125. TX8664SHLRSHRNODE
  126. *****************************************************************************}
  127. procedure tx8664shlshrnode.pass_2;
  128. var
  129. op : Tasmop;
  130. opsize : tcgsize;
  131. mask : aint;
  132. begin
  133. secondpass(left);
  134. secondpass(right);
  135. { determine operator }
  136. if nodetype=shln then
  137. op:=A_SHL
  138. else
  139. op:=A_SHR;
  140. { special treatment of 32bit values for backwards compatibility }
  141. if left.resulttype.def.size<=4 then
  142. begin
  143. opsize:=OS_32;
  144. mask:=31;
  145. end
  146. else
  147. begin
  148. opsize:=OS_64;
  149. mask:=63;
  150. end;
  151. { load left operators in a register }
  152. location_copy(location,left.location);
  153. location_force_reg(exprasmlist,location,opsize,false);
  154. { shifting by a constant directly coded: }
  155. if (right.nodetype=ordconstn) then
  156. emit_const_reg(op,tcgsize2opsize[opsize],tordconstnode(right).value and mask,location.register)
  157. else
  158. begin
  159. { load right operators in a RCX }
  160. cg.getcpuregister(exprasmlist,NR_RCX);
  161. cg.a_load_loc_reg(exprasmlist,OS_INT,right.location,NR_RCX);
  162. { right operand is in ECX }
  163. cg.ungetcpuregister(exprasmlist,NR_RCX);
  164. emit_reg_reg(op,tcgsize2opsize[opsize],NR_CL,location.register);
  165. end;
  166. end;
  167. begin
  168. cunaryminusnode:=tx8664unaryminusnode;
  169. cmoddivnode:=tx8664moddivnode;
  170. cshlshrnode:=tx8664shlshrnode;
  171. cnotnode:=tx8664notnode;
  172. end.
  173. {
  174. $Log$
  175. Revision 1.7 2004-09-25 14:23:55 peter
  176. * ungetregister is now only used for cpuregisters, renamed to
  177. ungetcpuregister
  178. * renamed (get|unget)explicitregister(s) to ..cpuregister
  179. * removed location-release/reference_release
  180. Revision 1.6 2004/06/20 08:55:32 florian
  181. * logs truncated
  182. Revision 1.5 2004/06/16 20:07:11 florian
  183. * dwarf branch merged
  184. Revision 1.4.2.3 2004/05/03 16:27:38 peter
  185. * fixed shl for x86-64
  186. Revision 1.4.2.2 2004/04/26 15:54:33 peter
  187. * small x86-64 fixes
  188. Revision 1.4.2.1 2004/04/24 16:02:19 florian
  189. * sign extension for int div int fixed
  190. Revision 1.4 2004/02/05 18:28:37 peter
  191. * x86_64 fixes for opsize
  192. }