n386mat.pas 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Florian Klaempfl
  4. Generate i386 assembler for math nodes
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit n386mat;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nmat,ncgmat,nx86mat;
  23. type
  24. ti386moddivnode = class(tmoddivnode)
  25. procedure pass_2;override;
  26. end;
  27. ti386shlshrnode = class(tshlshrnode)
  28. procedure pass_2;override;
  29. { everything will be handled in pass_2 }
  30. function first_shlshr64bitint: tnode; override;
  31. end;
  32. ti386unaryminusnode = class(tx86unaryminusnode)
  33. end;
  34. ti386notnode = class(tx86notnode)
  35. end;
  36. implementation
  37. uses
  38. globtype,systems,
  39. cutils,verbose,globals,
  40. symconst,symdef,aasmbase,aasmtai,defutil,
  41. cgbase,pass_2,
  42. ncon,
  43. cpubase,cpuinfo,
  44. cga,ncgutil,cgobj,cgutils;
  45. {*****************************************************************************
  46. TI386MODDIVNODE
  47. *****************************************************************************}
  48. procedure ti386moddivnode.pass_2;
  49. var hreg1,hreg2:Tregister;
  50. power:longint;
  51. hl:Tasmlabel;
  52. op:Tasmop;
  53. begin
  54. secondpass(left);
  55. if codegenerror then
  56. exit;
  57. secondpass(right);
  58. if codegenerror then
  59. exit;
  60. if is_64bitint(resulttype.def) then
  61. { should be handled in pass_1 (JM) }
  62. internalerror(200109052);
  63. { put numerator in register }
  64. location_reset(location,LOC_REGISTER,OS_INT);
  65. location_force_reg(exprasmlist,left.location,OS_INT,false);
  66. hreg1:=left.location.register;
  67. if (nodetype=divn) and (right.nodetype=ordconstn) and
  68. ispowerof2(tordconstnode(right).value,power) then
  69. begin
  70. { for signed numbers, the numerator must be adjusted before the
  71. shift instruction, but not wih unsigned numbers! Otherwise,
  72. "Cardinal($ffffffff) div 16" overflows! (JM) }
  73. if is_signed(left.resulttype.def) Then
  74. begin
  75. if (aktOptProcessor <> class386) and
  76. not(cs_littlesize in aktglobalswitches) then
  77. { use a sequence without jumps, saw this in
  78. comp.compilers (JM) }
  79. begin
  80. { no jumps, but more operations }
  81. hreg2:=cg.getintregister(exprasmlist,OS_INT);
  82. emit_reg_reg(A_MOV,S_L,hreg1,hreg2);
  83. {If the left value is signed, hreg2=$ffffffff, otherwise 0.}
  84. emit_const_reg(A_SAR,S_L,31,hreg2);
  85. {If signed, hreg2=right value-1, otherwise 0.}
  86. emit_const_reg(A_AND,S_L,tordconstnode(right).value-1,hreg2);
  87. { add to the left value }
  88. emit_reg_reg(A_ADD,S_L,hreg2,hreg1);
  89. { do the shift }
  90. emit_const_reg(A_SAR,S_L,power,hreg1);
  91. end
  92. else
  93. begin
  94. { a jump, but less operations }
  95. emit_reg_reg(A_TEST,S_L,hreg1,hreg1);
  96. objectlibrary.getlabel(hl);
  97. cg.a_jmp_flags(exprasmlist,F_NS,hl);
  98. if power=1 then
  99. emit_reg(A_INC,S_L,hreg1)
  100. else
  101. emit_const_reg(A_ADD,S_L,tordconstnode(right).value-1,hreg1);
  102. cg.a_label(exprasmlist,hl);
  103. emit_const_reg(A_SAR,S_L,power,hreg1);
  104. end
  105. end
  106. else
  107. emit_const_reg(A_SHR,S_L,power,hreg1);
  108. location.register:=hreg1;
  109. end
  110. else
  111. begin
  112. cg.getcpuregister(exprasmlist,NR_EAX);
  113. emit_reg_reg(A_MOV,S_L,hreg1,NR_EAX);
  114. cg.getcpuregister(exprasmlist,NR_EDX);
  115. {Sign extension depends on the left type.}
  116. if torddef(left.resulttype.def).typ=u32bit then
  117. emit_reg_reg(A_XOR,S_L,NR_EDX,NR_EDX)
  118. else
  119. emit_none(A_CDQ,S_NO);
  120. {Division depends on the right type.}
  121. if Torddef(right.resulttype.def).typ=u32bit then
  122. op:=A_DIV
  123. else
  124. op:=A_IDIV;
  125. if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  126. emit_ref(op,S_L,right.location.reference)
  127. else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  128. emit_reg(op,S_L,right.location.register)
  129. else
  130. begin
  131. hreg1:=cg.getintregister(exprasmlist,right.location.size);
  132. cg.a_load_loc_reg(exprasmlist,OS_32,right.location,hreg1);
  133. emit_reg(op,S_L,hreg1);
  134. end;
  135. {Copy the result into a new register. Release EAX & EDX.}
  136. cg.ungetcpuregister(exprasmlist,NR_EDX);
  137. cg.ungetcpuregister(exprasmlist,NR_EAX);
  138. location.register:=cg.getintregister(exprasmlist,OS_INT);
  139. if nodetype=divn then
  140. cg.a_load_reg_reg(exprasmlist,OS_INT,OS_INT,NR_EAX,location.register)
  141. else
  142. cg.a_load_reg_reg(exprasmlist,OS_INT,OS_INT,NR_EDX,location.register);
  143. end;
  144. end;
  145. {*****************************************************************************
  146. TI386SHLRSHRNODE
  147. *****************************************************************************}
  148. function ti386shlshrnode.first_shlshr64bitint: tnode;
  149. begin
  150. result := nil;
  151. end;
  152. procedure ti386shlshrnode.pass_2;
  153. var hreg64hi,hreg64lo:Tregister;
  154. op:Tasmop;
  155. v : TConstExprInt;
  156. l1,l2,l3:Tasmlabel;
  157. begin
  158. secondpass(left);
  159. secondpass(right);
  160. { determine operator }
  161. if nodetype=shln then
  162. op:=A_SHL
  163. else
  164. op:=A_SHR;
  165. if is_64bitint(left.resulttype.def) then
  166. begin
  167. location_reset(location,LOC_REGISTER,OS_64);
  168. { load left operator in a register }
  169. location_force_reg(exprasmlist,left.location,OS_64,false);
  170. hreg64hi:=left.location.register64.reghi;
  171. hreg64lo:=left.location.register64.reglo;
  172. { shifting by a constant directly coded: }
  173. if (right.nodetype=ordconstn) then
  174. begin
  175. v:=Tordconstnode(right).value and 63;
  176. if v>31 then
  177. begin
  178. if nodetype=shln then
  179. begin
  180. emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
  181. if ((v and 31) <> 0) then
  182. emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
  183. end
  184. else
  185. begin
  186. emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
  187. if ((v and 31) <> 0) then
  188. emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
  189. end;
  190. location.register64.reghi:=hreg64lo;
  191. location.register64.reglo:=hreg64hi;
  192. end
  193. else
  194. begin
  195. if nodetype=shln then
  196. begin
  197. emit_const_reg_reg(A_SHLD,S_L,v and 31,hreg64lo,hreg64hi);
  198. emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
  199. end
  200. else
  201. begin
  202. emit_const_reg_reg(A_SHRD,S_L,v and 31,hreg64hi,hreg64lo);
  203. emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
  204. end;
  205. location.register64.reglo:=hreg64lo;
  206. location.register64.reghi:=hreg64hi;
  207. end;
  208. end
  209. else
  210. begin
  211. { load right operators in a register }
  212. cg.getcpuregister(exprasmlist,NR_ECX);
  213. cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
  214. { left operator is already in a register }
  215. { hence are both in a register }
  216. { is it in the case ECX ? }
  217. { the damned shift instructions work only til a count of 32 }
  218. { so we've to do some tricks here }
  219. objectlibrary.getlabel(l1);
  220. objectlibrary.getlabel(l2);
  221. objectlibrary.getlabel(l3);
  222. emit_const_reg(A_CMP,S_L,64,NR_ECX);
  223. cg.a_jmp_flags(exprasmlist,F_L,l1);
  224. emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
  225. emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
  226. cg.a_jmp_always(exprasmlist,l3);
  227. cg.a_label(exprasmlist,l1);
  228. emit_const_reg(A_CMP,S_L,32,NR_ECX);
  229. cg.a_jmp_flags(exprasmlist,F_L,l2);
  230. emit_const_reg(A_SUB,S_L,32,NR_ECX);
  231. if nodetype=shln then
  232. begin
  233. emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
  234. emit_reg_reg(A_MOV,S_L,hreg64lo,hreg64hi);
  235. emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
  236. cg.a_jmp_always(exprasmlist,l3);
  237. cg.a_label(exprasmlist,l2);
  238. emit_reg_reg_reg(A_SHLD,S_L,NR_CL,hreg64lo,hreg64hi);
  239. emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
  240. end
  241. else
  242. begin
  243. emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
  244. emit_reg_reg(A_MOV,S_L,hreg64hi,hreg64lo);
  245. emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
  246. cg.a_jmp_always(exprasmlist,l3);
  247. cg.a_label(exprasmlist,l2);
  248. emit_reg_reg_reg(A_SHRD,S_L,NR_CL,hreg64hi,hreg64lo);
  249. emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
  250. end;
  251. cg.a_label(exprasmlist,l3);
  252. cg.ungetcpuregister(exprasmlist,NR_ECX);
  253. location.register64.reglo:=hreg64lo;
  254. location.register64.reghi:=hreg64hi;
  255. end;
  256. end
  257. else
  258. begin
  259. { load left operators in a register }
  260. location_copy(location,left.location);
  261. location_force_reg(exprasmlist,location,OS_INT,false);
  262. { shifting by a constant directly coded: }
  263. if (right.nodetype=ordconstn) then
  264. { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)}
  265. emit_const_reg(op,S_L,tordconstnode(right).value and 31,location.register)
  266. else
  267. begin
  268. { load right operators in a ECX }
  269. cg.getcpuregister(exprasmlist,NR_ECX);
  270. cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
  271. { right operand is in ECX }
  272. cg.ungetcpuregister(exprasmlist,NR_ECX);
  273. emit_reg_reg(op,S_L,NR_CL,location.register);
  274. end;
  275. end;
  276. end;
  277. begin
  278. cunaryminusnode:=ti386unaryminusnode;
  279. cmoddivnode:=ti386moddivnode;
  280. cshlshrnode:=ti386shlshrnode;
  281. cnotnode:=ti386notnode;
  282. end.
  283. {
  284. $Log$
  285. Revision 1.74 2005-02-14 17:13:09 peter
  286. * truncate log
  287. }