ncgmat.pas 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Florian Klaempfl
  4. Generate generic mathematical nodes
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit ncgmat;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nmat,cpubase,cgbase,cginfo;
  23. type
  24. tcgunaryminusnode = class(tunaryminusnode)
  25. procedure pass_2;override;
  26. protected
  27. { This routine is called to change the sign of the
  28. floating point value in the floating point
  29. register r.
  30. This routine should be overriden, since
  31. the generic version is not optimal at all. The
  32. generic version assumes that floating
  33. point values are stored in the register
  34. in IEEE-754 format.
  35. }
  36. procedure emit_float_sign_change(r: tregister; _size : tcgsize);virtual;
  37. end;
  38. tcgmoddivnode = class(tmoddivnode)
  39. procedure pass_2;override;
  40. protected
  41. { This routine must do an actual 32-bit division, be it
  42. signed or unsigned. The result must set into the the
  43. @var(num) register.
  44. @param(signed Indicates if the division must be signed)
  45. @param(denum Register containing the denominator
  46. @param(num Register containing the numerator, will also receive result)
  47. The actual optimizations regarding shifts have already
  48. been done and emitted, so this should really a do a divide.
  49. }
  50. procedure emit_div_reg_reg(signed: boolean;denum,num : tregister);virtual;abstract;
  51. { This routine must do an actual 32-bit modulo, be it
  52. signed or unsigned. The result must set into the the
  53. @var(num) register.
  54. @param(signed Indicates if the modulo must be signed)
  55. @param(denum Register containing the denominator
  56. @param(num Register containing the numerator, will also receive result)
  57. The actual optimizations regarding shifts have already
  58. been done and emitted, so this should really a do a modulo.
  59. }
  60. procedure emit_mod_reg_reg(signed: boolean;denum,num : tregister);virtual;abstract;
  61. { This routine must do an actual 64-bit division, be it
  62. signed or unsigned. The result must set into the the
  63. @var(num) register.
  64. @param(signed Indicates if the division must be signed)
  65. @param(denum Register containing the denominator
  66. @param(num Register containing the numerator, will also receive result)
  67. The actual optimizations regarding shifts have already
  68. been done and emitted, so this should really a do a divide.
  69. Currently, this routine should only be implemented on
  70. 64-bit systems, otherwise a helper is called in 1st pass.
  71. }
  72. procedure emit64_div_reg_reg(signed: boolean;denum,num : tregister64);virtual;
  73. end;
  74. tcgshlshrnode = class(tshlshrnode)
  75. procedure pass_2;override;
  76. end;
  77. implementation
  78. uses
  79. globtype,systems,
  80. cutils,verbose,globals,
  81. symconst,symdef,aasmbase,aasmtai,aasmcpu,defutil,
  82. pass_1,pass_2,
  83. ncon,
  84. cpuinfo,
  85. tgobj,ncgutil,cgobj,rgobj,rgcpu,paramgr,cg64f32;
  86. {*****************************************************************************
  87. TCGUNARYMINUSNODE
  88. *****************************************************************************}
  89. procedure tcgunaryminusnode.emit_float_sign_change(r: tregister; _size : tcgsize);
  90. var
  91. href : treference;
  92. hreg : tregister;
  93. begin
  94. { get a temporary memory reference to store the floating
  95. point value
  96. }
  97. tg.gettemp(exprasmlist,tcgsize2size[_size],tt_normal,href);
  98. { store the floating point value in the temporary memory area }
  99. cg.a_loadfpu_reg_ref(exprasmlist,_size,r,href);
  100. { only single and double ieee are supported }
  101. if _size = OS_F64 then
  102. begin
  103. { on little-endian machine the most significant
  104. 32-bit value is stored at the highest address
  105. }
  106. if target_info.endian = endian_little then
  107. inc(href.offset,4);
  108. end
  109. else
  110. if _size <> OS_F32 then
  111. internalerror(20020814);
  112. hreg := rg.getregisterint(exprasmlist,OS_32);
  113. { load value }
  114. cg.a_load_ref_reg(exprasmlist,OS_32,href,hreg);
  115. { bitwise complement copied value }
  116. cg.a_op_reg_reg(exprasmlist,OP_NOT,OS_32,hreg,hreg);
  117. { sign-bit is bit 31/63 of single/double }
  118. cg.a_op_const_reg(exprasmlist,OP_AND,$80000000,hreg);
  119. { or with value in reference memory }
  120. cg.a_op_reg_ref(exprasmlist,OP_OR,OS_32,hreg,href);
  121. rg.ungetregister(exprasmlist,hreg);
  122. { store the floating point value in the temporary memory area }
  123. if _size = OS_F64 then
  124. begin
  125. { on little-endian machine the most significant
  126. 32-bit value is stored at the highest address
  127. }
  128. if target_info.endian = endian_little then
  129. dec(href.offset,4);
  130. end;
  131. cg.a_loadfpu_ref_reg(exprasmlist,_size,href,r);
  132. end;
  133. procedure tcgunaryminusnode.pass_2;
  134. begin
  135. if is_64bitint(left.resulttype.def) then
  136. begin
  137. secondpass(left);
  138. { load left operator in a register }
  139. location_copy(location,left.location);
  140. location_force_reg(exprasmlist,location,OS_64,false);
  141. cg64.a_op64_loc_reg(exprasmlist,OP_NEG,
  142. location,joinreg64(location.registerlow,location.registerhigh));
  143. end
  144. else
  145. begin
  146. secondpass(left);
  147. location_reset(location,LOC_REGISTER,OS_INT);
  148. case left.location.loc of
  149. LOC_REGISTER:
  150. begin
  151. location.register:=left.location.register;
  152. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,
  153. location.register);
  154. end;
  155. LOC_CREGISTER:
  156. begin
  157. location.register:=rg.getregisterint(exprasmlist,OS_INT);
  158. cg.a_load_reg_reg(exprasmlist,OS_INT,OS_INT,left.location.register,
  159. location.register);
  160. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,
  161. location.register);
  162. end;
  163. LOC_REFERENCE,
  164. LOC_CREFERENCE:
  165. begin
  166. reference_release(exprasmlist,left.location.reference);
  167. if (left.resulttype.def.deftype=floatdef) then
  168. begin
  169. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  170. location.register:=rg.getregisterfpu(exprasmlist);
  171. cg.a_loadfpu_ref_reg(exprasmlist,
  172. def_cgsize(left.resulttype.def),
  173. left.location.reference,location.register);
  174. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  175. end
  176. else
  177. begin
  178. location.register:=rg.getregisterint(exprasmlist,OS_INT);
  179. { why is the size is OS_INT, since in pass_1 we convert
  180. everything to a signed natural value anyways
  181. }
  182. cg.a_load_ref_reg(exprasmlist,OS_INT,
  183. left.location.reference,location.register);
  184. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,
  185. location.register);
  186. end;
  187. end;
  188. LOC_FPUREGISTER:
  189. begin
  190. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  191. location.register:=left.location.register;
  192. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  193. end;
  194. LOC_CFPUREGISTER:
  195. begin
  196. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  197. location.register:=rg.getregisterfpu(exprasmlist);
  198. cg.a_loadfpu_reg_reg(exprasmlist,left.location.register,location.register);
  199. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  200. end;
  201. else
  202. internalerror(200203225);
  203. end;
  204. end;
  205. end;
  206. {*****************************************************************************
  207. TCGMODDIVNODE
  208. *****************************************************************************}
  209. procedure tcgmoddivnode.emit64_div_reg_reg(signed: boolean; denum,num:tregister64);
  210. begin
  211. { handled in pass_1 already, unless pass_1 is
  212. overriden
  213. }
  214. { should be handled in pass_1 (JM) }
  215. internalerror(200109052);
  216. end;
  217. procedure tcgmoddivnode.pass_2;
  218. var
  219. hreg1 : tregister;
  220. hdenom : tregister;
  221. power : longint;
  222. hl : tasmlabel;
  223. pushedregs : tmaybesave;
  224. begin
  225. secondpass(left);
  226. if codegenerror then
  227. exit;
  228. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  229. secondpass(right);
  230. maybe_restore(exprasmlist,left.location,pushedregs);
  231. if codegenerror then
  232. exit;
  233. location_copy(location,left.location);
  234. if is_64bitint(resulttype.def) then
  235. begin
  236. { this code valid for 64-bit cpu's only ,
  237. otherwise helpers are called in pass_1
  238. }
  239. location_force_reg(exprasmlist,location,OS_64,false);
  240. location_copy(location,left.location);
  241. location_force_reg(exprasmlist,right.location,OS_64,false);
  242. emit64_div_reg_reg(is_signed(left.resulttype.def),
  243. joinreg64(right.location.registerlow,right.location.registerhigh),
  244. joinreg64(location.registerlow,location.registerhigh));
  245. end
  246. else
  247. begin
  248. { put numerator in register }
  249. location_force_reg(exprasmlist,left.location,OS_INT,false);
  250. hreg1:=left.location.register;
  251. if (nodetype=divn) and
  252. (right.nodetype=ordconstn) and
  253. ispowerof2(tordconstnode(right).value,power) then
  254. Begin
  255. { for signed numbers, the numerator must be adjusted before the
  256. shift instruction, but not wih unsigned numbers! Otherwise,
  257. "Cardinal($ffffffff) div 16" overflows! (JM) }
  258. If is_signed(left.resulttype.def) Then
  259. Begin
  260. objectlibrary.getlabel(hl);
  261. cg.a_cmp_const_reg_label(exprasmlist,OS_INT,OC_GT,0,hreg1,hl);
  262. if power=1 then
  263. cg.a_op_const_reg(exprasmlist,OP_ADD,1,hreg1)
  264. else
  265. cg.a_op_const_reg(exprasmlist,OP_ADD,
  266. tordconstnode(right).value-1,hreg1);
  267. cg.a_label(exprasmlist,hl);
  268. cg.a_op_const_reg(exprasmlist,OP_SAR,power,hreg1);
  269. End
  270. Else { not signed }
  271. Begin
  272. cg.a_op_const_reg(exprasmlist,OP_SHR,power,hreg1);
  273. end;
  274. End
  275. else
  276. begin
  277. { bring denominator to hdenom }
  278. { hdenom is always free, it's }
  279. { only used for temporary }
  280. { purposes }
  281. hdenom := rg.getregisterint(exprasmlist,OS_INT);
  282. if right.location.loc<>LOC_CREGISTER then
  283. location_release(exprasmlist,right.location);
  284. cg.a_load_loc_reg(exprasmlist,right.location,hdenom);
  285. { verify if the divisor is zero, if so return an error
  286. immediately
  287. }
  288. objectlibrary.getlabel(hl);
  289. cg.a_cmp_const_reg_label(exprasmlist,OS_INT,OC_NE,0,hdenom,hl);
  290. cg.a_param_const(exprasmlist,OS_S32,200,paramanager.getintparaloc(1));
  291. cg.a_call_name(exprasmlist,'FPC_HANDLERROR');
  292. cg.a_label(exprasmlist,hl);
  293. if nodetype = modn then
  294. emit_mod_reg_reg(is_signed(left.resulttype.def),hdenom,hreg1)
  295. else
  296. emit_div_reg_reg(is_signed(left.resulttype.def),hdenom,hreg1);
  297. end;
  298. location_reset(location,LOC_REGISTER,OS_INT);
  299. location.register:=hreg1;
  300. end;
  301. cg.g_overflowcheck(exprasmlist,self);
  302. end;
  303. {*****************************************************************************
  304. TCGSHLRSHRNODE
  305. *****************************************************************************}
  306. procedure tcgshlshrnode.pass_2;
  307. var
  308. hcountreg : tregister;
  309. op : topcg;
  310. pushedregs : tmaybesave;
  311. freescratch : boolean;
  312. begin
  313. freescratch:=false;
  314. secondpass(left);
  315. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  316. secondpass(right);
  317. maybe_restore(exprasmlist,left.location,pushedregs);
  318. { determine operator }
  319. case nodetype of
  320. shln: op:=OP_SHL;
  321. shrn: op:=OP_SHR;
  322. end;
  323. if is_64bitint(left.resulttype.def) then
  324. begin
  325. { already hanled in 1st pass }
  326. internalerror(2002081501);
  327. (* Normally for 64-bit cpu's this here should be here,
  328. and only pass_1 need to be overriden, but dunno how to
  329. do that!
  330. location_reset(location,LOC_REGISTER,OS_64);
  331. { load left operator in a register }
  332. location_force_reg(exprasmlist,left.location,OS_64,false);
  333. location_copy(location,left.location);
  334. if (right.nodetype=ordconstn) then
  335. begin
  336. cg64.a_op64_const_reg(exprasmlist,op,tordconstnode(right).value,
  337. joinreg64(location.registerlow,location.registerhigh));
  338. end
  339. else
  340. begin
  341. { this should be handled in pass_1 }
  342. internalerror(2002081501);
  343. if right.location.loc<>LOC_REGISTER then
  344. begin
  345. if right.location.loc<>LOC_CREGISTER then
  346. location_release(exprasmlist,right.location);
  347. hcountreg:=cg.get_scratch_reg_int(exprasmlist);
  348. cg.a_load_loc_reg(exprasmlist,right.location,hcountreg);
  349. freescratch := true;
  350. end
  351. else
  352. hcountreg:=right.location.register;
  353. cg64.a_op64_reg_reg(exprasmlist,op,hcountreg,
  354. joinreg64(location.registerlow,location.registerhigh));
  355. if freescratch then
  356. cg.free_scratch_reg(exprasmlist,hcountreg);
  357. end;*)
  358. end
  359. else
  360. begin
  361. { load left operators in a register }
  362. location_copy(location,left.location);
  363. location_force_reg(exprasmlist,location,OS_INT,false);
  364. { shifting by a constant directly coded: }
  365. if (right.nodetype=ordconstn) then
  366. begin
  367. { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)
  368. if right.value<=31 then
  369. }
  370. cg.a_op_const_reg(exprasmlist,op,tordconstnode(right).value and 31,
  371. location.register);
  372. {
  373. else
  374. emit_reg_reg(A_XOR,S_L,hregister1,
  375. hregister1);
  376. }
  377. end
  378. else
  379. begin
  380. { load right operators in a register - this
  381. is done since most target cpu which will use this
  382. node do not support a shift count in a mem. location (cec)
  383. }
  384. if right.location.loc<>LOC_REGISTER then
  385. begin
  386. if right.location.loc<>LOC_CREGISTER then
  387. location_release(exprasmlist,right.location);
  388. hcountreg:=cg.get_scratch_reg_int(exprasmlist,OS_INT);
  389. freescratch := true;
  390. cg.a_load_loc_reg(exprasmlist,right.location,hcountreg);
  391. end
  392. else
  393. hcountreg:=right.location.register;
  394. cg.a_op_reg_reg(exprasmlist,op,OS_INT,hcountreg,location.register);
  395. if freescratch then
  396. cg.free_scratch_reg(exprasmlist,hcountreg);
  397. end;
  398. end;
  399. end;
  400. begin
  401. cmoddivnode:=tcgmoddivnode;
  402. cunaryminusnode:=tcgunaryminusnode;
  403. cshlshrnode:=tcgshlshrnode;
  404. end.
  405. {
  406. $Log$
  407. Revision 1.7 2003-03-28 19:16:56 peter
  408. * generic constructor working for i386
  409. * remove fixed self register
  410. * esi added as address register for i386
  411. Revision 1.6 2003/02/19 22:00:14 daniel
  412. * Code generator converted to new register notation
  413. - Horribily outdated todo.txt removed
  414. Revision 1.5 2002/11/25 17:43:18 peter
  415. * splitted defbase in defutil,symutil,defcmp
  416. * merged isconvertable and is_equal into compare_defs(_ext)
  417. * made operator search faster by walking the list only once
  418. Revision 1.4 2002/09/17 18:54:02 jonas
  419. * a_load_reg_reg() now has two size parameters: source and dest. This
  420. allows some optimizations on architectures that don't encode the
  421. register size in the register name.
  422. Revision 1.3 2002/08/23 16:14:48 peter
  423. * tempgen cleanup
  424. * tt_noreuse temp type added that will be used in genentrycode
  425. Revision 1.2 2002/08/15 15:15:55 carl
  426. * jmpbuf size allocation for exceptions is now cpu specific (as it should)
  427. * more generic nodes for maths
  428. * several fixes for better m68k support
  429. Revision 1.1 2002/08/14 19:26:55 carl
  430. + generic int_to_real type conversion
  431. + generic unaryminus node
  432. }