ncgmat.pas 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Florian Klaempfl
  4. Generate generic mathematical nodes
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit ncgmat;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nmat,cpubase,cgbase,cginfo;
  23. type
  24. tcgunaryminusnode = class(tunaryminusnode)
  25. procedure pass_2;override;
  26. protected
  27. { This routine is called to change the sign of the
  28. floating point value in the floating point
  29. register r.
  30. This routine should be overriden, since
  31. the generic version is not optimal at all. The
  32. generic version assumes that floating
  33. point values are stored in the register
  34. in IEEE-754 format.
  35. }
  36. procedure emit_float_sign_change(r: tregister; _size : tcgsize);virtual;
  37. end;
  38. tcgmoddivnode = class(tmoddivnode)
  39. procedure pass_2;override;
  40. protected
  41. { This routine must do an actual 32-bit division, be it
  42. signed or unsigned. The result must set into the the
  43. @var(num) register.
  44. @param(signed Indicates if the division must be signed)
  45. @param(denum Register containing the denominator
  46. @param(num Register containing the numerator, will also receive result)
  47. The actual optimizations regarding shifts have already
  48. been done and emitted, so this should really a do a divide.
  49. }
  50. procedure emit_div_reg_reg(signed: boolean;denum,num : tregister);virtual;abstract;
  51. { This routine must do an actual 32-bit modulo, be it
  52. signed or unsigned. The result must set into the the
  53. @var(num) register.
  54. @param(signed Indicates if the modulo must be signed)
  55. @param(denum Register containing the denominator
  56. @param(num Register containing the numerator, will also receive result)
  57. The actual optimizations regarding shifts have already
  58. been done and emitted, so this should really a do a modulo.
  59. }
  60. procedure emit_mod_reg_reg(signed: boolean;denum,num : tregister);virtual;abstract;
  61. { This routine must do an actual 64-bit division, be it
  62. signed or unsigned. The result must set into the the
  63. @var(num) register.
  64. @param(signed Indicates if the division must be signed)
  65. @param(denum Register containing the denominator
  66. @param(num Register containing the numerator, will also receive result)
  67. The actual optimizations regarding shifts have already
  68. been done and emitted, so this should really a do a divide.
  69. Currently, this routine should only be implemented on
  70. 64-bit systems, otherwise a helper is called in 1st pass.
  71. }
  72. procedure emit64_div_reg_reg(signed: boolean;denum,num : tregister64);virtual;
  73. end;
  74. tcgshlshrnode = class(tshlshrnode)
  75. procedure pass_2;override;
  76. end;
  77. implementation
  78. uses
  79. globtype,systems,
  80. cutils,verbose,globals,
  81. symconst,symdef,aasmbase,aasmtai,aasmcpu,defbase,
  82. pass_1,pass_2,
  83. ncon,
  84. cpuinfo,
  85. tgobj,ncgutil,cgobj,rgobj,rgcpu,paramgr,cg64f32;
  86. {*****************************************************************************
  87. TCGUNARYMINUSNODE
  88. *****************************************************************************}
  89. procedure tcgunaryminusnode.emit_float_sign_change(r: tregister; _size : tcgsize);
  90. var
  91. href : treference;
  92. hreg : tregister;
  93. begin
  94. { get a temporary memory reference to store the floating
  95. point value
  96. }
  97. tg.gettemp(exprasmlist,tcgsize2size[_size],tt_normal,href);
  98. { store the floating point value in the temporary memory area }
  99. cg.a_loadfpu_reg_ref(exprasmlist,_size,r,href);
  100. { only single and double ieee are supported }
  101. if _size = OS_F64 then
  102. begin
  103. { on little-endian machine the most significant
  104. 32-bit value is stored at the highest address
  105. }
  106. if target_info.endian = endian_little then
  107. inc(href.offset,4);
  108. end
  109. else
  110. if _size <> OS_F32 then
  111. internalerror(20020814);
  112. hreg := rg.getregisterint(exprasmlist);
  113. { load value }
  114. cg.a_load_ref_reg(exprasmlist,OS_32,href,hreg);
  115. { bitwise complement copied value }
  116. cg.a_op_reg_reg(exprasmlist,OP_NOT,OS_32,hreg,hreg);
  117. { sign-bit is bit 31/63 of single/double }
  118. cg.a_op_const_reg(exprasmlist,OP_AND,$80000000,hreg);
  119. { or with value in reference memory }
  120. cg.a_op_reg_ref(exprasmlist,OP_OR,OS_32,hreg,href);
  121. rg.ungetregister(exprasmlist,hreg);
  122. { store the floating point value in the temporary memory area }
  123. if _size = OS_F64 then
  124. begin
  125. { on little-endian machine the most significant
  126. 32-bit value is stored at the highest address
  127. }
  128. if target_info.endian = endian_little then
  129. dec(href.offset,4);
  130. end;
  131. cg.a_loadfpu_ref_reg(exprasmlist,_size,href,r);
  132. end;
  133. procedure tcgunaryminusnode.pass_2;
  134. begin
  135. if is_64bitint(left.resulttype.def) then
  136. begin
  137. secondpass(left);
  138. { load left operator in a register }
  139. location_copy(location,left.location);
  140. location_force_reg(exprasmlist,location,OS_64,false);
  141. cg64.a_op64_loc_reg(exprasmlist,OP_NEG,
  142. location,joinreg64(location.registerlow,location.registerhigh));
  143. end
  144. else
  145. begin
  146. secondpass(left);
  147. location_reset(location,LOC_REGISTER,OS_INT);
  148. case left.location.loc of
  149. LOC_REGISTER:
  150. begin
  151. location.register:=left.location.register;
  152. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,
  153. location.register);
  154. end;
  155. LOC_CREGISTER:
  156. begin
  157. location.register:=rg.getregisterint(exprasmlist);
  158. cg.a_load_reg_reg(exprasmlist,OS_INT,OS_INT,left.location.register,
  159. location.register);
  160. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,
  161. location.register);
  162. end;
  163. LOC_REFERENCE,
  164. LOC_CREFERENCE:
  165. begin
  166. reference_release(exprasmlist,left.location.reference);
  167. if (left.resulttype.def.deftype=floatdef) then
  168. begin
  169. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  170. location.register:=rg.getregisterfpu(exprasmlist);
  171. cg.a_loadfpu_ref_reg(exprasmlist,
  172. def_cgsize(left.resulttype.def),
  173. left.location.reference,location.register);
  174. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  175. end
  176. else
  177. begin
  178. location.register:=rg.getregisterint(exprasmlist);
  179. { why is the size is OS_INT, since in pass_1 we convert
  180. everything to a signed natural value anyways
  181. }
  182. cg.a_load_ref_reg(exprasmlist,OS_INT,
  183. left.location.reference,location.register);
  184. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,
  185. location.register);
  186. end;
  187. end;
  188. LOC_FPUREGISTER:
  189. begin
  190. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  191. location.register:=left.location.register;
  192. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  193. end;
  194. LOC_CFPUREGISTER:
  195. begin
  196. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  197. location.register:=rg.getregisterfpu(exprasmlist);
  198. cg.a_loadfpu_reg_reg(exprasmlist,left.location.register,location.register);
  199. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  200. end;
  201. else
  202. internalerror(200203225);
  203. end;
  204. end;
  205. end;
  206. {*****************************************************************************
  207. TCGMODDIVNODE
  208. *****************************************************************************}
  209. procedure tcgmoddivnode.emit64_div_reg_reg(signed: boolean; denum,num:tregister64);
  210. begin
  211. { handled in pass_1 already, unless pass_1 is
  212. overriden
  213. }
  214. { should be handled in pass_1 (JM) }
  215. internalerror(200109052);
  216. end;
  217. procedure tcgmoddivnode.pass_2;
  218. var
  219. hreg1 : tregister;
  220. hdenom,hnumerator : tregister;
  221. shrdiv,popeax,popedx : boolean;
  222. power : longint;
  223. hl : tasmlabel;
  224. pushedregs : tmaybesave;
  225. begin
  226. shrdiv := false;
  227. secondpass(left);
  228. if codegenerror then
  229. exit;
  230. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  231. secondpass(right);
  232. maybe_restore(exprasmlist,left.location,pushedregs);
  233. if codegenerror then
  234. exit;
  235. location_copy(location,left.location);
  236. if is_64bitint(resulttype.def) then
  237. begin
  238. { this code valid for 64-bit cpu's only ,
  239. otherwise helpers are called in pass_1
  240. }
  241. location_force_reg(exprasmlist,location,OS_64,false);
  242. location_copy(location,left.location);
  243. location_force_reg(exprasmlist,right.location,OS_64,false);
  244. emit64_div_reg_reg(is_signed(left.resulttype.def),
  245. joinreg64(right.location.registerlow,right.location.registerhigh),
  246. joinreg64(location.registerlow,location.registerhigh));
  247. end
  248. else
  249. begin
  250. { put numerator in register }
  251. location_force_reg(exprasmlist,left.location,OS_INT,false);
  252. hreg1:=left.location.register;
  253. if (nodetype=divn) and
  254. (right.nodetype=ordconstn) and
  255. ispowerof2(tordconstnode(right).value,power) then
  256. Begin
  257. shrdiv := true;
  258. { for signed numbers, the numerator must be adjusted before the
  259. shift instruction, but not wih unsigned numbers! Otherwise,
  260. "Cardinal($ffffffff) div 16" overflows! (JM) }
  261. If is_signed(left.resulttype.def) Then
  262. Begin
  263. objectlibrary.getlabel(hl);
  264. cg.a_cmp_const_reg_label(exprasmlist,OS_INT,OC_GT,0,hreg1,hl);
  265. if power=1 then
  266. cg.a_op_const_reg(exprasmlist,OP_ADD,1,hreg1)
  267. else
  268. cg.a_op_const_reg(exprasmlist,OP_ADD,
  269. tordconstnode(right).value-1,hreg1);
  270. cg.a_label(exprasmlist,hl);
  271. cg.a_op_const_reg(exprasmlist,OP_SAR,power,hreg1);
  272. End
  273. Else { not signed }
  274. Begin
  275. cg.a_op_const_reg(exprasmlist,OP_SHR,power,hreg1);
  276. end;
  277. End
  278. else
  279. begin
  280. { bring denominator to hdenom }
  281. { hdenom is always free, it's }
  282. { only used for temporary }
  283. { purposes }
  284. hdenom := rg.getregisterint(exprasmlist);
  285. if right.location.loc<>LOC_CREGISTER then
  286. location_release(exprasmlist,right.location);
  287. cg.a_load_loc_reg(exprasmlist,right.location,hdenom);
  288. { verify if the divisor is zero, if so return an error
  289. immediately
  290. }
  291. objectlibrary.getlabel(hl);
  292. cg.a_cmp_const_reg_label(exprasmlist,OS_INT,OC_NE,0,hdenom,hl);
  293. cg.a_param_const(exprasmlist,OS_S32,200,paramanager.getintparaloc(1));
  294. cg.a_call_name(exprasmlist,'FPC_HANDLERROR');
  295. cg.a_label(exprasmlist,hl);
  296. if nodetype = modn then
  297. emit_mod_reg_reg(is_signed(left.resulttype.def),hdenom,hreg1)
  298. else
  299. emit_div_reg_reg(is_signed(left.resulttype.def),hdenom,hreg1);
  300. end;
  301. location_reset(location,LOC_REGISTER,OS_INT);
  302. location.register:=hreg1;
  303. end;
  304. cg.g_overflowcheck(exprasmlist,self);
  305. end;
  306. {*****************************************************************************
  307. TCGSHLRSHRNODE
  308. *****************************************************************************}
  309. procedure tcgshlshrnode.pass_2;
  310. var
  311. hcountreg : tregister;
  312. op : topcg;
  313. l1,l2,l3 : tasmlabel;
  314. pushedregs : tmaybesave;
  315. freescratch : boolean;
  316. begin
  317. freescratch:=false;
  318. secondpass(left);
  319. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  320. secondpass(right);
  321. maybe_restore(exprasmlist,left.location,pushedregs);
  322. { determine operator }
  323. case nodetype of
  324. shln: op:=OP_SHL;
  325. shrn: op:=OP_SHR;
  326. end;
  327. if is_64bitint(left.resulttype.def) then
  328. begin
  329. { already hanled in 1st pass }
  330. internalerror(2002081501);
  331. (* Normally for 64-bit cpu's this here should be here,
  332. and only pass_1 need to be overriden, but dunno how to
  333. do that!
  334. location_reset(location,LOC_REGISTER,OS_64);
  335. { load left operator in a register }
  336. location_force_reg(exprasmlist,left.location,OS_64,false);
  337. location_copy(location,left.location);
  338. if (right.nodetype=ordconstn) then
  339. begin
  340. cg64.a_op64_const_reg(exprasmlist,op,tordconstnode(right).value,
  341. joinreg64(location.registerlow,location.registerhigh));
  342. end
  343. else
  344. begin
  345. { this should be handled in pass_1 }
  346. internalerror(2002081501);
  347. if right.location.loc<>LOC_REGISTER then
  348. begin
  349. if right.location.loc<>LOC_CREGISTER then
  350. location_release(exprasmlist,right.location);
  351. hcountreg:=cg.get_scratch_reg_int(exprasmlist);
  352. cg.a_load_loc_reg(exprasmlist,right.location,hcountreg);
  353. freescratch := true;
  354. end
  355. else
  356. hcountreg:=right.location.register;
  357. cg64.a_op64_reg_reg(exprasmlist,op,hcountreg,
  358. joinreg64(location.registerlow,location.registerhigh));
  359. if freescratch then
  360. cg.free_scratch_reg(exprasmlist,hcountreg);
  361. end;*)
  362. end
  363. else
  364. begin
  365. { load left operators in a register }
  366. location_copy(location,left.location);
  367. location_force_reg(exprasmlist,location,OS_INT,false);
  368. { shifting by a constant directly coded: }
  369. if (right.nodetype=ordconstn) then
  370. begin
  371. { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)
  372. if right.value<=31 then
  373. }
  374. cg.a_op_const_reg(exprasmlist,op,tordconstnode(right).value and 31,
  375. location.register);
  376. {
  377. else
  378. emit_reg_reg(A_XOR,S_L,hregister1,
  379. hregister1);
  380. }
  381. end
  382. else
  383. begin
  384. { load right operators in a register - this
  385. is done since most target cpu which will use this
  386. node do not support a shift count in a mem. location (cec)
  387. }
  388. if right.location.loc<>LOC_REGISTER then
  389. begin
  390. if right.location.loc<>LOC_CREGISTER then
  391. location_release(exprasmlist,right.location);
  392. hcountreg:=cg.get_scratch_reg_int(exprasmlist);
  393. freescratch := true;
  394. cg.a_load_loc_reg(exprasmlist,right.location,hcountreg);
  395. end
  396. else
  397. hcountreg:=right.location.register;
  398. cg.a_op_reg_reg(exprasmlist,op,OS_INT,hcountreg,location.register);
  399. if freescratch then
  400. cg.free_scratch_reg(exprasmlist,hcountreg);
  401. end;
  402. end;
  403. end;
  404. begin
  405. cmoddivnode:=tcgmoddivnode;
  406. cunaryminusnode:=tcgunaryminusnode;
  407. cshlshrnode:=tcgshlshrnode;
  408. end.
  409. {
  410. $Log$
  411. Revision 1.4 2002-09-17 18:54:02 jonas
  412. * a_load_reg_reg() now has two size parameters: source and dest. This
  413. allows some optimizations on architectures that don't encode the
  414. register size in the register name.
  415. Revision 1.3 2002/08/23 16:14:48 peter
  416. * tempgen cleanup
  417. * tt_noreuse temp type added that will be used in genentrycode
  418. Revision 1.2 2002/08/15 15:15:55 carl
  419. * jmpbuf size allocation for exceptions is now cpu specific (as it should)
  420. * more generic nodes for maths
  421. * several fixes for better m68k support
  422. Revision 1.1 2002/08/14 19:26:55 carl
  423. + generic int_to_real type conversion
  424. + generic unaryminus node
  425. }