ncgmat.pas 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Florian Klaempfl
  4. Generate generic mathematical nodes
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit ncgmat;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nmat,cpubase,cgbase,cginfo;
  23. type
  24. tcgunaryminusnode = class(tunaryminusnode)
  25. procedure pass_2;override;
  26. protected
  27. { This routine is called to change the sign of the
  28. floating point value in the floating point
  29. register r.
  30. This routine should be overriden, since
  31. the generic version is not optimal at all. The
  32. generic version assumes that floating
  33. point values are stored in the register
  34. in IEEE-754 format.
  35. }
  36. procedure emit_float_sign_change(r: tregister; _size : tcgsize);virtual;
  37. end;
  38. tcgmoddivnode = class(tmoddivnode)
  39. procedure pass_2;override;
  40. protected
  41. { This routine must do an actual 32-bit division, be it
  42. signed or unsigned. The result must set into the the
  43. @var(num) register.
  44. @param(signed Indicates if the division must be signed)
  45. @param(denum Register containing the denominator
  46. @param(num Register containing the numerator, will also receive result)
  47. The actual optimizations regarding shifts have already
  48. been done and emitted, so this should really a do a divide.
  49. }
  50. procedure emit_div_reg_reg(signed: boolean;denum,num : tregister);virtual;abstract;
  51. { This routine must do an actual 32-bit modulo, be it
  52. signed or unsigned. The result must set into the the
  53. @var(num) register.
  54. @param(signed Indicates if the modulo must be signed)
  55. @param(denum Register containing the denominator
  56. @param(num Register containing the numerator, will also receive result)
  57. The actual optimizations regarding shifts have already
  58. been done and emitted, so this should really a do a modulo.
  59. }
  60. procedure emit_mod_reg_reg(signed: boolean;denum,num : tregister);virtual;abstract;
  61. { This routine must do an actual 64-bit division, be it
  62. signed or unsigned. The result must set into the the
  63. @var(num) register.
  64. @param(signed Indicates if the division must be signed)
  65. @param(denum Register containing the denominator
  66. @param(num Register containing the numerator, will also receive result)
  67. The actual optimizations regarding shifts have already
  68. been done and emitted, so this should really a do a divide.
  69. Currently, this routine should only be implemented on
  70. 64-bit systems, otherwise a helper is called in 1st pass.
  71. }
  72. procedure emit64_div_reg_reg(signed: boolean;denum,num : tregister64);virtual;
  73. end;
  74. tcgshlshrnode = class(tshlshrnode)
  75. procedure pass_2;override;
  76. end;
  77. implementation
  78. uses
  79. globtype,systems,
  80. cutils,verbose,globals,
  81. symconst,symdef,aasmbase,aasmtai,aasmcpu,defutil,
  82. pass_1,pass_2,
  83. ncon,
  84. cpuinfo,
  85. tgobj,ncgutil,cgobj,rgobj,rgcpu,paramgr,cg64f32;
  86. {*****************************************************************************
  87. TCGUNARYMINUSNODE
  88. *****************************************************************************}
  89. procedure tcgunaryminusnode.emit_float_sign_change(r: tregister; _size : tcgsize);
  90. var
  91. href : treference;
  92. hreg : tregister;
  93. begin
  94. { get a temporary memory reference to store the floating
  95. point value
  96. }
  97. tg.gettemp(exprasmlist,tcgsize2size[_size],tt_normal,href);
  98. { store the floating point value in the temporary memory area }
  99. cg.a_loadfpu_reg_ref(exprasmlist,_size,r,href);
  100. { only single and double ieee are supported }
  101. if _size = OS_F64 then
  102. begin
  103. { on little-endian machine the most significant
  104. 32-bit value is stored at the highest address
  105. }
  106. if target_info.endian = endian_little then
  107. inc(href.offset,4);
  108. end
  109. else
  110. if _size <> OS_F32 then
  111. internalerror(20020814);
  112. hreg := rg.getregisterint(exprasmlist,OS_32);
  113. { load value }
  114. cg.a_load_ref_reg(exprasmlist,OS_32,href,hreg);
  115. { bitwise complement copied value }
  116. cg.a_op_reg_reg(exprasmlist,OP_NOT,OS_32,hreg,hreg);
  117. { sign-bit is bit 31/63 of single/double }
  118. cg.a_op_const_reg(exprasmlist,OP_AND,$80000000,hreg);
  119. { or with value in reference memory }
  120. cg.a_op_reg_ref(exprasmlist,OP_OR,OS_32,hreg,href);
  121. rg.ungetregister(exprasmlist,hreg);
  122. { store the floating point value in the temporary memory area }
  123. if _size = OS_F64 then
  124. begin
  125. { on little-endian machine the most significant
  126. 32-bit value is stored at the highest address
  127. }
  128. if target_info.endian = endian_little then
  129. dec(href.offset,4);
  130. end;
  131. cg.a_loadfpu_ref_reg(exprasmlist,_size,href,r);
  132. end;
  133. procedure tcgunaryminusnode.pass_2;
  134. begin
  135. if is_64bit(left.resulttype.def) then
  136. begin
  137. secondpass(left);
  138. { load left operator in a register }
  139. location_copy(location,left.location);
  140. location_force_reg(exprasmlist,location,OS_64,false);
  141. cg64.a_op64_loc_reg(exprasmlist,OP_NEG,
  142. location,joinreg64(location.registerlow,location.registerhigh));
  143. end
  144. else
  145. begin
  146. secondpass(left);
  147. location_reset(location,LOC_REGISTER,OS_INT);
  148. case left.location.loc of
  149. LOC_REGISTER:
  150. begin
  151. location.register:=left.location.register;
  152. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,
  153. location.register);
  154. end;
  155. LOC_CREGISTER:
  156. begin
  157. location.register:=rg.getregisterint(exprasmlist,OS_INT);
  158. cg.a_load_reg_reg(exprasmlist,OS_INT,OS_INT,left.location.register,
  159. location.register);
  160. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,
  161. location.register);
  162. end;
  163. LOC_REFERENCE,
  164. LOC_CREFERENCE:
  165. begin
  166. reference_release(exprasmlist,left.location.reference);
  167. if (left.resulttype.def.deftype=floatdef) then
  168. begin
  169. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  170. location.register:=rg.getregisterfpu(exprasmlist);
  171. cg.a_loadfpu_ref_reg(exprasmlist,
  172. def_cgsize(left.resulttype.def),
  173. left.location.reference,location.register);
  174. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  175. end
  176. else
  177. begin
  178. location.register:=rg.getregisterint(exprasmlist,OS_INT);
  179. { why is the size is OS_INT, since in pass_1 we convert
  180. everything to a signed natural value anyways
  181. }
  182. cg.a_load_ref_reg(exprasmlist,OS_INT,
  183. left.location.reference,location.register);
  184. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,
  185. location.register);
  186. end;
  187. end;
  188. LOC_FPUREGISTER:
  189. begin
  190. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  191. location.register:=left.location.register;
  192. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  193. end;
  194. LOC_CFPUREGISTER:
  195. begin
  196. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  197. location.register:=rg.getregisterfpu(exprasmlist);
  198. cg.a_loadfpu_reg_reg(exprasmlist,left.location.register,location.register);
  199. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  200. end;
  201. else
  202. internalerror(200203225);
  203. end;
  204. end;
  205. end;
  206. {*****************************************************************************
  207. TCGMODDIVNODE
  208. *****************************************************************************}
  209. procedure tcgmoddivnode.emit64_div_reg_reg(signed: boolean; denum,num:tregister64);
  210. begin
  211. { handled in pass_1 already, unless pass_1 is
  212. overriden
  213. }
  214. { should be handled in pass_1 (JM) }
  215. internalerror(200109052);
  216. end;
  217. procedure tcgmoddivnode.pass_2;
  218. var
  219. hreg1 : tregister;
  220. hdenom : tregister;
  221. power : longint;
  222. hl : tasmlabel;
  223. pushedregs : tmaybesave;
  224. begin
  225. secondpass(left);
  226. if codegenerror then
  227. exit;
  228. {$ifndef newra}
  229. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  230. {$endif}
  231. secondpass(right);
  232. {$ifndef newra}
  233. maybe_restore(exprasmlist,left.location,pushedregs);
  234. {$endif newra}
  235. if codegenerror then
  236. exit;
  237. location_copy(location,left.location);
  238. if is_64bit(resulttype.def) then
  239. begin
  240. { this code valid for 64-bit cpu's only ,
  241. otherwise helpers are called in pass_1
  242. }
  243. location_force_reg(exprasmlist,location,OS_64,false);
  244. location_copy(location,left.location);
  245. location_force_reg(exprasmlist,right.location,OS_64,false);
  246. emit64_div_reg_reg(is_signed(left.resulttype.def),
  247. joinreg64(right.location.registerlow,right.location.registerhigh),
  248. joinreg64(location.registerlow,location.registerhigh));
  249. end
  250. else
  251. begin
  252. { put numerator in register }
  253. location_force_reg(exprasmlist,left.location,OS_INT,false);
  254. hreg1:=left.location.register;
  255. if (nodetype=divn) and
  256. (right.nodetype=ordconstn) and
  257. ispowerof2(tordconstnode(right).value,power) then
  258. Begin
  259. { for signed numbers, the numerator must be adjusted before the
  260. shift instruction, but not wih unsigned numbers! Otherwise,
  261. "Cardinal($ffffffff) div 16" overflows! (JM) }
  262. If is_signed(left.resulttype.def) Then
  263. Begin
  264. objectlibrary.getlabel(hl);
  265. cg.a_cmp_const_reg_label(exprasmlist,OS_INT,OC_GT,0,hreg1,hl);
  266. if power=1 then
  267. cg.a_op_const_reg(exprasmlist,OP_ADD,1,hreg1)
  268. else
  269. cg.a_op_const_reg(exprasmlist,OP_ADD,
  270. tordconstnode(right).value-1,hreg1);
  271. cg.a_label(exprasmlist,hl);
  272. cg.a_op_const_reg(exprasmlist,OP_SAR,power,hreg1);
  273. End
  274. Else { not signed }
  275. Begin
  276. cg.a_op_const_reg(exprasmlist,OP_SHR,power,hreg1);
  277. end;
  278. End
  279. else
  280. begin
  281. { bring denominator to hdenom }
  282. { hdenom is always free, it's }
  283. { only used for temporary }
  284. { purposes }
  285. hdenom := rg.getregisterint(exprasmlist,OS_INT);
  286. if right.location.loc<>LOC_CREGISTER then
  287. location_release(exprasmlist,right.location);
  288. cg.a_load_loc_reg(exprasmlist,right.location,hdenom);
  289. { verify if the divisor is zero, if so return an error
  290. immediately
  291. }
  292. objectlibrary.getlabel(hl);
  293. cg.a_cmp_const_reg_label(exprasmlist,OS_INT,OC_NE,0,hdenom,hl);
  294. cg.a_param_const(exprasmlist,OS_S32,200,paramanager.getintparaloc(1));
  295. cg.a_call_name(exprasmlist,'FPC_HANDLERROR');
  296. cg.a_label(exprasmlist,hl);
  297. if nodetype = modn then
  298. emit_mod_reg_reg(is_signed(left.resulttype.def),hdenom,hreg1)
  299. else
  300. emit_div_reg_reg(is_signed(left.resulttype.def),hdenom,hreg1);
  301. end;
  302. location_reset(location,LOC_REGISTER,OS_INT);
  303. location.register:=hreg1;
  304. end;
  305. cg.g_overflowcheck(exprasmlist,self);
  306. end;
  307. {*****************************************************************************
  308. TCGSHLRSHRNODE
  309. *****************************************************************************}
  310. procedure tcgshlshrnode.pass_2;
  311. var
  312. hcountreg : tregister;
  313. op : topcg;
  314. pushedregs : tmaybesave;
  315. freescratch : boolean;
  316. begin
  317. freescratch:=false;
  318. secondpass(left);
  319. {$ifndef newra}
  320. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  321. {$endif newra}
  322. secondpass(right);
  323. {$ifndef newra}
  324. maybe_restore(exprasmlist,left.location,pushedregs);
  325. {$endif}
  326. { determine operator }
  327. case nodetype of
  328. shln: op:=OP_SHL;
  329. shrn: op:=OP_SHR;
  330. end;
  331. if is_64bit(left.resulttype.def) then
  332. begin
  333. { already hanled in 1st pass }
  334. internalerror(2002081501);
  335. (* Normally for 64-bit cpu's this here should be here,
  336. and only pass_1 need to be overriden, but dunno how to
  337. do that!
  338. location_reset(location,LOC_REGISTER,OS_64);
  339. { load left operator in a register }
  340. location_force_reg(exprasmlist,left.location,OS_64,false);
  341. location_copy(location,left.location);
  342. if (right.nodetype=ordconstn) then
  343. begin
  344. cg64.a_op64_const_reg(exprasmlist,op,tordconstnode(right).value,
  345. joinreg64(location.registerlow,location.registerhigh));
  346. end
  347. else
  348. begin
  349. { this should be handled in pass_1 }
  350. internalerror(2002081501);
  351. if right.location.loc<>LOC_REGISTER then
  352. begin
  353. if right.location.loc<>LOC_CREGISTER then
  354. location_release(exprasmlist,right.location);
  355. hcountreg:=cg.get_scratch_reg_int(exprasmlist);
  356. cg.a_load_loc_reg(exprasmlist,right.location,hcountreg);
  357. freescratch := true;
  358. end
  359. else
  360. hcountreg:=right.location.register;
  361. cg64.a_op64_reg_reg(exprasmlist,op,hcountreg,
  362. joinreg64(location.registerlow,location.registerhigh));
  363. if freescratch then
  364. cg.free_scratch_reg(exprasmlist,hcountreg);
  365. end;*)
  366. end
  367. else
  368. begin
  369. { load left operators in a register }
  370. location_copy(location,left.location);
  371. location_force_reg(exprasmlist,location,OS_INT,false);
  372. { shifting by a constant directly coded: }
  373. if (right.nodetype=ordconstn) then
  374. begin
  375. { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)
  376. if right.value<=31 then
  377. }
  378. cg.a_op_const_reg(exprasmlist,op,tordconstnode(right).value and 31,
  379. location.register);
  380. {
  381. else
  382. emit_reg_reg(A_XOR,S_L,hregister1,
  383. hregister1);
  384. }
  385. end
  386. else
  387. begin
  388. { load right operators in a register - this
  389. is done since most target cpu which will use this
  390. node do not support a shift count in a mem. location (cec)
  391. }
  392. if right.location.loc<>LOC_REGISTER then
  393. begin
  394. if right.location.loc<>LOC_CREGISTER then
  395. location_release(exprasmlist,right.location);
  396. {$ifdef newra}
  397. hcountreg:=rg.getregisterint(exprasmlist,OS_INT);
  398. {$else}
  399. hcountreg:=cg.get_scratch_reg_int(exprasmlist,OS_INT);
  400. {$endif}
  401. freescratch := true;
  402. cg.a_load_loc_reg(exprasmlist,right.location,hcountreg);
  403. end
  404. else
  405. hcountreg:=right.location.register;
  406. cg.a_op_reg_reg(exprasmlist,op,OS_INT,hcountreg,location.register);
  407. {$ifdef newra}
  408. if freescratch then
  409. rg.ungetregisterint(exprasmlist,hcountreg);
  410. {$else}
  411. if freescratch then
  412. cg.free_scratch_reg(exprasmlist,hcountreg);
  413. {$endif}
  414. end;
  415. end;
  416. end;
  417. begin
  418. cmoddivnode:=tcgmoddivnode;
  419. cunaryminusnode:=tcgunaryminusnode;
  420. cshlshrnode:=tcgshlshrnode;
  421. end.
  422. {
  423. $Log$
  424. Revision 1.9 2003-04-23 20:16:04 peter
  425. + added currency support based on int64
  426. + is_64bit for use in cg units instead of is_64bitint
  427. * removed cgmessage from n386add, replace with internalerrors
  428. Revision 1.8 2003/04/22 10:09:35 daniel
  429. + Implemented the actual register allocator
  430. + Scratch registers unavailable when new register allocator used
  431. + maybe_save/maybe_restore unavailable when new register allocator used
  432. Revision 1.7 2003/03/28 19:16:56 peter
  433. * generic constructor working for i386
  434. * remove fixed self register
  435. * esi added as address register for i386
  436. Revision 1.6 2003/02/19 22:00:14 daniel
  437. * Code generator converted to new register notation
  438. - Horribily outdated todo.txt removed
  439. Revision 1.5 2002/11/25 17:43:18 peter
  440. * splitted defbase in defutil,symutil,defcmp
  441. * merged isconvertable and is_equal into compare_defs(_ext)
  442. * made operator search faster by walking the list only once
  443. Revision 1.4 2002/09/17 18:54:02 jonas
  444. * a_load_reg_reg() now has two size parameters: source and dest. This
  445. allows some optimizations on architectures that don't encode the
  446. register size in the register name.
  447. Revision 1.3 2002/08/23 16:14:48 peter
  448. * tempgen cleanup
  449. * tt_noreuse temp type added that will be used in genentrycode
  450. Revision 1.2 2002/08/15 15:15:55 carl
  451. * jmpbuf size allocation for exceptions is now cpu specific (as it should)
  452. * more generic nodes for maths
  453. * several fixes for better m68k support
  454. Revision 1.1 2002/08/14 19:26:55 carl
  455. + generic int_to_real type conversion
  456. + generic unaryminus node
  457. }