ncgmat.pas 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Florian Klaempfl
  4. Generate generic mathematical nodes
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit ncgmat;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nmat,cpubase,cgbase,cginfo;
  23. type
  24. tcgunaryminusnode = class(tunaryminusnode)
  25. procedure pass_2;override;
  26. protected
  27. { This routine is called to change the sign of the
  28. floating point value in the floating point
  29. register r.
  30. This routine should be overriden, since
  31. the generic version is not optimal at all. The
  32. generic version assumes that floating
  33. point values are stored in the register
  34. in IEEE-754 format.
  35. }
  36. procedure emit_float_sign_change(r: tregister; _size : tcgsize);virtual;
  37. end;
  38. tcgmoddivnode = class(tmoddivnode)
  39. procedure pass_2;override;
  40. protected
  41. { This routine must do an actual 32-bit division, be it
  42. signed or unsigned. The result must set into the the
  43. @var(num) register.
  44. @param(signed Indicates if the division must be signed)
  45. @param(denum Register containing the denominator
  46. @param(num Register containing the numerator, will also receive result)
  47. The actual optimizations regarding shifts have already
  48. been done and emitted, so this should really a do a divide.
  49. }
  50. procedure emit_div_reg_reg(signed: boolean;denum,num : tregister);virtual;abstract;
  51. { This routine must do an actual 32-bit modulo, be it
  52. signed or unsigned. The result must set into the the
  53. @var(num) register.
  54. @param(signed Indicates if the modulo must be signed)
  55. @param(denum Register containing the denominator
  56. @param(num Register containing the numerator, will also receive result)
  57. The actual optimizations regarding shifts have already
  58. been done and emitted, so this should really a do a modulo.
  59. }
  60. procedure emit_mod_reg_reg(signed: boolean;denum,num : tregister);virtual;abstract;
  61. { This routine must do an actual 64-bit division, be it
  62. signed or unsigned. The result must set into the the
  63. @var(num) register.
  64. @param(signed Indicates if the division must be signed)
  65. @param(denum Register containing the denominator
  66. @param(num Register containing the numerator, will also receive result)
  67. The actual optimizations regarding shifts have already
  68. been done and emitted, so this should really a do a divide.
  69. Currently, this routine should only be implemented on
  70. 64-bit systems, otherwise a helper is called in 1st pass.
  71. }
  72. procedure emit64_div_reg_reg(signed: boolean;denum,num : tregister64);virtual;
  73. end;
  74. tcgshlshrnode = class(tshlshrnode)
  75. procedure pass_2;override;
  76. end;
  77. tcgnotnode = class(tnotnode)
  78. protected
  79. procedure second_boolean;virtual;abstract;
  80. procedure second_integer;virtual;
  81. public
  82. procedure pass_2;override;
  83. end;
  84. implementation
  85. uses
  86. globtype,systems,
  87. cutils,verbose,globals,
  88. symconst,symdef,aasmbase,aasmtai,aasmcpu,defutil,
  89. pass_1,pass_2,
  90. ncon,
  91. cpuinfo,
  92. tgobj,ncgutil,cgobj,rgobj,paramgr,cg64f32;
  93. {*****************************************************************************
  94. TCGUNARYMINUSNODE
  95. *****************************************************************************}
  96. procedure tcgunaryminusnode.emit_float_sign_change(r: tregister; _size : tcgsize);
  97. var
  98. href : treference;
  99. hreg : tregister;
  100. begin
  101. { get a temporary memory reference to store the floating
  102. point value
  103. }
  104. tg.gettemp(exprasmlist,tcgsize2size[_size],tt_normal,href);
  105. { store the floating point value in the temporary memory area }
  106. cg.a_loadfpu_reg_ref(exprasmlist,_size,r,href);
  107. { only single and double ieee are supported }
  108. if _size = OS_F64 then
  109. begin
  110. { on little-endian machine the most significant
  111. 32-bit value is stored at the highest address
  112. }
  113. if target_info.endian = endian_little then
  114. inc(href.offset,4);
  115. end
  116. else
  117. if _size <> OS_F32 then
  118. internalerror(20020814);
  119. hreg := rg.getregisterint(exprasmlist,OS_32);
  120. { load value }
  121. cg.a_load_ref_reg(exprasmlist,OS_32,href,hreg);
  122. { bitwise complement copied value }
  123. cg.a_op_reg_reg(exprasmlist,OP_NOT,OS_32,hreg,hreg);
  124. { sign-bit is bit 31/63 of single/double }
  125. cg.a_op_const_reg(exprasmlist,OP_AND,OS_32,aword($80000000),hreg);
  126. { or with value in reference memory }
  127. cg.a_op_reg_ref(exprasmlist,OP_OR,OS_32,hreg,href);
  128. rg.ungetregisterint(exprasmlist,hreg);
  129. { store the floating point value in the temporary memory area }
  130. if _size = OS_F64 then
  131. begin
  132. { on little-endian machine the most significant
  133. 32-bit value is stored at the highest address
  134. }
  135. if target_info.endian = endian_little then
  136. dec(href.offset,4);
  137. end;
  138. cg.a_loadfpu_ref_reg(exprasmlist,_size,href,r);
  139. end;
  140. procedure tcgunaryminusnode.pass_2;
  141. begin
  142. if is_64bit(left.resulttype.def) then
  143. begin
  144. secondpass(left);
  145. { load left operator in a register }
  146. location_copy(location,left.location);
  147. location_force_reg(exprasmlist,location,OS_64,false);
  148. cg64.a_op64_loc_reg(exprasmlist,OP_NEG,
  149. location,joinreg64(location.registerlow,location.registerhigh));
  150. end
  151. else
  152. begin
  153. secondpass(left);
  154. location_reset(location,LOC_REGISTER,OS_INT);
  155. case left.location.loc of
  156. LOC_REGISTER:
  157. begin
  158. location.register:=left.location.register;
  159. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,
  160. location.register);
  161. end;
  162. LOC_CREGISTER:
  163. begin
  164. location.register:=rg.getregisterint(exprasmlist,OS_INT);
  165. cg.a_load_reg_reg(exprasmlist,OS_INT,OS_INT,left.location.register,
  166. location.register);
  167. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,
  168. location.register);
  169. end;
  170. LOC_REFERENCE,
  171. LOC_CREFERENCE:
  172. begin
  173. reference_release(exprasmlist,left.location.reference);
  174. if (left.resulttype.def.deftype=floatdef) then
  175. begin
  176. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  177. location.register:=rg.getregisterfpu(exprasmlist,location.size);
  178. cg.a_loadfpu_ref_reg(exprasmlist,
  179. def_cgsize(left.resulttype.def),
  180. left.location.reference,location.register);
  181. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  182. end
  183. else
  184. begin
  185. location.register:=rg.getregisterint(exprasmlist,OS_INT);
  186. { why is the size is OS_INT, since in pass_1 we convert
  187. everything to a signed natural value anyways
  188. }
  189. cg.a_load_ref_reg(exprasmlist,OS_INT,
  190. left.location.reference,location.register);
  191. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,
  192. location.register);
  193. end;
  194. end;
  195. LOC_FPUREGISTER:
  196. begin
  197. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  198. location.register:=left.location.register;
  199. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  200. end;
  201. LOC_CFPUREGISTER:
  202. begin
  203. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  204. location.register:=rg.getregisterfpu(exprasmlist,location.size);
  205. cg.a_loadfpu_reg_reg(exprasmlist,left.location.register,location.register);
  206. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  207. end;
  208. else
  209. internalerror(200203225);
  210. end;
  211. end;
  212. end;
  213. {*****************************************************************************
  214. TCGMODDIVNODE
  215. *****************************************************************************}
  216. procedure tcgmoddivnode.emit64_div_reg_reg(signed: boolean; denum,num:tregister64);
  217. begin
  218. { handled in pass_1 already, unless pass_1 is
  219. overriden
  220. }
  221. { should be handled in pass_1 (JM) }
  222. internalerror(200109052);
  223. end;
  224. procedure tcgmoddivnode.pass_2;
  225. var
  226. hreg1 : tregister;
  227. hdenom : tregister;
  228. power : longint;
  229. hl : tasmlabel;
  230. pushedregs : tmaybesave;
  231. begin
  232. secondpass(left);
  233. if codegenerror then
  234. exit;
  235. {$ifndef newra}
  236. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  237. {$endif}
  238. secondpass(right);
  239. {$ifndef newra}
  240. maybe_restore(exprasmlist,left.location,pushedregs);
  241. {$endif newra}
  242. if codegenerror then
  243. exit;
  244. location_copy(location,left.location);
  245. if is_64bit(resulttype.def) then
  246. begin
  247. { this code valid for 64-bit cpu's only ,
  248. otherwise helpers are called in pass_1
  249. }
  250. location_force_reg(exprasmlist,location,OS_64,false);
  251. location_copy(location,left.location);
  252. location_force_reg(exprasmlist,right.location,OS_64,false);
  253. emit64_div_reg_reg(is_signed(left.resulttype.def),
  254. joinreg64(right.location.registerlow,right.location.registerhigh),
  255. joinreg64(location.registerlow,location.registerhigh));
  256. end
  257. else
  258. begin
  259. { put numerator in register }
  260. location_force_reg(exprasmlist,left.location,OS_INT,false);
  261. hreg1:=left.location.register;
  262. if (nodetype=divn) and
  263. (right.nodetype=ordconstn) and
  264. ispowerof2(tordconstnode(right).value,power) then
  265. Begin
  266. { for signed numbers, the numerator must be adjusted before the
  267. shift instruction, but not wih unsigned numbers! Otherwise,
  268. "Cardinal($ffffffff) div 16" overflows! (JM) }
  269. If is_signed(left.resulttype.def) Then
  270. Begin
  271. objectlibrary.getlabel(hl);
  272. cg.a_cmp_const_reg_label(exprasmlist,OS_INT,OC_GT,0,hreg1,hl);
  273. if power=1 then
  274. cg.a_op_const_reg(exprasmlist,OP_ADD,OS_INT,1,hreg1)
  275. else
  276. cg.a_op_const_reg(exprasmlist,OP_ADD,OS_INT,tordconstnode(right).value-1,hreg1);
  277. cg.a_label(exprasmlist,hl);
  278. cg.a_op_const_reg(exprasmlist,OP_SAR,OS_INT,power,hreg1);
  279. End
  280. Else { not signed }
  281. cg.a_op_const_reg(exprasmlist,OP_SHR,OS_INT,power,hreg1);
  282. End
  283. else
  284. begin
  285. { bring denominator to hdenom }
  286. { hdenom is always free, it's }
  287. { only used for temporary }
  288. { purposes }
  289. hdenom := rg.getregisterint(exprasmlist,OS_INT);
  290. if right.location.loc<>LOC_CREGISTER then
  291. location_release(exprasmlist,right.location);
  292. cg.a_load_loc_reg(exprasmlist,right.location.size,right.location,hdenom);
  293. { verify if the divisor is zero, if so return an error
  294. immediately
  295. }
  296. objectlibrary.getlabel(hl);
  297. cg.a_cmp_const_reg_label(exprasmlist,OS_INT,OC_NE,0,hdenom,hl);
  298. cg.a_param_const(exprasmlist,OS_S32,200,paramanager.getintparaloc(1));
  299. cg.a_call_name(exprasmlist,'FPC_HANDLERROR');
  300. cg.a_label(exprasmlist,hl);
  301. if nodetype = modn then
  302. emit_mod_reg_reg(is_signed(left.resulttype.def),hdenom,hreg1)
  303. else
  304. emit_div_reg_reg(is_signed(left.resulttype.def),hdenom,hreg1);
  305. end;
  306. location_reset(location,LOC_REGISTER,OS_INT);
  307. location.register:=hreg1;
  308. end;
  309. cg.g_overflowcheck(exprasmlist,self);
  310. end;
  311. {*****************************************************************************
  312. TCGSHLRSHRNODE
  313. *****************************************************************************}
  314. procedure tcgshlshrnode.pass_2;
  315. var
  316. hcountreg : tregister;
  317. op : topcg;
  318. pushedregs : tmaybesave;
  319. freescratch : boolean;
  320. begin
  321. freescratch:=false;
  322. secondpass(left);
  323. {$ifndef newra}
  324. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  325. {$endif newra}
  326. secondpass(right);
  327. {$ifndef newra}
  328. maybe_restore(exprasmlist,left.location,pushedregs);
  329. {$endif}
  330. { determine operator }
  331. case nodetype of
  332. shln: op:=OP_SHL;
  333. shrn: op:=OP_SHR;
  334. end;
  335. if is_64bit(left.resulttype.def) then
  336. begin
  337. { already hanled in 1st pass }
  338. internalerror(2002081501);
  339. (* Normally for 64-bit cpu's this here should be here,
  340. and only pass_1 need to be overriden, but dunno how to
  341. do that!
  342. location_reset(location,LOC_REGISTER,OS_64);
  343. { load left operator in a register }
  344. location_force_reg(exprasmlist,left.location,OS_64,false);
  345. location_copy(location,left.location);
  346. if (right.nodetype=ordconstn) then
  347. begin
  348. cg64.a_op64_const_reg(exprasmlist,op,tordconstnode(right).value,
  349. joinreg64(location.registerlow,location.registerhigh));
  350. end
  351. else
  352. begin
  353. { this should be handled in pass_1 }
  354. internalerror(2002081501);
  355. if right.location.loc<>LOC_REGISTER then
  356. begin
  357. if right.location.loc<>LOC_CREGISTER then
  358. location_release(exprasmlist,right.location);
  359. hcountreg:=cg.get_scratch_reg_int(exprasmlist);
  360. cg.a_load_loc_reg(exprasmlist,right.location.size,right.location,hcountreg);
  361. freescratch := true;
  362. end
  363. else
  364. hcountreg:=right.location.register;
  365. cg64.a_op64_reg_reg(exprasmlist,op,hcountreg,
  366. joinreg64(location.registerlow,location.registerhigh));
  367. if freescratch then
  368. cg.free_scratch_reg(exprasmlist,hcountreg);
  369. end;*)
  370. end
  371. else
  372. begin
  373. { load left operators in a register }
  374. location_copy(location,left.location);
  375. location_force_reg(exprasmlist,location,OS_INT,false);
  376. { shifting by a constant directly coded: }
  377. if (right.nodetype=ordconstn) then
  378. begin
  379. { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)
  380. if right.value<=31 then
  381. }
  382. cg.a_op_const_reg(exprasmlist,op,location.size,
  383. tordconstnode(right).value and 31,location.register);
  384. {
  385. else
  386. emit_reg_reg(A_XOR,S_L,hregister1,
  387. hregister1);
  388. }
  389. end
  390. else
  391. begin
  392. { load right operators in a register - this
  393. is done since most target cpu which will use this
  394. node do not support a shift count in a mem. location (cec)
  395. }
  396. if right.location.loc<>LOC_REGISTER then
  397. begin
  398. if right.location.loc<>LOC_CREGISTER then
  399. location_release(exprasmlist,right.location);
  400. {$ifdef newra}
  401. hcountreg:=rg.getregisterint(exprasmlist,OS_INT);
  402. {$else}
  403. hcountreg:=cg.get_scratch_reg_int(exprasmlist,OS_INT);
  404. {$endif}
  405. freescratch := true;
  406. cg.a_load_loc_reg(exprasmlist,right.location.size,right.location,hcountreg);
  407. end
  408. else
  409. hcountreg:=right.location.register;
  410. cg.a_op_reg_reg(exprasmlist,op,OS_INT,hcountreg,location.register);
  411. {$ifdef newra}
  412. if freescratch then
  413. rg.ungetregisterint(exprasmlist,hcountreg);
  414. {$else}
  415. if freescratch then
  416. cg.free_scratch_reg(exprasmlist,hcountreg);
  417. {$endif}
  418. end;
  419. end;
  420. end;
  421. {*****************************************************************************
  422. TCGNOTNODE
  423. *****************************************************************************}
  424. procedure tcgnotnode.second_integer;
  425. begin
  426. if is_64bit(left.resulttype.def) then
  427. begin
  428. secondpass(left);
  429. location_force_reg(exprasmlist,left.location,def_cgsize(left.resulttype.def),false);
  430. location_copy(location,left.location);
  431. { perform the NOT operation }
  432. cg64.a_op64_reg_reg(exprasmlist,OP_NOT,left.location.register64,location.register64);
  433. end
  434. else
  435. begin
  436. secondpass(left);
  437. location_force_reg(exprasmlist,left.location,def_cgsize(left.resulttype.def),false);
  438. location_copy(location,left.location);
  439. { perform the NOT operation }
  440. cg.a_op_reg_reg(exprasmlist,OP_NOT,location.size,location.register,location.register);
  441. end;
  442. end;
  443. procedure tcgnotnode.pass_2;
  444. begin
  445. if is_boolean(resulttype.def) then
  446. second_boolean
  447. else
  448. second_integer;
  449. end;
  450. begin
  451. cmoddivnode:=tcgmoddivnode;
  452. cunaryminusnode:=tcgunaryminusnode;
  453. cshlshrnode:=tcgshlshrnode;
  454. cnotnode:=tcgnotnode;
  455. end.
  456. {
  457. $Log$
  458. Revision 1.12 2003-06-01 21:38:06 peter
  459. * getregisterfpu size parameter added
  460. * op_const_reg size parameter added
  461. * sparc updates
  462. Revision 1.11 2003/05/30 23:49:18 jonas
  463. * a_load_loc_reg now has an extra size parameter for the destination
  464. register (properly fixes what I worked around in revision 1.106 of
  465. ncgutil.pas)
  466. Revision 1.10 2003/05/23 14:27:35 peter
  467. * remove some unit dependencies
  468. * current_procinfo changes to store more info
  469. Revision 1.9 2003/04/23 20:16:04 peter
  470. + added currency support based on int64
  471. + is_64bit for use in cg units instead of is_64bitint
  472. * removed cgmessage from n386add, replace with internalerrors
  473. Revision 1.8 2003/04/22 10:09:35 daniel
  474. + Implemented the actual register allocator
  475. + Scratch registers unavailable when new register allocator used
  476. + maybe_save/maybe_restore unavailable when new register allocator used
  477. Revision 1.7 2003/03/28 19:16:56 peter
  478. * generic constructor working for i386
  479. * remove fixed self register
  480. * esi added as address register for i386
  481. Revision 1.6 2003/02/19 22:00:14 daniel
  482. * Code generator converted to new register notation
  483. - Horribily outdated todo.txt removed
  484. Revision 1.5 2002/11/25 17:43:18 peter
  485. * splitted defbase in defutil,symutil,defcmp
  486. * merged isconvertable and is_equal into compare_defs(_ext)
  487. * made operator search faster by walking the list only once
  488. Revision 1.4 2002/09/17 18:54:02 jonas
  489. * a_load_reg_reg() now has two size parameters: source and dest. This
  490. allows some optimizations on architectures that don't encode the
  491. register size in the register name.
  492. Revision 1.3 2002/08/23 16:14:48 peter
  493. * tempgen cleanup
  494. * tt_noreuse temp type added that will be used in genentrycode
  495. Revision 1.2 2002/08/15 15:15:55 carl
  496. * jmpbuf size allocation for exceptions is now cpu specific (as it should)
  497. * more generic nodes for maths
  498. * several fixes for better m68k support
  499. Revision 1.1 2002/08/14 19:26:55 carl
  500. + generic int_to_real type conversion
  501. + generic unaryminus node
  502. }