ncgmat.pas 23 KB


  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Florian Klaempfl
  4. Generate generic mathematical nodes
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit ncgmat;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nmat,cpubase,cgbase;
  23. type
  24. tcgunaryminusnode = class(tunaryminusnode)
  25. protected
  26. { This routine is called to change the sign of the
  27. floating point value in the floating point
  28. register r.
  29. This routine should be overriden, since
  30. the generic version is not optimal at all. The
  31. generic version assumes that floating
  32. point values are stored in the register
  33. in IEEE-754 format.
  34. }
  35. procedure emit_float_sign_change(r: tregister; _size : tcgsize);virtual;
  36. {$ifdef SUPPORT_MMX}
  37. procedure second_mmx;virtual;abstract;
  38. {$endif SUPPORT_MMX}
  39. {$ifndef cpu64bit}
  40. procedure second_64bit;virtual;
  41. {$endif cpu64bit}
  42. procedure second_integer;virtual;
  43. procedure second_float;virtual;
  44. public
  45. procedure pass_2;override;
  46. end;
  47. tcgmoddivnode = class(tmoddivnode)
  48. procedure pass_2;override;
  49. protected
  50. { This routine must do an actual 32-bit division, be it
  51. signed or unsigned. The result must set into the the
  52. @var(num) register.
  53. @param(signed Indicates if the division must be signed)
  54. @param(denum Register containing the denominator
  55. @param(num Register containing the numerator, will also receive result)
  56. The actual optimizations regarding shifts have already
  57. been done and emitted, so this should really a do a divide.
  58. }
  59. procedure emit_div_reg_reg(signed: boolean;denum,num : tregister);virtual;abstract;
  60. { This routine must do an actual 32-bit modulo, be it
  61. signed or unsigned. The result must set into the the
  62. @var(num) register.
  63. @param(signed Indicates if the modulo must be signed)
  64. @param(denum Register containing the denominator
  65. @param(num Register containing the numerator, will also receive result)
  66. The actual optimizations regarding shifts have already
  67. been done and emitted, so this should really a do a modulo.
  68. }
  69. procedure emit_mod_reg_reg(signed: boolean;denum,num : tregister);virtual;abstract;
  70. { This routine must do an actual 64-bit division, be it
  71. signed or unsigned. The result must set into the the
  72. @var(num) register.
  73. @param(signed Indicates if the division must be signed)
  74. @param(denum Register containing the denominator
  75. @param(num Register containing the numerator, will also receive result)
  76. The actual optimizations regarding shifts have already
  77. been done and emitted, so this should really a do a divide.
  78. Currently, this routine should only be implemented on
  79. 64-bit systems, otherwise a helper is called in 1st pass.
  80. }
  81. procedure emit64_div_reg_reg(signed: boolean;denum,num : tregister64);virtual;
  82. end;
  83. tcgshlshrnode = class(tshlshrnode)
  84. {$ifndef cpu64bit}
  85. procedure second_64bit;virtual;
  86. {$endif cpu64bit}
  87. procedure second_integer;virtual;
  88. procedure pass_2;override;
  89. end;
  90. tcgnotnode = class(tnotnode)
  91. protected
  92. procedure second_boolean;virtual;abstract;
  93. {$ifdef SUPPORT_MMX}
  94. procedure second_mmx;virtual;abstract;
  95. {$endif SUPPORT_MMX}
  96. {$ifndef cpu64bit}
  97. procedure second_64bit;virtual;
  98. {$endif cpu64bit}
  99. procedure second_integer;virtual;
  100. public
  101. procedure pass_2;override;
  102. end;
  103. implementation
  104. uses
  105. globtype,systems,
  106. cutils,verbose,globals,
  107. symconst,symdef,aasmbase,aasmtai,aasmcpu,defutil,
  108. pass_1,pass_2,
  109. ncon,
  110. cpuinfo,
  111. tgobj,ncgutil,cgobj,paramgr
  112. {$ifndef cpu64bit}
  113. ,cg64f32
  114. {$endif cpu64bit}
  115. ;
  116. {*****************************************************************************
  117. TCGUNARYMINUSNODE
  118. *****************************************************************************}
  119. procedure tcgunaryminusnode.emit_float_sign_change(r: tregister; _size : tcgsize);
  120. var
  121. href : treference;
  122. hreg : tregister;
  123. begin
  124. { get a temporary memory reference to store the floating
  125. point value
  126. }
  127. tg.gettemp(exprasmlist,tcgsize2size[_size],tt_normal,href);
  128. { store the floating point value in the temporary memory area }
  129. cg.a_loadfpu_reg_ref(exprasmlist,_size,r,href);
  130. { only single and double ieee are supported }
  131. if _size = OS_F64 then
  132. begin
  133. { on little-endian machine the most significant
  134. 32-bit value is stored at the highest address
  135. }
  136. if target_info.endian = endian_little then
  137. inc(href.offset,4);
  138. end
  139. else
  140. if _size <> OS_F32 then
  141. internalerror(20020814);
  142. hreg := cg.getintregister(exprasmlist,OS_32);
  143. { load value }
  144. cg.a_load_ref_reg(exprasmlist,OS_32,OS_32,href,hreg);
  145. { bitwise complement copied value }
  146. cg.a_op_reg_reg(exprasmlist,OP_NOT,OS_32,hreg,hreg);
  147. { sign-bit is bit 31/63 of single/double }
  148. cg.a_op_const_reg(exprasmlist,OP_AND,OS_32,aword($80000000),hreg);
  149. { or with value in reference memory }
  150. cg.a_op_reg_ref(exprasmlist,OP_OR,OS_32,hreg,href);
  151. cg.ungetregister(exprasmlist,hreg);
  152. { store the floating point value in the temporary memory area }
  153. if _size = OS_F64 then
  154. begin
  155. { on little-endian machine the most significant
  156. 32-bit value is stored at the highest address
  157. }
  158. if target_info.endian = endian_little then
  159. dec(href.offset,4);
  160. end;
  161. cg.a_loadfpu_ref_reg(exprasmlist,_size,href,r);
  162. end;
  163. {$ifndef cpu64bit}
  164. procedure tcgunaryminusnode.second_64bit;
  165. begin
  166. secondpass(left);
  167. { load left operator in a register }
  168. location_copy(location,left.location);
  169. location_force_reg(exprasmlist,location,OS_64,false);
  170. cg64.a_op64_loc_reg(exprasmlist,OP_NEG,
  171. location,joinreg64(location.registerlow,location.registerhigh));
  172. end;
  173. {$endif cpu64bit}
  174. procedure tcgunaryminusnode.second_float;
  175. begin
  176. secondpass(left);
  177. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  178. case left.location.loc of
  179. LOC_REFERENCE,
  180. LOC_CREFERENCE :
  181. begin
  182. reference_release(exprasmlist,left.location.reference);
  183. location.register:=cg.getfpuregister(exprasmlist,location.size);
  184. cg.a_loadfpu_ref_reg(exprasmlist,
  185. def_cgsize(left.resulttype.def),
  186. left.location.reference,location.register);
  187. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  188. end;
  189. LOC_FPUREGISTER:
  190. begin
  191. location.register:=left.location.register;
  192. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  193. end;
  194. LOC_CFPUREGISTER:
  195. begin
  196. location.register:=cg.getfpuregister(exprasmlist,location.size);
  197. cg.a_loadfpu_reg_reg(exprasmlist,left.location.size,left.location.register,location.register);
  198. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  199. end;
  200. else
  201. internalerror(200306021);
  202. end;
  203. end;
  204. procedure tcgunaryminusnode.second_integer;
  205. begin
  206. secondpass(left);
  207. { load left operator in a register }
  208. location_copy(location,left.location);
  209. location_force_reg(exprasmlist,location,OS_INT,false);
  210. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,location.register);
  211. end;
  212. procedure tcgunaryminusnode.pass_2;
  213. begin
  214. {$ifndef cpu64bit}
  215. if is_64bit(left.resulttype.def) then
  216. second_64bit
  217. else
  218. {$endif cpu64bit}
  219. {$ifdef SUPPORT_MMX}
  220. if (cs_mmx in aktlocalswitches) and is_mmx_able_array(left.resulttype.def) then
  221. second_mmx
  222. else
  223. {$endif SUPPORT_MMX}
  224. if (left.resulttype.def.deftype=floatdef) then
  225. second_float
  226. else
  227. second_integer;
  228. end;
  229. {*****************************************************************************
  230. TCGMODDIVNODE
  231. *****************************************************************************}
  232. procedure tcgmoddivnode.emit64_div_reg_reg(signed: boolean; denum,num:tregister64);
  233. begin
  234. { handled in pass_1 already, unless pass_1 is
  235. overriden
  236. }
  237. { should be handled in pass_1 (JM) }
  238. internalerror(200109052);
  239. end;
  240. procedure tcgmoddivnode.pass_2;
  241. var
  242. hreg1 : tregister;
  243. hdenom : tregister;
  244. power : longint;
  245. hl : tasmlabel;
  246. paraloc1 : tparalocation;
  247. begin
  248. secondpass(left);
  249. if codegenerror then
  250. exit;
  251. secondpass(right);
  252. if codegenerror then
  253. exit;
  254. location_copy(location,left.location);
  255. {$ifndef cpu64bit}
  256. if is_64bit(resulttype.def) then
  257. begin
  258. { this code valid for 64-bit cpu's only ,
  259. otherwise helpers are called in pass_1
  260. }
  261. location_force_reg(exprasmlist,location,OS_64,false);
  262. location_copy(location,left.location);
  263. location_force_reg(exprasmlist,right.location,OS_64,false);
  264. emit64_div_reg_reg(is_signed(left.resulttype.def),
  265. joinreg64(right.location.registerlow,right.location.registerhigh),
  266. joinreg64(location.registerlow,location.registerhigh));
  267. end
  268. else
  269. {$endif cpu64bit}
  270. begin
  271. { put numerator in register }
  272. location_force_reg(exprasmlist,left.location,OS_INT,false);
  273. hreg1:=left.location.register;
  274. if (nodetype=divn) and
  275. (right.nodetype=ordconstn) and
  276. ispowerof2(tordconstnode(right).value,power) then
  277. Begin
  278. { for signed numbers, the numerator must be adjusted before the
  279. shift instruction, but not wih unsigned numbers! Otherwise,
  280. "Cardinal($ffffffff) div 16" overflows! (JM) }
  281. If is_signed(left.resulttype.def) Then
  282. Begin
  283. objectlibrary.getlabel(hl);
  284. cg.a_cmp_const_reg_label(exprasmlist,OS_INT,OC_GT,0,hreg1,hl);
  285. if power=1 then
  286. cg.a_op_const_reg(exprasmlist,OP_ADD,OS_INT,1,hreg1)
  287. else
  288. cg.a_op_const_reg(exprasmlist,OP_ADD,OS_INT,tordconstnode(right).value-1,hreg1);
  289. cg.a_label(exprasmlist,hl);
  290. cg.a_op_const_reg(exprasmlist,OP_SAR,OS_INT,power,hreg1);
  291. End
  292. Else { not signed }
  293. cg.a_op_const_reg(exprasmlist,OP_SHR,OS_INT,power,hreg1);
  294. End
  295. else
  296. begin
  297. { bring denominator to hdenom }
  298. { hdenom is always free, it's }
  299. { only used for temporary }
  300. { purposes }
  301. hdenom := cg.getintregister(exprasmlist,OS_INT);
  302. if right.location.loc<>LOC_CREGISTER then
  303. location_release(exprasmlist,right.location);
  304. cg.a_load_loc_reg(exprasmlist,right.location.size,right.location,hdenom);
  305. { verify if the divisor is zero, if so return an error
  306. immediately
  307. }
  308. objectlibrary.getlabel(hl);
  309. cg.a_cmp_const_reg_label(exprasmlist,OS_INT,OC_NE,0,hdenom,hl);
  310. paraloc1:=paramanager.getintparaloc(pocall_default,1);
  311. paramanager.allocparaloc(exprasmlist,paraloc1);
  312. cg.a_param_const(exprasmlist,OS_S32,200,paraloc1);
  313. paramanager.freeparaloc(exprasmlist,paraloc1);
  314. cg.a_call_name(exprasmlist,'FPC_HANDLERROR');
  315. cg.a_label(exprasmlist,hl);
  316. if nodetype = modn then
  317. emit_mod_reg_reg(is_signed(left.resulttype.def),hdenom,hreg1)
  318. else
  319. emit_div_reg_reg(is_signed(left.resulttype.def),hdenom,hreg1);
  320. end;
  321. location_reset(location,LOC_REGISTER,OS_INT);
  322. location.register:=hreg1;
  323. end;
  324. cg.g_overflowcheck(exprasmlist,location,resulttype.def);
  325. end;
  326. {*****************************************************************************
  327. TCGSHLRSHRNODE
  328. *****************************************************************************}
  329. {$ifndef cpu64bit}
  330. procedure tcgshlshrnode.second_64bit;
  331. var
  332. freescratch : boolean;
  333. op : topcg;
  334. begin
  335. {$ifdef cpu64bit}
  336. freescratch:=false;
  337. secondpass(left);
  338. secondpass(right);
  339. { determine operator }
  340. case nodetype of
  341. shln: op:=OP_SHL;
  342. shrn: op:=OP_SHR;
  343. end;
  344. freescratch:=false;
  345. location_reset(location,LOC_REGISTER,OS_64);
  346. { load left operator in a register }
  347. location_force_reg(exprasmlist,left.location,OS_64,false);
  348. location_copy(location,left.location);
  349. if (right.nodetype=ordconstn) then
  350. begin
  351. cg64.a_op64_const_reg(exprasmlist,op,tordconstnode(right).value,
  352. joinreg64(location.registerlow,location.registerhigh));
  353. end
  354. else
  355. begin
  356. { this should be handled in pass_1 }
  357. internalerror(2002081501);
  358. end;
  359. {$else cpu64bit}
  360. { already hanled in 1st pass }
  361. internalerror(2002081501);
  362. {$endif cpu64bit}
  363. end;
  364. {$endif cpu64bit}
  365. procedure tcgshlshrnode.second_integer;
  366. var
  367. freescratch : boolean;
  368. op : topcg;
  369. hcountreg : tregister;
  370. begin
  371. freescratch:=false;
  372. { determine operator }
  373. case nodetype of
  374. shln: op:=OP_SHL;
  375. shrn: op:=OP_SHR;
  376. end;
  377. { load left operators in a register }
  378. location_copy(location,left.location);
  379. location_force_reg(exprasmlist,location,OS_INT,false);
  380. { shifting by a constant directly coded: }
  381. if (right.nodetype=ordconstn) then
  382. begin
  383. { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)
  384. if right.value<=31 then
  385. }
  386. cg.a_op_const_reg(exprasmlist,op,location.size,
  387. tordconstnode(right).value and 31,location.register);
  388. {
  389. else
  390. emit_reg_reg(A_XOR,S_L,hregister1,
  391. hregister1);
  392. }
  393. end
  394. else
  395. begin
  396. { load right operators in a register - this
  397. is done since most target cpu which will use this
  398. node do not support a shift count in a mem. location (cec)
  399. }
  400. if right.location.loc<>LOC_REGISTER then
  401. begin
  402. hcountreg:=cg.getintregister(exprasmlist,OS_INT);
  403. freescratch := true;
  404. cg.a_load_loc_reg(exprasmlist,right.location.size,right.location,hcountreg);
  405. end
  406. else
  407. hcountreg:=right.location.register;
  408. cg.a_op_reg_reg(exprasmlist,op,OS_INT,hcountreg,location.register);
  409. if right.location.loc<>LOC_REGISTER then
  410. location_release(exprasmlist,right.location);
  411. if freescratch then
  412. cg.ungetregister(exprasmlist,hcountreg);
  413. end;
  414. end;
  415. procedure tcgshlshrnode.pass_2;
  416. begin
  417. secondpass(left);
  418. secondpass(right);
  419. {$ifndef cpu64bit}
  420. if is_64bit(left.resulttype.def) then
  421. second_64bit
  422. else
  423. {$endif cpu64bit}
  424. second_integer;
  425. end;
  426. {*****************************************************************************
  427. TCGNOTNODE
  428. *****************************************************************************}
  429. {$ifndef cpu64bit}
  430. procedure tcgnotnode.second_64bit;
  431. begin
  432. secondpass(left);
  433. location_force_reg(exprasmlist,left.location,def_cgsize(left.resulttype.def),false);
  434. location_copy(location,left.location);
  435. { perform the NOT operation }
  436. cg64.a_op64_reg_reg(exprasmlist,OP_NOT,left.location.register64,location.register64);
  437. end;
  438. {$endif cpu64bit}
  439. procedure tcgnotnode.second_integer;
  440. begin
  441. secondpass(left);
  442. location_force_reg(exprasmlist,left.location,def_cgsize(left.resulttype.def),false);
  443. location_copy(location,left.location);
  444. { perform the NOT operation }
  445. cg.a_op_reg_reg(exprasmlist,OP_NOT,location.size,location.register,location.register);
  446. end;
  447. procedure tcgnotnode.pass_2;
  448. begin
  449. if is_boolean(resulttype.def) then
  450. second_boolean
  451. {$ifdef SUPPORT_MMX}
  452. else if (cs_mmx in aktlocalswitches) and is_mmx_able_array(left.resulttype.def) then
  453. second_mmx
  454. {$endif SUPPORT_MMX}
  455. {$ifndef cpu64bit}
  456. else if is_64bit(left.resulttype.def) then
  457. second_64bit
  458. {$endif cpu64bit}
  459. else
  460. second_integer;
  461. end;
  462. begin
  463. cmoddivnode:=tcgmoddivnode;
  464. cunaryminusnode:=tcgunaryminusnode;
  465. cshlshrnode:=tcgshlshrnode;
  466. cnotnode:=tcgnotnode;
  467. end.
  468. {
  469. $Log$
  470. Revision 1.25 2004-01-23 15:12:49 florian
  471. * fixed generic shl/shr operations
  472. + added register allocation hook calls for arm specific operand types:
  473. register set and shifter op
  474. Revision 1.24 2004/01/20 12:59:37 florian
  475. * common addnode code for x86-64 and i386
  476. Revision 1.23 2003/12/06 01:15:22 florian
  477. * reverted Peter's alloctemp patch; hopefully properly
  478. Revision 1.22 2003/12/03 23:13:20 peter
  479. * delayed paraloc allocation, a_param_*() gets extra parameter
  480. if it needs to allocate temp or real paralocation
  481. * optimized/simplified int-real loading
  482. Revision 1.21 2003/10/10 17:48:13 peter
  483. * old trgobj moved to x86/rgcpu and renamed to trgx86fpu
  484. * tregisteralloctor renamed to trgobj
  485. * removed rgobj from a lot of units
  486. * moved location_* and reference_* to cgobj
  487. * first things for mmx register allocation
  488. Revision 1.20 2003/10/09 21:31:37 daniel
  489. * Register allocator splitted, ans abstract now
  490. Revision 1.19 2003/10/01 20:34:48 peter
  491. * procinfo unit contains tprocinfo
  492. * cginfo renamed to cgbase
  493. * moved cgmessage to verbose
  494. * fixed ppc and sparc compiles
  495. Revision 1.18 2003/09/10 08:31:47 marco
  496. * Patch from Peter for paraloc
  497. Revision 1.17 2003/09/03 15:55:00 peter
  498. * NEWRA branch merged
  499. Revision 1.16 2003/09/03 11:18:37 florian
  500. * fixed arm concatcopy
  501. + arm support in the common compiler sources added
  502. * moved some generic cg code around
  503. + tfputype added
  504. * ...
  505. Revision 1.15.2.2 2003/08/31 15:46:26 peter
  506. * more updates for tregister
  507. Revision 1.15.2.1 2003/08/31 13:50:15 daniel
  508. * Remove sorting and use pregenerated indexes
  509. * Some work on making things compile
  510. Revision 1.15 2003/07/02 22:18:04 peter
  511. * paraloc splitted in callerparaloc,calleeparaloc
  512. * sparc calling convention updates
  513. Revision 1.14 2003/06/07 18:57:04 jonas
  514. + added freeintparaloc
  515. * ppc get/freeintparaloc now check whether the parameter regs are
  516. properly allocated/deallocated (and get an extra list para)
  517. * ppc a_call_* now internalerrors if pi_do_call is not yet set
  518. * fixed lot of missing pi_do_call's
  519. Revision 1.13 2003/06/03 21:11:09 peter
  520. * cg.a_load_* get a from and to size specifier
  521. * makeregsize only accepts newregister
  522. * i386 uses generic tcgnotnode,tcgunaryminus
  523. Revision 1.12 2003/06/01 21:38:06 peter
  524. * getregisterfpu size parameter added
  525. * op_const_reg size parameter added
  526. * sparc updates
  527. Revision 1.11 2003/05/30 23:49:18 jonas
  528. * a_load_loc_reg now has an extra size parameter for the destination
  529. register (properly fixes what I worked around in revision 1.106 of
  530. ncgutil.pas)
  531. Revision 1.10 2003/05/23 14:27:35 peter
  532. * remove some unit dependencies
  533. * current_procinfo changes to store more info
  534. Revision 1.9 2003/04/23 20:16:04 peter
  535. + added currency support based on int64
  536. + is_64bit for use in cg units instead of is_64bitint
  537. * removed cgmessage from n386add, replace with internalerrors
  538. Revision 1.8 2003/04/22 10:09:35 daniel
  539. + Implemented the actual register allocator
  540. + Scratch registers unavailable when new register allocator used
  541. + maybe_save/maybe_restore unavailable when new register allocator used
  542. Revision 1.7 2003/03/28 19:16:56 peter
  543. * generic constructor working for i386
  544. * remove fixed self register
  545. * esi added as address register for i386
  546. Revision 1.6 2003/02/19 22:00:14 daniel
  547. * Code generator converted to new register notation
  548. - Horribily outdated todo.txt removed
  549. Revision 1.5 2002/11/25 17:43:18 peter
  550. * splitted defbase in defutil,symutil,defcmp
  551. * merged isconvertable and is_equal into compare_defs(_ext)
  552. * made operator search faster by walking the list only once
  553. Revision 1.4 2002/09/17 18:54:02 jonas
  554. * a_load_reg_reg() now has two size parameters: source and dest. This
  555. allows some optimizations on architectures that don't encode the
  556. register size in the register name.
  557. Revision 1.3 2002/08/23 16:14:48 peter
  558. * tempgen cleanup
  559. * tt_noreuse temp type added that will be used in genentrycode
  560. Revision 1.2 2002/08/15 15:15:55 carl
  561. * jmpbuf size allocation for exceptions is now cpu specific (as it should)
  562. * more generic nodes for maths
  563. * several fixes for better m68k support
  564. Revision 1.1 2002/08/14 19:26:55 carl
  565. + generic int_to_real type conversion
  566. + generic unaryminus node
  567. }