ncgmat.pas 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613
  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Florian Klaempfl
  4. Generate generic mathematical nodes
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit ncgmat;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nmat,cpubase,cgbase;
  23. type
  24. tcgunaryminusnode = class(tunaryminusnode)
  25. protected
  26. { This routine is called to change the sign of the
  27. floating point value in the floating point
  28. register r.
  29. This routine should be overriden, since
  30. the generic version is not optimal at all. The
  31. generic version assumes that floating
  32. point values are stored in the register
  33. in IEEE-754 format.
  34. }
  35. procedure emit_float_sign_change(r: tregister; _size : tcgsize);virtual;
  36. {$ifdef SUPPORT_MMX}
  37. procedure second_mmx;virtual;abstract;
  38. {$endif SUPPORT_MMX}
  39. procedure second_64bit;virtual;
  40. procedure second_integer;virtual;
  41. procedure second_float;virtual;
  42. public
  43. procedure pass_2;override;
  44. end;
  45. tcgmoddivnode = class(tmoddivnode)
  46. procedure pass_2;override;
  47. protected
  48. { This routine must do an actual 32-bit division, be it
  49. signed or unsigned. The result must set into the the
  50. @var(num) register.
  51. @param(signed Indicates if the division must be signed)
  52. @param(denum Register containing the denominator
  53. @param(num Register containing the numerator, will also receive result)
  54. The actual optimizations regarding shifts have already
  55. been done and emitted, so this should really a do a divide.
  56. }
  57. procedure emit_div_reg_reg(signed: boolean;denum,num : tregister);virtual;abstract;
  58. { This routine must do an actual 32-bit modulo, be it
  59. signed or unsigned. The result must set into the the
  60. @var(num) register.
  61. @param(signed Indicates if the modulo must be signed)
  62. @param(denum Register containing the denominator
  63. @param(num Register containing the numerator, will also receive result)
  64. The actual optimizations regarding shifts have already
  65. been done and emitted, so this should really a do a modulo.
  66. }
  67. procedure emit_mod_reg_reg(signed: boolean;denum,num : tregister);virtual;abstract;
  68. { This routine must do an actual 64-bit division, be it
  69. signed or unsigned. The result must set into the the
  70. @var(num) register.
  71. @param(signed Indicates if the division must be signed)
  72. @param(denum Register containing the denominator
  73. @param(num Register containing the numerator, will also receive result)
  74. The actual optimizations regarding shifts have already
  75. been done and emitted, so this should really a do a divide.
  76. Currently, this routine should only be implemented on
  77. 64-bit systems, otherwise a helper is called in 1st pass.
  78. }
  79. procedure emit64_div_reg_reg(signed: boolean;denum,num : tregister64);virtual;
  80. end;
  81. tcgshlshrnode = class(tshlshrnode)
  82. procedure second_64bit;virtual;
  83. procedure second_integer;virtual;
  84. procedure pass_2;override;
  85. end;
  86. tcgnotnode = class(tnotnode)
  87. protected
  88. procedure second_boolean;virtual;abstract;
  89. {$ifdef SUPPORT_MMX}
  90. procedure second_mmx;virtual;abstract;
  91. {$endif SUPPORT_MMX}
  92. procedure second_64bit;virtual;
  93. procedure second_integer;virtual;
  94. public
  95. procedure pass_2;override;
  96. end;
  97. implementation
  98. uses
  99. globtype,systems,
  100. cutils,verbose,globals,
  101. symconst,symdef,aasmbase,aasmtai,aasmcpu,defutil,
  102. pass_1,pass_2,
  103. ncon,
  104. cpuinfo,
  105. tgobj,ncgutil,cgobj,paramgr,cg64f32;
  106. {*****************************************************************************
  107. TCGUNARYMINUSNODE
  108. *****************************************************************************}
  109. procedure tcgunaryminusnode.emit_float_sign_change(r: tregister; _size : tcgsize);
  110. var
  111. href : treference;
  112. hreg : tregister;
  113. begin
  114. { get a temporary memory reference to store the floating
  115. point value
  116. }
  117. tg.gettemp(exprasmlist,tcgsize2size[_size],tt_normal,href);
  118. { store the floating point value in the temporary memory area }
  119. cg.a_loadfpu_reg_ref(exprasmlist,_size,r,href);
  120. { only single and double ieee are supported }
  121. if _size = OS_F64 then
  122. begin
  123. { on little-endian machine the most significant
  124. 32-bit value is stored at the highest address
  125. }
  126. if target_info.endian = endian_little then
  127. inc(href.offset,4);
  128. end
  129. else
  130. if _size <> OS_F32 then
  131. internalerror(20020814);
  132. hreg := cg.getintregister(exprasmlist,OS_32);
  133. { load value }
  134. cg.a_load_ref_reg(exprasmlist,OS_32,OS_32,href,hreg);
  135. { bitwise complement copied value }
  136. cg.a_op_reg_reg(exprasmlist,OP_NOT,OS_32,hreg,hreg);
  137. { sign-bit is bit 31/63 of single/double }
  138. cg.a_op_const_reg(exprasmlist,OP_AND,OS_32,aword($80000000),hreg);
  139. { or with value in reference memory }
  140. cg.a_op_reg_ref(exprasmlist,OP_OR,OS_32,hreg,href);
  141. cg.ungetregister(exprasmlist,hreg);
  142. { store the floating point value in the temporary memory area }
  143. if _size = OS_F64 then
  144. begin
  145. { on little-endian machine the most significant
  146. 32-bit value is stored at the highest address
  147. }
  148. if target_info.endian = endian_little then
  149. dec(href.offset,4);
  150. end;
  151. cg.a_loadfpu_ref_reg(exprasmlist,_size,href,r);
  152. end;
  153. procedure tcgunaryminusnode.second_64bit;
  154. begin
  155. secondpass(left);
  156. { load left operator in a register }
  157. location_copy(location,left.location);
  158. location_force_reg(exprasmlist,location,OS_64,false);
  159. cg64.a_op64_loc_reg(exprasmlist,OP_NEG,
  160. location,joinreg64(location.registerlow,location.registerhigh));
  161. end;
  162. procedure tcgunaryminusnode.second_float;
  163. begin
  164. secondpass(left);
  165. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  166. case left.location.loc of
  167. LOC_REFERENCE,
  168. LOC_CREFERENCE :
  169. begin
  170. reference_release(exprasmlist,left.location.reference);
  171. location.register:=cg.getfpuregister(exprasmlist,location.size);
  172. cg.a_loadfpu_ref_reg(exprasmlist,
  173. def_cgsize(left.resulttype.def),
  174. left.location.reference,location.register);
  175. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  176. end;
  177. LOC_FPUREGISTER:
  178. begin
  179. location.register:=left.location.register;
  180. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  181. end;
  182. LOC_CFPUREGISTER:
  183. begin
  184. location.register:=cg.getfpuregister(exprasmlist,location.size);
  185. cg.a_loadfpu_reg_reg(exprasmlist,left.location.size,left.location.register,location.register);
  186. emit_float_sign_change(location.register,def_cgsize(left.resulttype.def));
  187. end;
  188. else
  189. internalerror(200306021);
  190. end;
  191. end;
  192. procedure tcgunaryminusnode.second_integer;
  193. begin
  194. secondpass(left);
  195. { load left operator in a register }
  196. location_copy(location,left.location);
  197. location_force_reg(exprasmlist,location,OS_INT,false);
  198. cg.a_op_reg_reg(exprasmlist,OP_NEG,OS_INT,location.register,location.register);
  199. end;
  200. procedure tcgunaryminusnode.pass_2;
  201. begin
  202. if is_64bit(left.resulttype.def) then
  203. second_64bit
  204. {$ifdef SUPPORT_MMX}
  205. else
  206. if (cs_mmx in aktlocalswitches) and is_mmx_able_array(left.resulttype.def) then
  207. second_mmx
  208. {$endif SUPPORT_MMX}
  209. else
  210. if (left.resulttype.def.deftype=floatdef) then
  211. second_float
  212. else
  213. second_integer;
  214. end;
  215. {*****************************************************************************
  216. TCGMODDIVNODE
  217. *****************************************************************************}
  218. procedure tcgmoddivnode.emit64_div_reg_reg(signed: boolean; denum,num:tregister64);
  219. begin
  220. { handled in pass_1 already, unless pass_1 is
  221. overriden
  222. }
  223. { should be handled in pass_1 (JM) }
  224. internalerror(200109052);
  225. end;
  226. procedure tcgmoddivnode.pass_2;
  227. var
  228. hreg1 : tregister;
  229. hdenom : tregister;
  230. power : longint;
  231. hl : tasmlabel;
  232. paraloc1 : tparalocation;
  233. begin
  234. secondpass(left);
  235. if codegenerror then
  236. exit;
  237. secondpass(right);
  238. if codegenerror then
  239. exit;
  240. location_copy(location,left.location);
  241. if is_64bit(resulttype.def) then
  242. begin
  243. { this code valid for 64-bit cpu's only ,
  244. otherwise helpers are called in pass_1
  245. }
  246. location_force_reg(exprasmlist,location,OS_64,false);
  247. location_copy(location,left.location);
  248. location_force_reg(exprasmlist,right.location,OS_64,false);
  249. emit64_div_reg_reg(is_signed(left.resulttype.def),
  250. joinreg64(right.location.registerlow,right.location.registerhigh),
  251. joinreg64(location.registerlow,location.registerhigh));
  252. end
  253. else
  254. begin
  255. { put numerator in register }
  256. location_force_reg(exprasmlist,left.location,OS_INT,false);
  257. hreg1:=left.location.register;
  258. if (nodetype=divn) and
  259. (right.nodetype=ordconstn) and
  260. ispowerof2(tordconstnode(right).value,power) then
  261. Begin
  262. { for signed numbers, the numerator must be adjusted before the
  263. shift instruction, but not wih unsigned numbers! Otherwise,
  264. "Cardinal($ffffffff) div 16" overflows! (JM) }
  265. If is_signed(left.resulttype.def) Then
  266. Begin
  267. objectlibrary.getlabel(hl);
  268. cg.a_cmp_const_reg_label(exprasmlist,OS_INT,OC_GT,0,hreg1,hl);
  269. if power=1 then
  270. cg.a_op_const_reg(exprasmlist,OP_ADD,OS_INT,1,hreg1)
  271. else
  272. cg.a_op_const_reg(exprasmlist,OP_ADD,OS_INT,tordconstnode(right).value-1,hreg1);
  273. cg.a_label(exprasmlist,hl);
  274. cg.a_op_const_reg(exprasmlist,OP_SAR,OS_INT,power,hreg1);
  275. End
  276. Else { not signed }
  277. cg.a_op_const_reg(exprasmlist,OP_SHR,OS_INT,power,hreg1);
  278. End
  279. else
  280. begin
  281. { bring denominator to hdenom }
  282. { hdenom is always free, it's }
  283. { only used for temporary }
  284. { purposes }
  285. hdenom := cg.getintregister(exprasmlist,OS_INT);
  286. if right.location.loc<>LOC_CREGISTER then
  287. location_release(exprasmlist,right.location);
  288. cg.a_load_loc_reg(exprasmlist,right.location.size,right.location,hdenom);
  289. { verify if the divisor is zero, if so return an error
  290. immediately
  291. }
  292. objectlibrary.getlabel(hl);
  293. cg.a_cmp_const_reg_label(exprasmlist,OS_INT,OC_NE,0,hdenom,hl);
  294. paraloc1:=paramanager.getintparaloc(pocall_default,1);
  295. paramanager.allocparaloc(exprasmlist,paraloc1);
  296. cg.a_param_const(exprasmlist,OS_S32,200,paraloc1);
  297. paramanager.freeparaloc(exprasmlist,paraloc1);
  298. cg.a_call_name(exprasmlist,'FPC_HANDLERROR');
  299. cg.a_label(exprasmlist,hl);
  300. if nodetype = modn then
  301. emit_mod_reg_reg(is_signed(left.resulttype.def),hdenom,hreg1)
  302. else
  303. emit_div_reg_reg(is_signed(left.resulttype.def),hdenom,hreg1);
  304. end;
  305. location_reset(location,LOC_REGISTER,OS_INT);
  306. location.register:=hreg1;
  307. end;
  308. cg.g_overflowcheck(exprasmlist,location,resulttype.def);
  309. end;
  310. {*****************************************************************************
  311. TCGSHLRSHRNODE
  312. *****************************************************************************}
  313. procedure tcgshlshrnode.second_64bit;
  314. var
  315. freescratch : boolean;
  316. op : topcg;
  317. begin
  318. {$ifdef cpu64bit}
  319. freescratch:=false;
  320. secondpass(left);
  321. secondpass(right);
  322. { determine operator }
  323. case nodetype of
  324. shln: op:=OP_SHL;
  325. shrn: op:=OP_SHR;
  326. end;
  327. freescratch:=false;
  328. location_reset(location,LOC_REGISTER,OS_64);
  329. { load left operator in a register }
  330. location_force_reg(exprasmlist,left.location,OS_64,false);
  331. location_copy(location,left.location);
  332. if (right.nodetype=ordconstn) then
  333. begin
  334. cg64.a_op64_const_reg(exprasmlist,op,tordconstnode(right).value,
  335. joinreg64(location.registerlow,location.registerhigh));
  336. end
  337. else
  338. begin
  339. { this should be handled in pass_1 }
  340. internalerror(2002081501);
  341. end;
  342. {$else cpu64bit}
  343. { already hanled in 1st pass }
  344. internalerror(2002081501);
  345. {$endif cpu64bit}
  346. end;
  347. procedure tcgshlshrnode.second_integer;
  348. var
  349. freescratch : boolean;
  350. op : topcg;
  351. hcountreg : tregister;
  352. begin
  353. freescratch:=false;
  354. { determine operator }
  355. case nodetype of
  356. shln: op:=OP_SHL;
  357. shrn: op:=OP_SHR;
  358. end;
  359. { load left operators in a register }
  360. location_copy(location,left.location);
  361. location_force_reg(exprasmlist,location,OS_INT,false);
  362. { shifting by a constant directly coded: }
  363. if (right.nodetype=ordconstn) then
  364. begin
  365. { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)
  366. if right.value<=31 then
  367. }
  368. cg.a_op_const_reg(exprasmlist,op,location.size,
  369. tordconstnode(right).value and 31,location.register);
  370. {
  371. else
  372. emit_reg_reg(A_XOR,S_L,hregister1,
  373. hregister1);
  374. }
  375. end
  376. else
  377. begin
  378. { load right operators in a register - this
  379. is done since most target cpu which will use this
  380. node do not support a shift count in a mem. location (cec)
  381. }
  382. if right.location.loc<>LOC_REGISTER then
  383. begin
  384. if right.location.loc<>LOC_CREGISTER then
  385. location_release(exprasmlist,right.location);
  386. hcountreg:=cg.getintregister(exprasmlist,OS_INT);
  387. freescratch := true;
  388. cg.a_load_loc_reg(exprasmlist,right.location.size,right.location,hcountreg);
  389. end
  390. else
  391. hcountreg:=right.location.register;
  392. cg.a_op_reg_reg(exprasmlist,op,OS_INT,hcountreg,location.register);
  393. if freescratch then
  394. cg.ungetregister(exprasmlist,hcountreg);
  395. end;
  396. end;
  397. procedure tcgshlshrnode.pass_2;
  398. begin
  399. secondpass(left);
  400. secondpass(right);
  401. if is_64bit(left.resulttype.def) then
  402. second_64bit
  403. else
  404. second_integer;
  405. end;
  406. {*****************************************************************************
  407. TCGNOTNODE
  408. *****************************************************************************}
  409. procedure tcgnotnode.second_64bit;
  410. begin
  411. secondpass(left);
  412. location_force_reg(exprasmlist,left.location,def_cgsize(left.resulttype.def),false);
  413. location_copy(location,left.location);
  414. { perform the NOT operation }
  415. cg64.a_op64_reg_reg(exprasmlist,OP_NOT,left.location.register64,location.register64);
  416. end;
  417. procedure tcgnotnode.second_integer;
  418. begin
  419. secondpass(left);
  420. location_force_reg(exprasmlist,left.location,def_cgsize(left.resulttype.def),false);
  421. location_copy(location,left.location);
  422. { perform the NOT operation }
  423. cg.a_op_reg_reg(exprasmlist,OP_NOT,location.size,location.register,location.register);
  424. end;
  425. procedure tcgnotnode.pass_2;
  426. begin
  427. if is_boolean(resulttype.def) then
  428. second_boolean
  429. {$ifdef SUPPORT_MMX}
  430. else if (cs_mmx in aktlocalswitches) and is_mmx_able_array(left.resulttype.def) then
  431. second_mmx
  432. {$endif SUPPORT_MMX}
  433. else if is_64bit(left.resulttype.def) then
  434. second_64bit
  435. else
  436. second_integer;
  437. end;
  438. begin
  439. cmoddivnode:=tcgmoddivnode;
  440. cunaryminusnode:=tcgunaryminusnode;
  441. cshlshrnode:=tcgshlshrnode;
  442. cnotnode:=tcgnotnode;
  443. end.
  444. {
  445. $Log$
  446. Revision 1.21 2003-10-10 17:48:13 peter
  447. * old trgobj moved to x86/rgcpu and renamed to trgx86fpu
  448. * tregisteralloctor renamed to trgobj
  449. * removed rgobj from a lot of units
  450. * moved location_* and reference_* to cgobj
  451. * first things for mmx register allocation
  452. Revision 1.20 2003/10/09 21:31:37 daniel
  453. * Register allocator splitted, ans abstract now
  454. Revision 1.19 2003/10/01 20:34:48 peter
  455. * procinfo unit contains tprocinfo
  456. * cginfo renamed to cgbase
  457. * moved cgmessage to verbose
  458. * fixed ppc and sparc compiles
  459. Revision 1.18 2003/09/10 08:31:47 marco
  460. * Patch from Peter for paraloc
  461. Revision 1.17 2003/09/03 15:55:00 peter
  462. * NEWRA branch merged
  463. Revision 1.16 2003/09/03 11:18:37 florian
  464. * fixed arm concatcopy
  465. + arm support in the common compiler sources added
  466. * moved some generic cg code around
  467. + tfputype added
  468. * ...
  469. Revision 1.15.2.2 2003/08/31 15:46:26 peter
  470. * more updates for tregister
  471. Revision 1.15.2.1 2003/08/31 13:50:15 daniel
  472. * Remove sorting and use pregenerated indexes
  473. * Some work on making things compile
  474. Revision 1.15 2003/07/02 22:18:04 peter
  475. * paraloc splitted in callerparaloc,calleeparaloc
  476. * sparc calling convention updates
  477. Revision 1.14 2003/06/07 18:57:04 jonas
  478. + added freeintparaloc
  479. * ppc get/freeintparaloc now check whether the parameter regs are
  480. properly allocated/deallocated (and get an extra list para)
  481. * ppc a_call_* now internalerrors if pi_do_call is not yet set
  482. * fixed lot of missing pi_do_call's
  483. Revision 1.13 2003/06/03 21:11:09 peter
  484. * cg.a_load_* get a from and to size specifier
  485. * makeregsize only accepts newregister
  486. * i386 uses generic tcgnotnode,tcgunaryminus
  487. Revision 1.12 2003/06/01 21:38:06 peter
  488. * getregisterfpu size parameter added
  489. * op_const_reg size parameter added
  490. * sparc updates
  491. Revision 1.11 2003/05/30 23:49:18 jonas
  492. * a_load_loc_reg now has an extra size parameter for the destination
  493. register (properly fixes what I worked around in revision 1.106 of
  494. ncgutil.pas)
  495. Revision 1.10 2003/05/23 14:27:35 peter
  496. * remove some unit dependencies
  497. * current_procinfo changes to store more info
  498. Revision 1.9 2003/04/23 20:16:04 peter
  499. + added currency support based on int64
  500. + is_64bit for use in cg units instead of is_64bitint
  501. * removed cgmessage from n386add, replace with internalerrors
  502. Revision 1.8 2003/04/22 10:09:35 daniel
  503. + Implemented the actual register allocator
  504. + Scratch registers unavailable when new register allocator used
  505. + maybe_save/maybe_restore unavailable when new register allocator used
  506. Revision 1.7 2003/03/28 19:16:56 peter
  507. * generic constructor working for i386
  508. * remove fixed self register
  509. * esi added as address register for i386
  510. Revision 1.6 2003/02/19 22:00:14 daniel
  511. * Code generator converted to new register notation
  512. - Horribily outdated todo.txt removed
  513. Revision 1.5 2002/11/25 17:43:18 peter
  514. * splitted defbase in defutil,symutil,defcmp
  515. * merged isconvertable and is_equal into compare_defs(_ext)
  516. * made operator search faster by walking the list only once
  517. Revision 1.4 2002/09/17 18:54:02 jonas
  518. * a_load_reg_reg() now has two size parameters: source and dest. This
  519. allows some optimizations on architectures that don't encode the
  520. register size in the register name.
  521. Revision 1.3 2002/08/23 16:14:48 peter
  522. * tempgen cleanup
  523. * tt_noreuse temp type added that will be used in genentrycode
  524. Revision 1.2 2002/08/15 15:15:55 carl
  525. * jmpbuf size allocation for exceptions is now cpu specific (as it should)
  526. * more generic nodes for maths
  527. * several fixes for better m68k support
  528. Revision 1.1 2002/08/14 19:26:55 carl
  529. + generic int_to_real type conversion
  530. + generic unaryminus node
  531. }