nppcmat.pas 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
  1. {
  2. $Id$
  3. Copyright (c) 1998-2000 by Florian Klaempfl
  4. Generate PowerPC assembler for math nodes
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit nppcmat;
  19. {$i defines.inc}
  20. interface
  21. uses
  22. node,nmat;
  23. type
  24. tppcmoddivnode = class(tmoddivnode)
  25. procedure pass_2;override;
  26. end;
  27. tppcshlshrnode = class(tshlshrnode)
  28. procedure pass_2;override;
  29. end;
  30. tppcunaryminusnode = class(tunaryminusnode)
  31. procedure pass_2;override;
  32. end;
  33. tppcnotnode = class(tnotnode)
  34. procedure pass_2;override;
  35. end;
  36. implementation
  37. uses
  38. globtype,systems,
  39. cutils,verbose,globals,
  40. symconst,symdef,aasm,types,
  41. cgbase,cgobj,temp_gen,pass_1,pass_2,
  42. ncon,
  43. cpubase,
  44. cga,tgcpu,nppcutil,cgcpu,cg64f32;
  45. {*****************************************************************************
  46. TPPCMODDIVNODE
  47. *****************************************************************************}
  48. procedure tppcmoddivnode.pass_2;
  49. const
  50. { signed overflow }
  51. divops: array[boolean, boolean] of tasmop =
  52. ((A_DIVWU,A_DIVWUO),(A_DIVW,A_DIVWO));
  53. var
  54. power,
  55. l1, l2 : longint;
  56. op : tasmop;
  57. numerator,
  58. divider,
  59. resultreg : tregister;
  60. saved : boolean;
  61. begin
  62. secondpass(left);
  63. saved:=maybe_savetotemp(right.registers32,left,is_64bitint(left.resulttype.def));
  64. secondpass(right);
  65. if saved then
  66. restorefromtemp(left,is_64bitint(left.resulttype.def));
  67. set_location(location,left.location);
  68. resultreg := R_NO;
  69. { put numerator in register }
  70. if (left.location.loc in [LOC_REFERENCE,LOC_MEM]) then
  71. begin
  72. del_reference(left.location.reference);
  73. numerator := getregisterint;
  74. { OS_32 because everything is always converted to longint/ }
  75. { cardinal in the resulttype pass (JM) }
  76. cg.a_load_ref_reg(expraslist,OS_32,left.location.reference,
  77. numerator);
  78. resultreg := numerator;
  79. end
  80. else
  81. begin
  82. numerator := left.location.register;
  83. if left.location.loc = LOC_CREGISTER then
  84. resultreg := getregisterint
  85. else
  86. resultreg := numerator;
  87. end;
  88. if (nodetype = divn) and
  89. (right.nodetype = ordconstn) and
  90. ispowerof2(tordconstnode(right).value,power) then
  91. begin
  92. { From 'The PowerPC Compiler Writer's Guide": }
  93. { This code uses the fact that, in the PowerPC architecture, }
  94. { the shift right algebraic instructions set the Carry bit if }
  95. { the source register contains a negative number and one or }
  96. { more 1-bits are shifted out. Otherwise, the carry bit is }
  97. { cleared. The addze instruction corrects the quotient, if }
  98. { necessary, when the dividend is negative. For example, if }
  99. { n = -13, (0xFFFF_FFF3), and k = 2, after executing the srawi }
  100. { instruction, q = -4 (0xFFFF_FFFC) and CA = 1. After executing }
  101. { the addze instruction, q = -3, the correct quotient. }
  102. cg.a_op_const_reg_reg(list,OP_SAR,OS_32,aword(power),numerator,resultreg);
  103. exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE,resultreg,resultreg));
  104. end
  105. else
  106. begin
  107. { load divider in a register if necessary }
  108. case right.location.loc of
  109. LOC_CREGISTER, LOC_REGISTER:
  110. divider := right.location.register;
  111. LOC_REFERENCE, LOC_MEM:
  112. begin
  113. divider := cg.get_scratch_reg(exprasmlist);
  114. cg.a_load_ref_reg(exprasmlist,OS_32,
  115. right.location.reference,divider);
  116. del_reference(right.location.reference);
  117. end;
  118. end;
  119. { needs overflow checking, (-maxlongint-1) div (-1) overflows! }
  120. { And on PPC, the only way to catch a div-by-0 is by checking }
  121. { the overflow flag (JM) }
  122. op := divops[is_signed(right.resulttype.def),
  123. cs_check_overflow in aktlocalswitches];
  124. exprasmlist(taicpu.op_reg_reg_reg(op,resultreg,numerator,
  125. divider))
  126. end;
  127. { free used registers }
  128. if right.location.loc in [LOC_REFERENCE,LOC_MEM] then
  129. cg.free_scratch_reg(exprasmlist,divider)
  130. else
  131. ungetregister(divider);
  132. if numerator <> resultreg then
  133. ungetregisterint(numerator);
  134. { set result location }
  135. location.loc:=LOC_REGISTER;
  136. location.register:=resultreg;
  137. emitoverflowcheck(self);
  138. end;
  139. {*****************************************************************************
  140. TPPCSHLRSHRNODE
  141. *****************************************************************************}
  142. procedure tppcshlshrnode.pass_2;
  143. var
  144. resultreg, hregister1,hregister2,
  145. hregisterhigh,hregisterlow : tregister;
  146. op : topcg;
  147. asmop1, asmop2: tasmop;
  148. shiftval: aword;
  149. saved : boolean;
  150. begin
  151. secondpass(left);
  152. saved:=maybe_savetotemp(right.registers32,left,is_64bitint(left.resulttype.def));
  153. secondpass(right);
  154. if saved then
  155. restorefromtemp(left,is_64bitint(left.resulttype.def));
  156. if is_64bitint(left.resulttype.def) then
  157. begin
  158. case left.location.loc of
  159. LOC_REGISTER, LOC_CREGISTER:
  160. begin
  161. hregisterhigh := left.location.registerhigh;
  162. hregisterlow := left.location.registerlow;
  163. if left.location.loc = LOC_REGISTER then
  164. begin
  165. location.registerhigh := hregisterhigh;
  166. location.registerlow := hregisterlow
  167. end
  168. else
  169. begin
  170. location.registerhigh := getregisterint;
  171. location.registerlow := getregisterint;
  172. end;
  173. end;
  174. LOC_REFERENCE,LOC_MEM:
  175. begin
  176. { !!!!!!!! not good, registers are release too soon this way !!!! (JM) }
  177. del_reference(left.location.reference);
  178. hregisterhigh := getregisterint;
  179. location.registerhigh := hregisterhigh;
  180. hregisterlow := getregisterint;
  181. location.registerlow := hregisterlow;
  182. tcg64f32(cg).a_load64_ref_reg(list,left.location.reference,
  183. hregisterlow,hregisterhigh);
  184. end;
  185. end;
  186. if (right.nodetype = ordconstn) then
  187. begin
  188. if tordconstnode(right).value > 31 then
  189. begin
  190. if nodetype = shln then
  191. begin
  192. if (value and 31) <> 0 then
  193. cg.a_op_const_reg_reg(exprasmlist,OP_SHL,OS_32,value and 31,
  194. hregisterlow,location.registerhigh)
  195. cg.a_load_const_reg(exprasmlist,OS_32,0,location.registerlow);
  196. end
  197. else
  198. begin
  199. if (value and 31) <> 0 then
  200. cg.a_op_const_reg_reg(exprasmlist,OP_SHR,OS_32,value and 31,
  201. hregisterhigh,location.registerlow);
  202. cg.a_load_const_reg(exprasmlist,OS_32,0,location.registerhigh);
  203. end;
  204. end
  205. else
  206. begin
  207. shiftval := aword(tordconstnode(right).value;
  208. if nodetype = shln then
  209. begin
  210. exprasmlist.concat(taicpu.op_reg_reg_const_const_const(
  211. A_RLWINM,location.registerhigh,hregisterhigh,shiftval,
  212. 0,31-shiftval));
  213. exprasmlist.concat(taicpu.op_reg_reg_const_const_const(
  214. A_RLWIMI,location.registerhigh,hregisterlow,shiftval,
  215. 32-shiftval,31));
  216. exprasmlist.concat(taicpu.op_reg_reg_const_const_const(
  217. A_RLWINM,location.registerlow,hregisterlow,shiftval,
  218. 0,31-shiftval));
  219. end
  220. else
  221. begin
  222. exprasmlist.concat(taicpu.op_reg_reg_const_const_const(
  223. A_RLWINM,location.registerlow,hregisterlow,32-shiftval,
  224. shiftval,31));
  225. exprasmlist.concat(taicpu.op_reg_reg_const_const_const(
  226. A_RLWIMI,location.registerlow,hregisterhigh,32-shiftval,
  227. 0,shiftval-1));
  228. exprasmlist.concat(taicpu.op_reg_reg_const_const_const(
  229. A_RLWINM,location.registerhigh,hregisterhigh,32-shiftval,
  230. shiftval,31));
  231. end;
  232. end;
  233. end
  234. else
  235. { no constant shiftcount }
  236. begin
  237. case right.location.loc of
  238. LOC_REGISTER,LOC_CREGISTER:
  239. begin
  240. hregister1 := right.location.register;
  241. end;
  242. LOC_REFERENCE,LOC_MEM:
  243. begin
  244. hregister1 := get_scratch_reg(exprasmlist);
  245. cg.a_load_ref_reg(exprasmlist,OS_S32,
  246. right.location.reference,hregister1);
  247. end;
  248. end;
  249. if nodetype = shln then
  250. begin
  251. asmop1 := A_SLW;
  252. asmop2 := A_SRW;
  253. end
  254. else
  255. begin
  256. asmop1 := A_SRW;
  257. asmop2 := A_SLW;
  258. resultreg := location.registerhigh;
  259. location.registerhigh := location.registerlow;
  260. location.registerlow := resultreg;
  261. end;
  262. getexplicitregisterint(R_0);
  263. exprasmlist.concat(taicpu.op_reg_reg_const(A_SUBFIC,
  264. R_0,hregister1,32));
  265. exprasmlist.concat(taicpu.op_reg_reg_reg(asmop1,
  266. location.registerhigh,hregisterhigh,hregister1));
  267. exprasmlist.concat(taicpu.op_reg_reg_reg(asmop2,
  268. R_0,hregisterlow,R_0));
  269. exprasmlist.concat(taicpu.op_reg_reg_reg(A_OR,
  270. location.registerhigh,location.registerhigh,R_0));
  271. exprasmlist.concat(taicpu.op_reg_reg_const(A_SUBI,
  272. R_0,hregister1,32));
  273. exprasmlist.concat(taicpu.op_reg_reg_reg(asmop1,
  274. R_0,hregisterlow,R_0));
  275. exprasmlist.concat(taicpu.op_reg_reg_reg(A_OR,
  276. location.registerhigh,location.registerhigh,R_0));
  277. exprasmlist.concat(taicpu.op_reg_reg_reg(asmop1,
  278. location.registerlow,hregisterlow,hregister1));
  279. ungetregister(R_0);
  280. if right.location.loc in [LOC_MEM,LOC_REFERENCE] then
  281. free_scratch_reg(exprasmlist,hregister1)
  282. else
  283. ungetregister(hregister1);
  284. end
  285. end
  286. else
  287. begin
  288. { load left operators in a register }
  289. if (left.location.loc in [LOC_REFERENCE,LOC_MEM]) then
  290. begin
  291. del_reference(left.location.reference);
  292. hregister1 := getregisterint;
  293. { OS_32 because everything is always converted to longint/ }
  294. { cardinal in the resulttype pass (JM) }
  295. cg.a_load_ref_reg(expraslist,OS_32,left.location.reference,
  296. hregister1);
  297. resultreg := hregister1;
  298. end
  299. else
  300. begin
  301. hregister1 := left.location.register;
  302. if left.location.loc = LOC_CREGISTER then
  303. resultreg := getregisterint
  304. else
  305. resultreg := hregister1;
  306. end;
  307. { determine operator }
  308. if nodetype=shln then
  309. op:=OP_SHL
  310. else
  311. op:=OP_SHR;
  312. { shifting by a constant directly coded: }
  313. if (right.nodetype=ordconstn) then
  314. cg.a_op_reg_reg_const(exprasmlist,op,OS_32,resultreg,
  315. hregister1,tordconstnode(right).value and 31)
  316. else
  317. begin
  318. { load shift count in a register if necessary }
  319. case right.location.loc of
  320. LOC_CREGISTER, LOC_REGISTER:
  321. hregister2 := right.location.register;
  322. LOC_REFERENCE, LOC_MEM:
  323. begin
  324. hregister2 := cg.get_scratch_reg(exprasmlist);
  325. cg.a_load_ref_reg(exprasmlist,OS_32,
  326. right.location.reference,hregister2);
  327. del_reference(right.location.reference);
  328. end;
  329. end;
  330. tcgppc(cg).a_op_reg_reg_reg(exprasmlist,op,hregister1,
  331. hregister2,resultreg);
  332. if right.location.loc in [LOC_REFERENCE,LOC_MEM] then
  333. cg.free_scratch_reg(exprasmlist,hregister2)
  334. else
  335. ungetregister(hregister2);
  336. end;
  337. { set result location }
  338. location.loc:=LOC_REGISTER;
  339. location.register:=resultreg;
  340. end;
  341. end;
  342. {*****************************************************************************
  343. TPPCUNARYMINUSNODE
  344. *****************************************************************************}
  345. procedure tppcunaryminusnode.pass_2;
  346. var
  347. src1, src2, tmp: tregister;
  348. op: tasmop;
  349. begin
  350. secondpass(left);
  351. if is_64bitint(left.resulttype.def) then
  352. begin
  353. clear_location(location);
  354. location.loc:=LOC_REGISTER;
  355. case left.location.loc of
  356. LOC_REGISTER, LOC_CREGISTER :
  357. begin
  358. src1 := left.location.registerlow;
  359. src2 := left.location.registerhigh;
  360. if left.location.loc = LOC_REGISTER then
  361. begin
  362. location.registerlow:=src1;
  363. location.registerhigh:=src2;
  364. end
  365. else
  366. begin
  367. location.registerlow := getregisterint;
  368. location.registerhigh := getregisterint;
  369. end;
  370. end;
  371. LOC_REFERENCE,LOC_MEM :
  372. begin
  373. del_reference(left.location.reference);
  374. location.registerlow:=getregisterint;
  375. src1 := location.registerlow;
  376. location.registerhigh:=getregisterint;
  377. src2 := location.registerhigh;
  378. tcg64f32(cg).a_load64_ref_reg(exprasmlist,left.location.reference,
  379. location.registerlow,
  380. location.registerhigh);
  381. end;
  382. end;
  383. exprasmlist.concat(taicpu.op_reg_reg(A_NEG,location.registerlow,
  384. src1));
  385. cg.a_op_reg_reg(OP_NOT,OS_32,src2,location.registerhigh);
  386. tmp := cg.get_scratch_reg(exprasmlist);
  387. tcgppc(cg).a_op_const_reg_reg(OP_SAR,31,location.registerlow,tmp);
  388. if not(cs_check_overflow in aktlocalswitches) then
  389. cg.a_op_reg_reg(OP_ADD,OS_32,location.registerhigh,tmp)
  390. else
  391. exprasmlist.concat(taicpu.op_reg_reg_reg(A_ADDO,tmp,
  392. location.registerhigh,tmp));
  393. cg.free_scratch_reg(exprasmlist,tmp);
  394. end
  395. else
  396. begin
  397. location.loc:=LOC_REGISTER;
  398. case left.location.loc of
  399. LOC_FPU, LOC_REGISTER:
  400. begin
  401. src1 := left.location.register;
  402. location.register := src1;
  403. end;
  404. LOC_CFPUREGISTER, LOC_CREGISTER:
  405. begin
  406. src1 := left.location.register;
  407. if left.location.loc = LOC_CREGISTER then
  408. location.register := getregisterint
  409. else
  410. location.register := getregisterfpu;
  411. end;
  412. LOC_REFERENCE,LOC_MEM:
  413. begin
  414. del_reference(left.location.reference);
  415. if (left.resulttype.def.deftype=floatdef) then
  416. begin
  417. src1 := getregisterfpu;
  418. location.register := src1;
  419. floatload(tfloatdef(left.resulttype.def).typ,
  420. left.location.reference,src1);
  421. end
  422. else
  423. begin
  424. src1 := getregisterint;
  425. location.register:= src1;
  426. cg.a_load_ref_reg(exprasmlist,OS_32,
  427. left.location.reference,src1);
  428. end;
  429. end;
  430. end;
  431. { choose appropriate operand }
  432. if left.resulttype.def <> floatdef then
  433. if not(cs_check_overflow in aktlocalswitches) then
  434. op := A_NEG
  435. else
  436. op := A_NEGO
  437. else
  438. op := A_FNEG;
  439. { emit operation }
  440. eprasmlist.concat(taicpu.op_reg_reg(op,location.register,src1));
  441. end;
  442. { Here was a problem... }
  443. { Operand to be negated always }
  444. { seems to be converted to signed }
  445. { 32-bit before doing neg!! }
  446. { So this is useless... }
  447. { that's not true: -2^31 gives an overflow error if it is negated (FK) }
  448. emitoverflowcheck(self);
  449. end;
  450. {*****************************************************************************
  451. TPPCNOTNODE
  452. *****************************************************************************}
  453. procedure tppcnotnode.pass_2;
  454. var
  455. hl : tasmlabel;
  456. regl, regh: tregister;
  457. begin
  458. if is_boolean(resulttype.def) then
  459. begin
  460. { the second pass could change the location of left }
  461. { if it is a register variable, so we've to do }
  462. { this before the case statement }
  463. if left.location.loc in [LOC_REFERENCE,LOC_MEM,
  464. LOC_FLAGS,LOC_REGISTER,LOC_CREGISTER] then
  465. secondpass(left);
  466. case left.location.loc of
  467. LOC_JUMP :
  468. begin
  469. hl:=truelabel;
  470. truelabel:=falselabel;
  471. falselabel:=hl;
  472. secondpass(left);
  473. maketojumpbool(left,lr_load_regvars);
  474. hl:=truelabel;
  475. truelabel:=falselabel;
  476. falselabel:=hl;
  477. end;
  478. LOC_FLAGS :
  479. location.resflags:=inverse_flags(left.location.resflags);
  480. LOC_REGISTER, LOC_CREGISTER, LOC_REFERENCE, LOC_MEM :
  481. begin
  482. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  483. regl := left.location.register
  484. else
  485. begin
  486. regl := getregisterint;
  487. cg.a_load_ref_reg(exprasmlist,def_cgsize(left.resulttype.def),
  488. left.location.reference,regl);
  489. end;
  490. location.loc:=LOC_FLAGS;
  491. location.resflags.cr:=0;
  492. location.resflags.flag:=F_EQ;
  493. exprasmlist.concat(taicpu.op_reg_const(A_CMPWI,regl,0));
  494. ungetregister(regl);
  495. end;
  496. end;
  497. end
  498. else if is_64bitint(left.resulttype.def) then
  499. begin
  500. secondpass(left);
  501. clear_location(location);
  502. location.loc:=LOC_REGISTER;
  503. { make sure left is in a register and set the dest register }
  504. case left.location.loc of
  505. LOC_REFERENCE, LOC_MEM, LOC_CREGISTER:
  506. begin
  507. location.registerlow := getregisterint;
  508. location.registerhigh := getregisterint;
  509. if left.location.loc <> LOC_CREGISTER then
  510. begin
  511. tcg64f32(cg).a_load64_ref_reg(exprasmlist,
  512. left.location.reference,location.registerlow,
  513. location.registerhigh);
  514. regl := location.registerlow;
  515. regh := location.registerhigh;
  516. end
  517. else
  518. begin
  519. regl := left.location.registerlow;
  520. regh := left.location.registerhigh;
  521. end;
  522. end;
  523. LOC_REGISTER:
  524. begin
  525. regl := left.location.registerlow;
  526. location.registerlow := regl;
  527. regh := left.location.registerhigh;
  528. location.registerhigh := regh;
  529. end;
  530. end;
  531. { perform the NOT operation }
  532. exprasmlist.concat(taicpu.op_reg_reg(A_NOT,location.registerhigh,
  533. regh);
  534. exprasmlist.concat(taicpu.op_reg_reg(A_NOT,location.registerlow,
  535. regl);
  536. end
  537. else
  538. begin
  539. secondpass(left);
  540. clear_location(location);
  541. location.loc:=LOC_REGISTER;
  542. { make sure left is in a register and set the dest register }
  543. case left.location.loc of
  544. LOC_REFERENCE, LOC_MEM, LOC_CREGISTER:
  545. begin
  546. location.register := getregisterint;
  547. if left.location.loc <> LOC_CREGISTER then
  548. begin
  549. cg.a_load_ref_reg(exprasmlist,left.location.reference,
  550. location.register);
  551. regl := location.register;
  552. end
  553. else
  554. regl := left.location.register;
  555. end;
  556. LOC_REGISTER:
  557. regl := left.location.register;
  558. end;
  559. { perform the NOT operation }
  560. exprasmlist.concat(taicpu.op_reg_reg(A_NOT,location.register,
  561. regl);
  562. { release the source reg if it wasn't reused }
  563. if regl <> location.register then
  564. ungetregisterint(regl);
  565. end;
  566. end;
  567. begin
  568. cmoddivnode:=tppcmoddivnode;
  569. cshlshrnode:=tppcshlshrnode;
  570. cunaryminusnode:=tppcunaryminusnode;
  571. cnotnode:=tppcnotnode;
  572. end.
  573. {
  574. $Log$
  575. Revision 1.2 2002-01-03 14:57:52 jonas
  576. * completed (not compilale yet though)
  577. Revision 1.1 2001/12/29 15:28:58 jonas
  578. * powerpc/cgcpu.pas compiles :)
  579. * several powerpc-related fixes
  580. * cpuasm unit is now based on common tainst unit
  581. + nppcmat unit for powerpc (almost complete)
  582. }