aoptcpu.pas 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. Interface
  21. uses
  22. cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. function GetNextInstructionUsingReg(Current: tai;
  26. var Next: tai; reg: TRegister): Boolean;
  27. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  28. var AllUsedRegs: TAllUsedRegs): Boolean;
  29. function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  30. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  31. procedure PeepHoleOptPass2; override;
  32. End;
  33. Implementation
  34. uses
  35. globals,aasmbase,aasmcpu,cpuinfo,verbose;
  36. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  37. begin
  38. result :=
  39. (instr.typ = ait_instruction) and
  40. (taicpu(instr).opcode = op);
  41. end;
  42. function MatchOperand(const oper: TOper; reg: TRegister): boolean;
  43. begin
  44. result:=(oper.typ=top_reg) and (oper.reg=reg);
  45. end;
  46. function IsSameReg(this,next: taicpu): boolean;
  47. begin
  48. result:=(next.oper[0]^.typ=top_reg) and
  49. (next.oper[1]^.typ=top_reg) and
  50. (next.oper[0]^.reg=next.oper[1]^.reg) and
  51. (next.oper[0]^.reg=this.oper[0]^.reg);
  52. end;
  53. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  54. var
  55. p: taicpu;
  56. begin
  57. p:=taicpu(hp);
  58. result:=false;
  59. if not ((assigned(hp)) and (hp.typ=ait_instruction)) then
  60. exit;
  61. case p.opcode of
  62. { These instructions do not write into a register at all }
  63. A_NOP,
  64. A_C_EQ_D,A_C_EQ_S,A_C_LE_D,A_C_LE_S,A_C_LT_D,A_C_LT_S,
  65. A_BA,A_BC,
  66. A_SB,A_SH,A_SW,A_SWL,A_SWR,A_SWC1,A_SDC1:
  67. exit;
  68. end;
  69. result:=(p.ops>0) and (p.oper[0]^.typ=top_reg) and
  70. (p.oper[0]^.reg=reg);
  71. end;
  72. function CanBeCMOV(p: tai): boolean;
  73. begin
  74. result:=assigned(p) and (p.typ=ait_instruction) and
  75. (taicpu(p).opcode in [A_MOV_D,A_MOV_S,A_MOVE]);
  76. end;
  77. procedure ChangeToCMOV(p: taicpu; cond: tasmcond; reg: tregister);
  78. begin
  79. case cond of
  80. C_COP1TRUE:
  81. case p.opcode of
  82. A_MOV_D: p.opcode:=A_MOVT_D;
  83. A_MOV_S: p.opcode:=A_MOVT_S;
  84. A_MOVE: p.opcode:=A_MOVT;
  85. else
  86. InternalError(2014061701);
  87. end;
  88. C_COP1FALSE:
  89. case p.opcode of
  90. A_MOV_D: p.opcode:=A_MOVF_D;
  91. A_MOV_S: p.opcode:=A_MOVF_S;
  92. A_MOVE: p.opcode:=A_MOVF;
  93. else
  94. InternalError(2014061702);
  95. end;
  96. C_EQ:
  97. case p.opcode of
  98. A_MOV_D: p.opcode:=A_MOVZ_D;
  99. A_MOV_S: p.opcode:=A_MOVZ_S;
  100. A_MOVE: p.opcode:=A_MOVZ;
  101. else
  102. InternalError(2014061703);
  103. end;
  104. C_NE:
  105. case p.opcode of
  106. A_MOV_D: p.opcode:=A_MOVN_D;
  107. A_MOV_S: p.opcode:=A_MOVN_S;
  108. A_MOVE: p.opcode:=A_MOVN;
  109. else
  110. InternalError(2014061704);
  111. end;
  112. else
  113. InternalError(2014061705);
  114. end;
  115. p.ops:=3;
  116. p.loadreg(2,reg);
  117. end;
  118. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  119. var
  120. p: taicpu;
  121. i: longint;
  122. begin
  123. result:=false;
  124. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  125. exit;
  126. p:=taicpu(hp);
  127. i:=1;
  128. while(i<p.ops) do
  129. begin
  130. case p.oper[I]^.typ of
  131. top_reg:
  132. result:=(p.oper[I]^.reg=reg) and (I<2);
  133. top_ref:
  134. result:=
  135. (p.oper[I]^.ref^.base=reg) or
  136. (p.oper[I]^.ref^.index=reg);
  137. end;
  138. if result then exit; {Bailout if we found something}
  139. Inc(I);
  140. end;
  141. end;
  142. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  143. var Next: tai; reg: TRegister): Boolean;
  144. begin
  145. Next:=Current;
  146. repeat
  147. Result:=GetNextInstruction(Next,Next);
  148. until {not(cs_opt_level3 in current_settings.optimizerswitches) or} not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  149. (is_calljmp(taicpu(Next).opcode));
  150. if Result and (next.typ=ait_instruction) and is_calljmp(taicpu(next).opcode) then
  151. begin
  152. result:=false;
  153. next:=nil;
  154. end;
  155. end;
  156. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  157. var AllUsedRegs: TAllUsedRegs): Boolean;
  158. begin
  159. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  160. RegUsedAfterInstruction :=
  161. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  162. not(regLoadedWithNewValue(reg,p)) and
  163. (
  164. not(GetNextInstruction(p,p)) or
  165. instructionLoadsFromReg(reg,p) or
  166. not(regLoadedWithNewValue(reg,p))
  167. );
  168. end;
  169. function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  170. var
  171. next,hp1: tai;
  172. alloc,dealloc: tai_regalloc;
  173. begin
  174. { Fold
  175. op $reg1,...
  176. opcode $reg2,$reg1
  177. dealloc $reg1
  178. into
  179. op $reg2,...
  180. opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc.
  181. }
  182. result:=false;
  183. if (taicpu(p).ops>1) and
  184. GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  185. MatchInstruction(next,opcode) and
  186. MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and
  187. { the destination register of mov cannot be used between p and next }
  188. (not RegUsedBetween(taicpu(next).oper[0]^.reg,p,next)) then
  189. begin
  190. dealloc:=FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.Next));
  191. if assigned(dealloc) then
  192. begin
  193. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  194. and remove it if possible }
  195. GetLastInstruction(p,hp1);
  196. asml.Remove(dealloc);
  197. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  198. if assigned(alloc) then
  199. begin
  200. asml.Remove(alloc);
  201. alloc.free;
  202. dealloc.free;
  203. end
  204. else
  205. asml.InsertAfter(dealloc,p);
  206. { try to move the allocation of the target register }
  207. GetLastInstruction(next,hp1);
  208. alloc:=FindRegAlloc(taicpu(next).oper[0]^.reg,tai(hp1.Next));
  209. if assigned(alloc) then
  210. begin
  211. asml.Remove(alloc);
  212. asml.InsertBefore(alloc,p);
  213. { adjust used regs }
  214. IncludeRegInUsedRegs(taicpu(next).oper[0]^.reg,UsedRegs);
  215. end;
  216. { finally get rid of the mov }
  217. taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
  218. asml.remove(next);
  219. next.free;
  220. end;
  221. end;
  222. end;
  223. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  224. var
  225. next,next2: tai;
  226. TmpUsedRegs: TAllUsedRegs;
  227. begin
  228. result:=false;
  229. case p.typ of
  230. ait_instruction:
  231. begin
  232. case taicpu(p).opcode of
  233. A_SLL:
  234. begin
  235. { if this is a sign extension... }
  236. if (taicpu(p).oper[2]^.typ=top_const) and
  237. GetNextInstruction(p,next) and
  238. MatchInstruction(next,A_SRA) and
  239. IsSameReg(taicpu(p),taicpu(next)) and
  240. (taicpu(next).oper[2]^.typ=top_const) and
  241. (taicpu(next).oper[2]^.val=taicpu(p).oper[2]^.val) and
  242. (taicpu(next).oper[2]^.val=16) and
  243. { ...followed by 16-bit store (possibly with PIC simplification, etc. in between) }
  244. GetNextInstructionUsingReg(next,next2,taicpu(p).oper[0]^.reg) and
  245. MatchInstruction(next2,A_SH) and
  246. (taicpu(next2).oper[0]^.typ=top_reg) and
  247. (taicpu(next2).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  248. { the initial register may not be reused }
  249. (not RegUsedBetween(taicpu(p).oper[1]^.reg,next,next2)) then
  250. begin
  251. CopyUsedRegs(TmpUsedRegs);
  252. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  253. UpdateUsedRegs(TmpUsedRegs, tai(next.next));
  254. if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,next2,TmpUsedRegs) then
  255. begin
  256. taicpu(next2).loadreg(0,taicpu(p).oper[1]^.reg);
  257. asml.remove(p);
  258. asml.remove(next);
  259. p.free;
  260. next.free;
  261. p:=next2;
  262. end;
  263. ReleaseUsedRegs(TmpUsedRegs);
  264. end
  265. else
  266. TryRemoveMov(p,A_MOVE);
  267. end;
  268. A_SRL:
  269. begin
  270. { Remove 'andi' in sequences
  271. srl Rx,Ry,16
  272. andi Rx,Rx,65535
  273. srl Rx,Ry,24
  274. andi Rx,Rx,255
  275. since 'srl' clears all relevant upper bits }
  276. if (taicpu(p).oper[2]^.typ=top_const) and
  277. GetNextInstruction(p,next) and
  278. MatchInstruction(next,A_ANDI) and
  279. IsSameReg(taicpu(p),taicpu(next)) and
  280. (taicpu(next).oper[2]^.typ=top_const) and
  281. ((
  282. (taicpu(p).oper[2]^.val>=16) and
  283. (taicpu(next).oper[2]^.val=65535)
  284. ) or (
  285. (taicpu(p).oper[2]^.val>=24) and
  286. (taicpu(next).oper[2]^.val=255)
  287. )) then
  288. begin
  289. asml.remove(next);
  290. next.free;
  291. end
  292. else
  293. TryRemoveMov(p,A_MOVE);
  294. end;
  295. A_ANDI:
  296. begin
  297. { Remove sign extension after 'andi' if bit 7/15 of const operand is clear }
  298. if (taicpu(p).oper[2]^.typ=top_const) and
  299. GetNextInstruction(p,next) and
  300. MatchInstruction(next,A_SLL) and
  301. GetNextInstruction(next,next2) and
  302. MatchInstruction(next2,A_SRA) and
  303. IsSameReg(taicpu(p),taicpu(next)) and
  304. IsSameReg(taicpu(p),taicpu(next2)) and
  305. (taicpu(next).oper[2]^.typ=top_const) and
  306. (taicpu(next2).oper[2]^.typ=top_const) and
  307. (taicpu(next).oper[2]^.val=taicpu(next2).oper[2]^.val) and
  308. ((
  309. (taicpu(p).oper[2]^.val<=$7fff) and
  310. (taicpu(next).oper[2]^.val=16)
  311. ) or (
  312. (taicpu(p).oper[2]^.val<=$7f) and
  313. (taicpu(next).oper[2]^.val=24)
  314. )) then
  315. begin
  316. asml.remove(next);
  317. asml.remove(next2);
  318. next.free;
  319. next2.free;
  320. end
  321. { Remove zero extension if register is used only for byte/word memory store }
  322. else if (taicpu(p).oper[2]^.typ=top_const) and
  323. GetNextInstruction(p,next) and
  324. ((taicpu(p).oper[2]^.val=255) and MatchInstruction(next,A_SB)) or
  325. ((taicpu(p).oper[2]^.val=65535) and MatchInstruction(next,A_SH)) and
  326. (taicpu(next).oper[0]^.typ=top_reg) and
  327. (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
  328. begin
  329. CopyUsedRegs(TmpUsedRegs);
  330. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  331. if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,next,TmpUsedRegs) then
  332. begin
  333. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  334. asml.remove(p);
  335. p.free;
  336. p:=next;
  337. end;
  338. ReleaseUsedRegs(TmpUsedRegs);
  339. end
  340. else
  341. TryRemoveMov(p,A_MOVE);
  342. end;
  343. A_LB,A_LBU,A_LH,A_LHU,A_LW,
  344. A_ADD,A_ADDU,
  345. A_ADDI,A_ADDIU,
  346. A_SUB,A_SUBU,
  347. A_SRA,A_SRAV,
  348. A_SRLV,
  349. A_SLLV,
  350. A_AND,A_OR,A_XOR,A_ORI,A_XORI:
  351. TryRemoveMov(p,A_MOVE);
  352. A_LWC1,
  353. A_ADD_s, A_SUB_s, A_MUL_s, A_DIV_s,
  354. A_ABS_s, A_NEG_s, A_SQRT_s,
  355. A_CVT_s_w, A_CVT_s_l, A_CVT_s_d:
  356. TryRemoveMov(p,A_MOV_s);
  357. A_LDC1,
  358. A_ADD_d, A_SUB_d, A_MUL_d, A_DIV_d,
  359. A_ABS_d, A_NEG_d, A_SQRT_d,
  360. A_CVT_d_w, A_CVT_d_l, A_CVT_d_s:
  361. TryRemoveMov(p,A_MOV_d);
  362. end;
  363. end;
  364. end;
  365. end;
  366. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  367. var
  368. p: tai;
  369. l: longint;
  370. hp1,hp2,hp3: tai;
  371. condition: tasmcond;
  372. condreg: tregister;
  373. begin
  374. { Currently, everything below is mips4+ }
  375. if (current_settings.cputype<cpu_mips4) then
  376. exit;
  377. p:=BlockStart;
  378. ClearUsedRegs;
  379. while (p<>BlockEnd) Do
  380. begin
  381. UpdateUsedRegs(tai(p.next));
  382. case p.typ of
  383. ait_instruction:
  384. begin
  385. case taicpu(p).opcode of
  386. A_BC:
  387. begin
  388. condreg:=NR_NO;
  389. if (taicpu(p).condition in [C_COP1TRUE,C_COP1FALSE]) then
  390. { TODO: must be taken from "p" if/when codegen makes use of multiple %fcc }
  391. condreg:=NR_FCC0
  392. else if (taicpu(p).condition in [C_EQ,C_NE]) then
  393. begin
  394. if (taicpu(p).oper[0]^.reg=NR_R0) then
  395. condreg:=taicpu(p).oper[1]^.reg
  396. else if (taicpu(p).oper[1]^.reg=NR_R0) then
  397. condreg:=taicpu(p).oper[0]^.reg
  398. end;
  399. if (condreg<>NR_NO) then
  400. begin
  401. { check for
  402. bCC xxx
  403. <several movs>
  404. xxx:
  405. }
  406. l:=0;
  407. GetNextInstruction(p, hp1);
  408. while CanBeCMOV(hp1) do // CanBeCMOV returns False for nil or labels
  409. begin
  410. inc(l);
  411. GetNextInstruction(hp1,hp1);
  412. end;
  413. if assigned(hp1) then
  414. begin
  415. if FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  416. begin
  417. if (l<=4) and (l>0) then
  418. begin
  419. condition:=inverse_cond(taicpu(p).condition);
  420. hp2:=p;
  421. GetNextInstruction(p,hp1);
  422. p:=hp1;
  423. repeat
  424. ChangeToCMOV(taicpu(hp1),condition,condreg);
  425. GetNextInstruction(hp1,hp1);
  426. until not CanBeCMOV(hp1);
  427. { wait with removing else GetNextInstruction could
  428. ignore the label if it was the only usage in the
  429. jump moved away }
  430. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  431. RemoveDelaySlot(hp2);
  432. asml.remove(hp2);
  433. hp2.free;
  434. continue;
  435. end;
  436. end
  437. else
  438. begin
  439. { check further for
  440. bCC xxx
  441. <several movs 1>
  442. b yyy
  443. xxx:
  444. <several movs 2>
  445. yyy:
  446. }
  447. { hp2 points to jmp yyy }
  448. hp2:=hp1;
  449. { skip hp1 to xxx }
  450. GetNextInstruction(hp1, hp1);
  451. if assigned(hp2) and
  452. assigned(hp1) and
  453. (l<=3) and
  454. (hp2.typ=ait_instruction) and
  455. (taicpu(hp2).opcode=A_BA) and
  456. { real label and jump, no further references to the
  457. label are allowed }
  458. (tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol).getrefs<=2) and
  459. FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  460. begin
  461. l:=0;
  462. { skip hp1 to <several moves 2> }
  463. GetNextInstruction(hp1, hp1);
  464. while CanBeCMOV(hp1) do
  465. begin
  466. inc(l);
  467. GetNextInstruction(hp1, hp1);
  468. end;
  469. { hp1 points to yyy: }
  470. if assigned(hp1) and
  471. FindLabel(tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol),hp1) then
  472. begin
  473. condition:=inverse_cond(taicpu(p).condition);
  474. GetNextInstruction(p,hp1);
  475. hp3:=p;
  476. p:=hp1;
  477. repeat
  478. ChangeToCMOV(taicpu(hp1),condition,condreg);
  479. GetNextInstruction(hp1,hp1);
  480. until not CanBeCMOV(hp1);
  481. { hp2 is still at b yyy }
  482. GetNextInstruction(hp2,hp1);
  483. { hp2 is now at xxx: }
  484. condition:=inverse_cond(condition);
  485. GetNextInstruction(hp1,hp1);
  486. { hp1 is now at <several movs 2> }
  487. repeat
  488. ChangeToCMOV(taicpu(hp1),condition,condreg);
  489. GetNextInstruction(hp1,hp1);
  490. until not CanBeCMOV(hp1);
  491. { remove bCC }
  492. tasmlabel(taicpu(hp3).oper[taicpu(hp3).ops-1]^.ref^.symbol).decrefs;
  493. RemoveDelaySlot(hp3);
  494. asml.remove(hp3);
  495. hp3.free;
  496. { remove jmp }
  497. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  498. RemoveDelaySlot(hp2);
  499. asml.remove(hp2);
  500. hp2.free;
  501. continue;
  502. end;
  503. end;
  504. end;
  505. end;
  506. end;
  507. end;
  508. end;
  509. end;
  510. end;
  511. UpdateUsedRegs(p);
  512. p:=tai(p.next);
  513. end;
  514. end;
  515. begin
  516. casmoptimizer:=TCpuAsmOptimizer;
  517. end.