aoptcpu.pas 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. Interface
  21. uses
  22. cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. function GetNextInstructionUsingReg(Current: tai;
  26. var Next: tai; reg: TRegister): Boolean;
  27. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  28. var AllUsedRegs: TAllUsedRegs): Boolean;
  29. function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  30. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  31. procedure PeepHoleOptPass2; override;
  32. End;
  33. Implementation
  34. uses
  35. globals,aasmbase,aasmcpu,cpuinfo,verbose;
  36. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  37. begin
  38. result :=
  39. (instr.typ = ait_instruction) and
  40. (taicpu(instr).opcode = op);
  41. end;
  42. function MatchOperand(const oper: TOper; reg: TRegister): boolean;
  43. begin
  44. result:=(oper.typ=top_reg) and (oper.reg=reg);
  45. end;
  46. function IsSameReg(this,next: taicpu): boolean;
  47. begin
  48. result:=(next.oper[0]^.typ=top_reg) and
  49. (next.oper[1]^.typ=top_reg) and
  50. (next.oper[0]^.reg=next.oper[1]^.reg) and
  51. (next.oper[0]^.reg=this.oper[0]^.reg);
  52. end;
  53. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  54. var
  55. p: taicpu;
  56. begin
  57. p:=taicpu(hp);
  58. result:=false;
  59. if not ((assigned(hp)) and (hp.typ=ait_instruction)) then
  60. exit;
  61. case p.opcode of
  62. { These instructions do not write into a register at all }
  63. A_NOP,
  64. A_C_EQ_D,A_C_EQ_S,A_C_LE_D,A_C_LE_S,A_C_LT_D,A_C_LT_S,
  65. A_BA,A_BC,
  66. A_SB,A_SH,A_SW,A_SWL,A_SWR,A_SWC1,A_SDC1:
  67. exit;
  68. end;
  69. result:=(p.ops>0) and (p.oper[0]^.typ=top_reg) and
  70. (p.oper[0]^.reg=reg);
  71. end;
  72. function CanBeCMOV(p: tai): boolean;
  73. begin
  74. result:=assigned(p) and (p.typ=ait_instruction) and
  75. (taicpu(p).opcode in [A_MOV_D,A_MOV_S,A_MOVE]);
  76. end;
  77. procedure ChangeToCMOV(p: taicpu; cond: tasmcond; reg: tregister);
  78. begin
  79. case cond of
  80. C_COP1TRUE:
  81. case p.opcode of
  82. A_MOV_D: p.opcode:=A_MOVT_D;
  83. A_MOV_S: p.opcode:=A_MOVT_S;
  84. A_MOVE: p.opcode:=A_MOVT;
  85. else
  86. InternalError(2014061701);
  87. end;
  88. C_COP1FALSE:
  89. case p.opcode of
  90. A_MOV_D: p.opcode:=A_MOVF_D;
  91. A_MOV_S: p.opcode:=A_MOVF_S;
  92. A_MOVE: p.opcode:=A_MOVF;
  93. else
  94. InternalError(2014061702);
  95. end;
  96. C_EQ:
  97. case p.opcode of
  98. A_MOV_D: p.opcode:=A_MOVZ_D;
  99. A_MOV_S: p.opcode:=A_MOVZ_S;
  100. A_MOVE: p.opcode:=A_MOVZ;
  101. else
  102. InternalError(2014061703);
  103. end;
  104. C_NE:
  105. case p.opcode of
  106. A_MOV_D: p.opcode:=A_MOVN_D;
  107. A_MOV_S: p.opcode:=A_MOVN_S;
  108. A_MOVE: p.opcode:=A_MOVN;
  109. else
  110. InternalError(2014061704);
  111. end;
  112. else
  113. InternalError(2014061705);
  114. end;
  115. p.ops:=3;
  116. p.loadreg(2,reg);
  117. end;
  118. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  119. var
  120. p: taicpu;
  121. i: longint;
  122. begin
  123. result:=false;
  124. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  125. exit;
  126. p:=taicpu(hp);
  127. i:=1;
  128. while(i<p.ops) do
  129. begin
  130. case p.oper[I]^.typ of
  131. top_reg:
  132. result:=(p.oper[I]^.reg=reg) and (I<2);
  133. top_ref:
  134. result:=
  135. (p.oper[I]^.ref^.base=reg) or
  136. (p.oper[I]^.ref^.index=reg);
  137. end;
  138. if result then exit; {Bailout if we found something}
  139. Inc(I);
  140. end;
  141. end;
  142. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  143. var Next: tai; reg: TRegister): Boolean;
  144. begin
  145. Next:=Current;
  146. repeat
  147. Result:=GetNextInstruction(Next,Next);
  148. until {not(cs_opt_level3 in current_settings.optimizerswitches) or} not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  149. (is_calljmp(taicpu(Next).opcode));
  150. if Result and is_calljmp(taicpu(next).opcode) then
  151. begin
  152. result:=false;
  153. next:=nil;
  154. end;
  155. end;
  156. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  157. var AllUsedRegs: TAllUsedRegs): Boolean;
  158. begin
  159. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  160. RegUsedAfterInstruction :=
  161. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  162. not(regLoadedWithNewValue(reg,p)) and
  163. (
  164. not(GetNextInstruction(p,p)) or
  165. instructionLoadsFromReg(reg,p) or
  166. not(regLoadedWithNewValue(reg,p))
  167. );
  168. end;
  169. function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  170. var
  171. next,hp1: tai;
  172. alloc,dealloc: tai_regalloc;
  173. begin
  174. { Fold
  175. op $reg1,...
  176. opcode $reg2,$reg1
  177. dealloc $reg1
  178. into
  179. op $reg2,...
  180. opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc.
  181. }
  182. result:=false;
  183. if (taicpu(p).ops>1) and
  184. GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  185. MatchInstruction(next,opcode) and
  186. MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and
  187. { the destination register of mov cannot be used between p and next }
  188. (not RegUsedBetween(taicpu(next).oper[0]^.reg,p,next)) then
  189. begin
  190. dealloc:=FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.Next));
  191. if assigned(dealloc) then
  192. begin
  193. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  194. and remove it if possible }
  195. GetLastInstruction(p,hp1);
  196. asml.Remove(dealloc);
  197. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  198. if assigned(alloc) then
  199. begin
  200. asml.Remove(alloc);
  201. alloc.free;
  202. dealloc.free;
  203. end
  204. else
  205. asml.InsertAfter(dealloc,p);
  206. { try to move the allocation of the target register }
  207. GetLastInstruction(next,hp1);
  208. alloc:=FindRegAlloc(taicpu(next).oper[0]^.reg,tai(hp1.Next));
  209. if assigned(alloc) then
  210. begin
  211. asml.Remove(alloc);
  212. asml.InsertBefore(alloc,p);
  213. { adjust used regs }
  214. IncludeRegInUsedRegs(taicpu(next).oper[0]^.reg,UsedRegs);
  215. end;
  216. { finally get rid of the mov }
  217. taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
  218. asml.remove(next);
  219. next.free;
  220. end;
  221. end;
  222. end;
  223. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  224. var
  225. next,next2: tai;
  226. TmpUsedRegs: TAllUsedRegs;
  227. begin
  228. result:=false;
  229. case p.typ of
  230. ait_instruction:
  231. begin
  232. case taicpu(p).opcode of
  233. A_SLL:
  234. begin
  235. { if this is a sign extension... }
  236. if (taicpu(p).oper[2]^.typ=top_const) and
  237. GetNextInstruction(p,next) and
  238. MatchInstruction(next,A_SRA) and
  239. IsSameReg(taicpu(p),taicpu(next)) and
  240. (taicpu(next).oper[2]^.typ=top_const) and
  241. (taicpu(next).oper[2]^.val=taicpu(p).oper[2]^.val) and
  242. (taicpu(next).oper[2]^.val=16) and
  243. { ...followed by 16-bit store (possibly with PIC simplification, etc. in between) }
  244. GetNextInstructionUsingReg(next,next2,taicpu(p).oper[0]^.reg) and
  245. MatchInstruction(next2,A_SH) and
  246. (taicpu(next2).oper[0]^.typ=top_reg) and
  247. (taicpu(next2).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  248. { the initial register may not be reused }
  249. (not RegUsedBetween(taicpu(p).oper[1]^.reg,next,next2)) then
  250. begin
  251. CopyUsedRegs(TmpUsedRegs);
  252. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  253. UpdateUsedRegs(TmpUsedRegs, tai(next.next));
  254. if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,next2,TmpUsedRegs) then
  255. begin
  256. taicpu(next2).loadreg(0,taicpu(p).oper[1]^.reg);
  257. asml.remove(p);
  258. asml.remove(next);
  259. p.free;
  260. next.free;
  261. p:=next2;
  262. end;
  263. ReleaseUsedRegs(TmpUsedRegs);
  264. end
  265. else
  266. TryRemoveMov(p,A_MOVE);
  267. end;
  268. A_SRL:
  269. begin
  270. { Remove 'andi' in sequences
  271. srl Rx,Ry,16
  272. andi Rx,Rx,65535
  273. srl Rx,Ry,24
  274. andi Rx,Rx,255
  275. since 'srl' clears all relevant upper bits }
  276. if (taicpu(p).oper[2]^.typ=top_const) and
  277. GetNextInstruction(p,next) and
  278. MatchInstruction(next,A_ANDI) and
  279. IsSameReg(taicpu(p),taicpu(next)) and
  280. (taicpu(next).oper[2]^.typ=top_const) and
  281. ((
  282. (taicpu(p).oper[2]^.val>=16) and
  283. (taicpu(next).oper[2]^.val=65535)
  284. ) or (
  285. (taicpu(p).oper[2]^.val>=24) and
  286. (taicpu(next).oper[2]^.val=255)
  287. )) then
  288. begin
  289. asml.remove(next);
  290. next.free;
  291. end
  292. else
  293. TryRemoveMov(p,A_MOVE);
  294. end;
  295. A_ANDI:
  296. begin
  297. { Remove sign extension after 'andi' if bit 7/15 of const operand is clear }
  298. if (taicpu(p).oper[2]^.typ=top_const) and
  299. GetNextInstruction(p,next) and
  300. MatchInstruction(next,A_SLL) and
  301. GetNextInstruction(next,next2) and
  302. MatchInstruction(next2,A_SRA) and
  303. IsSameReg(taicpu(p),taicpu(next)) and
  304. IsSameReg(taicpu(p),taicpu(next2)) and
  305. (taicpu(next).oper[2]^.typ=top_const) and
  306. (taicpu(next2).oper[2]^.typ=top_const) and
  307. (taicpu(next).oper[2]^.val=taicpu(next2).oper[2]^.val) and
  308. ((
  309. (taicpu(p).oper[2]^.val<=$7fff) and
  310. (taicpu(next).oper[2]^.val=16)
  311. ) or (
  312. (taicpu(p).oper[2]^.val<=$7f) and
  313. (taicpu(next).oper[2]^.val=24)
  314. )) then
  315. begin
  316. asml.remove(next);
  317. asml.remove(next2);
  318. next.free;
  319. next2.free;
  320. end
  321. { Remove zero extension if register is used only for byte/word memory store }
  322. else if (taicpu(p).oper[2]^.typ=top_const) and
  323. GetNextInstruction(p,next) and
  324. ((taicpu(p).oper[2]^.val=255) and MatchInstruction(next,A_SB)) or
  325. ((taicpu(p).oper[2]^.val=65535) and MatchInstruction(next,A_SH)) and
  326. (taicpu(next).oper[0]^.typ=top_reg) and
  327. (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
  328. begin
  329. CopyUsedRegs(TmpUsedRegs);
  330. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  331. if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,next,TmpUsedRegs) then
  332. begin
  333. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  334. asml.remove(p);
  335. p.free;
  336. p:=next;
  337. end;
  338. ReleaseUsedRegs(TmpUsedRegs);
  339. end
  340. else
  341. TryRemoveMov(p,A_MOVE);
  342. end;
  343. A_ADD,A_ADDU,
  344. A_ADDI,A_ADDIU,
  345. A_SUB,A_SUBU,
  346. A_SRA,A_SRAV,
  347. A_SRLV,
  348. A_SLLV,
  349. A_AND,A_OR,A_XOR,A_ORI,A_XORI:
  350. TryRemoveMov(p,A_MOVE);
  351. A_ADD_s, A_SUB_s, A_MUL_s, A_DIV_s,
  352. A_ABS_s, A_NEG_s, A_SQRT_s,
  353. A_CVT_s_w, A_CVT_s_l, A_CVT_s_d:
  354. TryRemoveMov(p,A_MOV_s);
  355. A_ADD_d, A_SUB_d, A_MUL_d, A_DIV_d,
  356. A_ABS_d, A_NEG_d, A_SQRT_d,
  357. A_CVT_d_w, A_CVT_d_l, A_CVT_d_s:
  358. TryRemoveMov(p,A_MOV_d);
  359. end;
  360. end;
  361. end;
  362. end;
  363. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  364. var
  365. p: tai;
  366. l: longint;
  367. hp1,hp2,hp3: tai;
  368. condition: tasmcond;
  369. condreg: tregister;
  370. begin
  371. { Currently, everything below is mips4+ }
  372. if (current_settings.cputype<cpu_mips4) then
  373. exit;
  374. p:=BlockStart;
  375. ClearUsedRegs;
  376. while (p<>BlockEnd) Do
  377. begin
  378. UpdateUsedRegs(tai(p.next));
  379. case p.typ of
  380. ait_instruction:
  381. begin
  382. case taicpu(p).opcode of
  383. A_BC:
  384. begin
  385. condreg:=NR_NO;
  386. if (taicpu(p).condition in [C_COP1TRUE,C_COP1FALSE]) then
  387. { TODO: must be taken from "p" if/when codegen makes use of multiple %fcc }
  388. condreg:=NR_FCC0
  389. else if (taicpu(p).condition in [C_EQ,C_NE]) then
  390. begin
  391. if (taicpu(p).oper[0]^.reg=NR_R0) then
  392. condreg:=taicpu(p).oper[1]^.reg
  393. else if (taicpu(p).oper[1]^.reg=NR_R0) then
  394. condreg:=taicpu(p).oper[0]^.reg
  395. end;
  396. if (condreg<>NR_NO) then
  397. begin
  398. { check for
  399. bCC xxx
  400. <several movs>
  401. xxx:
  402. }
  403. l:=0;
  404. GetNextInstruction(p, hp1);
  405. while CanBeCMOV(hp1) do // CanBeCMOV returns False for nil or labels
  406. begin
  407. inc(l);
  408. GetNextInstruction(hp1,hp1);
  409. end;
  410. if assigned(hp1) then
  411. begin
  412. if FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  413. begin
  414. if (l<=4) and (l>0) then
  415. begin
  416. condition:=inverse_cond(taicpu(p).condition);
  417. hp2:=p;
  418. GetNextInstruction(p,hp1);
  419. p:=hp1;
  420. repeat
  421. ChangeToCMOV(taicpu(hp1),condition,condreg);
  422. GetNextInstruction(hp1,hp1);
  423. until not CanBeCMOV(hp1);
  424. { wait with removing else GetNextInstruction could
  425. ignore the label if it was the only usage in the
  426. jump moved away }
  427. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  428. RemoveDelaySlot(hp2);
  429. asml.remove(hp2);
  430. hp2.free;
  431. continue;
  432. end;
  433. end
  434. else
  435. begin
  436. { check further for
  437. bCC xxx
  438. <several movs 1>
  439. b yyy
  440. xxx:
  441. <several movs 2>
  442. yyy:
  443. }
  444. { hp2 points to jmp yyy }
  445. hp2:=hp1;
  446. { skip hp1 to xxx }
  447. GetNextInstruction(hp1, hp1);
  448. if assigned(hp2) and
  449. assigned(hp1) and
  450. (l<=3) and
  451. (hp2.typ=ait_instruction) and
  452. (taicpu(hp2).opcode=A_BA) and
  453. { real label and jump, no further references to the
  454. label are allowed }
  455. (tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol).getrefs<=2) and
  456. FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  457. begin
  458. l:=0;
  459. { skip hp1 to <several moves 2> }
  460. GetNextInstruction(hp1, hp1);
  461. while CanBeCMOV(hp1) do
  462. begin
  463. inc(l);
  464. GetNextInstruction(hp1, hp1);
  465. end;
  466. { hp1 points to yyy: }
  467. if assigned(hp1) and
  468. FindLabel(tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol),hp1) then
  469. begin
  470. condition:=inverse_cond(taicpu(p).condition);
  471. GetNextInstruction(p,hp1);
  472. hp3:=p;
  473. p:=hp1;
  474. repeat
  475. ChangeToCMOV(taicpu(hp1),condition,condreg);
  476. GetNextInstruction(hp1,hp1);
  477. until not CanBeCMOV(hp1);
  478. { hp2 is still at b yyy }
  479. GetNextInstruction(hp2,hp1);
  480. { hp2 is now at xxx: }
  481. condition:=inverse_cond(condition);
  482. GetNextInstruction(hp1,hp1);
  483. { hp1 is now at <several movs 2> }
  484. repeat
  485. ChangeToCMOV(taicpu(hp1),condition,condreg);
  486. GetNextInstruction(hp1,hp1);
  487. until not CanBeCMOV(hp1);
  488. { remove bCC }
  489. tasmlabel(taicpu(hp3).oper[taicpu(hp3).ops-1]^.ref^.symbol).decrefs;
  490. RemoveDelaySlot(hp3);
  491. asml.remove(hp3);
  492. hp3.free;
  493. { remove jmp }
  494. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  495. RemoveDelaySlot(hp2);
  496. asml.remove(hp2);
  497. hp2.free;
  498. continue;
  499. end;
  500. end;
  501. end;
  502. end;
  503. end;
  504. end;
  505. end;
  506. end;
  507. end;
  508. UpdateUsedRegs(p);
  509. p:=tai(p.next);
  510. end;
  511. end;
  512. begin
  513. casmoptimizer:=TCpuAsmOptimizer;
  514. end.