aoptcpu.pas 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. Interface
  21. uses
  22. cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai, aasmcpu;
  23. Type
  24. TAsmOpSet = set of TAsmOp;
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  27. function GetNextInstructionUsingReg(Current: tai;
  28. var Next: tai; reg: TRegister): Boolean;
  29. function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  30. function TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
  31. function TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean;
  32. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  33. procedure PeepHoleOptPass2; override;
  34. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  35. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  36. End;
  37. Implementation
  38. uses
  39. cutils,globtype,globals,aasmbase,cpuinfo,verbose;
  40. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  41. begin
  42. result :=
  43. (instr.typ = ait_instruction) and
  44. (taicpu(instr).opcode = op);
  45. end;
  46. function MatchOperand(const oper: TOper; reg: TRegister): boolean;
  47. begin
  48. result:=(oper.typ=top_reg) and (oper.reg=reg);
  49. end;
  50. function IsSameReg(this,next: taicpu): boolean;
  51. begin
  52. result:=(next.oper[0]^.typ=top_reg) and
  53. (next.oper[1]^.typ=top_reg) and
  54. (next.oper[0]^.reg=next.oper[1]^.reg) and
  55. (next.oper[0]^.reg=this.oper[0]^.reg);
  56. end;
  57. function CanBeCMOV(p: tai; condreg: tregister): boolean;
  58. begin
  59. result:=assigned(p) and (p.typ=ait_instruction) and
  60. ((taicpu(p).opcode in [A_MOV_D,A_MOV_S]) or
  61. (
  62. { register with condition must not be overwritten }
  63. (taicpu(p).opcode=A_MOVE) and
  64. (taicpu(p).oper[0]^.reg<>condreg)
  65. ));
  66. end;
  67. procedure ChangeToCMOV(p: taicpu; cond: tasmcond; reg: tregister);
  68. begin
  69. case cond of
  70. C_COP1TRUE:
  71. case p.opcode of
  72. A_MOV_D: p.opcode:=A_MOVT_D;
  73. A_MOV_S: p.opcode:=A_MOVT_S;
  74. A_MOVE: p.opcode:=A_MOVT;
  75. else
  76. InternalError(2014061701);
  77. end;
  78. C_COP1FALSE:
  79. case p.opcode of
  80. A_MOV_D: p.opcode:=A_MOVF_D;
  81. A_MOV_S: p.opcode:=A_MOVF_S;
  82. A_MOVE: p.opcode:=A_MOVF;
  83. else
  84. InternalError(2014061702);
  85. end;
  86. C_EQ:
  87. case p.opcode of
  88. A_MOV_D: p.opcode:=A_MOVZ_D;
  89. A_MOV_S: p.opcode:=A_MOVZ_S;
  90. A_MOVE: p.opcode:=A_MOVZ;
  91. else
  92. InternalError(2014061703);
  93. end;
  94. C_NE:
  95. case p.opcode of
  96. A_MOV_D: p.opcode:=A_MOVN_D;
  97. A_MOV_S: p.opcode:=A_MOVN_S;
  98. A_MOVE: p.opcode:=A_MOVN;
  99. else
  100. InternalError(2014061704);
  101. end;
  102. else
  103. InternalError(2014061705);
  104. end;
  105. p.ops:=3;
  106. p.loadreg(2,reg);
  107. end;
  108. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  109. var
  110. p: taicpu;
  111. i: longint;
  112. begin
  113. result:=false;
  114. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  115. exit;
  116. p:=taicpu(hp);
  117. i:=1;
  118. while(i<p.ops) do
  119. begin
  120. case p.oper[I]^.typ of
  121. top_reg:
  122. result:=(p.oper[I]^.reg=reg) and (I<2);
  123. top_ref:
  124. result:=
  125. (p.oper[I]^.ref^.base=reg) or
  126. (p.oper[I]^.ref^.index=reg);
  127. end;
  128. if result then exit; {Bailout if we found something}
  129. Inc(I);
  130. end;
  131. end;
  132. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  133. var
  134. p: taicpu;
  135. begin
  136. p:=taicpu(hp);
  137. result:=false;
  138. if not ((assigned(hp)) and (hp.typ=ait_instruction)) then
  139. exit;
  140. case p.opcode of
  141. { These instructions do not write into a register at all }
  142. A_NOP,
  143. A_C_EQ_D,A_C_EQ_S,A_C_LE_D,A_C_LE_S,A_C_LT_D,A_C_LT_S,
  144. A_BA,A_BC,
  145. A_SB,A_SH,A_SW,A_SWL,A_SWR,A_SWC1,A_SDC1:
  146. exit;
  147. end;
  148. result:=(p.ops>0) and (p.oper[0]^.typ=top_reg) and
  149. (p.oper[0]^.reg=reg);
  150. end;
  151. function TCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  152. var
  153. i : Longint;
  154. begin
  155. result:=false;
  156. for i:=0 to taicpu(p1).ops-1 do
  157. if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
  158. begin
  159. result:=true;
  160. exit;
  161. end;
  162. end;
  163. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  164. var Next: tai; reg: TRegister): Boolean;
  165. begin
  166. Next:=Current;
  167. repeat
  168. Result:=GetNextInstruction(Next,Next);
  169. until {not(cs_opt_level3 in current_settings.optimizerswitches) or} not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  170. (is_calljmp(taicpu(Next).opcode));
  171. end;
  172. function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  173. var
  174. next,hp1: tai;
  175. alloc,dealloc: tai_regalloc;
  176. begin
  177. { Fold
  178. op $reg1,...
  179. opcode $reg2,$reg1
  180. dealloc $reg1
  181. into
  182. op $reg2,...
  183. opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc.
  184. }
  185. result:=false;
  186. if (taicpu(p).ops>0) and
  187. GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  188. MatchInstruction(next,opcode) and
  189. MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and
  190. { the destination register of mov cannot be used between p and next }
  191. (not RegUsedBetween(taicpu(next).oper[0]^.reg,p,next)) then
  192. begin
  193. dealloc:=FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.Next));
  194. if assigned(dealloc) then
  195. begin
  196. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  197. and remove it if possible }
  198. GetLastInstruction(p,hp1);
  199. asml.Remove(dealloc);
  200. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  201. if assigned(alloc) then
  202. begin
  203. asml.Remove(alloc);
  204. alloc.free;
  205. dealloc.free;
  206. end
  207. else
  208. asml.InsertAfter(dealloc,p);
  209. { try to move the allocation of the target register }
  210. GetLastInstruction(next,hp1);
  211. alloc:=FindRegAlloc(taicpu(next).oper[0]^.reg,tai(hp1.Next));
  212. if assigned(alloc) then
  213. begin
  214. asml.Remove(alloc);
  215. asml.InsertBefore(alloc,p);
  216. { adjust used regs }
  217. IncludeRegInUsedRegs(taicpu(next).oper[0]^.reg,UsedRegs);
  218. end;
  219. { finally get rid of the mov }
  220. taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
  221. asml.remove(next);
  222. next.free;
  223. result:=true;
  224. end
  225. else // no dealloc found
  226. begin
  227. { try to optimize the typical call sequence
  228. lw $reg, (whatever)
  229. <alloc volatile registers (including $reg!!)>
  230. move $t9,$reg
  231. jalr $t9
  232. if $reg is nonvolatile, its value may be used after call
  233. and we cannot safely replace it with $t9 }
  234. if (opcode=A_MOVE) and
  235. (taicpu(next).oper[0]^.reg=NR_R25) and
  236. GetNextInstruction(next,hp1) and
  237. MatchInstruction(hp1,A_JALR) and
  238. MatchOperand(taicpu(hp1).oper[0]^,NR_R25) and
  239. assigned(FindRegAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) then
  240. begin
  241. taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
  242. asml.remove(next);
  243. next.free;
  244. result:=true;
  245. end;
  246. end;
  247. end;
  248. end;
  249. function TCpuAsmOptimizer.TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean;
  250. begin
  251. result:=(next.opcode in storeops) and
  252. MatchOperand(next.oper[0]^,taicpu(p).oper[0]^.reg) and
  253. { Ry cannot be modified between move and store }
  254. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  255. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));
  256. if result then
  257. begin
  258. next.loadreg(0,taicpu(p).oper[1]^.reg);
  259. asml.remove(p);
  260. p.free;
  261. p:=next;
  262. end;
  263. end;
  264. function TCpuAsmOptimizer.TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
  265. begin
  266. result:=(next.ops>1) and
  267. (next.oper[1]^.typ=top_ref) and
  268. (next.oper[1]^.ref^.refaddr<>addr_full) and
  269. (next.oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  270. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  271. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));
  272. if result then
  273. begin
  274. next.oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  275. asml.remove(p);
  276. p.free;
  277. p:=next;
  278. end;
  279. end;
  280. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  281. var
  282. next,next2: tai;
  283. begin
  284. result:=false;
  285. case p.typ of
  286. ait_instruction:
  287. begin
  288. case taicpu(p).opcode of
  289. A_BC:
  290. begin
  291. { BEQ/BNE with same register are bogus, but can be generated for code like
  292. "if lo(qwordvar)=cardinal(qwordvar) ...",
  293. optimizations below can also yield them, e.g. if one register was initially R0. }
  294. if (taicpu(p).condition in [C_EQ,C_NE]) and
  295. (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then
  296. begin
  297. if (taicpu(p).condition=C_NE) then
  298. begin
  299. if (taicpu(p).oper[2]^.typ = top_ref) and
  300. (taicpu(p).oper[2]^.ref^.symbol is TAsmLabel) then
  301. TAsmLabel(taicpu(p).oper[2]^.ref^.symbol).decrefs;
  302. RemoveDelaySlot(p);
  303. GetNextInstruction(p,next);
  304. end
  305. else
  306. begin
  307. next:=taicpu.op_sym(A_BA,taicpu(p).oper[2]^.ref^.symbol);
  308. taicpu(next).fileinfo:=taicpu(p).fileinfo;
  309. asml.insertbefore(next,p);
  310. end;
  311. asml.remove(p);
  312. p.Free;
  313. p:=next;
  314. result:=true;
  315. end;
  316. end;
  317. A_SEH:
  318. begin
  319. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  320. MatchInstruction(next,A_SH) and
  321. MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) and
  322. (not RegUsedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  323. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  324. begin
  325. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  326. asml.remove(p);
  327. p.free;
  328. p:=next;
  329. result:=true;
  330. end
  331. else
  332. result:=TryRemoveMov(p,A_MOVE);
  333. end;
  334. A_SEB:
  335. { TODO: can be handled similar to A_SEH, but it's almost never encountered }
  336. result:=TryRemoveMov(p,A_MOVE);
  337. A_SLL:
  338. begin
  339. { if this is a sign extension... }
  340. if (taicpu(p).oper[2]^.typ=top_const) and
  341. GetNextInstruction(p,next) and
  342. MatchInstruction(next,A_SRA) and
  343. IsSameReg(taicpu(p),taicpu(next)) and
  344. (taicpu(next).oper[2]^.typ=top_const) and
  345. (taicpu(next).oper[2]^.val=taicpu(p).oper[2]^.val) and
  346. (taicpu(next).oper[2]^.val=16) and
  347. { ...followed by 16-bit store (possibly with PIC simplification, etc. in between) }
  348. GetNextInstructionUsingReg(next,next2,taicpu(p).oper[0]^.reg) and
  349. MatchInstruction(next2,A_SH) and
  350. (taicpu(next2).oper[0]^.typ=top_reg) and
  351. (taicpu(next2).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  352. { the initial register may not be reused }
  353. (not RegUsedBetween(taicpu(p).oper[1]^.reg,next,next2)) then
  354. begin
  355. if Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next2.next))) then
  356. begin
  357. taicpu(next2).loadreg(0,taicpu(p).oper[1]^.reg);
  358. asml.remove(p);
  359. asml.remove(next);
  360. p.free;
  361. next.free;
  362. p:=next2;
  363. result:=true;
  364. end;
  365. end
  366. else
  367. result:=TryRemoveMov(p,A_MOVE);
  368. end;
  369. A_SRL:
  370. begin
  371. { TODO: also kill sign-extensions that follow, both SLL+SRA and SEB/SEH versions }
  372. { Remove 'andi' in sequences
  373. srl Rx,Ry,16
  374. andi Rx,Rx,65535
  375. srl Rx,Ry,24
  376. andi Rx,Rx,255
  377. since 'srl' clears all relevant upper bits }
  378. if (taicpu(p).oper[2]^.typ=top_const) and
  379. GetNextInstruction(p,next) and
  380. MatchInstruction(next,A_ANDI) and
  381. IsSameReg(taicpu(p),taicpu(next)) and
  382. (taicpu(next).oper[2]^.typ=top_const) and
  383. ((
  384. (taicpu(p).oper[2]^.val>=16) and
  385. (taicpu(next).oper[2]^.val=65535)
  386. ) or (
  387. (taicpu(p).oper[2]^.val>=24) and
  388. (taicpu(next).oper[2]^.val=255)
  389. )) then
  390. begin
  391. asml.remove(next);
  392. next.free;
  393. result:=true;
  394. end
  395. else
  396. result:=TryRemoveMov(p,A_MOVE);
  397. end;
  398. A_ANDI:
  399. begin
  400. { Remove sign extension after 'andi' if bit 7/15 of const operand is clear }
  401. if (taicpu(p).oper[2]^.typ=top_const) and
  402. GetNextInstruction(p,next) and
  403. MatchInstruction(next,A_SLL) and
  404. GetNextInstruction(next,next2) and
  405. MatchInstruction(next2,A_SRA) and
  406. IsSameReg(taicpu(p),taicpu(next)) and
  407. IsSameReg(taicpu(p),taicpu(next2)) and
  408. (taicpu(next).oper[2]^.typ=top_const) and
  409. (taicpu(next2).oper[2]^.typ=top_const) and
  410. (taicpu(next).oper[2]^.val=taicpu(next2).oper[2]^.val) and
  411. ((
  412. (taicpu(p).oper[2]^.val<=$7fff) and
  413. (taicpu(next).oper[2]^.val=16)
  414. ) or (
  415. (taicpu(p).oper[2]^.val<=$7f) and
  416. (taicpu(next).oper[2]^.val=24)
  417. )) then
  418. begin
  419. asml.remove(next);
  420. asml.remove(next2);
  421. next.free;
  422. next2.free;
  423. result:=true;
  424. end
  425. { Remove zero extension if register is used only for byte/word memory store }
  426. else if (taicpu(p).oper[2]^.typ=top_const) and
  427. GetNextInstruction(p,next) and
  428. ((taicpu(p).oper[2]^.val=255) and MatchInstruction(next,A_SB)) or
  429. ((taicpu(p).oper[2]^.val=65535) and MatchInstruction(next,A_SH)) and
  430. (taicpu(next).oper[0]^.typ=top_reg) and
  431. (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  432. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  433. begin
  434. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  435. asml.remove(p);
  436. p.free;
  437. p:=next;
  438. result:=true;
  439. end
  440. else
  441. result:=TryRemoveMov(p,A_MOVE);
  442. end;
  443. A_MOV_S:
  444. begin
  445. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  446. (next.typ=ait_instruction) then
  447. begin
  448. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SWC1]) then
  449. result:=true;
  450. end;
  451. end;
  452. A_MOV_D:
  453. begin
  454. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  455. (next.typ=ait_instruction) then
  456. begin
  457. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SDC1]) then
  458. result:=true;
  459. end;
  460. end;
  461. A_MOVE:
  462. begin
  463. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  464. (next.typ=ait_instruction) and
  465. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then
  466. begin
  467. { MOVE Rx,Ry; store Rx,(ref); dealloc Rx ==> store Ry,(ref) }
  468. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SB,A_SH,A_SW]) then
  469. result:=true
  470. else if TryRemoveMovToRefIndex(p,taicpu(next)) then
  471. result:=true
  472. { MOVE Rx,Ry; opcode Rx,Rx,any ==> opcode Rx,Ry,any
  473. MOVE Rx,Ry; opcode Rx,Rz,Rx ==> opcode Rx,Rz,Ry }
  474. else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_AND,A_ANDI,A_SLLV,A_SRLV,A_SRAV]) and
  475. MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  476. begin
  477. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  478. begin
  479. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  480. asml.remove(p);
  481. p.free;
  482. p:=next;
  483. result:=true;
  484. end
  485. { TODO: if Ry=NR_R0, this effectively changes instruction into MOVE,
  486. providing further optimization possibilities }
  487. else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then
  488. begin
  489. taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg);
  490. asml.remove(p);
  491. p.free;
  492. p:=next;
  493. result:=true;
  494. end;
  495. end
  496. { MOVE Rx,Ry; opcode Rz,Rx,any; dealloc Rx ==> opcode Rz,Ry,any }
  497. else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_SLT,A_SLTU,A_DIV,A_DIVU,
  498. A_SLL,A_SRL,A_SRA,A_SLLV,A_SRLV,A_SRAV,A_AND,A_ANDI,A_OR,A_ORI,A_XOR,A_XORI]) and
  499. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  500. begin
  501. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  502. begin
  503. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  504. asml.remove(p);
  505. p.free;
  506. p:=next;
  507. result:=true;
  508. end
  509. else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then
  510. begin
  511. taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg);
  512. asml.remove(p);
  513. p.free;
  514. p:=next;
  515. result:=true;
  516. end;
  517. end
  518. { MULT[U] and cond.branches must be handled separately due to different operand numbers }
  519. else if (taicpu(next).opcode in [A_MULT,A_MULTU,A_BC]) and
  520. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  521. begin
  522. if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  523. begin
  524. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  525. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  526. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  527. asml.remove(p);
  528. p.free;
  529. p:=next;
  530. result:=true;
  531. end
  532. else if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  533. begin
  534. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  535. if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  536. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  537. asml.remove(p);
  538. p.free;
  539. p:=next;
  540. result:=true;
  541. end;
  542. end
  543. else if TryRemoveMov(p,A_MOVE) then
  544. begin
  545. { Ended up with move between same register? Suicide then. }
  546. if (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then
  547. begin
  548. GetNextInstruction(p,next);
  549. asml.remove(p);
  550. p.free;
  551. p:=next;
  552. result:=true;
  553. end;
  554. end;
  555. end;
  556. end;
  557. A_ADDIU:
  558. begin
  559. { ADDIU Rx,Ry,const; load/store Rz,(Rx); dealloc Rx ==> load/store Rz,const(Ry)
  560. ADDIU Rx,Ry,%lo(sym); load/store Rz,(Rx); dealloc Rx ==> load/store Rz,%lo(sym)(Ry)
  561. ADDIU Rx,Ry,const; load Rx,(Rx) ==> load Rx,const(Ry)
  562. ADDIU Rx,Ry,%lo(sym); load Rx,(Rx) ==> load Rx,%lo(sym)(Ry) }
  563. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  564. (next.typ=ait_instruction) and
  565. (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW,A_SB,A_SH,A_SW]) and
  566. (taicpu(p).oper[0]^.reg=taicpu(next).oper[1]^.ref^.base) and
  567. (taicpu(next).oper[1]^.ref^.offset=0) and
  568. (taicpu(next).oper[1]^.ref^.symbol=nil) and
  569. (
  570. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) or
  571. (
  572. (taicpu(p).oper[0]^.reg=taicpu(next).oper[0]^.reg) and
  573. (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW])
  574. )
  575. ) and
  576. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then
  577. begin
  578. case taicpu(p).oper[2]^.typ of
  579. top_const:
  580. taicpu(next).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
  581. top_ref:
  582. taicpu(next).oper[1]^.ref^:=taicpu(p).oper[2]^.ref^;
  583. else
  584. InternalError(2014100401);
  585. end;
  586. taicpu(next).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  587. asml.remove(p);
  588. p.free;
  589. p:=next;
  590. result:=true;
  591. end
  592. else
  593. result:=TryRemoveMov(p,A_MOVE);
  594. end;
  595. A_ADD,A_ADDU,A_OR:
  596. begin
  597. if MatchOperand(taicpu(p).oper[1]^,NR_R0) then
  598. begin
  599. taicpu(p).freeop(1);
  600. taicpu(p).oper[1]:=taicpu(p).oper[2];
  601. taicpu(p).oper[2]:=nil;
  602. taicpu(p).ops:=2;
  603. taicpu(p).opercnt:=2;
  604. taicpu(p).opcode:=A_MOVE;
  605. result:=true;
  606. end
  607. else if MatchOperand(taicpu(p).oper[2]^,NR_R0) then
  608. begin
  609. taicpu(p).freeop(2);
  610. taicpu(p).ops:=2;
  611. taicpu(p).opercnt:=2;
  612. taicpu(p).opcode:=A_MOVE;
  613. result:=true;
  614. end
  615. else
  616. result:=TryRemoveMov(p,A_MOVE);
  617. end;
  618. A_LB,A_LBU,A_LH,A_LHU,A_LW,
  619. A_ADDI,
  620. A_SUB,A_SUBU,
  621. A_SRA,A_SRAV,
  622. A_SRLV,
  623. A_SLLV,
  624. A_MFLO,A_MFHI,
  625. A_AND,A_XOR,A_ORI,A_XORI:
  626. result:=TryRemoveMov(p,A_MOVE);
  627. A_LWC1,
  628. A_ADD_s, A_SUB_s, A_MUL_s, A_DIV_s,
  629. A_ABS_s, A_NEG_s, A_SQRT_s,
  630. A_CVT_s_w, A_CVT_s_l, A_CVT_s_d:
  631. result:=TryRemoveMov(p,A_MOV_s);
  632. A_LDC1,
  633. A_ADD_d, A_SUB_d, A_MUL_d, A_DIV_d,
  634. A_ABS_d, A_NEG_d, A_SQRT_d,
  635. A_CVT_d_w, A_CVT_d_l, A_CVT_d_s:
  636. result:=TryRemoveMov(p,A_MOV_d);
  637. end;
  638. end;
  639. end;
  640. end;
  641. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  642. var
  643. p: tai;
  644. l: longint;
  645. hp1,hp2,hp3: tai;
  646. condition: tasmcond;
  647. condreg: tregister;
  648. begin
  649. { Currently, everything below is mips4+ }
  650. if (current_settings.cputype<cpu_mips4) then
  651. exit;
  652. p:=BlockStart;
  653. ClearUsedRegs;
  654. while (p<>BlockEnd) Do
  655. begin
  656. UpdateUsedRegs(tai(p.next));
  657. case p.typ of
  658. ait_instruction:
  659. begin
  660. case taicpu(p).opcode of
  661. A_BC:
  662. begin
  663. condreg:=NR_NO;
  664. if (taicpu(p).condition in [C_COP1TRUE,C_COP1FALSE]) then
  665. { TODO: must be taken from "p" if/when codegen makes use of multiple %fcc }
  666. condreg:=NR_FCC0
  667. else if (taicpu(p).condition in [C_EQ,C_NE]) then
  668. begin
  669. if (taicpu(p).oper[0]^.reg=NR_R0) then
  670. condreg:=taicpu(p).oper[1]^.reg
  671. else if (taicpu(p).oper[1]^.reg=NR_R0) then
  672. condreg:=taicpu(p).oper[0]^.reg
  673. end;
  674. if (condreg<>NR_NO) then
  675. begin
  676. { check for
  677. bCC xxx
  678. <several movs>
  679. xxx:
  680. }
  681. l:=0;
  682. GetNextInstruction(p, hp1);
  683. while CanBeCMOV(hp1,condreg) do // CanBeCMOV returns False for nil or labels
  684. begin
  685. inc(l);
  686. GetNextInstruction(hp1,hp1);
  687. end;
  688. if assigned(hp1) then
  689. begin
  690. if FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  691. begin
  692. if (l<=4) and (l>0) then
  693. begin
  694. condition:=inverse_cond(taicpu(p).condition);
  695. hp2:=p;
  696. GetNextInstruction(p,hp1);
  697. p:=hp1;
  698. repeat
  699. ChangeToCMOV(taicpu(hp1),condition,condreg);
  700. GetNextInstruction(hp1,hp1);
  701. until not CanBeCMOV(hp1,condreg);
  702. { wait with removing else GetNextInstruction could
  703. ignore the label if it was the only usage in the
  704. jump moved away }
  705. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  706. RemoveDelaySlot(hp2);
  707. asml.remove(hp2);
  708. hp2.free;
  709. continue;
  710. end;
  711. end
  712. else
  713. begin
  714. { check further for
  715. bCC xxx
  716. <several movs 1>
  717. b yyy
  718. xxx:
  719. <several movs 2>
  720. yyy:
  721. }
  722. { hp2 points to jmp yyy }
  723. hp2:=hp1;
  724. { skip hp1 to xxx }
  725. GetNextInstruction(hp1, hp1);
  726. if assigned(hp2) and
  727. assigned(hp1) and
  728. (l<=3) and
  729. (hp2.typ=ait_instruction) and
  730. (taicpu(hp2).opcode=A_BA) and
  731. { real label and jump, no further references to the
  732. label are allowed }
  733. (tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol).getrefs<=2) and
  734. FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  735. begin
  736. l:=0;
  737. { skip hp1 to <several moves 2> }
  738. GetNextInstruction(hp1, hp1);
  739. while CanBeCMOV(hp1,condreg) do
  740. begin
  741. inc(l);
  742. GetNextInstruction(hp1, hp1);
  743. end;
  744. { hp1 points to yyy: }
  745. if assigned(hp1) and (l<=3) and
  746. FindLabel(tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol),hp1) then
  747. begin
  748. condition:=inverse_cond(taicpu(p).condition);
  749. GetNextInstruction(p,hp1);
  750. hp3:=p;
  751. p:=hp1;
  752. while CanBeCMOV(hp1,condreg) do
  753. begin
  754. ChangeToCMOV(taicpu(hp1),condition,condreg);
  755. GetNextInstruction(hp1,hp1);
  756. end;
  757. { hp2 is still at b yyy }
  758. GetNextInstruction(hp2,hp1);
  759. { hp2 is now at xxx: }
  760. condition:=inverse_cond(condition);
  761. GetNextInstruction(hp1,hp1);
  762. { hp1 is now at <several movs 2> }
  763. while CanBeCMOV(hp1,condreg) do
  764. begin
  765. ChangeToCMOV(taicpu(hp1),condition,condreg);
  766. GetNextInstruction(hp1,hp1);
  767. end;
  768. { remove bCC }
  769. tasmlabel(taicpu(hp3).oper[taicpu(hp3).ops-1]^.ref^.symbol).decrefs;
  770. RemoveDelaySlot(hp3);
  771. asml.remove(hp3);
  772. hp3.free;
  773. { remove jmp }
  774. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  775. RemoveDelaySlot(hp2);
  776. asml.remove(hp2);
  777. hp2.free;
  778. continue;
  779. end;
  780. end;
  781. end;
  782. end;
  783. end;
  784. end;
  785. end;
  786. end;
  787. end;
  788. UpdateUsedRegs(p);
  789. p:=tai(p.next);
  790. end;
  791. end;
  792. begin
  793. casmoptimizer:=TCpuAsmOptimizer;
  794. end.