aoptcpu.pas 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_AOPTCPU}
  21. Interface
  22. uses
  23. cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai, aasmcpu;
  24. Type
  25. TAsmOpSet = set of TAsmOp;
  26. TCpuAsmOptimizer = class(TAsmOptimizer)
  27. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  28. function GetNextInstructionUsingReg(Current: tai;
  29. var Next: tai; reg: TRegister): Boolean;
  30. function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  31. function TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
  32. function TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean;
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. procedure PeepHoleOptPass2; override;
  35. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  36. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  37. { outputs a debug message into the assembler file }
  38. procedure DebugMsg(const s: string; p: tai);
  39. End;
  40. Implementation
  41. uses
  42. cutils,globtype,globals,aasmbase,cpuinfo,verbose;
  43. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  44. begin
  45. result :=
  46. (instr.typ = ait_instruction) and
  47. (taicpu(instr).opcode = op);
  48. end;
  49. function MatchOperand(const oper: TOper; reg: TRegister): boolean;
  50. begin
  51. result:=(oper.typ=top_reg) and (oper.reg=reg);
  52. end;
  53. function IsSameReg(this,next: taicpu): boolean;
  54. begin
  55. result:=(next.oper[0]^.typ=top_reg) and
  56. (next.oper[1]^.typ=top_reg) and
  57. (next.oper[0]^.reg=next.oper[1]^.reg) and
  58. (next.oper[0]^.reg=this.oper[0]^.reg);
  59. end;
  60. function CanBeCMOV(p: tai; condreg: tregister): boolean;
  61. begin
  62. result:=assigned(p) and (p.typ=ait_instruction) and
  63. ((taicpu(p).opcode in [A_MOV_D,A_MOV_S]) or
  64. (
  65. { register with condition must not be overwritten }
  66. (taicpu(p).opcode=A_MOVE) and
  67. (taicpu(p).oper[0]^.reg<>condreg)
  68. ));
  69. end;
  70. procedure ChangeToCMOV(p: taicpu; cond: tasmcond; reg: tregister);
  71. begin
  72. case cond of
  73. C_COP1TRUE:
  74. case p.opcode of
  75. A_MOV_D: p.opcode:=A_MOVT_D;
  76. A_MOV_S: p.opcode:=A_MOVT_S;
  77. A_MOVE: p.opcode:=A_MOVT;
  78. else
  79. InternalError(2014061701);
  80. end;
  81. C_COP1FALSE:
  82. case p.opcode of
  83. A_MOV_D: p.opcode:=A_MOVF_D;
  84. A_MOV_S: p.opcode:=A_MOVF_S;
  85. A_MOVE: p.opcode:=A_MOVF;
  86. else
  87. InternalError(2014061702);
  88. end;
  89. C_EQ:
  90. case p.opcode of
  91. A_MOV_D: p.opcode:=A_MOVZ_D;
  92. A_MOV_S: p.opcode:=A_MOVZ_S;
  93. A_MOVE: p.opcode:=A_MOVZ;
  94. else
  95. InternalError(2014061703);
  96. end;
  97. C_NE:
  98. case p.opcode of
  99. A_MOV_D: p.opcode:=A_MOVN_D;
  100. A_MOV_S: p.opcode:=A_MOVN_S;
  101. A_MOVE: p.opcode:=A_MOVN;
  102. else
  103. InternalError(2014061704);
  104. end;
  105. else
  106. InternalError(2014061705);
  107. end;
  108. p.ops:=3;
  109. p.loadreg(2,reg);
  110. end;
  111. {$ifdef DEBUG_AOPTCPU}
  112. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  113. begin
  114. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  115. end;
  116. {$else DEBUG_AOPTCPU}
  117. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  118. begin
  119. end;
  120. {$endif DEBUG_AOPTCPU}
  121. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  122. var
  123. p: taicpu;
  124. i: longint;
  125. begin
  126. result:=false;
  127. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  128. exit;
  129. p:=taicpu(hp);
  130. i:=0;
  131. while(i<p.ops) do
  132. begin
  133. case p.oper[I]^.typ of
  134. top_reg:
  135. result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(I)<>operand_write);
  136. top_ref:
  137. result:=
  138. (p.oper[I]^.ref^.base=reg) or
  139. (p.oper[I]^.ref^.index=reg);
  140. else
  141. ;
  142. end;
  143. if result then exit; {Bailout if we found something}
  144. Inc(I);
  145. end;
  146. end;
  147. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  148. var
  149. p: taicpu;
  150. begin
  151. p:=taicpu(hp);
  152. result:=false;
  153. if not ((assigned(hp)) and (hp.typ=ait_instruction)) then
  154. exit;
  155. case p.opcode of
  156. { These instructions do not write into a register at all }
  157. A_NOP,
  158. A_C_EQ_D,A_C_EQ_S,A_C_LE_D,A_C_LE_S,A_C_LT_D,A_C_LT_S,
  159. A_BA,A_BC,
  160. A_SB,A_SH,A_SW,A_SWL,A_SWR,A_SWC1,A_SDC1:
  161. exit;
  162. else
  163. ;
  164. end;
  165. result:=(p.ops>0) and (p.oper[0]^.typ=top_reg) and
  166. (p.oper[0]^.reg=reg);
  167. end;
  168. function TCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  169. var
  170. i : Longint;
  171. begin
  172. result:=false;
  173. for i:=0 to taicpu(p1).ops-1 do
  174. if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
  175. begin
  176. result:=true;
  177. exit;
  178. end;
  179. end;
  180. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  181. var Next: tai; reg: TRegister): Boolean;
  182. begin
  183. Next:=Current;
  184. repeat
  185. Result:=GetNextInstruction(Next,Next);
  186. until {not(cs_opt_level3 in current_settings.optimizerswitches) or} not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  187. (is_calljmp(taicpu(Next).opcode));
  188. end;
  189. function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  190. var
  191. next,hp1: tai;
  192. alloc,dealloc: tai_regalloc;
  193. begin
  194. { Fold
  195. op $reg1,...
  196. opcode $reg2,$reg1
  197. dealloc $reg1
  198. into
  199. op $reg2,...
  200. opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc.
  201. }
  202. result:=false;
  203. if (taicpu(p).ops>0) and
  204. GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  205. MatchInstruction(next,opcode) and
  206. MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and
  207. { the destination register of mov cannot be used between p and next }
  208. (not RegUsedBetween(taicpu(next).oper[0]^.reg,p,next)) then
  209. begin
  210. dealloc:=FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.Next));
  211. if assigned(dealloc) then
  212. begin
  213. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  214. and remove it if possible }
  215. GetLastInstruction(p,hp1);
  216. asml.Remove(dealloc);
  217. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  218. if assigned(alloc) then
  219. begin
  220. asml.Remove(alloc);
  221. alloc.free;
  222. dealloc.free;
  223. end
  224. else
  225. asml.InsertAfter(dealloc,p);
  226. { try to move the allocation of the target register }
  227. GetLastInstruction(next,hp1);
  228. alloc:=FindRegAlloc(taicpu(next).oper[0]^.reg,tai(hp1.Next));
  229. if assigned(alloc) then
  230. begin
  231. asml.Remove(alloc);
  232. asml.InsertBefore(alloc,p);
  233. { adjust used regs }
  234. IncludeRegInUsedRegs(taicpu(next).oper[0]^.reg,UsedRegs);
  235. end;
  236. { finally get rid of the mov }
  237. taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
  238. DebugMsg('Peephole: Move removed 1',next);
  239. asml.remove(next);
  240. next.free;
  241. result:=true;
  242. end
  243. else // no dealloc found
  244. begin
  245. { try to optimize the typical call sequence
  246. lw $reg, (whatever)
  247. <alloc volatile registers (including $reg!!)>
  248. move $t9,$reg
  249. jalr $t9
  250. if $reg is nonvolatile, its value may be used after call
  251. and we cannot safely replace it with $t9 }
  252. if (opcode=A_MOVE) and
  253. (taicpu(next).oper[0]^.reg=NR_R25) and
  254. GetNextInstruction(next,hp1) and
  255. MatchInstruction(hp1,A_JALR) and
  256. MatchOperand(taicpu(hp1).oper[0]^,NR_R25) and
  257. assigned(FindRegAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) then
  258. begin
  259. taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
  260. DebugMsg('Peephole: Move removed 2',next);
  261. asml.remove(next);
  262. next.free;
  263. result:=true;
  264. end;
  265. end;
  266. end;
  267. end;
  268. function TCpuAsmOptimizer.TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean;
  269. begin
  270. result:=(next.opcode in storeops) and
  271. MatchOperand(next.oper[0]^,taicpu(p).oper[0]^.reg) and
  272. { Ry cannot be modified between move and store }
  273. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  274. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));
  275. if result then
  276. begin
  277. next.loadreg(0,taicpu(p).oper[1]^.reg);
  278. DebugMsg('Peephole: Move removed 3',p);
  279. asml.remove(p);
  280. p.free;
  281. p:=next;
  282. end;
  283. end;
  284. function TCpuAsmOptimizer.TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
  285. begin
  286. result:=(next.ops>1) and
  287. (next.oper[1]^.typ=top_ref) and
  288. (next.oper[1]^.ref^.refaddr<>addr_full) and
  289. (next.oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  290. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  291. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));
  292. if result then
  293. begin
  294. AllocRegBetween(taicpu(p).oper[1]^.reg,p,next,UsedRegs);
  295. next.oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  296. DebugMsg('Peephole: Move removed 4',p);
  297. asml.remove(p);
  298. p.free;
  299. p:=next;
  300. end;
  301. end;
  302. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  303. var
  304. next,next2: tai;
  305. begin
  306. result:=false;
  307. case p.typ of
  308. ait_instruction:
  309. begin
  310. case taicpu(p).opcode of
  311. A_BC:
  312. begin
  313. { BEQ/BNE with same register are bogus, but can be generated for code like
  314. "if lo(qwordvar)=cardinal(qwordvar) ...",
  315. optimizations below can also yield them, e.g. if one register was initially R0. }
  316. if (taicpu(p).condition in [C_EQ,C_NE]) and
  317. (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then
  318. begin
  319. if (taicpu(p).condition=C_NE) then
  320. begin
  321. if (taicpu(p).oper[2]^.typ = top_ref) and
  322. (taicpu(p).oper[2]^.ref^.symbol is TAsmLabel) then
  323. TAsmLabel(taicpu(p).oper[2]^.ref^.symbol).decrefs;
  324. RemoveDelaySlot(p);
  325. GetNextInstruction(p,next);
  326. end
  327. else
  328. begin
  329. next:=taicpu.op_sym(A_BA,taicpu(p).oper[2]^.ref^.symbol);
  330. taicpu(next).fileinfo:=taicpu(p).fileinfo;
  331. asml.insertbefore(next,p);
  332. end;
  333. asml.remove(p);
  334. p.Free;
  335. p:=next;
  336. result:=true;
  337. end;
  338. end;
  339. A_SEH:
  340. begin
  341. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  342. MatchInstruction(next,A_SH) and
  343. MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) and
  344. (not RegUsedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  345. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  346. begin
  347. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  348. asml.remove(p);
  349. p.free;
  350. p:=next;
  351. result:=true;
  352. end
  353. else
  354. result:=TryRemoveMov(p,A_MOVE);
  355. end;
  356. A_SEB:
  357. { TODO: can be handled similar to A_SEH, but it's almost never encountered }
  358. result:=TryRemoveMov(p,A_MOVE);
  359. A_SLL:
  360. begin
  361. { if this is a sign extension... }
  362. if (taicpu(p).oper[2]^.typ=top_const) and
  363. GetNextInstruction(p,next) and
  364. MatchInstruction(next,A_SRA) and
  365. IsSameReg(taicpu(p),taicpu(next)) and
  366. (taicpu(next).oper[2]^.typ=top_const) and
  367. (taicpu(next).oper[2]^.val=taicpu(p).oper[2]^.val) and
  368. (taicpu(next).oper[2]^.val=16) and
  369. { ...followed by 16-bit store (possibly with PIC simplification, etc. in between) }
  370. GetNextInstructionUsingReg(next,next2,taicpu(p).oper[0]^.reg) and
  371. MatchInstruction(next2,A_SH) and
  372. (taicpu(next2).oper[0]^.typ=top_reg) and
  373. (taicpu(next2).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  374. { the initial register may not be reused }
  375. (not RegUsedBetween(taicpu(p).oper[1]^.reg,next,next2)) then
  376. begin
  377. if Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next2.next))) then
  378. begin
  379. taicpu(next2).loadreg(0,taicpu(p).oper[1]^.reg);
  380. asml.remove(p);
  381. asml.remove(next);
  382. p.free;
  383. next.free;
  384. p:=next2;
  385. result:=true;
  386. end;
  387. end
  388. else
  389. result:=TryRemoveMov(p,A_MOVE);
  390. end;
  391. A_SRL:
  392. begin
  393. { TODO: also kill sign-extensions that follow, both SLL+SRA and SEB/SEH versions }
  394. { Remove 'andi' in sequences
  395. srl Rx,Ry,16
  396. andi Rx,Rx,65535
  397. srl Rx,Ry,24
  398. andi Rx,Rx,255
  399. since 'srl' clears all relevant upper bits }
  400. if (taicpu(p).oper[2]^.typ=top_const) and
  401. GetNextInstruction(p,next) and
  402. MatchInstruction(next,A_ANDI) and
  403. IsSameReg(taicpu(p),taicpu(next)) and
  404. (taicpu(next).oper[2]^.typ=top_const) and
  405. ((
  406. (taicpu(p).oper[2]^.val>=16) and
  407. (taicpu(next).oper[2]^.val=65535)
  408. ) or (
  409. (taicpu(p).oper[2]^.val>=24) and
  410. (taicpu(next).oper[2]^.val=255)
  411. )) then
  412. begin
  413. asml.remove(next);
  414. next.free;
  415. result:=true;
  416. end
  417. else
  418. result:=TryRemoveMov(p,A_MOVE);
  419. end;
  420. A_ANDI:
  421. begin
  422. { Remove sign extension after 'andi' if bit 7/15 of const operand is clear }
  423. if (taicpu(p).oper[2]^.typ=top_const) and
  424. GetNextInstruction(p,next) and
  425. MatchInstruction(next,A_SLL) and
  426. GetNextInstruction(next,next2) and
  427. MatchInstruction(next2,A_SRA) and
  428. IsSameReg(taicpu(p),taicpu(next)) and
  429. IsSameReg(taicpu(p),taicpu(next2)) and
  430. (taicpu(next).oper[2]^.typ=top_const) and
  431. (taicpu(next2).oper[2]^.typ=top_const) and
  432. (taicpu(next).oper[2]^.val=taicpu(next2).oper[2]^.val) and
  433. ((
  434. (taicpu(p).oper[2]^.val<=$7fff) and
  435. (taicpu(next).oper[2]^.val=16)
  436. ) or (
  437. (taicpu(p).oper[2]^.val<=$7f) and
  438. (taicpu(next).oper[2]^.val=24)
  439. )) then
  440. begin
  441. asml.remove(next);
  442. asml.remove(next2);
  443. next.free;
  444. next2.free;
  445. result:=true;
  446. end
  447. { Remove zero extension if register is used only for byte/word memory store }
  448. else if (taicpu(p).oper[2]^.typ=top_const) and
  449. GetNextInstruction(p,next) and
  450. ((taicpu(p).oper[2]^.val=255) and MatchInstruction(next,A_SB)) or
  451. ((taicpu(p).oper[2]^.val=65535) and MatchInstruction(next,A_SH)) and
  452. (taicpu(next).oper[0]^.typ=top_reg) and
  453. (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  454. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  455. begin
  456. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  457. asml.remove(p);
  458. p.free;
  459. p:=next;
  460. result:=true;
  461. end
  462. else
  463. result:=TryRemoveMov(p,A_MOVE);
  464. end;
  465. A_MOV_S:
  466. begin
  467. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  468. (next.typ=ait_instruction) then
  469. begin
  470. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SWC1]) then
  471. result:=true;
  472. end;
  473. end;
  474. A_MOV_D:
  475. begin
  476. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  477. (next.typ=ait_instruction) then
  478. begin
  479. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SDC1]) then
  480. result:=true;
  481. end;
  482. end;
  483. A_MOVE:
  484. begin
  485. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  486. (next.typ=ait_instruction) and
  487. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then
  488. begin
  489. { MOVE Rx,Ry; store Rx,(ref); dealloc Rx ==> store Ry,(ref) }
  490. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SB,A_SH,A_SW]) then
  491. result:=true
  492. else if TryRemoveMovToRefIndex(p,taicpu(next)) then
  493. result:=true
  494. { MOVE Rx,Ry; opcode Rx,Rx,any ==> opcode Rx,Ry,any
  495. MOVE Rx,Ry; opcode Rx,Rz,Rx ==> opcode Rx,Rz,Ry }
  496. else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_AND,A_ANDI,A_SLLV,A_SRLV,A_SRAV]) and
  497. MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  498. begin
  499. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  500. begin
  501. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  502. asml.remove(p);
  503. p.free;
  504. p:=next;
  505. result:=true;
  506. end
  507. { TODO: if Ry=NR_R0, this effectively changes instruction into MOVE,
  508. providing further optimization possibilities }
  509. else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then
  510. begin
  511. taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg);
  512. asml.remove(p);
  513. p.free;
  514. p:=next;
  515. result:=true;
  516. end;
  517. end
  518. { MOVE Rx,Ry; opcode Rz,Rx,any; dealloc Rx ==> opcode Rz,Ry,any }
  519. else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_SLT,A_SLTU,A_DIV,A_DIVU,
  520. A_SLL,A_SRL,A_SRA,A_SLLV,A_SRLV,A_SRAV,A_AND,A_ANDI,A_OR,A_ORI,A_XOR,A_XORI]) and
  521. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  522. begin
  523. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  524. begin
  525. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  526. asml.remove(p);
  527. p.free;
  528. p:=next;
  529. result:=true;
  530. end
  531. else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then
  532. begin
  533. taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg);
  534. asml.remove(p);
  535. p.free;
  536. p:=next;
  537. result:=true;
  538. end;
  539. end
  540. { MULT[U] and cond.branches must be handled separately due to different operand numbers }
  541. else if (taicpu(next).opcode in [A_MULT,A_MULTU,A_BC]) and
  542. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  543. begin
  544. if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  545. begin
  546. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  547. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  548. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  549. asml.remove(p);
  550. p.free;
  551. p:=next;
  552. result:=true;
  553. end
  554. else if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  555. begin
  556. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  557. if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  558. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  559. asml.remove(p);
  560. p.free;
  561. p:=next;
  562. result:=true;
  563. end;
  564. end
  565. else if TryRemoveMov(p,A_MOVE) then
  566. begin
  567. { Ended up with move between same register? Suicide then. }
  568. if (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then
  569. begin
  570. GetNextInstruction(p,next);
  571. asml.remove(p);
  572. p.free;
  573. p:=next;
  574. result:=true;
  575. end;
  576. end;
  577. end;
  578. end;
  579. A_ADDIU:
  580. begin
  581. { ADDIU Rx,Ry,const; load/store Rz,(Rx); dealloc Rx ==> load/store Rz,const(Ry)
  582. ADDIU Rx,Ry,%lo(sym); load/store Rz,(Rx); dealloc Rx ==> load/store Rz,%lo(sym)(Ry)
  583. ADDIU Rx,Ry,const; load Rx,(Rx) ==> load Rx,const(Ry)
  584. ADDIU Rx,Ry,%lo(sym); load Rx,(Rx) ==> load Rx,%lo(sym)(Ry) }
  585. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  586. (next.typ=ait_instruction) and
  587. (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW,A_SB,A_SH,A_SW]) and
  588. (taicpu(p).oper[0]^.reg=taicpu(next).oper[1]^.ref^.base) and
  589. (taicpu(next).oper[1]^.ref^.offset=0) and
  590. (taicpu(next).oper[1]^.ref^.symbol=nil) and
  591. (
  592. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) or
  593. (
  594. (taicpu(p).oper[0]^.reg=taicpu(next).oper[0]^.reg) and
  595. (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW])
  596. )
  597. ) and
  598. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then
  599. begin
  600. case taicpu(p).oper[2]^.typ of
  601. top_const:
  602. taicpu(next).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
  603. top_ref:
  604. taicpu(next).oper[1]^.ref^:=taicpu(p).oper[2]^.ref^;
  605. else
  606. InternalError(2014100401);
  607. end;
  608. taicpu(next).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  609. asml.remove(p);
  610. p.free;
  611. p:=next;
  612. result:=true;
  613. end
  614. else
  615. result:=TryRemoveMov(p,A_MOVE);
  616. end;
  617. A_ADD,A_ADDU,A_OR:
  618. begin
  619. if MatchOperand(taicpu(p).oper[1]^,NR_R0) then
  620. begin
  621. taicpu(p).freeop(1);
  622. taicpu(p).oper[1]:=taicpu(p).oper[2];
  623. taicpu(p).oper[2]:=nil;
  624. taicpu(p).ops:=2;
  625. taicpu(p).opercnt:=2;
  626. taicpu(p).opcode:=A_MOVE;
  627. result:=true;
  628. end
  629. else if MatchOperand(taicpu(p).oper[2]^,NR_R0) then
  630. begin
  631. taicpu(p).freeop(2);
  632. taicpu(p).ops:=2;
  633. taicpu(p).opercnt:=2;
  634. taicpu(p).opcode:=A_MOVE;
  635. result:=true;
  636. end
  637. else
  638. result:=TryRemoveMov(p,A_MOVE);
  639. end;
  640. A_LB,A_LBU,A_LH,A_LHU,A_LW,
  641. A_ADDI,
  642. A_SUB,A_SUBU,
  643. A_SRA,A_SRAV,
  644. A_SRLV,
  645. A_SLLV,
  646. A_MFLO,A_MFHI,
  647. A_AND,A_XOR,A_ORI,A_XORI:
  648. result:=TryRemoveMov(p,A_MOVE);
  649. A_LWC1,
  650. A_ADD_s, A_SUB_s, A_MUL_s, A_DIV_s,
  651. A_ABS_s, A_NEG_s, A_SQRT_s,
  652. A_CVT_s_w, A_CVT_s_l, A_CVT_s_d:
  653. result:=TryRemoveMov(p,A_MOV_s);
  654. A_LDC1,
  655. A_ADD_d, A_SUB_d, A_MUL_d, A_DIV_d,
  656. A_ABS_d, A_NEG_d, A_SQRT_d,
  657. A_CVT_d_w, A_CVT_d_l, A_CVT_d_s:
  658. result:=TryRemoveMov(p,A_MOV_d);
  659. else
  660. ;
  661. end;
  662. end;
  663. else
  664. ;
  665. end;
  666. end;
  667. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  668. var
  669. p: tai;
  670. l: longint;
  671. hp1,hp2,hp3: tai;
  672. condition: tasmcond;
  673. condreg: tregister;
  674. begin
  675. { Currently, everything below is mips4+ }
  676. if (current_settings.cputype<cpu_mips4) then
  677. exit;
  678. p:=BlockStart;
  679. ClearUsedRegs;
  680. while (p<>BlockEnd) Do
  681. begin
  682. UpdateUsedRegs(tai(p.next));
  683. case p.typ of
  684. ait_instruction:
  685. begin
  686. case taicpu(p).opcode of
  687. A_BC:
  688. begin
  689. condreg:=NR_NO;
  690. if (taicpu(p).condition in [C_COP1TRUE,C_COP1FALSE]) then
  691. { TODO: must be taken from "p" if/when codegen makes use of multiple %fcc }
  692. condreg:=NR_FCC0
  693. else if (taicpu(p).condition in [C_EQ,C_NE]) then
  694. begin
  695. if (taicpu(p).oper[0]^.reg=NR_R0) then
  696. condreg:=taicpu(p).oper[1]^.reg
  697. else if (taicpu(p).oper[1]^.reg=NR_R0) then
  698. condreg:=taicpu(p).oper[0]^.reg
  699. end;
  700. if (condreg<>NR_NO) then
  701. begin
  702. { check for
  703. bCC xxx
  704. <several movs>
  705. xxx:
  706. }
  707. l:=0;
  708. GetNextInstruction(p, hp1);
  709. while CanBeCMOV(hp1,condreg) do // CanBeCMOV returns False for nil or labels
  710. begin
  711. inc(l);
  712. GetNextInstruction(hp1,hp1);
  713. end;
  714. if assigned(hp1) then
  715. begin
  716. if FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  717. begin
  718. if (l<=4) and (l>0) then
  719. begin
  720. condition:=inverse_cond(taicpu(p).condition);
  721. hp2:=p;
  722. GetNextInstruction(p,hp1);
  723. p:=hp1;
  724. repeat
  725. ChangeToCMOV(taicpu(hp1),condition,condreg);
  726. GetNextInstruction(hp1,hp1);
  727. until not CanBeCMOV(hp1,condreg);
  728. { wait with removing else GetNextInstruction could
  729. ignore the label if it was the only usage in the
  730. jump moved away }
  731. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  732. RemoveDelaySlot(hp2);
  733. asml.remove(hp2);
  734. hp2.free;
  735. continue;
  736. end;
  737. end
  738. else
  739. begin
  740. { check further for
  741. bCC xxx
  742. <several movs 1>
  743. b yyy
  744. xxx:
  745. <several movs 2>
  746. yyy:
  747. }
  748. { hp2 points to jmp yyy }
  749. hp2:=hp1;
  750. { skip hp1 to xxx }
  751. GetNextInstruction(hp1, hp1);
  752. if assigned(hp2) and
  753. assigned(hp1) and
  754. (l<=3) and
  755. (hp2.typ=ait_instruction) and
  756. (taicpu(hp2).opcode=A_BA) and
  757. { real label and jump, no further references to the
  758. label are allowed }
  759. (tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol).getrefs<=2) and
  760. FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  761. begin
  762. l:=0;
  763. { skip hp1 to <several moves 2> }
  764. GetNextInstruction(hp1, hp1);
  765. while CanBeCMOV(hp1,condreg) do
  766. begin
  767. inc(l);
  768. GetNextInstruction(hp1, hp1);
  769. end;
  770. { hp1 points to yyy: }
  771. if assigned(hp1) and (l<=3) and
  772. FindLabel(tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol),hp1) then
  773. begin
  774. condition:=inverse_cond(taicpu(p).condition);
  775. GetNextInstruction(p,hp1);
  776. hp3:=p;
  777. p:=hp1;
  778. while CanBeCMOV(hp1,condreg) do
  779. begin
  780. ChangeToCMOV(taicpu(hp1),condition,condreg);
  781. GetNextInstruction(hp1,hp1);
  782. end;
  783. { hp2 is still at b yyy }
  784. GetNextInstruction(hp2,hp1);
  785. { hp2 is now at xxx: }
  786. condition:=inverse_cond(condition);
  787. GetNextInstruction(hp1,hp1);
  788. { hp1 is now at <several movs 2> }
  789. while CanBeCMOV(hp1,condreg) do
  790. begin
  791. ChangeToCMOV(taicpu(hp1),condition,condreg);
  792. GetNextInstruction(hp1,hp1);
  793. end;
  794. { remove bCC }
  795. tasmlabel(taicpu(hp3).oper[taicpu(hp3).ops-1]^.ref^.symbol).decrefs;
  796. RemoveDelaySlot(hp3);
  797. asml.remove(hp3);
  798. hp3.free;
  799. { remove jmp }
  800. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  801. RemoveDelaySlot(hp2);
  802. asml.remove(hp2);
  803. hp2.free;
  804. continue;
  805. end;
  806. end;
  807. end;
  808. end;
  809. end;
  810. end;
  811. else
  812. ;
  813. end;
  814. end;
  815. else
  816. ;
  817. end;
  818. UpdateUsedRegs(p);
  819. p:=tai(p.next);
  820. end;
  821. end;
  822. begin
  823. casmoptimizer:=TCpuAsmOptimizer;
  824. end.