aoptcpu.pas 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_AOPTCPU}
  21. Interface
  22. uses
  23. cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai, aasmcpu;
  24. Type
  25. TAsmOpSet = set of TAsmOp;
  26. TCpuAsmOptimizer = class(TAsmOptimizer)
  27. { Converts a conditional jump into an unconditional jump. Only call this
  28. procedure on an instruction that you already know is a conditional jump }
  29. procedure MakeUnconditional(p: taicpu); override;
  30. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  31. function GetNextInstructionUsingReg(Current: tai;
  32. var Next: tai; reg: TRegister): Boolean;
  33. function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  34. function TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
  35. function TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean;
  36. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  37. procedure PeepHoleOptPass2; override;
  38. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  39. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  40. { outputs a debug message into the assembler file }
  41. procedure DebugMsg(const s: string; p: tai);
  42. End;
  43. Implementation
  44. uses
  45. cutils,globtype,globals,aasmbase,cpuinfo,verbose;
  46. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  47. begin
  48. result :=
  49. (instr.typ = ait_instruction) and
  50. (taicpu(instr).opcode = op);
  51. end;
  52. function MatchOperand(const oper: TOper; reg: TRegister): boolean;
  53. begin
  54. result:=(oper.typ=top_reg) and (oper.reg=reg);
  55. end;
  56. function IsSameReg(this,next: taicpu): boolean;
  57. begin
  58. result:=(next.oper[0]^.typ=top_reg) and
  59. (next.oper[1]^.typ=top_reg) and
  60. (next.oper[0]^.reg=next.oper[1]^.reg) and
  61. (next.oper[0]^.reg=this.oper[0]^.reg);
  62. end;
  63. function CanBeCMOV(p: tai; condreg: tregister): boolean;
  64. begin
  65. result:=assigned(p) and (p.typ=ait_instruction) and
  66. ((taicpu(p).opcode in [A_MOV_D,A_MOV_S]) or
  67. (
  68. { register with condition must not be overwritten }
  69. (taicpu(p).opcode=A_MOVE) and
  70. (taicpu(p).oper[0]^.reg<>condreg)
  71. ));
  72. end;
  73. procedure ChangeToCMOV(p: taicpu; cond: tasmcond; reg: tregister);
  74. begin
  75. case cond of
  76. C_COP1TRUE:
  77. case p.opcode of
  78. A_MOV_D: p.opcode:=A_MOVT_D;
  79. A_MOV_S: p.opcode:=A_MOVT_S;
  80. A_MOVE: p.opcode:=A_MOVT;
  81. else
  82. InternalError(2014061701);
  83. end;
  84. C_COP1FALSE:
  85. case p.opcode of
  86. A_MOV_D: p.opcode:=A_MOVF_D;
  87. A_MOV_S: p.opcode:=A_MOVF_S;
  88. A_MOVE: p.opcode:=A_MOVF;
  89. else
  90. InternalError(2014061702);
  91. end;
  92. C_EQ:
  93. case p.opcode of
  94. A_MOV_D: p.opcode:=A_MOVZ_D;
  95. A_MOV_S: p.opcode:=A_MOVZ_S;
  96. A_MOVE: p.opcode:=A_MOVZ;
  97. else
  98. InternalError(2014061703);
  99. end;
  100. C_NE:
  101. case p.opcode of
  102. A_MOV_D: p.opcode:=A_MOVN_D;
  103. A_MOV_S: p.opcode:=A_MOVN_S;
  104. A_MOVE: p.opcode:=A_MOVN;
  105. else
  106. InternalError(2014061704);
  107. end;
  108. else
  109. InternalError(2014061705);
  110. end;
  111. p.ops:=3;
  112. p.loadreg(2,reg);
  113. end;
  114. {$ifdef DEBUG_AOPTCPU}
  115. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  116. begin
  117. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  118. end;
  119. {$else DEBUG_AOPTCPU}
  120. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  121. begin
  122. end;
  123. {$endif DEBUG_AOPTCPU}
  124. { Converts a conditional jump into an unconditional jump. Only call this
  125. procedure on an instruction that you already know is a conditional jump }
  126. procedure TCpuAsmOptimizer.MakeUnconditional(p: taicpu);
  127. var
  128. idx, topidx: Byte;
  129. begin
  130. inherited MakeUnconditional(p);
  131. topidx := p.ops-1;
  132. if topidx = 0 then
  133. Exit;
  134. { Move destination address into first register, then delete the rest }
  135. p.loadoper(0, p.oper[topidx]^);
  136. for idx := topidx downto 1 do
  137. p.freeop(idx);
  138. p.ops := 1;
  139. p.opercnt := 1;
  140. end;
  141. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  142. var
  143. p: taicpu;
  144. i: longint;
  145. begin
  146. result:=false;
  147. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  148. exit;
  149. p:=taicpu(hp);
  150. i:=0;
  151. while(i<p.ops) do
  152. begin
  153. case p.oper[I]^.typ of
  154. top_reg:
  155. result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(I)<>operand_write);
  156. top_ref:
  157. result:=
  158. (p.oper[I]^.ref^.base=reg) or
  159. (p.oper[I]^.ref^.index=reg);
  160. else
  161. ;
  162. end;
  163. if result then exit; {Bailout if we found something}
  164. Inc(I);
  165. end;
  166. end;
  167. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  168. var
  169. p: taicpu;
  170. begin
  171. result:=false;
  172. if not ((assigned(hp)) and (hp.typ=ait_instruction)) then
  173. exit;
  174. p:=taicpu(hp);
  175. case p.opcode of
  176. { These instructions do not write into a register at all }
  177. A_NOP,
  178. A_C_EQ_D,A_C_EQ_S,A_C_LE_D,A_C_LE_S,A_C_LT_D,A_C_LT_S,
  179. A_BA,A_BC,
  180. A_SB,A_SH,A_SW,A_SWL,A_SWR,A_SWC1,A_SDC1:
  181. exit;
  182. else
  183. ;
  184. end;
  185. result:=(p.ops>0) and (p.oper[0]^.typ=top_reg) and
  186. (p.oper[0]^.reg=reg);
  187. end;
  188. function TCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  189. var
  190. i : Longint;
  191. begin
  192. result:=false;
  193. for i:=0 to taicpu(p1).ops-1 do
  194. if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
  195. begin
  196. result:=true;
  197. exit;
  198. end;
  199. end;
  200. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  201. var Next: tai; reg: TRegister): Boolean;
  202. begin
  203. Next:=Current;
  204. repeat
  205. Result:=GetNextInstruction(Next,Next);
  206. until {not(cs_opt_level3 in current_settings.optimizerswitches) or} not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  207. (is_calljmp(taicpu(Next).opcode));
  208. end;
  209. function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  210. var
  211. next,hp1: tai;
  212. alloc,dealloc: tai_regalloc;
  213. begin
  214. { Fold
  215. op $reg1,...
  216. opcode $reg2,$reg1
  217. dealloc $reg1
  218. into
  219. op $reg2,...
  220. opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc.
  221. }
  222. result:=false;
  223. if (taicpu(p).ops>0) and
  224. GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  225. MatchInstruction(next,opcode) and
  226. MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and
  227. { the destination register of mov cannot be used between p and next }
  228. (not RegUsedBetween(taicpu(next).oper[0]^.reg,p,next)) then
  229. begin
  230. dealloc:=FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.Next));
  231. if assigned(dealloc) then
  232. begin
  233. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  234. and remove it if possible }
  235. GetLastInstruction(p,hp1);
  236. asml.Remove(dealloc);
  237. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  238. if assigned(alloc) then
  239. begin
  240. asml.Remove(alloc);
  241. alloc.free;
  242. dealloc.free;
  243. end
  244. else
  245. asml.InsertAfter(dealloc,p);
  246. { try to move the allocation of the target register }
  247. GetLastInstruction(next,hp1);
  248. alloc:=FindRegAlloc(taicpu(next).oper[0]^.reg,tai(hp1.Next));
  249. if assigned(alloc) then
  250. begin
  251. asml.Remove(alloc);
  252. asml.InsertBefore(alloc,p);
  253. { adjust used regs }
  254. IncludeRegInUsedRegs(taicpu(next).oper[0]^.reg,UsedRegs);
  255. end;
  256. { finally get rid of the mov }
  257. taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
  258. DebugMsg('Peephole: Move removed 1',next);
  259. asml.remove(next);
  260. next.free;
  261. result:=true;
  262. end
  263. else // no dealloc found
  264. begin
  265. { try to optimize the typical call sequence
  266. lw $reg, (whatever)
  267. <alloc volatile registers (including $reg!!)>
  268. move $t9,$reg
  269. jalr $t9
  270. if $reg is nonvolatile, its value may be used after call
  271. and we cannot safely replace it with $t9 }
  272. if (opcode=A_MOVE) and
  273. (taicpu(next).oper[0]^.reg=NR_R25) and
  274. GetNextInstruction(next,hp1) and
  275. MatchInstruction(hp1,A_JALR) and
  276. MatchOperand(taicpu(hp1).oper[0]^,NR_R25) and
  277. assigned(FindRegAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) then
  278. begin
  279. taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
  280. DebugMsg('Peephole: Move removed 2',next);
  281. asml.remove(next);
  282. next.free;
  283. result:=true;
  284. end;
  285. end;
  286. end;
  287. end;
  288. function TCpuAsmOptimizer.TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean;
  289. begin
  290. result:=(next.opcode in storeops) and
  291. MatchOperand(next.oper[0]^,taicpu(p).oper[0]^.reg) and
  292. { Ry cannot be modified between move and store }
  293. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  294. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));
  295. if result then
  296. begin
  297. next.loadreg(0,taicpu(p).oper[1]^.reg);
  298. DebugMsg('Peephole: Move removed 3',p);
  299. asml.remove(p);
  300. p.free;
  301. p:=next;
  302. end;
  303. end;
  304. function TCpuAsmOptimizer.TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
  305. begin
  306. result:=(next.ops>1) and
  307. (next.oper[1]^.typ=top_ref) and
  308. (next.oper[1]^.ref^.refaddr<>addr_full) and
  309. (next.oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  310. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  311. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));
  312. if result then
  313. begin
  314. AllocRegBetween(taicpu(p).oper[1]^.reg,p,next,UsedRegs);
  315. next.oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  316. DebugMsg('Peephole: Move removed 4',p);
  317. asml.remove(p);
  318. p.free;
  319. p:=next;
  320. end;
  321. end;
  322. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  323. var
  324. next,next2: tai;
  325. begin
  326. result:=false;
  327. case p.typ of
  328. ait_instruction:
  329. begin
  330. case taicpu(p).opcode of
  331. A_BC:
  332. begin
  333. { BEQ/BNE with same register are bogus, but can be generated for code like
  334. "if lo(qwordvar)=cardinal(qwordvar) ...",
  335. optimizations below can also yield them, e.g. if one register was initially R0. }
  336. if (taicpu(p).condition in [C_EQ,C_NE]) and
  337. (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then
  338. begin
  339. if (taicpu(p).condition=C_NE) then
  340. begin
  341. if (taicpu(p).oper[2]^.typ = top_ref) and
  342. (taicpu(p).oper[2]^.ref^.symbol is TAsmLabel) then
  343. TAsmLabel(taicpu(p).oper[2]^.ref^.symbol).decrefs;
  344. RemoveDelaySlot(p);
  345. GetNextInstruction(p,next);
  346. end
  347. else
  348. begin
  349. next:=taicpu.op_sym(A_BA,taicpu(p).oper[2]^.ref^.symbol);
  350. taicpu(next).fileinfo:=taicpu(p).fileinfo;
  351. asml.insertbefore(next,p);
  352. end;
  353. asml.remove(p);
  354. p.Free;
  355. p:=next;
  356. result:=true;
  357. end;
  358. end;
  359. A_SEH:
  360. begin
  361. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  362. MatchInstruction(next,A_SH) and
  363. MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) and
  364. (not RegUsedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  365. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  366. begin
  367. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  368. asml.remove(p);
  369. p.free;
  370. p:=next;
  371. result:=true;
  372. end
  373. else
  374. result:=TryRemoveMov(p,A_MOVE);
  375. end;
  376. A_SEB:
  377. { TODO: can be handled similar to A_SEH, but it's almost never encountered }
  378. result:=TryRemoveMov(p,A_MOVE);
  379. A_SLL:
  380. begin
  381. { if this is a sign extension... }
  382. if (taicpu(p).oper[2]^.typ=top_const) and
  383. GetNextInstruction(p,next) and
  384. MatchInstruction(next,A_SRA) and
  385. IsSameReg(taicpu(p),taicpu(next)) and
  386. (taicpu(next).oper[2]^.typ=top_const) and
  387. (taicpu(next).oper[2]^.val=taicpu(p).oper[2]^.val) and
  388. (taicpu(next).oper[2]^.val=16) and
  389. { ...followed by 16-bit store (possibly with PIC simplification, etc. in between) }
  390. GetNextInstructionUsingReg(next,next2,taicpu(p).oper[0]^.reg) and
  391. MatchInstruction(next2,A_SH) and
  392. (taicpu(next2).oper[0]^.typ=top_reg) and
  393. (taicpu(next2).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  394. { the initial register may not be reused }
  395. (not RegUsedBetween(taicpu(p).oper[1]^.reg,next,next2)) then
  396. begin
  397. if Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next2.next))) then
  398. begin
  399. taicpu(next2).loadreg(0,taicpu(p).oper[1]^.reg);
  400. asml.remove(p);
  401. asml.remove(next);
  402. p.free;
  403. next.free;
  404. p:=next2;
  405. result:=true;
  406. end;
  407. end
  408. else
  409. result:=TryRemoveMov(p,A_MOVE);
  410. end;
  411. A_SRL:
  412. begin
  413. { TODO: also kill sign-extensions that follow, both SLL+SRA and SEB/SEH versions }
  414. { Remove 'andi' in sequences
  415. srl Rx,Ry,16
  416. andi Rx,Rx,65535
  417. srl Rx,Ry,24
  418. andi Rx,Rx,255
  419. since 'srl' clears all relevant upper bits }
  420. if (taicpu(p).oper[2]^.typ=top_const) and
  421. GetNextInstruction(p,next) and
  422. MatchInstruction(next,A_ANDI) and
  423. IsSameReg(taicpu(p),taicpu(next)) and
  424. (taicpu(next).oper[2]^.typ=top_const) and
  425. ((
  426. (taicpu(p).oper[2]^.val>=16) and
  427. (taicpu(next).oper[2]^.val=65535)
  428. ) or (
  429. (taicpu(p).oper[2]^.val>=24) and
  430. (taicpu(next).oper[2]^.val=255)
  431. )) then
  432. begin
  433. asml.remove(next);
  434. next.free;
  435. result:=true;
  436. end
  437. else
  438. result:=TryRemoveMov(p,A_MOVE);
  439. end;
  440. A_ANDI:
  441. begin
  442. { Remove sign extension after 'andi' if bit 7/15 of const operand is clear }
  443. if (taicpu(p).oper[2]^.typ=top_const) and
  444. GetNextInstruction(p,next) and
  445. MatchInstruction(next,A_SLL) and
  446. GetNextInstruction(next,next2) and
  447. MatchInstruction(next2,A_SRA) and
  448. IsSameReg(taicpu(p),taicpu(next)) and
  449. IsSameReg(taicpu(p),taicpu(next2)) and
  450. (taicpu(next).oper[2]^.typ=top_const) and
  451. (taicpu(next2).oper[2]^.typ=top_const) and
  452. (taicpu(next).oper[2]^.val=taicpu(next2).oper[2]^.val) and
  453. ((
  454. (taicpu(p).oper[2]^.val<=$7fff) and
  455. (taicpu(next).oper[2]^.val=16)
  456. ) or (
  457. (taicpu(p).oper[2]^.val<=$7f) and
  458. (taicpu(next).oper[2]^.val=24)
  459. )) then
  460. begin
  461. asml.remove(next);
  462. asml.remove(next2);
  463. next.free;
  464. next2.free;
  465. result:=true;
  466. end
  467. { Remove zero extension if register is used only for byte/word memory store }
  468. else if (taicpu(p).oper[2]^.typ=top_const) and
  469. GetNextInstruction(p,next) and
  470. ((taicpu(p).oper[2]^.val=255) and MatchInstruction(next,A_SB)) or
  471. ((taicpu(p).oper[2]^.val=65535) and MatchInstruction(next,A_SH)) and
  472. (taicpu(next).oper[0]^.typ=top_reg) and
  473. (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  474. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  475. begin
  476. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  477. asml.remove(p);
  478. p.free;
  479. p:=next;
  480. result:=true;
  481. end
  482. else
  483. result:=TryRemoveMov(p,A_MOVE);
  484. end;
  485. A_MOV_S:
  486. begin
  487. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  488. (next.typ=ait_instruction) then
  489. begin
  490. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SWC1]) then
  491. result:=true;
  492. end;
  493. end;
  494. A_MOV_D:
  495. begin
  496. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  497. (next.typ=ait_instruction) then
  498. begin
  499. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SDC1]) then
  500. result:=true;
  501. end;
  502. end;
  503. A_MOVE:
  504. begin
  505. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  506. (next.typ=ait_instruction) and
  507. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then
  508. begin
  509. { MOVE Rx,Ry; store Rx,(ref); dealloc Rx ==> store Ry,(ref) }
  510. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SB,A_SH,A_SW]) then
  511. result:=true
  512. else if TryRemoveMovToRefIndex(p,taicpu(next)) then
  513. result:=true
  514. { MOVE Rx,Ry; opcode Rx,Rx,any ==> opcode Rx,Ry,any
  515. MOVE Rx,Ry; opcode Rx,Rz,Rx ==> opcode Rx,Rz,Ry }
  516. else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_AND,A_ANDI,A_SLLV,A_SRLV,A_SRAV]) and
  517. MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  518. begin
  519. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  520. begin
  521. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  522. asml.remove(p);
  523. p.free;
  524. p:=next;
  525. result:=true;
  526. end
  527. { TODO: if Ry=NR_R0, this effectively changes instruction into MOVE,
  528. providing further optimization possibilities }
  529. else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then
  530. begin
  531. taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg);
  532. asml.remove(p);
  533. p.free;
  534. p:=next;
  535. result:=true;
  536. end;
  537. end
  538. { MOVE Rx,Ry; opcode Rz,Rx,any; dealloc Rx ==> opcode Rz,Ry,any }
  539. else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_SLT,A_SLTU,A_DIV,A_DIVU,
  540. A_SLL,A_SRL,A_SRA,A_SLLV,A_SRLV,A_SRAV,A_AND,A_ANDI,A_OR,A_ORI,A_XOR,A_XORI]) and
  541. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  542. begin
  543. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  544. begin
  545. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  546. asml.remove(p);
  547. p.free;
  548. p:=next;
  549. result:=true;
  550. end
  551. else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then
  552. begin
  553. taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg);
  554. asml.remove(p);
  555. p.free;
  556. p:=next;
  557. result:=true;
  558. end;
  559. end
  560. { MULT[U] and cond.branches must be handled separately due to different operand numbers }
  561. else if (taicpu(next).opcode in [A_MULT,A_MULTU,A_BC]) and
  562. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  563. begin
  564. if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  565. begin
  566. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  567. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  568. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  569. asml.remove(p);
  570. p.free;
  571. p:=next;
  572. result:=true;
  573. end
  574. else if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  575. begin
  576. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  577. if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  578. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  579. asml.remove(p);
  580. p.free;
  581. p:=next;
  582. result:=true;
  583. end;
  584. end
  585. else if TryRemoveMov(p,A_MOVE) then
  586. begin
  587. { Ended up with move between same register? Suicide then. }
  588. if (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then
  589. begin
  590. GetNextInstruction(p,next);
  591. asml.remove(p);
  592. p.free;
  593. p:=next;
  594. result:=true;
  595. end;
  596. end;
  597. end;
  598. end;
  599. A_ADDIU:
  600. begin
  601. { ADDIU Rx,Ry,const; load/store Rz,(Rx); dealloc Rx ==> load/store Rz,const(Ry)
  602. ADDIU Rx,Ry,%lo(sym); load/store Rz,(Rx); dealloc Rx ==> load/store Rz,%lo(sym)(Ry)
  603. ADDIU Rx,Ry,const; load Rx,(Rx) ==> load Rx,const(Ry)
  604. ADDIU Rx,Ry,%lo(sym); load Rx,(Rx) ==> load Rx,%lo(sym)(Ry) }
  605. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  606. (next.typ=ait_instruction) and
  607. (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW,A_SB,A_SH,A_SW]) and
  608. (taicpu(p).oper[0]^.reg=taicpu(next).oper[1]^.ref^.base) and
  609. (taicpu(next).oper[1]^.ref^.offset=0) and
  610. (taicpu(next).oper[1]^.ref^.symbol=nil) and
  611. (
  612. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) or
  613. (
  614. (taicpu(p).oper[0]^.reg=taicpu(next).oper[0]^.reg) and
  615. (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW])
  616. )
  617. ) and
  618. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then
  619. begin
  620. case taicpu(p).oper[2]^.typ of
  621. top_const:
  622. taicpu(next).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
  623. top_ref:
  624. taicpu(next).oper[1]^.ref^:=taicpu(p).oper[2]^.ref^;
  625. else
  626. InternalError(2014100401);
  627. end;
  628. taicpu(next).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  629. asml.remove(p);
  630. p.free;
  631. p:=next;
  632. result:=true;
  633. end
  634. else
  635. result:=TryRemoveMov(p,A_MOVE);
  636. end;
  637. A_ADD,A_ADDU,A_OR:
  638. begin
  639. if MatchOperand(taicpu(p).oper[1]^,NR_R0) then
  640. begin
  641. taicpu(p).freeop(1);
  642. taicpu(p).oper[1]:=taicpu(p).oper[2];
  643. taicpu(p).oper[2]:=nil;
  644. taicpu(p).ops:=2;
  645. taicpu(p).opercnt:=2;
  646. taicpu(p).opcode:=A_MOVE;
  647. result:=true;
  648. end
  649. else if MatchOperand(taicpu(p).oper[2]^,NR_R0) then
  650. begin
  651. taicpu(p).freeop(2);
  652. taicpu(p).ops:=2;
  653. taicpu(p).opercnt:=2;
  654. taicpu(p).opcode:=A_MOVE;
  655. result:=true;
  656. end
  657. else
  658. result:=TryRemoveMov(p,A_MOVE);
  659. end;
  660. A_LB,A_LBU,A_LH,A_LHU,A_LW,
  661. A_ADDI,
  662. A_SUB,A_SUBU,
  663. A_SRA,A_SRAV,
  664. A_SRLV,
  665. A_SLLV,
  666. A_MFLO,A_MFHI,
  667. A_AND,A_XOR,A_ORI,A_XORI:
  668. result:=TryRemoveMov(p,A_MOVE);
  669. A_LWC1,
  670. A_ADD_s, A_SUB_s, A_MUL_s, A_DIV_s,
  671. A_ABS_s, A_NEG_s, A_SQRT_s,
  672. A_CVT_s_w, A_CVT_s_l, A_CVT_s_d:
  673. result:=TryRemoveMov(p,A_MOV_s);
  674. A_LDC1,
  675. A_ADD_d, A_SUB_d, A_MUL_d, A_DIV_d,
  676. A_ABS_d, A_NEG_d, A_SQRT_d,
  677. A_CVT_d_w, A_CVT_d_l, A_CVT_d_s:
  678. result:=TryRemoveMov(p,A_MOV_d);
  679. else
  680. ;
  681. end;
  682. end;
  683. else
  684. ;
  685. end;
  686. end;
  687. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  688. var
  689. p: tai;
  690. l: longint;
  691. hp1,hp2,hp3,hp4: tai;
  692. condition: tasmcond;
  693. condreg: tregister;
  694. begin
  695. { Currently, everything below is mips4+ }
  696. if (current_settings.cputype<cpu_mips4) then
  697. exit;
  698. p:=BlockStart;
  699. ClearUsedRegs;
  700. while (p<>BlockEnd) Do
  701. begin
  702. UpdateUsedRegs(tai(p.next));
  703. case p.typ of
  704. ait_instruction:
  705. begin
  706. case taicpu(p).opcode of
  707. A_BC:
  708. begin
  709. condreg:=NR_NO;
  710. if (taicpu(p).condition in [C_COP1TRUE,C_COP1FALSE]) then
  711. { TODO: must be taken from "p" if/when codegen makes use of multiple %fcc }
  712. condreg:=NR_FCC0
  713. else if (taicpu(p).condition in [C_EQ,C_NE]) then
  714. begin
  715. if (taicpu(p).oper[0]^.reg=NR_R0) then
  716. condreg:=taicpu(p).oper[1]^.reg
  717. else if (taicpu(p).oper[1]^.reg=NR_R0) then
  718. condreg:=taicpu(p).oper[0]^.reg
  719. end;
  720. if (condreg<>NR_NO) then
  721. begin
  722. { check for
  723. bCC xxx
  724. <several movs>
  725. xxx:
  726. }
  727. l:=0;
  728. GetNextInstruction(p, hp1);
  729. while CanBeCMOV(hp1,condreg) do // CanBeCMOV returns False for nil or labels
  730. begin
  731. inc(l);
  732. GetNextInstruction(hp1,hp1);
  733. end;
  734. if assigned(hp1) then
  735. begin
  736. if FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  737. begin
  738. if (l<=4) and (l>0) then
  739. begin
  740. condition:=inverse_cond(taicpu(p).condition);
  741. hp2:=p;
  742. GetNextInstruction(p,hp1);
  743. p:=hp1;
  744. repeat
  745. ChangeToCMOV(taicpu(hp1),condition,condreg);
  746. GetNextInstruction(hp1,hp1);
  747. until not CanBeCMOV(hp1,condreg);
  748. { wait with removing else GetNextInstruction could
  749. ignore the label if it was the only usage in the
  750. jump moved away }
  751. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  752. RemoveDelaySlot(hp2);
  753. asml.remove(hp2);
  754. hp2.free;
  755. continue;
  756. end;
  757. end
  758. else
  759. begin
  760. { check further for
  761. bCC xxx
  762. <several movs 1>
  763. b yyy
  764. xxx:
  765. <several movs 2>
  766. yyy:
  767. }
  768. { hp2 points to b yyy }
  769. hp2:=hp1;
  770. { skip hp1 to xxx }
  771. GetNextInstruction(hp1, hp1);
  772. if assigned(hp2) and
  773. assigned(hp1) and
  774. (l<=3) and
  775. (hp2.typ=ait_instruction) and
  776. (taicpu(hp2).opcode=A_BA) and
  777. { real label and jump, no further references to the
  778. label are allowed }
  779. (tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol).getrefs<=2) and
  780. FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  781. begin
  782. l:=0;
  783. hp4:=hp1;
  784. { hp4 points to label xxx }
  785. GetNextInstruction(hp4, hp1);
  786. { hp1 points to <several moves 2> }
  787. while CanBeCMOV(hp1,condreg) do
  788. begin
  789. inc(l);
  790. GetNextInstruction(hp1, hp1);
  791. end;
  792. if l=0 then
  793. hp1:=hp4;
  794. { hp1 points to yyy: }
  795. if assigned(hp1) and
  796. (l<=3) and
  797. FindLabel(tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol),hp1) then
  798. begin
  799. condition:=inverse_cond(taicpu(p).condition);
  800. GetNextInstruction(p,hp1);
  801. hp3:=p;
  802. p:=hp1;
  803. while CanBeCMOV(hp1,condreg) do
  804. begin
  805. ChangeToCMOV(taicpu(hp1),condition,condreg);
  806. GetNextInstruction(hp1,hp1);
  807. end;
  808. condition:=inverse_cond(condition);
  809. GetNextInstruction(hp4, hp1);
  810. { hp1 points to <several moves 2> }
  811. while CanBeCMOV(hp1,condreg) do
  812. begin
  813. ChangeToCMOV(taicpu(hp1),condition,condreg);
  814. GetNextInstruction(hp1,hp1);
  815. end;
  816. { remove bCC }
  817. tasmlabel(taicpu(hp3).oper[taicpu(hp3).ops-1]^.ref^.symbol).decrefs;
  818. RemoveDelaySlot(hp3);
  819. asml.remove(hp3);
  820. hp3.free;
  821. { remove jmp }
  822. if (p=hp2) then
  823. GetNextInstruction(hp2,p);
  824. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  825. RemoveDelaySlot(hp2);
  826. asml.remove(hp2);
  827. hp2.free;
  828. continue;
  829. end;
  830. end;
  831. end;
  832. end;
  833. end;
  834. end;
  835. else
  836. ;
  837. end;
  838. end;
  839. else
  840. ;
  841. end;
  842. UpdateUsedRegs(p);
  843. p:=tai(p.next);
  844. end;
  845. end;
  846. begin
  847. casmoptimizer:=TCpuAsmOptimizer;
  848. end.