aoptcpu.pas 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$ifdef EXTDEBUG}
  21. {$define DEBUG_AOPTCPU}
  22. {$endif EXTDEBUG}
  23. Interface
  24. uses
  25. cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai, aasmcpu;
  26. Type
  27. TAsmOpSet = set of TAsmOp;
  28. TCpuAsmOptimizer = class(TAsmOptimizer)
  29. { Converts a conditional jump into an unconditional jump. Only call this
  30. procedure on an instruction that you already know is a conditional jump }
  31. procedure MakeUnconditional(p: taicpu); override;
  32. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  33. function GetNextInstructionUsingReg(Current: tai;
  34. var Next: tai; reg: TRegister): Boolean;
  35. function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  36. function TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
  37. function TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean;
  38. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  39. procedure PeepHoleOptPass2; override;
  40. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  41. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  42. { outputs a debug message into the assembler file }
  43. procedure DebugMsg(const s: string; p: tai);
  44. End;
  45. Implementation
  46. uses
  47. cutils,globtype,globals,aasmbase,cpuinfo,verbose;
  48. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  49. begin
  50. result :=
  51. (instr.typ = ait_instruction) and
  52. (taicpu(instr).opcode = op);
  53. end;
  54. function MatchOperand(const oper: TOper; reg: TRegister): boolean;
  55. begin
  56. result:=(oper.typ=top_reg) and (oper.reg=reg);
  57. end;
  58. function IsSameReg(this,next: taicpu): boolean;
  59. begin
  60. result:=(next.oper[0]^.typ=top_reg) and
  61. (next.oper[1]^.typ=top_reg) and
  62. (next.oper[0]^.reg=next.oper[1]^.reg) and
  63. (next.oper[0]^.reg=this.oper[0]^.reg);
  64. end;
  65. function CanBeCMOV(p: tai; condreg: tregister): boolean;
  66. begin
  67. result:=assigned(p) and (p.typ=ait_instruction) and
  68. ((taicpu(p).opcode in [A_MOV_D,A_MOV_S]) or
  69. (
  70. { register with condition must not be overwritten }
  71. (taicpu(p).opcode=A_MOVE) and
  72. (taicpu(p).oper[0]^.reg<>condreg)
  73. ));
  74. end;
  75. procedure ChangeToCMOV(p: taicpu; cond: tasmcond; reg: tregister);
  76. begin
  77. case cond of
  78. C_COP1TRUE:
  79. case p.opcode of
  80. A_MOV_D: p.opcode:=A_MOVT_D;
  81. A_MOV_S: p.opcode:=A_MOVT_S;
  82. A_MOVE: p.opcode:=A_MOVT;
  83. else
  84. InternalError(2014061701);
  85. end;
  86. C_COP1FALSE:
  87. case p.opcode of
  88. A_MOV_D: p.opcode:=A_MOVF_D;
  89. A_MOV_S: p.opcode:=A_MOVF_S;
  90. A_MOVE: p.opcode:=A_MOVF;
  91. else
  92. InternalError(2014061702);
  93. end;
  94. C_EQ:
  95. case p.opcode of
  96. A_MOV_D: p.opcode:=A_MOVZ_D;
  97. A_MOV_S: p.opcode:=A_MOVZ_S;
  98. A_MOVE: p.opcode:=A_MOVZ;
  99. else
  100. InternalError(2014061703);
  101. end;
  102. C_NE:
  103. case p.opcode of
  104. A_MOV_D: p.opcode:=A_MOVN_D;
  105. A_MOV_S: p.opcode:=A_MOVN_S;
  106. A_MOVE: p.opcode:=A_MOVN;
  107. else
  108. InternalError(2014061704);
  109. end;
  110. else
  111. InternalError(2014061705);
  112. end;
  113. p.ops:=3;
  114. p.loadreg(2,reg);
  115. end;
  116. {$ifdef DEBUG_AOPTCPU}
  117. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  118. begin
  119. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  120. end;
  121. {$else DEBUG_AOPTCPU}
  122. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  123. begin
  124. end;
  125. {$endif DEBUG_AOPTCPU}
  126. { Converts a conditional jump into an unconditional jump. Only call this
  127. procedure on an instruction that you already know is a conditional jump }
  128. procedure TCpuAsmOptimizer.MakeUnconditional(p: taicpu);
  129. var
  130. idx, topidx: Byte;
  131. begin
  132. inherited MakeUnconditional(p);
  133. topidx := p.ops-1;
  134. if topidx = 0 then
  135. Exit;
  136. { Move destination address into first register, then delete the rest }
  137. p.loadoper(0, p.oper[topidx]^);
  138. for idx := topidx downto 1 do
  139. p.freeop(idx);
  140. p.ops := 1;
  141. p.opercnt := 1;
  142. end;
  143. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  144. var
  145. p: taicpu;
  146. i: longint;
  147. begin
  148. result:=false;
  149. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  150. exit;
  151. p:=taicpu(hp);
  152. i:=0;
  153. while(i<p.ops) do
  154. begin
  155. case p.oper[I]^.typ of
  156. top_reg:
  157. result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(I)<>operand_write);
  158. top_ref:
  159. result:=
  160. (p.oper[I]^.ref^.base=reg) or
  161. (p.oper[I]^.ref^.index=reg);
  162. else
  163. ;
  164. end;
  165. if result then exit; {Bailout if we found something}
  166. Inc(I);
  167. end;
  168. end;
  169. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  170. var
  171. p: taicpu;
  172. begin
  173. result:=false;
  174. if not ((assigned(hp)) and (hp.typ=ait_instruction)) then
  175. exit;
  176. p:=taicpu(hp);
  177. case p.opcode of
  178. { These instructions do not write into a register at all }
  179. A_NOP,
  180. A_C_EQ_D,A_C_EQ_S,A_C_LE_D,A_C_LE_S,A_C_LT_D,A_C_LT_S,
  181. A_BA,A_BC,
  182. A_SB,A_SH,A_SW,A_SWL,A_SWR,A_SWC1,A_SDC1:
  183. exit;
  184. else
  185. ;
  186. end;
  187. result:=(p.ops>0) and (p.oper[0]^.typ=top_reg) and
  188. (p.oper[0]^.reg=reg);
  189. end;
  190. function TCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  191. var
  192. i : Longint;
  193. begin
  194. result:=false;
  195. for i:=0 to taicpu(p1).ops-1 do
  196. if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
  197. begin
  198. result:=true;
  199. exit;
  200. end;
  201. end;
  202. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  203. var Next: tai; reg: TRegister): Boolean;
  204. begin
  205. Next:=Current;
  206. repeat
  207. Result:=GetNextInstruction(Next,Next);
  208. until {not(cs_opt_level3 in current_settings.optimizerswitches) or} not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  209. (is_calljmp(taicpu(Next).opcode));
  210. end;
  211. function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  212. var
  213. next,hp1: tai;
  214. alloc,dealloc: tai_regalloc;
  215. begin
  216. { Fold
  217. op $reg1,...
  218. opcode $reg2,$reg1
  219. dealloc $reg1
  220. into
  221. op $reg2,...
  222. opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc.
  223. }
  224. result:=false;
  225. if (taicpu(p).ops>0) and
  226. GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  227. MatchInstruction(next,opcode) and
  228. MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and
  229. { the destination register of mov cannot be used between p and next }
  230. (not RegUsedBetween(taicpu(next).oper[0]^.reg,p,next)) then
  231. begin
  232. dealloc:=FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.Next));
  233. if assigned(dealloc) then
  234. begin
  235. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  236. and remove it if possible }
  237. GetLastInstruction(p,hp1);
  238. asml.Remove(dealloc);
  239. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  240. if assigned(alloc) then
  241. begin
  242. asml.Remove(alloc);
  243. alloc.free;
  244. dealloc.free;
  245. end
  246. else
  247. asml.InsertAfter(dealloc,p);
  248. { try to move the allocation of the target register }
  249. GetLastInstruction(next,hp1);
  250. alloc:=FindRegAlloc(taicpu(next).oper[0]^.reg,tai(hp1.Next));
  251. if assigned(alloc) then
  252. begin
  253. asml.Remove(alloc);
  254. asml.InsertBefore(alloc,p);
  255. { adjust used regs }
  256. IncludeRegInUsedRegs(taicpu(next).oper[0]^.reg,UsedRegs);
  257. end;
  258. { finally get rid of the mov }
  259. taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
  260. DebugMsg('Peephole: Move removed 1',next);
  261. asml.remove(next);
  262. next.free;
  263. result:=true;
  264. end
  265. else // no dealloc found
  266. begin
  267. { try to optimize the typical call sequence
  268. lw $reg, (whatever)
  269. <alloc volatile registers (including $reg!!)>
  270. move $t9,$reg
  271. jalr $t9
  272. if $reg is nonvolatile, its value may be used after call
  273. and we cannot safely replace it with $t9 }
  274. if (opcode=A_MOVE) and
  275. (taicpu(next).oper[0]^.reg=NR_R25) and
  276. GetNextInstruction(next,hp1) and
  277. MatchInstruction(hp1,A_JALR) and
  278. MatchOperand(taicpu(hp1).oper[0]^,NR_R25) and
  279. assigned(FindRegAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) then
  280. begin
  281. taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
  282. DebugMsg('Peephole: Move removed 2',next);
  283. asml.remove(next);
  284. next.free;
  285. result:=true;
  286. end;
  287. end;
  288. end;
  289. end;
  290. function TCpuAsmOptimizer.TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean;
  291. begin
  292. result:=(next.opcode in storeops) and
  293. MatchOperand(next.oper[0]^,taicpu(p).oper[0]^.reg) and
  294. { Ry cannot be modified between move and store }
  295. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  296. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));
  297. if result then
  298. begin
  299. next.loadreg(0,taicpu(p).oper[1]^.reg);
  300. DebugMsg('Peephole: Move removed 3',p);
  301. asml.remove(p);
  302. p.free;
  303. p:=next;
  304. end;
  305. end;
  306. function TCpuAsmOptimizer.TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
  307. begin
  308. result:=(next.ops>1) and
  309. (next.oper[1]^.typ=top_ref) and
  310. (next.oper[1]^.ref^.refaddr<>addr_full) and
  311. (next.oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  312. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  313. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));
  314. if result then
  315. begin
  316. AllocRegBetween(taicpu(p).oper[1]^.reg,p,next,UsedRegs);
  317. next.oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  318. DebugMsg('Peephole: Move removed 4',p);
  319. asml.remove(p);
  320. p.free;
  321. p:=next;
  322. end;
  323. end;
  324. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  325. var
  326. next,next2: tai;
  327. begin
  328. result:=false;
  329. case p.typ of
  330. ait_instruction:
  331. begin
  332. case taicpu(p).opcode of
  333. A_BC:
  334. begin
  335. { BEQ/BNE with same register are bogus, but can be generated for code like
  336. "if lo(qwordvar)=cardinal(qwordvar) ...",
  337. optimizations below can also yield them, e.g. if one register was initially R0. }
  338. if (taicpu(p).condition in [C_EQ,C_NE]) and
  339. (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then
  340. begin
  341. if (taicpu(p).condition=C_NE) then
  342. begin
  343. if (taicpu(p).oper[2]^.typ = top_ref) and
  344. (taicpu(p).oper[2]^.ref^.symbol is TAsmLabel) then
  345. TAsmLabel(taicpu(p).oper[2]^.ref^.symbol).decrefs;
  346. RemoveDelaySlot(p);
  347. GetNextInstruction(p,next);
  348. end
  349. else
  350. begin
  351. next:=taicpu.op_sym(A_BA,taicpu(p).oper[2]^.ref^.symbol);
  352. taicpu(next).fileinfo:=taicpu(p).fileinfo;
  353. asml.insertbefore(next,p);
  354. end;
  355. asml.remove(p);
  356. p.Free;
  357. p:=next;
  358. result:=true;
  359. end;
  360. end;
  361. A_SEH:
  362. begin
  363. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  364. MatchInstruction(next,A_SH) and
  365. MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) and
  366. (not RegUsedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  367. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  368. begin
  369. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  370. asml.remove(p);
  371. p.free;
  372. p:=next;
  373. result:=true;
  374. end
  375. else
  376. result:=TryRemoveMov(p,A_MOVE);
  377. end;
  378. A_SEB:
  379. { TODO: can be handled similar to A_SEH, but it's almost never encountered }
  380. result:=TryRemoveMov(p,A_MOVE);
  381. A_SLL:
  382. begin
  383. { if this is a sign extension... }
  384. if (taicpu(p).oper[2]^.typ=top_const) and
  385. GetNextInstruction(p,next) and
  386. MatchInstruction(next,A_SRA) and
  387. IsSameReg(taicpu(p),taicpu(next)) and
  388. (taicpu(next).oper[2]^.typ=top_const) and
  389. (taicpu(next).oper[2]^.val=taicpu(p).oper[2]^.val) and
  390. (taicpu(next).oper[2]^.val=16) and
  391. { ...followed by 16-bit store (possibly with PIC simplification, etc. in between) }
  392. GetNextInstructionUsingReg(next,next2,taicpu(p).oper[0]^.reg) and
  393. MatchInstruction(next2,A_SH) and
  394. (taicpu(next2).oper[0]^.typ=top_reg) and
  395. (taicpu(next2).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  396. { the initial register may not be reused }
  397. (not RegUsedBetween(taicpu(p).oper[1]^.reg,next,next2)) then
  398. begin
  399. if Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next2.next))) then
  400. begin
  401. taicpu(next2).loadreg(0,taicpu(p).oper[1]^.reg);
  402. asml.remove(p);
  403. asml.remove(next);
  404. p.free;
  405. next.free;
  406. p:=next2;
  407. result:=true;
  408. end;
  409. end
  410. else
  411. result:=TryRemoveMov(p,A_MOVE);
  412. end;
  413. A_SRL:
  414. begin
  415. { TODO: also kill sign-extensions that follow, both SLL+SRA and SEB/SEH versions }
  416. { Remove 'andi' in sequences
  417. srl Rx,Ry,16
  418. andi Rx,Rx,65535
  419. srl Rx,Ry,24
  420. andi Rx,Rx,255
  421. since 'srl' clears all relevant upper bits }
  422. if (taicpu(p).oper[2]^.typ=top_const) and
  423. GetNextInstruction(p,next) and
  424. MatchInstruction(next,A_ANDI) and
  425. IsSameReg(taicpu(p),taicpu(next)) and
  426. (taicpu(next).oper[2]^.typ=top_const) and
  427. ((
  428. (taicpu(p).oper[2]^.val>=16) and
  429. (taicpu(next).oper[2]^.val=65535)
  430. ) or (
  431. (taicpu(p).oper[2]^.val>=24) and
  432. (taicpu(next).oper[2]^.val=255)
  433. )) then
  434. begin
  435. asml.remove(next);
  436. next.free;
  437. result:=true;
  438. end
  439. else
  440. result:=TryRemoveMov(p,A_MOVE);
  441. end;
  442. A_ANDI:
  443. begin
  444. { Remove sign extension after 'andi' if bit 7/15 of const operand is clear }
  445. if (taicpu(p).oper[2]^.typ=top_const) and
  446. GetNextInstruction(p,next) and
  447. MatchInstruction(next,A_SLL) and
  448. GetNextInstruction(next,next2) and
  449. MatchInstruction(next2,A_SRA) and
  450. IsSameReg(taicpu(p),taicpu(next)) and
  451. IsSameReg(taicpu(p),taicpu(next2)) and
  452. (taicpu(next).oper[2]^.typ=top_const) and
  453. (taicpu(next2).oper[2]^.typ=top_const) and
  454. (taicpu(next).oper[2]^.val=taicpu(next2).oper[2]^.val) and
  455. ((
  456. (taicpu(p).oper[2]^.val<=$7fff) and
  457. (taicpu(next).oper[2]^.val=16)
  458. ) or (
  459. (taicpu(p).oper[2]^.val<=$7f) and
  460. (taicpu(next).oper[2]^.val=24)
  461. )) then
  462. begin
  463. asml.remove(next);
  464. asml.remove(next2);
  465. next.free;
  466. next2.free;
  467. result:=true;
  468. end
  469. { Remove zero extension if register is used only for byte/word memory store }
  470. else if (taicpu(p).oper[2]^.typ=top_const) and
  471. GetNextInstruction(p,next) and
  472. ((taicpu(p).oper[2]^.val=255) and MatchInstruction(next,A_SB)) or
  473. ((taicpu(p).oper[2]^.val=65535) and MatchInstruction(next,A_SH)) and
  474. (taicpu(next).oper[0]^.typ=top_reg) and
  475. (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  476. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  477. begin
  478. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  479. asml.remove(p);
  480. p.free;
  481. p:=next;
  482. result:=true;
  483. end
  484. else
  485. result:=TryRemoveMov(p,A_MOVE);
  486. end;
  487. A_MOV_S:
  488. begin
  489. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  490. (next.typ=ait_instruction) then
  491. begin
  492. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SWC1]) then
  493. result:=true;
  494. end;
  495. end;
  496. A_MOV_D:
  497. begin
  498. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  499. (next.typ=ait_instruction) then
  500. begin
  501. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SDC1]) then
  502. result:=true;
  503. end;
  504. end;
  505. A_MOVE:
  506. begin
  507. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  508. (next.typ=ait_instruction) and
  509. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then
  510. begin
  511. { MOVE Rx,Ry; store Rx,(ref); dealloc Rx ==> store Ry,(ref) }
  512. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SB,A_SH,A_SW]) then
  513. result:=true
  514. else if TryRemoveMovToRefIndex(p,taicpu(next)) then
  515. result:=true
  516. { MOVE Rx,Ry; opcode Rx,Rx,any ==> opcode Rx,Ry,any
  517. MOVE Rx,Ry; opcode Rx,Rz,Rx ==> opcode Rx,Rz,Ry }
  518. else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_AND,A_ANDI,A_SLLV,A_SRLV,A_SRAV]) and
  519. MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  520. begin
  521. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  522. begin
  523. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  524. asml.remove(p);
  525. p.free;
  526. p:=next;
  527. result:=true;
  528. end
  529. { TODO: if Ry=NR_R0, this effectively changes instruction into MOVE,
  530. providing further optimization possibilities }
  531. else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then
  532. begin
  533. taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg);
  534. asml.remove(p);
  535. p.free;
  536. p:=next;
  537. result:=true;
  538. end;
  539. end
  540. { MOVE Rx,Ry; opcode Rz,Rx,any; dealloc Rx ==> opcode Rz,Ry,any }
  541. else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_SLT,A_SLTU,A_DIV,A_DIVU,
  542. A_SLL,A_SRL,A_SRA,A_SLLV,A_SRLV,A_SRAV,A_AND,A_ANDI,A_OR,A_ORI,A_XOR,A_XORI]) and
  543. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  544. begin
  545. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  546. begin
  547. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  548. asml.remove(p);
  549. p.free;
  550. p:=next;
  551. result:=true;
  552. end
  553. else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then
  554. begin
  555. taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg);
  556. asml.remove(p);
  557. p.free;
  558. p:=next;
  559. result:=true;
  560. end;
  561. end
  562. { MULT[U] and cond.branches must be handled separately due to different operand numbers }
  563. else if (taicpu(next).opcode in [A_MULT,A_MULTU,A_BC]) and
  564. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  565. begin
  566. if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  567. begin
  568. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  569. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  570. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  571. asml.remove(p);
  572. p.free;
  573. p:=next;
  574. result:=true;
  575. end
  576. else if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  577. begin
  578. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  579. if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  580. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  581. asml.remove(p);
  582. p.free;
  583. p:=next;
  584. result:=true;
  585. end;
  586. end
  587. else if TryRemoveMov(p,A_MOVE) then
  588. begin
  589. { Ended up with move between same register? Suicide then. }
  590. if (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then
  591. begin
  592. GetNextInstruction(p,next);
  593. asml.remove(p);
  594. p.free;
  595. p:=next;
  596. result:=true;
  597. end;
  598. end;
  599. end;
  600. end;
  601. A_ADDIU:
  602. begin
  603. { ADDIU Rx,Ry,const; load/store Rz,(Rx); dealloc Rx ==> load/store Rz,const(Ry)
  604. ADDIU Rx,Ry,%lo(sym); load/store Rz,(Rx); dealloc Rx ==> load/store Rz,%lo(sym)(Ry)
  605. ADDIU Rx,Ry,const; load Rx,(Rx) ==> load Rx,const(Ry)
  606. ADDIU Rx,Ry,%lo(sym); load Rx,(Rx) ==> load Rx,%lo(sym)(Ry) }
  607. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  608. (next.typ=ait_instruction) and
  609. (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW,A_SB,A_SH,A_SW]) and
  610. (taicpu(p).oper[0]^.reg=taicpu(next).oper[1]^.ref^.base) and
  611. (taicpu(next).oper[1]^.ref^.offset=0) and
  612. (taicpu(next).oper[1]^.ref^.symbol=nil) and
  613. (
  614. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) or
  615. (
  616. (taicpu(p).oper[0]^.reg=taicpu(next).oper[0]^.reg) and
  617. (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW])
  618. )
  619. ) and
  620. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then
  621. begin
  622. case taicpu(p).oper[2]^.typ of
  623. top_const:
  624. taicpu(next).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
  625. top_ref:
  626. taicpu(next).oper[1]^.ref^:=taicpu(p).oper[2]^.ref^;
  627. else
  628. InternalError(2014100401);
  629. end;
  630. taicpu(next).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  631. asml.remove(p);
  632. p.free;
  633. p:=next;
  634. result:=true;
  635. end
  636. else
  637. result:=TryRemoveMov(p,A_MOVE);
  638. end;
  639. A_ADD,A_ADDU,A_OR:
  640. begin
  641. if MatchOperand(taicpu(p).oper[1]^,NR_R0) then
  642. begin
  643. taicpu(p).freeop(1);
  644. taicpu(p).oper[1]:=taicpu(p).oper[2];
  645. taicpu(p).oper[2]:=nil;
  646. taicpu(p).ops:=2;
  647. taicpu(p).opercnt:=2;
  648. taicpu(p).opcode:=A_MOVE;
  649. result:=true;
  650. end
  651. else if MatchOperand(taicpu(p).oper[2]^,NR_R0) then
  652. begin
  653. taicpu(p).freeop(2);
  654. taicpu(p).ops:=2;
  655. taicpu(p).opercnt:=2;
  656. taicpu(p).opcode:=A_MOVE;
  657. result:=true;
  658. end
  659. else
  660. result:=TryRemoveMov(p,A_MOVE);
  661. end;
  662. A_LB,A_LBU,A_LH,A_LHU,A_LW,
  663. A_ADDI,
  664. A_SUB,A_SUBU,
  665. A_SRA,A_SRAV,
  666. A_SRLV,
  667. A_SLLV,
  668. A_MFLO,A_MFHI,
  669. A_AND,A_XOR,A_ORI,A_XORI:
  670. result:=TryRemoveMov(p,A_MOVE);
  671. A_LWC1,
  672. A_ADD_s, A_SUB_s, A_MUL_s, A_DIV_s,
  673. A_ABS_s, A_NEG_s, A_SQRT_s,
  674. A_CVT_s_w, A_CVT_s_l, A_CVT_s_d:
  675. result:=TryRemoveMov(p,A_MOV_s);
  676. A_LDC1,
  677. A_ADD_d, A_SUB_d, A_MUL_d, A_DIV_d,
  678. A_ABS_d, A_NEG_d, A_SQRT_d,
  679. A_CVT_d_w, A_CVT_d_l, A_CVT_d_s:
  680. result:=TryRemoveMov(p,A_MOV_d);
  681. else
  682. ;
  683. end;
  684. end;
  685. else
  686. ;
  687. end;
  688. end;
  689. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  690. var
  691. p: tai;
  692. l: longint;
  693. hp1,hp2,hp3,hp4: tai;
  694. condition: tasmcond;
  695. condreg: tregister;
  696. begin
  697. { Currently, everything below is mips4+ }
  698. if (current_settings.cputype<cpu_mips4) then
  699. exit;
  700. p:=BlockStart;
  701. ClearUsedRegs;
  702. while (p<>BlockEnd) Do
  703. begin
  704. UpdateUsedRegs(tai(p.next));
  705. case p.typ of
  706. ait_instruction:
  707. begin
  708. case taicpu(p).opcode of
  709. A_BC:
  710. begin
  711. condreg:=NR_NO;
  712. if (taicpu(p).condition in [C_COP1TRUE,C_COP1FALSE]) then
  713. { TODO: must be taken from "p" if/when codegen makes use of multiple %fcc }
  714. condreg:=NR_FCC0
  715. else if (taicpu(p).condition in [C_EQ,C_NE]) then
  716. begin
  717. if (taicpu(p).oper[0]^.reg=NR_R0) then
  718. condreg:=taicpu(p).oper[1]^.reg
  719. else if (taicpu(p).oper[1]^.reg=NR_R0) then
  720. condreg:=taicpu(p).oper[0]^.reg
  721. end;
  722. if (condreg<>NR_NO) then
  723. begin
  724. { check for
  725. bCC xxx
  726. <several movs>
  727. xxx:
  728. }
  729. l:=0;
  730. GetNextInstruction(p, hp1);
  731. while CanBeCMOV(hp1,condreg) do // CanBeCMOV returns False for nil or labels
  732. begin
  733. inc(l);
  734. GetNextInstruction(hp1,hp1);
  735. end;
  736. if assigned(hp1) then
  737. begin
  738. if FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  739. begin
  740. if (l<=4) and (l>0) then
  741. begin
  742. condition:=inverse_cond(taicpu(p).condition);
  743. hp2:=p;
  744. GetNextInstruction(p,hp1);
  745. p:=hp1;
  746. repeat
  747. ChangeToCMOV(taicpu(hp1),condition,condreg);
  748. GetNextInstruction(hp1,hp1);
  749. until not CanBeCMOV(hp1,condreg);
  750. { wait with removing else GetNextInstruction could
  751. ignore the label if it was the only usage in the
  752. jump moved away }
  753. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  754. RemoveDelaySlot(hp2);
  755. asml.remove(hp2);
  756. hp2.free;
  757. continue;
  758. end;
  759. end
  760. else
  761. begin
  762. { check further for
  763. bCC xxx
  764. <several movs 1>
  765. b yyy
  766. xxx:
  767. <several movs 2>
  768. yyy:
  769. }
  770. { hp2 points to b yyy }
  771. hp2:=hp1;
  772. { skip hp1 to xxx }
  773. GetNextInstruction(hp1, hp1);
  774. if assigned(hp2) and
  775. assigned(hp1) and
  776. (l<=3) and
  777. (hp2.typ=ait_instruction) and
  778. (taicpu(hp2).opcode=A_BA) and
  779. { real label and jump, no further references to the
  780. label are allowed }
  781. (tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol).getrefs<=2) and
  782. FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  783. begin
  784. l:=0;
  785. hp4:=hp1;
  786. { hp4 points to label xxx }
  787. GetNextInstruction(hp4, hp1);
  788. { hp1 points to <several moves 2> }
  789. while CanBeCMOV(hp1,condreg) do
  790. begin
  791. inc(l);
  792. GetNextInstruction(hp1, hp1);
  793. end;
  794. if l=0 then
  795. hp1:=hp4;
  796. { hp1 points to yyy: }
  797. if assigned(hp1) and
  798. (l<=3) and
  799. FindLabel(tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol),hp1) then
  800. begin
  801. condition:=inverse_cond(taicpu(p).condition);
  802. GetNextInstruction(p,hp1);
  803. hp3:=p;
  804. p:=hp1;
  805. while CanBeCMOV(hp1,condreg) do
  806. begin
  807. ChangeToCMOV(taicpu(hp1),condition,condreg);
  808. GetNextInstruction(hp1,hp1);
  809. end;
  810. condition:=inverse_cond(condition);
  811. GetNextInstruction(hp4, hp1);
  812. { hp1 points to <several moves 2> }
  813. while CanBeCMOV(hp1,condreg) do
  814. begin
  815. ChangeToCMOV(taicpu(hp1),condition,condreg);
  816. GetNextInstruction(hp1,hp1);
  817. end;
  818. { remove bCC }
  819. tasmlabel(taicpu(hp3).oper[taicpu(hp3).ops-1]^.ref^.symbol).decrefs;
  820. RemoveDelaySlot(hp3);
  821. asml.remove(hp3);
  822. hp3.free;
  823. { remove jmp }
  824. if (p=hp2) then
  825. GetNextInstruction(hp2,p);
  826. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  827. RemoveDelaySlot(hp2);
  828. asml.remove(hp2);
  829. hp2.free;
  830. continue;
  831. end;
  832. end;
  833. end;
  834. end;
  835. end;
  836. end;
  837. else
  838. ;
  839. end;
  840. end;
  841. else
  842. ;
  843. end;
  844. UpdateUsedRegs(p);
  845. p:=tai(p.next);
  846. end;
  847. end;
  848. begin
  849. casmoptimizer:=TCpuAsmOptimizer;
  850. end.