aoptcpu.pas 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. Interface
  21. uses
  22. cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai, aasmcpu;
  23. Type
  24. TAsmOpSet = set of TAsmOp;
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
  27. function GetNextInstructionUsingReg(Current: tai;
  28. var Next: tai; reg: TRegister): Boolean;
  29. function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  30. function TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
  31. function TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean;
  32. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  33. procedure PeepHoleOptPass2; override;
  34. End;
  35. Implementation
  36. uses
  37. cutils,globtype,globals,aasmbase,cpuinfo,verbose;
  38. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  39. begin
  40. result :=
  41. (instr.typ = ait_instruction) and
  42. (taicpu(instr).opcode = op);
  43. end;
  44. function MatchOperand(const oper: TOper; reg: TRegister): boolean;
  45. begin
  46. result:=(oper.typ=top_reg) and (oper.reg=reg);
  47. end;
  48. function IsSameReg(this,next: taicpu): boolean;
  49. begin
  50. result:=(next.oper[0]^.typ=top_reg) and
  51. (next.oper[1]^.typ=top_reg) and
  52. (next.oper[0]^.reg=next.oper[1]^.reg) and
  53. (next.oper[0]^.reg=this.oper[0]^.reg);
  54. end;
  55. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  56. var
  57. p: taicpu;
  58. begin
  59. p:=taicpu(hp);
  60. result:=false;
  61. if not ((assigned(hp)) and (hp.typ=ait_instruction)) then
  62. exit;
  63. case p.opcode of
  64. { These instructions do not write into a register at all }
  65. A_NOP,
  66. A_C_EQ_D,A_C_EQ_S,A_C_LE_D,A_C_LE_S,A_C_LT_D,A_C_LT_S,
  67. A_BA,A_BC,
  68. A_SB,A_SH,A_SW,A_SWL,A_SWR,A_SWC1,A_SDC1:
  69. exit;
  70. end;
  71. result:=(p.ops>0) and (p.oper[0]^.typ=top_reg) and
  72. (p.oper[0]^.reg=reg);
  73. end;
  74. function CanBeCMOV(p: tai; condreg: tregister): boolean;
  75. begin
  76. result:=assigned(p) and (p.typ=ait_instruction) and
  77. ((taicpu(p).opcode in [A_MOV_D,A_MOV_S]) or
  78. (
  79. { register with condition must not be overwritten }
  80. (taicpu(p).opcode=A_MOVE) and
  81. (taicpu(p).oper[0]^.reg<>condreg)
  82. ));
  83. end;
  84. procedure ChangeToCMOV(p: taicpu; cond: tasmcond; reg: tregister);
  85. begin
  86. case cond of
  87. C_COP1TRUE:
  88. case p.opcode of
  89. A_MOV_D: p.opcode:=A_MOVT_D;
  90. A_MOV_S: p.opcode:=A_MOVT_S;
  91. A_MOVE: p.opcode:=A_MOVT;
  92. else
  93. InternalError(2014061701);
  94. end;
  95. C_COP1FALSE:
  96. case p.opcode of
  97. A_MOV_D: p.opcode:=A_MOVF_D;
  98. A_MOV_S: p.opcode:=A_MOVF_S;
  99. A_MOVE: p.opcode:=A_MOVF;
  100. else
  101. InternalError(2014061702);
  102. end;
  103. C_EQ:
  104. case p.opcode of
  105. A_MOV_D: p.opcode:=A_MOVZ_D;
  106. A_MOV_S: p.opcode:=A_MOVZ_S;
  107. A_MOVE: p.opcode:=A_MOVZ;
  108. else
  109. InternalError(2014061703);
  110. end;
  111. C_NE:
  112. case p.opcode of
  113. A_MOV_D: p.opcode:=A_MOVN_D;
  114. A_MOV_S: p.opcode:=A_MOVN_S;
  115. A_MOVE: p.opcode:=A_MOVN;
  116. else
  117. InternalError(2014061704);
  118. end;
  119. else
  120. InternalError(2014061705);
  121. end;
  122. p.ops:=3;
  123. p.loadreg(2,reg);
  124. end;
  125. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  126. var
  127. p: taicpu;
  128. i: longint;
  129. begin
  130. result:=false;
  131. if not (assigned(hp) and (hp.typ=ait_instruction)) then
  132. exit;
  133. p:=taicpu(hp);
  134. i:=1;
  135. while(i<p.ops) do
  136. begin
  137. case p.oper[I]^.typ of
  138. top_reg:
  139. result:=(p.oper[I]^.reg=reg) and (I<2);
  140. top_ref:
  141. result:=
  142. (p.oper[I]^.ref^.base=reg) or
  143. (p.oper[I]^.ref^.index=reg);
  144. end;
  145. if result then exit; {Bailout if we found something}
  146. Inc(I);
  147. end;
  148. end;
  149. function TCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
  150. var
  151. i : Longint;
  152. begin
  153. result:=false;
  154. for i:=0 to taicpu(p1).ops-1 do
  155. if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
  156. begin
  157. result:=true;
  158. exit;
  159. end;
  160. end;
  161. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  162. var Next: tai; reg: TRegister): Boolean;
  163. begin
  164. Next:=Current;
  165. repeat
  166. Result:=GetNextInstruction(Next,Next);
  167. until {not(cs_opt_level3 in current_settings.optimizerswitches) or} not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  168. (is_calljmp(taicpu(Next).opcode));
  169. if Result and (next.typ=ait_instruction) and is_calljmp(taicpu(next).opcode) then
  170. begin
  171. result:=false;
  172. next:=nil;
  173. end;
  174. end;
  175. function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
  176. var
  177. next,hp1: tai;
  178. alloc,dealloc: tai_regalloc;
  179. begin
  180. { Fold
  181. op $reg1,...
  182. opcode $reg2,$reg1
  183. dealloc $reg1
  184. into
  185. op $reg2,...
  186. opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc.
  187. }
  188. result:=false;
  189. if (taicpu(p).ops>0) and
  190. GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  191. MatchInstruction(next,opcode) and
  192. MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and
  193. { the destination register of mov cannot be used between p and next }
  194. (not RegUsedBetween(taicpu(next).oper[0]^.reg,p,next)) then
  195. begin
  196. dealloc:=FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.Next));
  197. if assigned(dealloc) then
  198. begin
  199. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  200. and remove it if possible }
  201. GetLastInstruction(p,hp1);
  202. asml.Remove(dealloc);
  203. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  204. if assigned(alloc) then
  205. begin
  206. asml.Remove(alloc);
  207. alloc.free;
  208. dealloc.free;
  209. end
  210. else
  211. asml.InsertAfter(dealloc,p);
  212. { try to move the allocation of the target register }
  213. GetLastInstruction(next,hp1);
  214. alloc:=FindRegAlloc(taicpu(next).oper[0]^.reg,tai(hp1.Next));
  215. if assigned(alloc) then
  216. begin
  217. asml.Remove(alloc);
  218. asml.InsertBefore(alloc,p);
  219. { adjust used regs }
  220. IncludeRegInUsedRegs(taicpu(next).oper[0]^.reg,UsedRegs);
  221. end;
  222. { finally get rid of the mov }
  223. taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
  224. asml.remove(next);
  225. next.free;
  226. result:=true;
  227. end
  228. else // no dealloc found
  229. begin
  230. { try to optimize the typical call sequence
  231. lw $reg, (whatever)
  232. <alloc volatile registers>
  233. move $t9,$reg
  234. jalr $t9
  235. Do not do so if the used register might contain a
  236. register variable. }
  237. if (opcode=A_MOVE) and
  238. not(cs_opt_regvar in current_settings.optimizerswitches) and
  239. (taicpu(next).oper[0]^.reg=NR_R25) and
  240. GetNextInstruction(next,hp1) and
  241. MatchInstruction(hp1,A_JALR) and
  242. MatchOperand(taicpu(hp1).oper[0]^,NR_R25) then
  243. begin
  244. taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
  245. asml.remove(next);
  246. next.free;
  247. result:=true;
  248. end;
  249. end;
  250. end;
  251. end;
  252. function TCpuAsmOptimizer.TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean;
  253. begin
  254. result:=(next.opcode in storeops) and
  255. MatchOperand(next.oper[0]^,taicpu(p).oper[0]^.reg) and
  256. { Ry cannot be modified between move and store }
  257. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  258. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));
  259. if result then
  260. begin
  261. next.loadreg(0,taicpu(p).oper[1]^.reg);
  262. asml.remove(p);
  263. p.free;
  264. p:=next;
  265. end;
  266. end;
  267. function TCpuAsmOptimizer.TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
  268. begin
  269. result:=(next.ops>1) and
  270. (next.oper[1]^.typ=top_ref) and
  271. (next.oper[1]^.ref^.refaddr<>addr_full) and
  272. (next.oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  273. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  274. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));
  275. if result then
  276. begin
  277. next.oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  278. asml.remove(p);
  279. p.free;
  280. p:=next;
  281. end;
  282. end;
  283. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  284. var
  285. next,next2: tai;
  286. begin
  287. result:=false;
  288. case p.typ of
  289. ait_instruction:
  290. begin
  291. case taicpu(p).opcode of
  292. A_SEH:
  293. begin
  294. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  295. MatchInstruction(next,A_SH) and
  296. MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) and
  297. (not RegUsedBetween(taicpu(p).oper[1]^.reg,p,next)) and
  298. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  299. begin
  300. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  301. asml.remove(p);
  302. p.free;
  303. p:=next;
  304. end
  305. else
  306. TryRemoveMov(p,A_MOVE);
  307. end;
  308. A_SEB:
  309. { TODO: can be handled similar to A_SEH, but it's almost never encountered }
  310. TryRemoveMov(p,A_MOVE);
  311. A_SLL:
  312. begin
  313. { if this is a sign extension... }
  314. if (taicpu(p).oper[2]^.typ=top_const) and
  315. GetNextInstruction(p,next) and
  316. MatchInstruction(next,A_SRA) and
  317. IsSameReg(taicpu(p),taicpu(next)) and
  318. (taicpu(next).oper[2]^.typ=top_const) and
  319. (taicpu(next).oper[2]^.val=taicpu(p).oper[2]^.val) and
  320. (taicpu(next).oper[2]^.val=16) and
  321. { ...followed by 16-bit store (possibly with PIC simplification, etc. in between) }
  322. GetNextInstructionUsingReg(next,next2,taicpu(p).oper[0]^.reg) and
  323. MatchInstruction(next2,A_SH) and
  324. (taicpu(next2).oper[0]^.typ=top_reg) and
  325. (taicpu(next2).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  326. { the initial register may not be reused }
  327. (not RegUsedBetween(taicpu(p).oper[1]^.reg,next,next2)) then
  328. begin
  329. if Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next2.next))) then
  330. begin
  331. taicpu(next2).loadreg(0,taicpu(p).oper[1]^.reg);
  332. asml.remove(p);
  333. asml.remove(next);
  334. p.free;
  335. next.free;
  336. p:=next2;
  337. end;
  338. end
  339. else
  340. TryRemoveMov(p,A_MOVE);
  341. end;
  342. A_SRL:
  343. begin
  344. { TODO: also kill sign-extensions that follow, both SLL+SRA and SEB/SEH versions }
  345. { Remove 'andi' in sequences
  346. srl Rx,Ry,16
  347. andi Rx,Rx,65535
  348. srl Rx,Ry,24
  349. andi Rx,Rx,255
  350. since 'srl' clears all relevant upper bits }
  351. if (taicpu(p).oper[2]^.typ=top_const) and
  352. GetNextInstruction(p,next) and
  353. MatchInstruction(next,A_ANDI) and
  354. IsSameReg(taicpu(p),taicpu(next)) and
  355. (taicpu(next).oper[2]^.typ=top_const) and
  356. ((
  357. (taicpu(p).oper[2]^.val>=16) and
  358. (taicpu(next).oper[2]^.val=65535)
  359. ) or (
  360. (taicpu(p).oper[2]^.val>=24) and
  361. (taicpu(next).oper[2]^.val=255)
  362. )) then
  363. begin
  364. asml.remove(next);
  365. next.free;
  366. end
  367. else
  368. TryRemoveMov(p,A_MOVE);
  369. end;
  370. A_ANDI:
  371. begin
  372. { Remove sign extension after 'andi' if bit 7/15 of const operand is clear }
  373. if (taicpu(p).oper[2]^.typ=top_const) and
  374. GetNextInstruction(p,next) and
  375. MatchInstruction(next,A_SLL) and
  376. GetNextInstruction(next,next2) and
  377. MatchInstruction(next2,A_SRA) and
  378. IsSameReg(taicpu(p),taicpu(next)) and
  379. IsSameReg(taicpu(p),taicpu(next2)) and
  380. (taicpu(next).oper[2]^.typ=top_const) and
  381. (taicpu(next2).oper[2]^.typ=top_const) and
  382. (taicpu(next).oper[2]^.val=taicpu(next2).oper[2]^.val) and
  383. ((
  384. (taicpu(p).oper[2]^.val<=$7fff) and
  385. (taicpu(next).oper[2]^.val=16)
  386. ) or (
  387. (taicpu(p).oper[2]^.val<=$7f) and
  388. (taicpu(next).oper[2]^.val=24)
  389. )) then
  390. begin
  391. asml.remove(next);
  392. asml.remove(next2);
  393. next.free;
  394. next2.free;
  395. end
  396. { Remove zero extension if register is used only for byte/word memory store }
  397. else if (taicpu(p).oper[2]^.typ=top_const) and
  398. GetNextInstruction(p,next) and
  399. ((taicpu(p).oper[2]^.val=255) and MatchInstruction(next,A_SB)) or
  400. ((taicpu(p).oper[2]^.val=65535) and MatchInstruction(next,A_SH)) and
  401. (taicpu(next).oper[0]^.typ=top_reg) and
  402. (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  403. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  404. begin
  405. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  406. asml.remove(p);
  407. p.free;
  408. p:=next;
  409. end
  410. else
  411. TryRemoveMov(p,A_MOVE);
  412. end;
  413. A_MOV_S:
  414. begin
  415. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  416. (next.typ=ait_instruction) then
  417. begin
  418. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SWC1]) then
  419. { optimization successful };
  420. end;
  421. end;
  422. A_MOV_D:
  423. begin
  424. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  425. (next.typ=ait_instruction) then
  426. begin
  427. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SDC1]) then
  428. { optimization successful };
  429. end;
  430. end;
  431. A_MOVE:
  432. begin
  433. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  434. (next.typ=ait_instruction) and
  435. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then
  436. begin
  437. { MOVE Rx,Ry; store Rx,(ref); dealloc Rx ==> store Ry,(ref) }
  438. if TryRemoveMovBeforeStore(p,taicpu(next),[A_SB,A_SH,A_SW]) then
  439. { optimization successful }
  440. else if TryRemoveMovToRefIndex(p,taicpu(next)) then
  441. { successful as well }
  442. { MOVE Rx,Ry; opcode Rx,Rx,any ==> opcode Rx,Ry,any
  443. MOVE Rx,Ry; opcode Rx,Rz,Rx ==> opcode Rx,Rz,Ry }
  444. else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU]) and
  445. MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  446. begin
  447. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and
  448. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  449. begin
  450. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  451. asml.remove(p);
  452. p.free;
  453. p:=next;
  454. end
  455. { TODO: if Ry=NR_R0, this effectively changes instruction into MOVE,
  456. providing further optimization possibilities }
  457. else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) and
  458. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  459. begin
  460. taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg);
  461. asml.remove(p);
  462. p.free;
  463. p:=next;
  464. end;
  465. end
  466. { MOVE Rx,Ry; opcode Rz,Rx,any; dealloc Rx ==> opcode Rz,Ry,any }
  467. else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_SLT,A_SLTU,A_DIV,A_DIVU]) and
  468. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  469. begin
  470. if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  471. begin
  472. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  473. asml.remove(p);
  474. p.free;
  475. p:=next;
  476. end
  477. else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then
  478. begin
  479. taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg);
  480. asml.remove(p);
  481. p.free;
  482. p:=next;
  483. end;
  484. end
  485. { MULT[U] must be handled separately due to different operand numbers }
  486. else if (taicpu(next).opcode in [A_MULT,A_MULTU]) and
  487. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
  488. begin
  489. if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
  490. begin
  491. taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
  492. asml.remove(p);
  493. p.free;
  494. p:=next;
  495. end
  496. else if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
  497. begin
  498. taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
  499. asml.remove(p);
  500. p.free;
  501. p:=next;
  502. end;
  503. end
  504. else if TryRemoveMov(p,A_MOVE) then
  505. begin
  506. { Ended up with move between same register? Suicide then. }
  507. if (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then
  508. begin
  509. GetNextInstruction(p,next);
  510. asml.remove(p);
  511. p.free;
  512. p:=next;
  513. end;
  514. end;
  515. { TODO: MOVE Rx,Ry; Bcc Rx,Rz,label; dealloc Rx ==> Bcc Ry,Rz,label }
  516. end;
  517. end;
  518. A_ADDIU:
  519. begin
  520. { ADDIU Rx,Ry,const; load/store Rz,(Rx); dealloc Rx ==> load/store Rz,const(Ry)
  521. ADDIU Rx,Ry,%lo(sym); load/store Rz,(Rx); dealloc Rx ==> load/store Rz,%lo(sym)(Ry)
  522. ADDIU Rx,Ry,const; load Rx,(Rx) ==> load Rx,const(Ry)
  523. ADDIU Rx,Ry,%lo(sym); load Rx,(Rx) ==> load Rx,%lo(sym)(Ry) }
  524. if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
  525. (next.typ=ait_instruction) and
  526. (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW,A_SB,A_SH,A_SW]) and
  527. (taicpu(p).oper[0]^.reg=taicpu(next).oper[1]^.ref^.base) and
  528. (taicpu(next).oper[1]^.ref^.offset=0) and
  529. (taicpu(next).oper[1]^.ref^.symbol=nil) and
  530. (
  531. Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) or
  532. (
  533. (taicpu(p).oper[0]^.reg=taicpu(next).oper[0]^.reg) and
  534. (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW])
  535. )
  536. ) and
  537. (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then
  538. begin
  539. case taicpu(p).oper[2]^.typ of
  540. top_const:
  541. taicpu(next).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
  542. top_ref:
  543. taicpu(next).oper[1]^.ref^:=taicpu(p).oper[2]^.ref^;
  544. else
  545. InternalError(2014100401);
  546. end;
  547. taicpu(next).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  548. asml.remove(p);
  549. p.free;
  550. p:=next;
  551. result:=true;
  552. end
  553. else
  554. result:=TryRemoveMov(p,A_MOVE);
  555. end;
  556. A_LB,A_LBU,A_LH,A_LHU,A_LW,
  557. A_ADD,A_ADDU,
  558. A_ADDI,
  559. A_SUB,A_SUBU,
  560. A_SRA,A_SRAV,
  561. A_SRLV,
  562. A_SLLV,
  563. A_MFLO,A_MFHI,
  564. A_AND,A_OR,A_XOR,A_ORI,A_XORI:
  565. TryRemoveMov(p,A_MOVE);
  566. A_LWC1,
  567. A_ADD_s, A_SUB_s, A_MUL_s, A_DIV_s,
  568. A_ABS_s, A_NEG_s, A_SQRT_s,
  569. A_CVT_s_w, A_CVT_s_l, A_CVT_s_d:
  570. TryRemoveMov(p,A_MOV_s);
  571. A_LDC1,
  572. A_ADD_d, A_SUB_d, A_MUL_d, A_DIV_d,
  573. A_ABS_d, A_NEG_d, A_SQRT_d,
  574. A_CVT_d_w, A_CVT_d_l, A_CVT_d_s:
  575. TryRemoveMov(p,A_MOV_d);
  576. end;
  577. end;
  578. end;
  579. end;
  580. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  581. var
  582. p: tai;
  583. l: longint;
  584. hp1,hp2,hp3: tai;
  585. condition: tasmcond;
  586. condreg: tregister;
  587. begin
  588. { Currently, everything below is mips4+ }
  589. if (current_settings.cputype<cpu_mips4) then
  590. exit;
  591. p:=BlockStart;
  592. ClearUsedRegs;
  593. while (p<>BlockEnd) Do
  594. begin
  595. UpdateUsedRegs(tai(p.next));
  596. case p.typ of
  597. ait_instruction:
  598. begin
  599. case taicpu(p).opcode of
  600. A_BC:
  601. begin
  602. condreg:=NR_NO;
  603. if (taicpu(p).condition in [C_COP1TRUE,C_COP1FALSE]) then
  604. { TODO: must be taken from "p" if/when codegen makes use of multiple %fcc }
  605. condreg:=NR_FCC0
  606. else if (taicpu(p).condition in [C_EQ,C_NE]) then
  607. begin
  608. if (taicpu(p).oper[0]^.reg=NR_R0) then
  609. condreg:=taicpu(p).oper[1]^.reg
  610. else if (taicpu(p).oper[1]^.reg=NR_R0) then
  611. condreg:=taicpu(p).oper[0]^.reg
  612. end;
  613. if (condreg<>NR_NO) then
  614. begin
  615. { check for
  616. bCC xxx
  617. <several movs>
  618. xxx:
  619. }
  620. l:=0;
  621. GetNextInstruction(p, hp1);
  622. while CanBeCMOV(hp1,condreg) do // CanBeCMOV returns False for nil or labels
  623. begin
  624. inc(l);
  625. GetNextInstruction(hp1,hp1);
  626. end;
  627. if assigned(hp1) then
  628. begin
  629. if FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  630. begin
  631. if (l<=4) and (l>0) then
  632. begin
  633. condition:=inverse_cond(taicpu(p).condition);
  634. hp2:=p;
  635. GetNextInstruction(p,hp1);
  636. p:=hp1;
  637. repeat
  638. ChangeToCMOV(taicpu(hp1),condition,condreg);
  639. GetNextInstruction(hp1,hp1);
  640. until not CanBeCMOV(hp1,condreg);
  641. { wait with removing else GetNextInstruction could
  642. ignore the label if it was the only usage in the
  643. jump moved away }
  644. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  645. RemoveDelaySlot(hp2);
  646. asml.remove(hp2);
  647. hp2.free;
  648. continue;
  649. end;
  650. end
  651. else
  652. begin
  653. { check further for
  654. bCC xxx
  655. <several movs 1>
  656. b yyy
  657. xxx:
  658. <several movs 2>
  659. yyy:
  660. }
  661. { hp2 points to jmp yyy }
  662. hp2:=hp1;
  663. { skip hp1 to xxx }
  664. GetNextInstruction(hp1, hp1);
  665. if assigned(hp2) and
  666. assigned(hp1) and
  667. (l<=3) and
  668. (hp2.typ=ait_instruction) and
  669. (taicpu(hp2).opcode=A_BA) and
  670. { real label and jump, no further references to the
  671. label are allowed }
  672. (tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol).getrefs<=2) and
  673. FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
  674. begin
  675. l:=0;
  676. { skip hp1 to <several moves 2> }
  677. GetNextInstruction(hp1, hp1);
  678. while CanBeCMOV(hp1,condreg) do
  679. begin
  680. inc(l);
  681. GetNextInstruction(hp1, hp1);
  682. end;
  683. { hp1 points to yyy: }
  684. if assigned(hp1) and
  685. FindLabel(tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol),hp1) then
  686. begin
  687. condition:=inverse_cond(taicpu(p).condition);
  688. GetNextInstruction(p,hp1);
  689. hp3:=p;
  690. p:=hp1;
  691. repeat
  692. ChangeToCMOV(taicpu(hp1),condition,condreg);
  693. GetNextInstruction(hp1,hp1);
  694. until not CanBeCMOV(hp1,condreg);
  695. { hp2 is still at b yyy }
  696. GetNextInstruction(hp2,hp1);
  697. { hp2 is now at xxx: }
  698. condition:=inverse_cond(condition);
  699. GetNextInstruction(hp1,hp1);
  700. { hp1 is now at <several movs 2> }
  701. repeat
  702. ChangeToCMOV(taicpu(hp1),condition,condreg);
  703. GetNextInstruction(hp1,hp1);
  704. until not CanBeCMOV(hp1,condreg);
  705. { remove bCC }
  706. tasmlabel(taicpu(hp3).oper[taicpu(hp3).ops-1]^.ref^.symbol).decrefs;
  707. RemoveDelaySlot(hp3);
  708. asml.remove(hp3);
  709. hp3.free;
  710. { remove jmp }
  711. tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
  712. RemoveDelaySlot(hp2);
  713. asml.remove(hp2);
  714. hp2.free;
  715. continue;
  716. end;
  717. end;
  718. end;
  719. end;
  720. end;
  721. end;
  722. end;
  723. end;
  724. end;
  725. UpdateUsedRegs(p);
  726. p:=tai(p.next);
  727. end;
  728. end;
  729. begin
  730. casmoptimizer:=TCpuAsmOptimizer;
  731. end.