aoptcpu.pas 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. Interface
  21. uses cpubase, cgbase, aasmtai, aopt, aoptcpub;
  22. Type
  23. TCpuAsmOptimizer = class(TAsmOptimizer)
  24. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  25. function RegInInstruction(Reg: TRegister; p1: tai): Boolean; override;
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. End;
  30. Implementation
  31. uses
  32. cutils,
  33. cpuinfo,
  34. aasmbase,aasmcpu,
  35. globals,globtype,
  36. cgutils;
  37. type
  38. TAsmOpSet = set of TAsmOp;
  39. function CanBeCond(p : tai) : boolean;
  40. begin
  41. result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
  42. end;
  43. function RefsEqual(const r1, r2: treference): boolean;
  44. begin
  45. refsequal :=
  46. (r1.offset = r2.offset) and
  47. (r1.base = r2.base) and
  48. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  49. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  50. (r1.relsymbol = r2.relsymbol) and
  51. (r1.addressmode = r2.addressmode);
  52. end;
  53. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  54. begin
  55. result:=oper1.typ=oper2.typ;
  56. if result then
  57. case oper1.typ of
  58. top_const:
  59. Result:=oper1.val = oper2.val;
  60. top_reg:
  61. Result:=oper1.reg = oper2.reg;
  62. top_ref:
  63. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  64. else Result:=false;
  65. end
  66. end;
  67. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  68. begin
  69. result := (oper.typ = top_reg) and (oper.reg = reg);
  70. end;
  71. function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
  72. begin
  73. result :=
  74. (instr.typ = ait_instruction) and
  75. (taicpu(instr).opcode = op);
  76. end;
  77. function MatchInstruction(const instr: tai; const ops: TAsmOpSet): boolean;
  78. begin
  79. result :=
  80. (instr.typ = ait_instruction) and
  81. (taicpu(instr).opcode in ops);
  82. end;
  83. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  84. begin
  85. If (p1.typ = ait_instruction) and (taicpu(p1).opcode in [A_MUL,A_MULS,A_FMUL,A_FMULS,A_FMULSU]) and
  86. ((getsupreg(reg)=RS_R0) or (getsupreg(reg)=RS_R1)) then
  87. Result:=true
  88. else
  89. Result:=inherited RegInInstruction(Reg, p1);
  90. end;
  91. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  92. var Next: tai; reg: TRegister): Boolean;
  93. begin
  94. Next:=Current;
  95. repeat
  96. Result:=GetNextInstruction(Next,Next);
  97. until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  98. (is_calljmp(taicpu(Next).opcode));
  99. end;
  100. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  101. var
  102. hp1,hp2,hp3: tai;
  103. alloc, dealloc: tai_regalloc;
  104. i: integer;
  105. begin
  106. result := false;
  107. case p.typ of
  108. ait_instruction:
  109. begin
  110. {
  111. change
  112. <op> reg,x,y
  113. cp reg,r1
  114. into
  115. <op>s reg,x,y
  116. }
  117. { this optimization can applied only to the currently enabled operations because
  118. the other operations do not update all flags and FPC does not track flag usage }
  119. if MatchInstruction(p, [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_DEC,A_EOR,
  120. A_INC,A_LSL,A_LSR,
  121. A_OR,A_ORI,A_ROL,A_ROR,A_SBC,A_SBCI,A_SUB,A_SUBI]) and
  122. GetNextInstruction(p, hp1) and
  123. MatchInstruction(hp1, A_CP) and
  124. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  125. (taicpu(hp1).oper[1]^.reg = NR_R1) and
  126. GetNextInstruction(hp1, hp2) and
  127. { be careful here, following instructions could use other flags
  128. however after a jump fpc never depends on the value of flags }
  129. { All above instructions set Z and N according to the following
  130. Z := result = 0;
  131. N := result[31];
  132. EQ = Z=1; NE = Z=0;
  133. MI = N=1; PL = N=0; }
  134. MatchInstruction(hp2, A_BRxx) and
  135. (taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) { and
  136. no flag allocation tracking implemented yet on avr
  137. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next)))} then
  138. begin
  139. { move flag allocation if possible }
  140. { no flag allocation tracking implemented yet on avr
  141. GetLastInstruction(hp1, hp2);
  142. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  143. if assigned(hp2) then
  144. begin
  145. asml.Remove(hp2);
  146. asml.insertbefore(hp2, p);
  147. end;
  148. }
  149. asml.remove(hp1);
  150. hp1.free;
  151. Result:=true;
  152. end
  153. else
  154. case taicpu(p).opcode of
  155. A_LDI:
  156. begin
  157. { turn
  158. ldi reg0, imm
  159. cp reg1, reg0
  160. dealloc reg0
  161. into
  162. cpi reg1, imm
  163. }
  164. if (taicpu(p).ops=2) and
  165. (taicpu(p).oper[0]^.typ=top_reg) and
  166. (taicpu(p).oper[1]^.typ=top_const) and
  167. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  168. (hp1.typ=ait_instruction) and
  169. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  170. (taicpu(hp1).opcode=A_CP) and
  171. (taicpu(hp1).ops=2) and
  172. (taicpu(hp1).oper[1]^.typ=top_reg) and
  173. (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
  174. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  175. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  176. begin
  177. taicpu(hp1).opcode:=A_CPI;
  178. taicpu(hp1).loadconst(1, taicpu(p).oper[1]^.val);
  179. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  180. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  181. if assigned(alloc) and assigned(dealloc) then
  182. begin
  183. asml.Remove(alloc);
  184. alloc.Free;
  185. asml.Remove(dealloc);
  186. dealloc.Free;
  187. end;
  188. GetNextInstruction(p,hp1);
  189. asml.Remove(p);
  190. p.Free;
  191. p:=hp1;
  192. result:=true;
  193. end;
  194. end;
  195. A_STS:
  196. if (taicpu(p).oper[0]^.ref^.symbol=nil) and
  197. (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
  198. (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
  199. (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
  200. (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
  201. (taicpu(p).oper[0]^.ref^.offset>=32) and
  202. (taicpu(p).oper[0]^.ref^.offset<=95) then
  203. begin
  204. taicpu(p).opcode:=A_OUT;
  205. taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
  206. end;
  207. A_LDS:
  208. if (taicpu(p).oper[1]^.ref^.symbol=nil) and
  209. (taicpu(p).oper[1]^.ref^.relsymbol=nil) and
  210. (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
  211. (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
  212. (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
  213. (taicpu(p).oper[1]^.ref^.offset>=32) and
  214. (taicpu(p).oper[1]^.ref^.offset<=95) then
  215. begin
  216. taicpu(p).opcode:=A_IN;
  217. taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
  218. end;
  219. A_IN:
  220. if GetNextInstruction(p,hp1) then
  221. begin
  222. {
  223. in rX,Y
  224. ori rX,n
  225. out Y,rX
  226. into
  227. sbi rX,lg(n)
  228. }
  229. if MatchInstruction(hp1,A_ORI) and
  230. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  231. (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
  232. GetNextInstruction(hp1,hp2) and
  233. MatchInstruction(hp2,A_OUT) and
  234. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  235. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  236. begin
  237. taicpu(p).opcode:=A_SBI;
  238. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  239. taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val)-1);
  240. asml.Remove(hp1);
  241. hp1.Free;
  242. asml.Remove(hp2);
  243. hp2.Free;
  244. result:=true;
  245. end
  246. {
  247. in rX,Y
  248. andi rX,not(n)
  249. out Y,rX
  250. into
  251. cbi rX,lg(n)
  252. }
  253. else if MatchInstruction(hp1,A_ANDI) and
  254. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
  255. (PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
  256. GetNextInstruction(hp1,hp2) and
  257. MatchInstruction(hp2,A_OUT) and
  258. MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  259. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
  260. begin
  261. taicpu(p).opcode:=A_CBI;
  262. taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
  263. taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val))-1);
  264. asml.Remove(hp1);
  265. hp1.Free;
  266. asml.Remove(hp2);
  267. hp2.Free;
  268. result:=true;
  269. end;
  270. end;
  271. A_CLR:
  272. begin
  273. { turn the common
  274. clr rX
  275. mov/ld rX, rY
  276. into
  277. mov/ld rX, rY
  278. }
  279. if (taicpu(p).ops=1) and
  280. (taicpu(p).oper[0]^.typ=top_reg) and
  281. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  282. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  283. (hp1.typ=ait_instruction) and
  284. (taicpu(hp1).opcode in [A_MOV,A_LD]) and
  285. (taicpu(hp1).ops>0) and
  286. (taicpu(hp1).oper[0]^.typ=top_reg) and
  287. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
  288. begin
  289. asml.Remove(p);
  290. p.Free;
  291. p:=hp1;
  292. result:=true;
  293. end
  294. { turn
  295. clr rX
  296. ...
  297. adc rY, rX
  298. into
  299. ...
  300. adc rY, r1
  301. }
  302. else if (taicpu(p).ops=1) and
  303. (taicpu(p).oper[0]^.typ=top_reg) and
  304. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  305. (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  306. (hp1.typ=ait_instruction) and
  307. (taicpu(hp1).opcode in [A_ADC,A_SBC]) and
  308. (taicpu(hp1).ops=2) and
  309. (taicpu(hp1).oper[1]^.typ=top_reg) and
  310. (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
  311. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[0]^.reg) and
  312. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  313. begin
  314. taicpu(hp1).oper[1]^.reg:=NR_R1;
  315. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  316. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  317. if assigned(alloc) and assigned(dealloc) then
  318. begin
  319. asml.Remove(alloc);
  320. alloc.Free;
  321. asml.Remove(dealloc);
  322. dealloc.Free;
  323. end;
  324. GetNextInstruction(p,hp1);
  325. asml.Remove(p);
  326. p.free;
  327. p:=hp1;
  328. result:=true;
  329. end;
  330. end;
  331. A_PUSH:
  332. begin
  333. { turn
  334. push reg0
  335. push reg1
  336. pop reg3
  337. pop reg2
  338. into
  339. movw reg2,reg0
  340. }
  341. if (taicpu(p).ops=1) and
  342. (taicpu(p).oper[0]^.typ=top_reg) and
  343. GetNextInstruction(p,hp1) and
  344. (hp1.typ=ait_instruction) and
  345. (taicpu(hp1).opcode=A_PUSH) and
  346. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  347. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  348. GetNextInstruction(hp1,hp2) and
  349. (hp2.typ=ait_instruction) and
  350. (taicpu(hp2).opcode=A_POP) and
  351. GetNextInstruction(hp2,hp3) and
  352. (hp3.typ=ait_instruction) and
  353. (taicpu(hp3).opcode=A_POP) and
  354. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
  355. ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
  356. begin
  357. taicpu(p).ops:=2;
  358. taicpu(p).opcode:=A_MOVW;
  359. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  360. taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg);
  361. asml.Remove(hp1);
  362. hp1.Free;
  363. asml.Remove(hp2);
  364. hp2.Free;
  365. asml.Remove(hp3);
  366. hp3.Free;
  367. result:=true;
  368. end;
  369. end;
  370. A_MOV:
  371. begin
  372. { turn
  373. mov reg0, reg1
  374. push reg0
  375. dealloc reg0
  376. into
  377. push reg1
  378. }
  379. if (taicpu(p).ops=2) and
  380. (taicpu(p).oper[0]^.typ = top_reg) and
  381. (taicpu(p).oper[1]^.typ = top_reg) and
  382. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  383. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  384. (hp1.typ = ait_instruction) and
  385. (taicpu(hp1).opcode in [A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_EOR,A_AND,A_OR]) and
  386. RegInInstruction(taicpu(p).oper[0]^.reg, hp1) and
  387. (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
  388. {(taicpu(hp1).ops=1) and
  389. (taicpu(hp1).oper[0]^.typ = top_reg) and
  390. (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and }
  391. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  392. begin
  393. for i := 0 to taicpu(hp1).ops-1 do
  394. if taicpu(hp1).oper[i]^.typ=top_reg then
  395. if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
  396. taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
  397. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
  398. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  399. if assigned(alloc) and assigned(dealloc) then
  400. begin
  401. asml.Remove(alloc);
  402. alloc.Free;
  403. asml.Remove(dealloc);
  404. dealloc.Free;
  405. end;
  406. GetNextInstruction(p,hp1);
  407. asml.Remove(p);
  408. p.free;
  409. p:=hp1;
  410. result:=true;
  411. end
  412. { remove
  413. mov reg0,reg0
  414. }
  415. else if (taicpu(p).ops=2) and
  416. (taicpu(p).oper[0]^.typ = top_reg) and
  417. (taicpu(p).oper[1]^.typ = top_reg) and
  418. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  419. begin
  420. GetNextInstruction(p,hp1);
  421. asml.remove(p);
  422. p.free;
  423. p:=hp1;
  424. result:=true;
  425. end
  426. { fold
  427. mov reg2,reg0
  428. mov reg3,reg1
  429. to
  430. movw reg2,reg0
  431. }
  432. else if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
  433. (taicpu(p).ops=2) and
  434. (taicpu(p).oper[0]^.typ = top_reg) and
  435. (taicpu(p).oper[1]^.typ = top_reg) and
  436. getnextinstruction(p,hp1) and
  437. (hp1.typ = ait_instruction) and
  438. (taicpu(hp1).opcode = A_MOV) and
  439. (taicpu(hp1).ops=2) and
  440. (taicpu(hp1).oper[0]^.typ = top_reg) and
  441. (taicpu(hp1).oper[1]^.typ = top_reg) and
  442. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
  443. ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
  444. ((getsupreg(taicpu(p).oper[1]^.reg) mod 2)=0) and
  445. (getsupreg(taicpu(hp1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then
  446. begin
  447. alloc:=FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous));
  448. if assigned(alloc) then
  449. begin
  450. asml.Remove(alloc);
  451. asml.InsertBefore(alloc,p);
  452. end;
  453. taicpu(p).opcode:=A_MOVW;
  454. asml.remove(hp1);
  455. hp1.free;
  456. result:=true;
  457. end
  458. {
  459. This removes the first mov from
  460. mov rX,...
  461. mov rX,...
  462. }
  463. else if taicpu(hp1).opcode=A_MOV then
  464. while (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_MOV) and
  465. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  466. { don't remove the first mov if the second is a mov rX,rX }
  467. not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do
  468. begin
  469. asml.remove(p);
  470. p.free;
  471. p:=hp1;
  472. GetNextInstruction(hp1,hp1);
  473. result:=true;
  474. if not assigned(hp1) then
  475. break;
  476. end;
  477. end;
  478. end;
  479. end;
  480. end;
  481. end;
  482. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  483. begin
  484. end;
  485. begin
  486. casmoptimizer:=TCpuAsmOptimizer;
  487. End.