aoptcpu.pas 66 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  31. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  32. var AllUsedRegs: TAllUsedRegs): Boolean;
  33. { gets the next tai object after current that contains info relevant
  34. to the optimizer in p1 which used the given register or does a
  35. change in program flow.
  36. If there is none, it returns false and
  37. sets p1 to nil }
  38. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  39. { outputs a debug message into the assembler file }
  40. procedure DebugMsg(const s: string; p: tai);
  41. End;
  42. TCpuPreRegallocScheduler = class(TAsmScheduler)
  43. function SchedulerPass1Cpu(var p: tai): boolean;override;
  44. procedure SwapRegLive(p, hp1: taicpu);
  45. end;
  46. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  47. { uses the same constructor as TAopObj }
  48. procedure PeepHoleOptPass2;override;
  49. End;
  50. Implementation
  51. uses
  52. cutils,verbose,globals,
  53. systems,
  54. cpuinfo,
  55. cgobj,cgutils,procinfo,
  56. aasmbase,aasmdata;
  57. function CanBeCond(p : tai) : boolean;
  58. begin
  59. result:=
  60. (p.typ=ait_instruction) and
  61. (taicpu(p).condition=C_None) and
  62. (taicpu(p).opcode<>A_PLD) and
  63. ((taicpu(p).opcode<>A_BLX) or
  64. (taicpu(p).oper[0]^.typ=top_reg));
  65. end;
  66. function RefsEqual(const r1, r2: treference): boolean;
  67. begin
  68. refsequal :=
  69. (r1.offset = r2.offset) and
  70. (r1.base = r2.base) and
  71. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  72. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  73. (r1.relsymbol = r2.relsymbol) and
  74. (r1.signindex = r2.signindex) and
  75. (r1.shiftimm = r2.shiftimm) and
  76. (r1.addressmode = r2.addressmode) and
  77. (r1.shiftmode = r2.shiftmode);
  78. end;
  79. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  80. begin
  81. result :=
  82. (instr.typ = ait_instruction) and
  83. (taicpu(instr).opcode = op) and
  84. ((cond = []) or (taicpu(instr).condition in cond)) and
  85. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  86. end;
  87. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  88. begin
  89. result := oper1.typ = oper2.typ;
  90. if result then
  91. case oper1.typ of
  92. top_const:
  93. Result:=oper1.val = oper2.val;
  94. top_reg:
  95. Result:=oper1.reg = oper2.reg;
  96. top_conditioncode:
  97. Result:=oper1.cc = oper2.cc;
  98. top_ref:
  99. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  100. else Result:=false;
  101. end
  102. end;
  103. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  104. begin
  105. result := (oper.typ = top_reg) and (oper.reg = reg);
  106. end;
  107. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  108. begin
  109. if (taicpu(movp).condition = C_EQ) and
  110. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  111. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  112. begin
  113. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  114. asml.remove(movp);
  115. movp.free;
  116. end;
  117. end;
  118. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  119. var
  120. p: taicpu;
  121. begin
  122. p := taicpu(hp);
  123. regLoadedWithNewValue := false;
  124. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  125. exit;
  126. case p.opcode of
  127. { These operands do not write into a register at all }
  128. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  129. exit;
  130. {Take care of post/preincremented store and loads, they will change their base register}
  131. A_STR, A_LDR:
  132. regLoadedWithNewValue :=
  133. (taicpu(p).oper[1]^.typ=top_ref) and
  134. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  135. (taicpu(p).oper[1]^.ref^.base = reg);
  136. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  137. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  138. regLoadedWithNewValue :=
  139. (p.oper[1]^.typ = top_reg) and
  140. (p.oper[1]^.reg = reg);
  141. {Loads to oper2 from coprocessor}
  142. {
  143. MCR/MRC is currently not supported in FPC
  144. A_MRC:
  145. regLoadedWithNewValue :=
  146. (p.oper[2]^.typ = top_reg) and
  147. (p.oper[2]^.reg = reg);
  148. }
  149. {Loads to all register in the registerset}
  150. A_LDM:
  151. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  152. end;
  153. if regLoadedWithNewValue then
  154. exit;
  155. case p.oper[0]^.typ of
  156. {This is the case}
  157. top_reg:
  158. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  159. { LDRD }
  160. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  161. {LDM/STM might write a new value to their index register}
  162. top_ref:
  163. regLoadedWithNewValue :=
  164. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  165. (taicpu(p).oper[0]^.ref^.base = reg);
  166. end;
  167. end;
  168. function AlignedToQWord(const ref : treference) : boolean;
  169. begin
  170. { (safe) heuristics to ensure alignment }
  171. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  172. (((ref.offset>=0) and
  173. ((ref.offset mod 8)=0) and
  174. ((ref.base=NR_R13) or
  175. (ref.index=NR_R13))
  176. ) or
  177. ((ref.offset<=0) and
  178. { when using NR_R11, it has always a value of <qword align>+4 }
  179. ((abs(ref.offset+4) mod 8)=0) and
  180. (current_procinfo.framepointer=NR_R11) and
  181. ((ref.base=NR_R11) or
  182. (ref.index=NR_R11))
  183. )
  184. );
  185. end;
  186. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  187. var
  188. p: taicpu;
  189. i: longint;
  190. begin
  191. instructionLoadsFromReg := false;
  192. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  193. exit;
  194. p:=taicpu(hp);
  195. i:=1;
  196. {For these instructions we have to start on oper[0]}
  197. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  198. A_CMP, A_CMN, A_TST, A_TEQ,
  199. A_B, A_BL, A_BX, A_BLX,
  200. A_SMLAL, A_UMLAL]) then i:=0;
  201. while(i<p.ops) do
  202. begin
  203. case p.oper[I]^.typ of
  204. top_reg:
  205. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  206. { STRD }
  207. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  208. top_regset:
  209. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  210. top_shifterop:
  211. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  212. top_ref:
  213. instructionLoadsFromReg :=
  214. (p.oper[I]^.ref^.base = reg) or
  215. (p.oper[I]^.ref^.index = reg);
  216. end;
  217. if instructionLoadsFromReg then exit; {Bailout if we found something}
  218. Inc(I);
  219. end;
  220. end;
  221. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  222. var AllUsedRegs: TAllUsedRegs): Boolean;
  223. begin
  224. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  225. RegUsedAfterInstruction :=
  226. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  227. not(regLoadedWithNewValue(reg,p)) and
  228. (
  229. not(GetNextInstruction(p,p)) or
  230. instructionLoadsFromReg(reg,p) or
  231. not(regLoadedWithNewValue(reg,p))
  232. );
  233. end;
  234. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  235. var Next: tai; reg: TRegister): Boolean;
  236. begin
  237. Next:=Current;
  238. repeat
  239. Result:=GetNextInstruction(Next,Next);
  240. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  241. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  242. end;
  243. {$ifdef DEBUG_AOPTCPU}
  244. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  245. begin
  246. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  247. end;
  248. {$else DEBUG_AOPTCPU}
  249. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  250. begin
  251. end;
  252. {$endif DEBUG_AOPTCPU}
  253. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  254. var
  255. alloc,
  256. dealloc : tai_regalloc;
  257. hp1 : tai;
  258. begin
  259. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  260. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  261. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  262. { don't mess with moves to pc }
  263. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  264. { don't mess with moves to lr }
  265. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  266. { the destination register of the mov might not be used beween p and movp }
  267. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  268. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  269. not (
  270. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  271. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  272. ) then
  273. begin
  274. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  275. if assigned(dealloc) then
  276. begin
  277. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  278. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  279. and remove it if possible }
  280. GetLastInstruction(p,hp1);
  281. asml.Remove(dealloc);
  282. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  283. if assigned(alloc) then
  284. begin
  285. asml.Remove(alloc);
  286. alloc.free;
  287. dealloc.free;
  288. end
  289. else
  290. asml.InsertAfter(dealloc,p);
  291. { try to move the allocation of the target register }
  292. GetLastInstruction(movp,hp1);
  293. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  294. if assigned(alloc) then
  295. begin
  296. asml.Remove(alloc);
  297. asml.InsertBefore(alloc,p);
  298. { adjust used regs }
  299. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  300. end;
  301. { finally get rid of the mov }
  302. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  303. asml.remove(movp);
  304. movp.free;
  305. end;
  306. end;
  307. end;
  308. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  309. var
  310. hp1,hp2: tai;
  311. i, i2: longint;
  312. TmpUsedRegs: TAllUsedRegs;
  313. tempop: tasmop;
  314. function IsPowerOf2(const value: DWord): boolean; inline;
  315. begin
  316. Result:=(value and (value - 1)) = 0;
  317. end;
  318. begin
  319. result := false;
  320. case p.typ of
  321. ait_instruction:
  322. begin
  323. {
  324. change
  325. <op> reg,x,y
  326. cmp reg,#0
  327. into
  328. <op>s reg,x,y
  329. }
  330. { this optimization can applied only to the currently enabled operations because
  331. the other operations do not update all flags and FPC does not track flag usage }
  332. if ((taicpu(p).opcode in [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,
  333. A_ORR,A_EOR,A_AND,A_RSB,A_RSC,A_SBC,A_MLA])
  334. ) and
  335. (taicpu(p).oppostfix = PF_None) and
  336. (taicpu(p).condition = C_None) and
  337. GetNextInstruction(p, hp1) and
  338. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  339. (taicpu(hp1).oper[1]^.typ = top_const) and
  340. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  341. (taicpu(hp1).oper[1]^.val = 0) and
  342. GetNextInstruction(hp1, hp2) and
  343. { be careful here, following instructions could use other flags
  344. however after a jump fpc never depends on the value of flags }
  345. { All above instructions set Z and N according to the following
  346. Z := result = 0;
  347. N := result[31];
  348. EQ = Z=1; NE = Z=0;
  349. MI = N=1; PL = N=0; }
  350. MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) and
  351. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  352. begin
  353. DebugMsg('Peephole OpCmp2OpS done', p);
  354. taicpu(p).oppostfix:=PF_S;
  355. { move flag allocation if possible }
  356. GetLastInstruction(hp1, hp2);
  357. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  358. if assigned(hp2) then
  359. begin
  360. asml.Remove(hp2);
  361. asml.insertbefore(hp2, p);
  362. end;
  363. asml.remove(hp1);
  364. hp1.free;
  365. end
  366. else
  367. case taicpu(p).opcode of
  368. A_STR:
  369. begin
  370. { change
  371. str reg1,ref
  372. ldr reg2,ref
  373. into
  374. str reg1,ref
  375. mov reg2,reg1
  376. }
  377. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  378. (taicpu(p).oppostfix=PF_None) and
  379. GetNextInstruction(p,hp1) and
  380. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  381. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  382. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  383. begin
  384. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  385. begin
  386. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  387. asml.remove(hp1);
  388. hp1.free;
  389. end
  390. else
  391. begin
  392. taicpu(hp1).opcode:=A_MOV;
  393. taicpu(hp1).oppostfix:=PF_None;
  394. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  395. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  396. end;
  397. result := true;
  398. end
  399. { change
  400. str reg1,ref
  401. str reg2,ref
  402. into
  403. strd reg1,ref
  404. }
  405. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  406. (taicpu(p).oppostfix=PF_None) and
  407. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  408. GetNextInstruction(p,hp1) and
  409. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  410. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  411. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  412. { str ensures that either base or index contain no register, else ldr wouldn't
  413. use an offset either
  414. }
  415. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  416. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  417. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  418. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  419. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  420. begin
  421. DebugMsg('Peephole StrStr2Strd done', p);
  422. taicpu(p).oppostfix:=PF_D;
  423. asml.remove(hp1);
  424. hp1.free;
  425. end;
  426. end;
  427. A_LDR:
  428. begin
  429. { change
  430. ldr reg1,ref
  431. ldr reg2,ref
  432. into ...
  433. }
  434. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  435. GetNextInstruction(p,hp1) and
  436. { ldrd is not allowed here }
  437. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  438. begin
  439. {
  440. ...
  441. ldr reg1,ref
  442. mov reg2,reg1
  443. }
  444. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  445. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  446. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  447. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  448. begin
  449. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  450. begin
  451. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  452. asml.remove(hp1);
  453. hp1.free;
  454. end
  455. else
  456. begin
  457. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  458. taicpu(hp1).opcode:=A_MOV;
  459. taicpu(hp1).oppostfix:=PF_None;
  460. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  461. end;
  462. result := true;
  463. end
  464. {
  465. ...
  466. ldrd reg1,ref
  467. }
  468. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  469. { ldrd does not allow any postfixes ... }
  470. (taicpu(p).oppostfix=PF_None) and
  471. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  472. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  473. { ldr ensures that either base or index contain no register, else ldr wouldn't
  474. use an offset either
  475. }
  476. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  477. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  478. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  479. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  480. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  481. begin
  482. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  483. taicpu(p).oppostfix:=PF_D;
  484. asml.remove(hp1);
  485. hp1.free;
  486. end;
  487. end;
  488. { Remove superfluous mov after ldr
  489. changes
  490. ldr reg1, ref
  491. mov reg2, reg1
  492. to
  493. ldr reg2, ref
  494. conditions are:
  495. * no ldrd usage
  496. * reg1 must be released after mov
  497. * mov can not contain shifterops
  498. * ldr+mov have the same conditions
  499. * mov does not set flags
  500. }
  501. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  502. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  503. end;
  504. A_MOV:
  505. begin
  506. { fold
  507. mov reg1,reg0, shift imm1
  508. mov reg1,reg1, shift imm2
  509. }
  510. if (taicpu(p).ops=3) and
  511. (taicpu(p).oper[2]^.typ = top_shifterop) and
  512. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  513. getnextinstruction(p,hp1) and
  514. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  515. (taicpu(hp1).ops=3) and
  516. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  517. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  518. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  519. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  520. begin
  521. { fold
  522. mov reg1,reg0, lsl 16
  523. mov reg1,reg1, lsr 16
  524. strh reg1, ...
  525. dealloc reg1
  526. to
  527. strh reg1, ...
  528. dealloc reg1
  529. }
  530. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  531. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  532. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  533. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  534. getnextinstruction(hp1,hp2) and
  535. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  536. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  537. begin
  538. CopyUsedRegs(TmpUsedRegs);
  539. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  540. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  541. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  542. begin
  543. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  544. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  545. asml.remove(p);
  546. asml.remove(hp1);
  547. p.free;
  548. hp1.free;
  549. p:=hp2;
  550. end;
  551. ReleaseUsedRegs(TmpUsedRegs);
  552. end
  553. { fold
  554. mov reg1,reg0, shift imm1
  555. mov reg1,reg1, shift imm2
  556. to
  557. mov reg1,reg0, shift imm1+imm2
  558. }
  559. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  560. { asr makes no use after a lsr, the asr can be foled into the lsr }
  561. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  562. begin
  563. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  564. { avoid overflows }
  565. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  566. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  567. SM_ROR:
  568. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  569. SM_ASR:
  570. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  571. SM_LSR,
  572. SM_LSL:
  573. begin
  574. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  575. InsertLLItem(p.previous, p.next, hp1);
  576. p.free;
  577. p:=hp1;
  578. end;
  579. else
  580. internalerror(2008072803);
  581. end;
  582. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  583. asml.remove(hp1);
  584. hp1.free;
  585. result := true;
  586. end
  587. { fold
  588. mov reg1,reg0, shift imm1
  589. mov reg1,reg1, shift imm2
  590. mov reg1,reg1, shift imm3 ...
  591. }
  592. else if getnextinstruction(hp1,hp2) and
  593. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  594. (taicpu(hp2).ops=3) and
  595. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  596. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  597. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  598. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  599. begin
  600. { mov reg1,reg0, lsl imm1
  601. mov reg1,reg1, lsr/asr imm2
  602. mov reg1,reg1, lsl imm3 ...
  603. if imm3<=imm1 and imm2>=imm3
  604. to
  605. mov reg1,reg0, lsl imm1
  606. mov reg1,reg1, lsr/asr imm2-imm3
  607. }
  608. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  609. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  610. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  611. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  612. begin
  613. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  614. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1 done', p);
  615. asml.remove(hp2);
  616. hp2.free;
  617. result := true;
  618. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  619. begin
  620. asml.remove(hp1);
  621. hp1.free;
  622. end;
  623. end
  624. { mov reg1,reg0, lsr/asr imm1
  625. mov reg1,reg1, lsl imm2
  626. mov reg1,reg1, lsr/asr imm3 ...
  627. if imm3>=imm1 and imm2>=imm1
  628. to
  629. mov reg1,reg0, lsl imm2-imm1
  630. mov reg1,reg1, lsr/asr imm3 ...
  631. }
  632. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  633. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  634. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  635. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  636. begin
  637. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  638. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  639. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  640. asml.remove(p);
  641. p.free;
  642. p:=hp2;
  643. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  644. begin
  645. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  646. asml.remove(hp1);
  647. hp1.free;
  648. p:=hp2;
  649. end;
  650. result := true;
  651. end;
  652. end;
  653. end;
  654. { Change the common
  655. mov r0, r0, lsr #24
  656. and r0, r0, #255
  657. and remove the superfluous and
  658. This could be extended to handle more cases.
  659. }
  660. if (taicpu(p).ops=3) and
  661. (taicpu(p).oper[2]^.typ = top_shifterop) and
  662. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  663. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  664. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  665. getnextinstruction(p,hp1) and
  666. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  667. (taicpu(hp1).ops=3) and
  668. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  669. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  670. (taicpu(hp1).oper[2]^.typ = top_const) and
  671. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  672. For LSR #25 and an AndConst of 255 that whould go like this:
  673. 255 and ((2 shl (32-25))-1)
  674. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  675. LSR #25 and AndConst of 254:
  676. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  677. }
  678. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  679. begin
  680. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  681. asml.remove(hp1);
  682. hp1.free;
  683. end;
  684. {
  685. optimize
  686. mov rX, yyyy
  687. ....
  688. }
  689. if (taicpu(p).ops = 2) and
  690. GetNextInstruction(p,hp1) and
  691. (tai(hp1).typ = ait_instruction) then
  692. begin
  693. {
  694. This changes the very common
  695. mov r0, #0
  696. str r0, [...]
  697. mov r0, #0
  698. str r0, [...]
  699. and removes all superfluous mov instructions
  700. }
  701. if (taicpu(p).oper[1]^.typ = top_const) and
  702. (taicpu(hp1).opcode=A_STR) then
  703. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], [PF_None]) and
  704. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  705. GetNextInstruction(hp1, hp2) and
  706. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  707. (taicpu(hp2).ops = 2) and
  708. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  709. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  710. begin
  711. DebugMsg('Peephole MovStrMov done', hp2);
  712. GetNextInstruction(hp2,hp1);
  713. asml.remove(hp2);
  714. hp2.free;
  715. if not assigned(hp1) then break;
  716. end
  717. {
  718. This removes the first mov from
  719. mov rX,...
  720. mov rX,...
  721. }
  722. else if taicpu(hp1).opcode=A_MOV then
  723. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  724. (taicpu(hp1).ops = 2) and
  725. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  726. { don't remove the first mov if the second is a mov rX,rX }
  727. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  728. begin
  729. DebugMsg('Peephole MovMov done', p);
  730. asml.remove(p);
  731. p.free;
  732. p:=hp1;
  733. GetNextInstruction(hp1,hp1);
  734. if not assigned(hp1) then
  735. break;
  736. end;
  737. end;
  738. {
  739. change
  740. mov r1, r0
  741. add r1, r1, #1
  742. to
  743. add r1, r0, #1
  744. Todo: Make it work for mov+cmp too
  745. CAUTION! If this one is successful p might not be a mov instruction anymore!
  746. }
  747. if (taicpu(p).ops = 2) and
  748. (taicpu(p).oper[1]^.typ = top_reg) and
  749. (taicpu(p).oppostfix = PF_NONE) and
  750. GetNextInstruction(p, hp1) and
  751. (tai(hp1).typ = ait_instruction) and
  752. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  753. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN]) and
  754. {MOV and MVN might only have 2 ops}
  755. (taicpu(hp1).ops = 3) and
  756. (taicpu(hp1).condition in [C_NONE, taicpu(hp1).condition]) and
  757. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  758. (taicpu(hp1).oper[1]^.typ = top_reg) and
  759. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  760. begin
  761. { When we get here we still don't know if the registers match}
  762. for I:=1 to 2 do
  763. {
  764. If the first loop was successful p will be replaced with hp1.
  765. The checks will still be ok, because all required information
  766. will also be in hp1 then.
  767. }
  768. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  769. begin
  770. DebugMsg('Peephole RedundantMovProcess done', hp1);
  771. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  772. if p<>hp1 then
  773. begin
  774. asml.remove(p);
  775. p.free;
  776. p:=hp1;
  777. end;
  778. end;
  779. end;
  780. { This folds shifterops into following instructions
  781. mov r0, r1, lsl #8
  782. add r2, r3, r0
  783. to
  784. add r2, r3, r1, lsl #8
  785. CAUTION! If this one is successful p might not be a mov instruction anymore!
  786. }
  787. if (taicpu(p).opcode = A_MOV) and
  788. (taicpu(p).ops = 3) and
  789. (taicpu(p).oper[1]^.typ = top_reg) and
  790. (taicpu(p).oper[2]^.typ = top_shifterop) and
  791. (taicpu(p).oppostfix = PF_NONE) and
  792. GetNextInstruction(p, hp1) and
  793. (tai(hp1).typ = ait_instruction) and
  794. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  795. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  796. (taicpu(hp1).oppostfix = PF_NONE) and
  797. (taicpu(hp1).condition = taicpu(p).condition) and
  798. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  799. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  800. A_CMP, A_CMN]) and
  801. (
  802. {Only ONE of the two src operands is allowed to match}
  803. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  804. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  805. ) then
  806. begin
  807. CopyUsedRegs(TmpUsedRegs);
  808. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  809. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  810. I2:=0
  811. else
  812. I2:=1;
  813. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  814. for I:=I2 to taicpu(hp1).ops-1 do
  815. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  816. begin
  817. { If the parameter matched on the second op from the RIGHT
  818. we have to switch the parameters, this will not happen for CMP
  819. were we're only evaluating the most right parameter
  820. }
  821. if I <> taicpu(hp1).ops-1 then
  822. begin
  823. {The SUB operators need to be changed when we swap parameters}
  824. case taicpu(hp1).opcode of
  825. A_SUB: tempop:=A_RSB;
  826. A_SBC: tempop:=A_RSC;
  827. A_RSB: tempop:=A_SUB;
  828. A_RSC: tempop:=A_SBC;
  829. else tempop:=taicpu(hp1).opcode;
  830. end;
  831. if taicpu(hp1).ops = 3 then
  832. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  833. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  834. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  835. else
  836. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  837. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  838. taicpu(p).oper[2]^.shifterop^);
  839. end
  840. else
  841. if taicpu(hp1).ops = 3 then
  842. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  843. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  844. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  845. else
  846. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  847. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  848. taicpu(p).oper[2]^.shifterop^);
  849. asml.insertbefore(hp2, p);
  850. asml.remove(p);
  851. asml.remove(hp1);
  852. p.free;
  853. hp1.free;
  854. p:=hp2;
  855. GetNextInstruction(p,hp1);
  856. DebugMsg('Peephole FoldShiftProcess done', p);
  857. break;
  858. end;
  859. ReleaseUsedRegs(TmpUsedRegs);
  860. end;
  861. {
  862. Often we see shifts and then a superfluous mov to another register
  863. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  864. }
  865. if (taicpu(p).opcode = A_MOV) and
  866. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  867. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  868. end;
  869. A_ADD,
  870. A_ADC,
  871. A_RSB,
  872. A_RSC,
  873. A_SUB,
  874. A_SBC,
  875. A_AND,
  876. A_BIC,
  877. A_EOR,
  878. A_ORR,
  879. A_MLA,
  880. A_MUL:
  881. begin
  882. {
  883. change
  884. and reg2,reg1,const1
  885. and reg2,reg2,const2
  886. to
  887. and reg2,reg1,(const1 and const2)
  888. }
  889. if (taicpu(p).opcode = A_AND) and
  890. (taicpu(p).oper[1]^.typ = top_reg) and
  891. (taicpu(p).oper[2]^.typ = top_const) and
  892. GetNextInstruction(p, hp1) and
  893. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  894. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  895. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  896. (taicpu(hp1).oper[2]^.typ = top_const) then
  897. begin
  898. DebugMsg('Peephole AndAnd2And done', p);
  899. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  900. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  901. asml.remove(hp1);
  902. hp1.free;
  903. end;
  904. {
  905. change
  906. add/sub reg2,reg1,const1
  907. str/ldr reg3,[reg2,const2]
  908. dealloc reg2
  909. to
  910. str/ldr reg3,[reg1,const2+/-const1]
  911. }
  912. if (taicpu(p).opcode in [A_ADD,A_SUB]) and
  913. (taicpu(p).oper[1]^.typ = top_reg) and
  914. (taicpu(p).oper[2]^.typ = top_const) then
  915. begin
  916. hp1:=p;
  917. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  918. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  919. (MatchInstruction(hp1, A_LDR, [C_None], []) or
  920. MatchInstruction(hp1, A_STR, [C_None], [])) and
  921. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  922. { don't optimize if the register is stored/overwritten }
  923. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  924. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  925. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  926. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  927. ldr postfix }
  928. (((taicpu(p).opcode=A_ADD) and
  929. (((taicpu(hp1).oppostfix in [PF_None,PF_B]) and
  930. (abs(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val)<4096)) or
  931. (abs(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val)<256)
  932. )
  933. ) or
  934. ((taicpu(p).opcode=A_SUB) and
  935. (((taicpu(hp1).oppostfix in [PF_None,PF_B]) and
  936. (abs(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val)<4096)) or
  937. (abs(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val)<256)
  938. )
  939. )
  940. ) do
  941. begin
  942. { neither reg1 nor reg2 might be changed inbetween }
  943. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  944. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  945. break;
  946. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  947. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  948. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  949. begin
  950. { remember last instruction }
  951. hp2:=hp1;
  952. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  953. hp1:=p;
  954. { fix all ldr/str }
  955. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  956. begin
  957. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  958. if taicpu(p).opcode=A_ADD then
  959. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  960. else
  961. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  962. if hp1=hp2 then
  963. break;
  964. end;
  965. GetNextInstruction(p,hp1);
  966. asml.remove(p);
  967. p.free;
  968. p:=hp1;
  969. break;
  970. end;
  971. end;
  972. end;
  973. {
  974. change
  975. add reg1, ...
  976. mov reg2, reg1
  977. to
  978. add reg2, ...
  979. }
  980. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  981. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  982. end;
  983. A_CMP:
  984. begin
  985. {
  986. change
  987. cmp reg,const1
  988. moveq reg,const1
  989. movne reg,const2
  990. to
  991. cmp reg,const1
  992. movne reg,const2
  993. }
  994. if (taicpu(p).oper[1]^.typ = top_const) and
  995. GetNextInstruction(p, hp1) and
  996. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  997. (taicpu(hp1).oper[1]^.typ = top_const) and
  998. GetNextInstruction(hp1, hp2) and
  999. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1000. (taicpu(hp1).oper[1]^.typ = top_const) then
  1001. begin
  1002. RemoveRedundantMove(p, hp1, asml);
  1003. RemoveRedundantMove(p, hp2, asml);
  1004. end;
  1005. end;
  1006. end;
  1007. end;
  1008. end;
  1009. end;
  1010. { instructions modifying the CPSR can be only the last instruction }
  1011. function MustBeLast(p : tai) : boolean;
  1012. begin
  1013. Result:=(p.typ=ait_instruction) and
  1014. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  1015. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  1016. (taicpu(p).oppostfix=PF_S));
  1017. end;
  1018. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  1019. var
  1020. p,hp1,hp2: tai;
  1021. l : longint;
  1022. condition : tasmcond;
  1023. hp3: tai;
  1024. WasLast: boolean;
  1025. { UsedRegs, TmpUsedRegs: TRegSet; }
  1026. begin
  1027. p := BlockStart;
  1028. { UsedRegs := []; }
  1029. while (p <> BlockEnd) Do
  1030. begin
  1031. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1032. case p.Typ Of
  1033. Ait_Instruction:
  1034. begin
  1035. case taicpu(p).opcode Of
  1036. A_B:
  1037. if taicpu(p).condition<>C_None then
  1038. begin
  1039. { check for
  1040. Bxx xxx
  1041. <several instructions>
  1042. xxx:
  1043. }
  1044. l:=0;
  1045. WasLast:=False;
  1046. GetNextInstruction(p, hp1);
  1047. while assigned(hp1) and
  1048. (l<=4) and
  1049. CanBeCond(hp1) and
  1050. { stop on labels }
  1051. not(hp1.typ=ait_label) do
  1052. begin
  1053. inc(l);
  1054. if MustBeLast(hp1) then
  1055. begin
  1056. WasLast:=True;
  1057. GetNextInstruction(hp1,hp1);
  1058. break;
  1059. end
  1060. else
  1061. GetNextInstruction(hp1,hp1);
  1062. end;
  1063. if assigned(hp1) then
  1064. begin
  1065. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1066. begin
  1067. if (l<=4) and (l>0) then
  1068. begin
  1069. condition:=inverse_cond(taicpu(p).condition);
  1070. hp2:=p;
  1071. GetNextInstruction(p,hp1);
  1072. p:=hp1;
  1073. repeat
  1074. if hp1.typ=ait_instruction then
  1075. taicpu(hp1).condition:=condition;
  1076. if MustBeLast(hp1) then
  1077. begin
  1078. GetNextInstruction(hp1,hp1);
  1079. break;
  1080. end
  1081. else
  1082. GetNextInstruction(hp1,hp1);
  1083. until not(assigned(hp1)) or
  1084. not(CanBeCond(hp1)) or
  1085. (hp1.typ=ait_label);
  1086. { wait with removing else GetNextInstruction could
  1087. ignore the label if it was the only usage in the
  1088. jump moved away }
  1089. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1090. asml.remove(hp2);
  1091. hp2.free;
  1092. continue;
  1093. end;
  1094. end
  1095. else
  1096. { do not perform further optimizations if there is inctructon
  1097. in block #1 which can not be optimized.
  1098. }
  1099. if not WasLast then
  1100. begin
  1101. { check further for
  1102. Bcc xxx
  1103. <several instructions 1>
  1104. B yyy
  1105. xxx:
  1106. <several instructions 2>
  1107. yyy:
  1108. }
  1109. { hp2 points to jmp yyy }
  1110. hp2:=hp1;
  1111. { skip hp1 to xxx }
  1112. GetNextInstruction(hp1, hp1);
  1113. if assigned(hp2) and
  1114. assigned(hp1) and
  1115. (l<=3) and
  1116. (hp2.typ=ait_instruction) and
  1117. (taicpu(hp2).is_jmp) and
  1118. (taicpu(hp2).condition=C_None) and
  1119. { real label and jump, no further references to the
  1120. label are allowed }
  1121. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1122. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1123. begin
  1124. l:=0;
  1125. { skip hp1 to <several moves 2> }
  1126. GetNextInstruction(hp1, hp1);
  1127. while assigned(hp1) and
  1128. CanBeCond(hp1) do
  1129. begin
  1130. inc(l);
  1131. GetNextInstruction(hp1, hp1);
  1132. end;
  1133. { hp1 points to yyy: }
  1134. if assigned(hp1) and
  1135. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1136. begin
  1137. condition:=inverse_cond(taicpu(p).condition);
  1138. GetNextInstruction(p,hp1);
  1139. hp3:=p;
  1140. p:=hp1;
  1141. repeat
  1142. if hp1.typ=ait_instruction then
  1143. taicpu(hp1).condition:=condition;
  1144. GetNextInstruction(hp1,hp1);
  1145. until not(assigned(hp1)) or
  1146. not(CanBeCond(hp1));
  1147. { hp2 is still at jmp yyy }
  1148. GetNextInstruction(hp2,hp1);
  1149. { hp2 is now at xxx: }
  1150. condition:=inverse_cond(condition);
  1151. GetNextInstruction(hp1,hp1);
  1152. { hp1 is now at <several movs 2> }
  1153. repeat
  1154. taicpu(hp1).condition:=condition;
  1155. GetNextInstruction(hp1,hp1);
  1156. until not(assigned(hp1)) or
  1157. not(CanBeCond(hp1)) or
  1158. (hp1.typ=ait_label);
  1159. {
  1160. asml.remove(hp1.next)
  1161. hp1.next.free;
  1162. asml.remove(hp1);
  1163. hp1.free;
  1164. }
  1165. { remove Bcc }
  1166. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1167. asml.remove(hp3);
  1168. hp3.free;
  1169. { remove jmp }
  1170. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1171. asml.remove(hp2);
  1172. hp2.free;
  1173. continue;
  1174. end;
  1175. end;
  1176. end;
  1177. end;
  1178. end;
  1179. end;
  1180. end;
  1181. end;
  1182. p := tai(p.next)
  1183. end;
  1184. end;
  1185. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1186. begin
  1187. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1188. Result:=true
  1189. else
  1190. Result:=inherited RegInInstruction(Reg, p1);
  1191. end;
  1192. const
  1193. { set of opcode which might or do write to memory }
  1194. { TODO : extend armins.dat to contain r/w info }
  1195. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1196. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1197. { adjust the register live information when swapping the two instructions p and hp1,
  1198. they must follow one after the other }
  1199. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1200. procedure CheckLiveEnd(reg : tregister);
  1201. var
  1202. supreg : TSuperRegister;
  1203. regtype : TRegisterType;
  1204. begin
  1205. if reg=NR_NO then
  1206. exit;
  1207. regtype:=getregtype(reg);
  1208. supreg:=getsupreg(reg);
  1209. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1210. RegInInstruction(reg,p) then
  1211. cg.rg[regtype].live_end[supreg]:=p;
  1212. end;
  1213. procedure CheckLiveStart(reg : TRegister);
  1214. var
  1215. supreg : TSuperRegister;
  1216. regtype : TRegisterType;
  1217. begin
  1218. if reg=NR_NO then
  1219. exit;
  1220. regtype:=getregtype(reg);
  1221. supreg:=getsupreg(reg);
  1222. if (cg.rg[regtype].live_start[supreg]=p) and
  1223. RegInInstruction(reg,hp1) then
  1224. cg.rg[regtype].live_start[supreg]:=hp1;
  1225. end;
  1226. var
  1227. i : longint;
  1228. r : TSuperRegister;
  1229. begin
  1230. { assumption: p is directly followed by hp1 }
  1231. { if live of any reg used by p starts at p and hp1 uses this register then
  1232. set live start to hp1 }
  1233. for i:=0 to p.ops-1 do
  1234. case p.oper[i]^.typ of
  1235. Top_Reg:
  1236. CheckLiveStart(p.oper[i]^.reg);
  1237. Top_Ref:
  1238. begin
  1239. CheckLiveStart(p.oper[i]^.ref^.base);
  1240. CheckLiveStart(p.oper[i]^.ref^.index);
  1241. end;
  1242. Top_Shifterop:
  1243. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1244. Top_RegSet:
  1245. for r:=RS_R0 to RS_R15 do
  1246. if r in p.oper[i]^.regset^ then
  1247. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1248. end;
  1249. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1250. set live end to p }
  1251. for i:=0 to hp1.ops-1 do
  1252. case hp1.oper[i]^.typ of
  1253. Top_Reg:
  1254. CheckLiveEnd(hp1.oper[i]^.reg);
  1255. Top_Ref:
  1256. begin
  1257. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1258. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1259. end;
  1260. Top_Shifterop:
  1261. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1262. Top_RegSet:
  1263. for r:=RS_R0 to RS_R15 do
  1264. if r in hp1.oper[i]^.regset^ then
  1265. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1266. end;
  1267. end;
  1268. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1269. { TODO : schedule also forward }
  1270. { TODO : schedule distance > 1 }
  1271. var
  1272. hp1,hp2,hp3,hp4,hp5 : tai;
  1273. list : TAsmList;
  1274. begin
  1275. result:=true;
  1276. list:=TAsmList.Create;
  1277. p:=BlockStart;
  1278. while p<>BlockEnd Do
  1279. begin
  1280. if (p.typ=ait_instruction) and
  1281. GetNextInstruction(p,hp1) and
  1282. (hp1.typ=ait_instruction) and
  1283. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1284. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1285. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1286. not(RegModifiedByInstruction(NR_PC,p))
  1287. ) or
  1288. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1289. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1290. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1291. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1292. )
  1293. ) or
  1294. { try to prove that the memory accesses don't overlapp }
  1295. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1296. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1297. (taicpu(p).oppostfix=PF_None) and
  1298. (taicpu(hp1).oppostfix=PF_None) and
  1299. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1300. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1301. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1302. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1303. )
  1304. )
  1305. ) and
  1306. GetNextInstruction(hp1,hp2) and
  1307. (hp2.typ=ait_instruction) and
  1308. { loaded register used by next instruction? }
  1309. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1310. { loaded register not used by previous instruction? }
  1311. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1312. { same condition? }
  1313. (taicpu(p).condition=taicpu(hp1).condition) and
  1314. { first instruction might not change the register used as base }
  1315. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1316. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1317. ) and
  1318. { first instruction might not change the register used as index }
  1319. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1320. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1321. ) then
  1322. begin
  1323. hp3:=tai(p.Previous);
  1324. hp5:=tai(p.next);
  1325. asml.Remove(p);
  1326. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1327. { before the instruction? }
  1328. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1329. begin
  1330. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1331. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1332. begin
  1333. hp4:=hp3;
  1334. hp3:=tai(hp3.Previous);
  1335. asml.Remove(hp4);
  1336. list.Concat(hp4);
  1337. end
  1338. else
  1339. hp3:=tai(hp3.Previous);
  1340. end;
  1341. list.Concat(p);
  1342. SwapRegLive(taicpu(p),taicpu(hp1));
  1343. { after the instruction? }
  1344. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1345. begin
  1346. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1347. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1348. begin
  1349. hp4:=hp5;
  1350. hp5:=tai(hp5.next);
  1351. asml.Remove(hp4);
  1352. list.Concat(hp4);
  1353. end
  1354. else
  1355. hp5:=tai(hp5.Next);
  1356. end;
  1357. asml.Remove(hp1);
  1358. {$ifdef DEBUG_PREREGSCHEDULER}
  1359. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1360. {$endif DEBUG_PREREGSCHEDULER}
  1361. asml.InsertBefore(hp1,hp2);
  1362. asml.InsertListBefore(hp2,list);
  1363. p:=tai(p.next)
  1364. end
  1365. else if p.typ=ait_instruction then
  1366. p:=hp1
  1367. else
  1368. p:=tai(p.next);
  1369. end;
  1370. list.Free;
  1371. end;
  1372. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1373. begin
  1374. { TODO: Add optimizer code }
  1375. end;
  1376. begin
  1377. casmoptimizer:=TCpuAsmOptimizer;
  1378. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1379. End.