aoptcpu.pas 68 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  31. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  32. var AllUsedRegs: TAllUsedRegs): Boolean;
  33. { gets the next tai object after current that contains info relevant
  34. to the optimizer in p1 which used the given register or does a
  35. change in program flow.
  36. If there is none, it returns false and
  37. sets p1 to nil }
  38. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  39. { outputs a debug message into the assembler file }
  40. procedure DebugMsg(const s: string; p: tai);
  41. protected
  42. function LookForPostindexedPattern(p: taicpu): boolean;
  43. End;
  44. TCpuPreRegallocScheduler = class(TAsmScheduler)
  45. function SchedulerPass1Cpu(var p: tai): boolean;override;
  46. procedure SwapRegLive(p, hp1: taicpu);
  47. end;
  48. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  49. { uses the same constructor as TAopObj }
  50. procedure PeepHoleOptPass2;override;
  51. End;
  52. Implementation
  53. uses
  54. cutils,verbose,globals,
  55. systems,
  56. cpuinfo,
  57. cgobj,cgutils,procinfo,
  58. aasmbase,aasmdata;
  59. function CanBeCond(p : tai) : boolean;
  60. begin
  61. result:=
  62. (p.typ=ait_instruction) and
  63. (taicpu(p).condition=C_None) and
  64. (taicpu(p).opcode<>A_PLD) and
  65. ((taicpu(p).opcode<>A_BLX) or
  66. (taicpu(p).oper[0]^.typ=top_reg));
  67. end;
  68. function RefsEqual(const r1, r2: treference): boolean;
  69. begin
  70. refsequal :=
  71. (r1.offset = r2.offset) and
  72. (r1.base = r2.base) and
  73. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  74. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  75. (r1.relsymbol = r2.relsymbol) and
  76. (r1.signindex = r2.signindex) and
  77. (r1.shiftimm = r2.shiftimm) and
  78. (r1.addressmode = r2.addressmode) and
  79. (r1.shiftmode = r2.shiftmode);
  80. end;
  81. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  82. begin
  83. result :=
  84. (instr.typ = ait_instruction) and
  85. (taicpu(instr).opcode = op) and
  86. ((cond = []) or (taicpu(instr).condition in cond)) and
  87. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  88. end;
  89. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  90. begin
  91. result := oper1.typ = oper2.typ;
  92. if result then
  93. case oper1.typ of
  94. top_const:
  95. Result:=oper1.val = oper2.val;
  96. top_reg:
  97. Result:=oper1.reg = oper2.reg;
  98. top_conditioncode:
  99. Result:=oper1.cc = oper2.cc;
  100. top_ref:
  101. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  102. else Result:=false;
  103. end
  104. end;
  105. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  106. begin
  107. result := (oper.typ = top_reg) and (oper.reg = reg);
  108. end;
  109. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  110. begin
  111. if (taicpu(movp).condition = C_EQ) and
  112. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  113. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  114. begin
  115. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  116. asml.remove(movp);
  117. movp.free;
  118. end;
  119. end;
  120. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  121. var
  122. p: taicpu;
  123. begin
  124. p := taicpu(hp);
  125. regLoadedWithNewValue := false;
  126. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  127. exit;
  128. case p.opcode of
  129. { These operands do not write into a register at all }
  130. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  131. exit;
  132. {Take care of post/preincremented store and loads, they will change their base register}
  133. A_STR, A_LDR:
  134. regLoadedWithNewValue :=
  135. (taicpu(p).oper[1]^.typ=top_ref) and
  136. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  137. (taicpu(p).oper[1]^.ref^.base = reg);
  138. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  139. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  140. regLoadedWithNewValue :=
  141. (p.oper[1]^.typ = top_reg) and
  142. (p.oper[1]^.reg = reg);
  143. {Loads to oper2 from coprocessor}
  144. {
  145. MCR/MRC is currently not supported in FPC
  146. A_MRC:
  147. regLoadedWithNewValue :=
  148. (p.oper[2]^.typ = top_reg) and
  149. (p.oper[2]^.reg = reg);
  150. }
  151. {Loads to all register in the registerset}
  152. A_LDM:
  153. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  154. end;
  155. if regLoadedWithNewValue then
  156. exit;
  157. case p.oper[0]^.typ of
  158. {This is the case}
  159. top_reg:
  160. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  161. { LDRD }
  162. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  163. {LDM/STM might write a new value to their index register}
  164. top_ref:
  165. regLoadedWithNewValue :=
  166. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  167. (taicpu(p).oper[0]^.ref^.base = reg);
  168. end;
  169. end;
  170. function AlignedToQWord(const ref : treference) : boolean;
  171. begin
  172. { (safe) heuristics to ensure alignment }
  173. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  174. (((ref.offset>=0) and
  175. ((ref.offset mod 8)=0) and
  176. ((ref.base=NR_R13) or
  177. (ref.index=NR_R13))
  178. ) or
  179. ((ref.offset<=0) and
  180. { when using NR_R11, it has always a value of <qword align>+4 }
  181. ((abs(ref.offset+4) mod 8)=0) and
  182. (current_procinfo.framepointer=NR_R11) and
  183. ((ref.base=NR_R11) or
  184. (ref.index=NR_R11))
  185. )
  186. );
  187. end;
  188. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  189. var
  190. p: taicpu;
  191. i: longint;
  192. begin
  193. instructionLoadsFromReg := false;
  194. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  195. exit;
  196. p:=taicpu(hp);
  197. i:=1;
  198. {For these instructions we have to start on oper[0]}
  199. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  200. A_CMP, A_CMN, A_TST, A_TEQ,
  201. A_B, A_BL, A_BX, A_BLX,
  202. A_SMLAL, A_UMLAL]) then i:=0;
  203. while(i<p.ops) do
  204. begin
  205. case p.oper[I]^.typ of
  206. top_reg:
  207. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  208. { STRD }
  209. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  210. top_regset:
  211. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  212. top_shifterop:
  213. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  214. top_ref:
  215. instructionLoadsFromReg :=
  216. (p.oper[I]^.ref^.base = reg) or
  217. (p.oper[I]^.ref^.index = reg);
  218. end;
  219. if instructionLoadsFromReg then exit; {Bailout if we found something}
  220. Inc(I);
  221. end;
  222. end;
  223. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  224. var AllUsedRegs: TAllUsedRegs): Boolean;
  225. begin
  226. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  227. RegUsedAfterInstruction :=
  228. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  229. not(regLoadedWithNewValue(reg,p)) and
  230. (
  231. not(GetNextInstruction(p,p)) or
  232. instructionLoadsFromReg(reg,p) or
  233. not(regLoadedWithNewValue(reg,p))
  234. );
  235. end;
  236. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  237. var Next: tai; reg: TRegister): Boolean;
  238. begin
  239. Next:=Current;
  240. repeat
  241. Result:=GetNextInstruction(Next,Next);
  242. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  243. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  244. end;
  245. {$ifdef DEBUG_AOPTCPU}
  246. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  247. begin
  248. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  249. end;
  250. {$else DEBUG_AOPTCPU}
  251. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  252. begin
  253. end;
  254. {$endif DEBUG_AOPTCPU}
  255. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  256. var
  257. alloc,
  258. dealloc : tai_regalloc;
  259. hp1 : tai;
  260. begin
  261. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  262. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  263. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  264. { don't mess with moves to pc }
  265. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  266. { don't mess with moves to lr }
  267. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  268. { the destination register of the mov might not be used beween p and movp }
  269. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  270. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  271. not (
  272. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  273. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  274. ) then
  275. begin
  276. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  277. if assigned(dealloc) then
  278. begin
  279. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  280. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  281. and remove it if possible }
  282. GetLastInstruction(p,hp1);
  283. asml.Remove(dealloc);
  284. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  285. if assigned(alloc) then
  286. begin
  287. asml.Remove(alloc);
  288. alloc.free;
  289. dealloc.free;
  290. end
  291. else
  292. asml.InsertAfter(dealloc,p);
  293. { try to move the allocation of the target register }
  294. GetLastInstruction(movp,hp1);
  295. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  296. if assigned(alloc) then
  297. begin
  298. asml.Remove(alloc);
  299. asml.InsertBefore(alloc,p);
  300. { adjust used regs }
  301. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  302. end;
  303. { finally get rid of the mov }
  304. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  305. asml.remove(movp);
  306. movp.free;
  307. end;
  308. end;
  309. end;
  310. {
  311. optimize
  312. ldr/str regX,[reg1]
  313. ...
  314. add/sub reg1,reg1,regY/const
  315. into
  316. ldr/str regX,[reg1], regY/const
  317. }
  318. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  319. var
  320. hp1 : tai;
  321. begin
  322. Result:=false;
  323. if (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  324. (p.oper[1]^.ref^.index=NR_NO) and
  325. (p.oper[1]^.ref^.offset=0) and
  326. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  327. (hp1.typ=ait_instruction) and
  328. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  329. (MatchInstruction(hp1, A_ADD, [C_None], [PF_None]) or
  330. MatchInstruction(hp1, A_SUB, [C_None], [PF_None])) and
  331. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  332. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  333. { don't apply the optimization if the base register is loaded }
  334. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  335. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  336. begin
  337. DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  338. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  339. if taicpu(hp1).oper[2]^.typ=top_const then
  340. begin
  341. if taicpu(hp1).opcode=A_ADD then
  342. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  343. else
  344. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  345. end
  346. else
  347. begin
  348. p.oper[1]^.ref^.index:=taicpu(hp1).oper[1]^.reg;
  349. if taicpu(hp1).opcode=A_ADD then
  350. p.oper[1]^.ref^.signindex:=1
  351. else
  352. p.oper[1]^.ref^.signindex:=-1;
  353. end;
  354. asml.Remove(hp1);
  355. hp1.Free;
  356. Result:=true;
  357. end;
  358. end;
  359. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  360. var
  361. hp1,hp2: tai;
  362. i, i2: longint;
  363. TmpUsedRegs: TAllUsedRegs;
  364. tempop: tasmop;
  365. function IsPowerOf2(const value: DWord): boolean; inline;
  366. begin
  367. Result:=(value and (value - 1)) = 0;
  368. end;
  369. begin
  370. result := false;
  371. case p.typ of
  372. ait_instruction:
  373. begin
  374. {
  375. change
  376. <op> reg,x,y
  377. cmp reg,#0
  378. into
  379. <op>s reg,x,y
  380. }
  381. { this optimization can applied only to the currently enabled operations because
  382. the other operations do not update all flags and FPC does not track flag usage }
  383. if ((taicpu(p).opcode in [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,
  384. A_ORR,A_EOR,A_AND,A_RSB,A_RSC,A_SBC,A_MLA])
  385. ) and
  386. (taicpu(p).oppostfix = PF_None) and
  387. (taicpu(p).condition = C_None) and
  388. GetNextInstruction(p, hp1) and
  389. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  390. (taicpu(hp1).oper[1]^.typ = top_const) and
  391. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  392. (taicpu(hp1).oper[1]^.val = 0) and
  393. GetNextInstruction(hp1, hp2) and
  394. { be careful here, following instructions could use other flags
  395. however after a jump fpc never depends on the value of flags }
  396. { All above instructions set Z and N according to the following
  397. Z := result = 0;
  398. N := result[31];
  399. EQ = Z=1; NE = Z=0;
  400. MI = N=1; PL = N=0; }
  401. MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) and
  402. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  403. begin
  404. DebugMsg('Peephole OpCmp2OpS done', p);
  405. taicpu(p).oppostfix:=PF_S;
  406. { move flag allocation if possible }
  407. GetLastInstruction(hp1, hp2);
  408. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  409. if assigned(hp2) then
  410. begin
  411. asml.Remove(hp2);
  412. asml.insertbefore(hp2, p);
  413. end;
  414. asml.remove(hp1);
  415. hp1.free;
  416. end
  417. else
  418. case taicpu(p).opcode of
  419. A_STR:
  420. begin
  421. { change
  422. str reg1,ref
  423. ldr reg2,ref
  424. into
  425. str reg1,ref
  426. mov reg2,reg1
  427. }
  428. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  429. (taicpu(p).oppostfix=PF_None) and
  430. GetNextInstruction(p,hp1) and
  431. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  432. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  433. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  434. begin
  435. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  436. begin
  437. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  438. asml.remove(hp1);
  439. hp1.free;
  440. end
  441. else
  442. begin
  443. taicpu(hp1).opcode:=A_MOV;
  444. taicpu(hp1).oppostfix:=PF_None;
  445. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  446. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  447. end;
  448. result := true;
  449. end
  450. { change
  451. str reg1,ref
  452. str reg2,ref
  453. into
  454. strd reg1,ref
  455. }
  456. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  457. (taicpu(p).oppostfix=PF_None) and
  458. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  459. GetNextInstruction(p,hp1) and
  460. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  461. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  462. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  463. { str ensures that either base or index contain no register, else ldr wouldn't
  464. use an offset either
  465. }
  466. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  467. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  468. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  469. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  470. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  471. begin
  472. DebugMsg('Peephole StrStr2Strd done', p);
  473. taicpu(p).oppostfix:=PF_D;
  474. asml.remove(hp1);
  475. hp1.free;
  476. end;
  477. LookForPostindexedPattern(taicpu(p));
  478. end;
  479. A_LDR:
  480. begin
  481. { change
  482. ldr reg1,ref
  483. ldr reg2,ref
  484. into ...
  485. }
  486. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  487. GetNextInstruction(p,hp1) and
  488. { ldrd is not allowed here }
  489. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  490. begin
  491. {
  492. ...
  493. ldr reg1,ref
  494. mov reg2,reg1
  495. }
  496. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  497. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  498. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  499. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  500. begin
  501. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  502. begin
  503. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  504. asml.remove(hp1);
  505. hp1.free;
  506. end
  507. else
  508. begin
  509. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  510. taicpu(hp1).opcode:=A_MOV;
  511. taicpu(hp1).oppostfix:=PF_None;
  512. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  513. end;
  514. result := true;
  515. end
  516. {
  517. ...
  518. ldrd reg1,ref
  519. }
  520. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  521. { ldrd does not allow any postfixes ... }
  522. (taicpu(p).oppostfix=PF_None) and
  523. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  524. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  525. { ldr ensures that either base or index contain no register, else ldr wouldn't
  526. use an offset either
  527. }
  528. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  529. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  530. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  531. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  532. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  533. begin
  534. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  535. taicpu(p).oppostfix:=PF_D;
  536. asml.remove(hp1);
  537. hp1.free;
  538. end;
  539. end;
  540. LookForPostindexedPattern(taicpu(p));
  541. { Remove superfluous mov after ldr
  542. changes
  543. ldr reg1, ref
  544. mov reg2, reg1
  545. to
  546. ldr reg2, ref
  547. conditions are:
  548. * no ldrd usage
  549. * reg1 must be released after mov
  550. * mov can not contain shifterops
  551. * ldr+mov have the same conditions
  552. * mov does not set flags
  553. }
  554. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  555. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  556. end;
  557. A_MOV:
  558. begin
  559. { fold
  560. mov reg1,reg0, shift imm1
  561. mov reg1,reg1, shift imm2
  562. }
  563. if (taicpu(p).ops=3) and
  564. (taicpu(p).oper[2]^.typ = top_shifterop) and
  565. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  566. getnextinstruction(p,hp1) and
  567. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  568. (taicpu(hp1).ops=3) and
  569. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  570. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  571. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  572. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  573. begin
  574. { fold
  575. mov reg1,reg0, lsl 16
  576. mov reg1,reg1, lsr 16
  577. strh reg1, ...
  578. dealloc reg1
  579. to
  580. strh reg1, ...
  581. dealloc reg1
  582. }
  583. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  584. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  585. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  586. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  587. getnextinstruction(hp1,hp2) and
  588. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  589. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  590. begin
  591. CopyUsedRegs(TmpUsedRegs);
  592. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  593. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  594. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  595. begin
  596. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  597. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  598. asml.remove(p);
  599. asml.remove(hp1);
  600. p.free;
  601. hp1.free;
  602. p:=hp2;
  603. end;
  604. ReleaseUsedRegs(TmpUsedRegs);
  605. end
  606. { fold
  607. mov reg1,reg0, shift imm1
  608. mov reg1,reg1, shift imm2
  609. to
  610. mov reg1,reg0, shift imm1+imm2
  611. }
  612. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  613. { asr makes no use after a lsr, the asr can be foled into the lsr }
  614. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  615. begin
  616. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  617. { avoid overflows }
  618. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  619. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  620. SM_ROR:
  621. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  622. SM_ASR:
  623. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  624. SM_LSR,
  625. SM_LSL:
  626. begin
  627. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  628. InsertLLItem(p.previous, p.next, hp1);
  629. p.free;
  630. p:=hp1;
  631. end;
  632. else
  633. internalerror(2008072803);
  634. end;
  635. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  636. asml.remove(hp1);
  637. hp1.free;
  638. result := true;
  639. end
  640. { fold
  641. mov reg1,reg0, shift imm1
  642. mov reg1,reg1, shift imm2
  643. mov reg1,reg1, shift imm3 ...
  644. }
  645. else if getnextinstruction(hp1,hp2) and
  646. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  647. (taicpu(hp2).ops=3) and
  648. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  649. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  650. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  651. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  652. begin
  653. { mov reg1,reg0, lsl imm1
  654. mov reg1,reg1, lsr/asr imm2
  655. mov reg1,reg1, lsl imm3 ...
  656. if imm3<=imm1 and imm2>=imm3
  657. to
  658. mov reg1,reg0, lsl imm1
  659. mov reg1,reg1, lsr/asr imm2-imm3
  660. }
  661. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  662. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  663. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  664. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  665. begin
  666. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  667. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1 done', p);
  668. asml.remove(hp2);
  669. hp2.free;
  670. result := true;
  671. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  672. begin
  673. asml.remove(hp1);
  674. hp1.free;
  675. end;
  676. end
  677. { mov reg1,reg0, lsr/asr imm1
  678. mov reg1,reg1, lsl imm2
  679. mov reg1,reg1, lsr/asr imm3 ...
  680. if imm3>=imm1 and imm2>=imm1
  681. to
  682. mov reg1,reg0, lsl imm2-imm1
  683. mov reg1,reg1, lsr/asr imm3 ...
  684. }
  685. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  686. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  687. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  688. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  689. begin
  690. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  691. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  692. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  693. asml.remove(p);
  694. p.free;
  695. p:=hp2;
  696. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  697. begin
  698. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  699. asml.remove(hp1);
  700. hp1.free;
  701. p:=hp2;
  702. end;
  703. result := true;
  704. end;
  705. end;
  706. end;
  707. { Change the common
  708. mov r0, r0, lsr #24
  709. and r0, r0, #255
  710. and remove the superfluous and
  711. This could be extended to handle more cases.
  712. }
  713. if (taicpu(p).ops=3) and
  714. (taicpu(p).oper[2]^.typ = top_shifterop) and
  715. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  716. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  717. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  718. getnextinstruction(p,hp1) and
  719. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  720. (taicpu(hp1).ops=3) and
  721. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  722. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  723. (taicpu(hp1).oper[2]^.typ = top_const) and
  724. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  725. For LSR #25 and an AndConst of 255 that whould go like this:
  726. 255 and ((2 shl (32-25))-1)
  727. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  728. LSR #25 and AndConst of 254:
  729. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  730. }
  731. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  732. begin
  733. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  734. asml.remove(hp1);
  735. hp1.free;
  736. end;
  737. {
  738. optimize
  739. mov rX, yyyy
  740. ....
  741. }
  742. if (taicpu(p).ops = 2) and
  743. GetNextInstruction(p,hp1) and
  744. (tai(hp1).typ = ait_instruction) then
  745. begin
  746. {
  747. This changes the very common
  748. mov r0, #0
  749. str r0, [...]
  750. mov r0, #0
  751. str r0, [...]
  752. and removes all superfluous mov instructions
  753. }
  754. if (taicpu(p).oper[1]^.typ = top_const) and
  755. (taicpu(hp1).opcode=A_STR) then
  756. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], [PF_None]) and
  757. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  758. GetNextInstruction(hp1, hp2) and
  759. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  760. (taicpu(hp2).ops = 2) and
  761. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  762. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  763. begin
  764. DebugMsg('Peephole MovStrMov done', hp2);
  765. GetNextInstruction(hp2,hp1);
  766. asml.remove(hp2);
  767. hp2.free;
  768. if not assigned(hp1) then break;
  769. end
  770. {
  771. This removes the first mov from
  772. mov rX,...
  773. mov rX,...
  774. }
  775. else if taicpu(hp1).opcode=A_MOV then
  776. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  777. (taicpu(hp1).ops = 2) and
  778. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  779. { don't remove the first mov if the second is a mov rX,rX }
  780. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  781. begin
  782. DebugMsg('Peephole MovMov done', p);
  783. asml.remove(p);
  784. p.free;
  785. p:=hp1;
  786. GetNextInstruction(hp1,hp1);
  787. if not assigned(hp1) then
  788. break;
  789. end;
  790. end;
  791. {
  792. change
  793. mov r1, r0
  794. add r1, r1, #1
  795. to
  796. add r1, r0, #1
  797. Todo: Make it work for mov+cmp too
  798. CAUTION! If this one is successful p might not be a mov instruction anymore!
  799. }
  800. if (taicpu(p).ops = 2) and
  801. (taicpu(p).oper[1]^.typ = top_reg) and
  802. (taicpu(p).oppostfix = PF_NONE) and
  803. GetNextInstruction(p, hp1) and
  804. (tai(hp1).typ = ait_instruction) and
  805. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  806. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN]) and
  807. {MOV and MVN might only have 2 ops}
  808. (taicpu(hp1).ops = 3) and
  809. (taicpu(hp1).condition in [C_NONE, taicpu(hp1).condition]) and
  810. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  811. (taicpu(hp1).oper[1]^.typ = top_reg) and
  812. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  813. begin
  814. { When we get here we still don't know if the registers match}
  815. for I:=1 to 2 do
  816. {
  817. If the first loop was successful p will be replaced with hp1.
  818. The checks will still be ok, because all required information
  819. will also be in hp1 then.
  820. }
  821. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  822. begin
  823. DebugMsg('Peephole RedundantMovProcess done', hp1);
  824. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  825. if p<>hp1 then
  826. begin
  827. asml.remove(p);
  828. p.free;
  829. p:=hp1;
  830. end;
  831. end;
  832. end;
  833. { This folds shifterops into following instructions
  834. mov r0, r1, lsl #8
  835. add r2, r3, r0
  836. to
  837. add r2, r3, r1, lsl #8
  838. CAUTION! If this one is successful p might not be a mov instruction anymore!
  839. }
  840. if (taicpu(p).opcode = A_MOV) and
  841. (taicpu(p).ops = 3) and
  842. (taicpu(p).oper[1]^.typ = top_reg) and
  843. (taicpu(p).oper[2]^.typ = top_shifterop) and
  844. (taicpu(p).oppostfix = PF_NONE) and
  845. GetNextInstruction(p, hp1) and
  846. (tai(hp1).typ = ait_instruction) and
  847. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  848. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  849. (taicpu(hp1).oppostfix = PF_NONE) and
  850. (taicpu(hp1).condition = taicpu(p).condition) and
  851. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  852. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  853. A_CMP, A_CMN]) and
  854. (
  855. {Only ONE of the two src operands is allowed to match}
  856. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  857. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  858. ) then
  859. begin
  860. CopyUsedRegs(TmpUsedRegs);
  861. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  862. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  863. I2:=0
  864. else
  865. I2:=1;
  866. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  867. for I:=I2 to taicpu(hp1).ops-1 do
  868. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  869. begin
  870. { If the parameter matched on the second op from the RIGHT
  871. we have to switch the parameters, this will not happen for CMP
  872. were we're only evaluating the most right parameter
  873. }
  874. if I <> taicpu(hp1).ops-1 then
  875. begin
  876. {The SUB operators need to be changed when we swap parameters}
  877. case taicpu(hp1).opcode of
  878. A_SUB: tempop:=A_RSB;
  879. A_SBC: tempop:=A_RSC;
  880. A_RSB: tempop:=A_SUB;
  881. A_RSC: tempop:=A_SBC;
  882. else tempop:=taicpu(hp1).opcode;
  883. end;
  884. if taicpu(hp1).ops = 3 then
  885. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  886. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  887. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  888. else
  889. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  890. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  891. taicpu(p).oper[2]^.shifterop^);
  892. end
  893. else
  894. if taicpu(hp1).ops = 3 then
  895. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  896. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  897. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  898. else
  899. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  900. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  901. taicpu(p).oper[2]^.shifterop^);
  902. asml.insertbefore(hp2, p);
  903. asml.remove(p);
  904. asml.remove(hp1);
  905. p.free;
  906. hp1.free;
  907. p:=hp2;
  908. GetNextInstruction(p,hp1);
  909. DebugMsg('Peephole FoldShiftProcess done', p);
  910. break;
  911. end;
  912. ReleaseUsedRegs(TmpUsedRegs);
  913. end;
  914. {
  915. Often we see shifts and then a superfluous mov to another register
  916. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  917. }
  918. if (taicpu(p).opcode = A_MOV) and
  919. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  920. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  921. end;
  922. A_ADD,
  923. A_ADC,
  924. A_RSB,
  925. A_RSC,
  926. A_SUB,
  927. A_SBC,
  928. A_AND,
  929. A_BIC,
  930. A_EOR,
  931. A_ORR,
  932. A_MLA,
  933. A_MUL:
  934. begin
  935. {
  936. change
  937. and reg2,reg1,const1
  938. and reg2,reg2,const2
  939. to
  940. and reg2,reg1,(const1 and const2)
  941. }
  942. if (taicpu(p).opcode = A_AND) and
  943. (taicpu(p).oper[1]^.typ = top_reg) and
  944. (taicpu(p).oper[2]^.typ = top_const) and
  945. GetNextInstruction(p, hp1) and
  946. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  947. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  948. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  949. (taicpu(hp1).oper[2]^.typ = top_const) then
  950. begin
  951. DebugMsg('Peephole AndAnd2And done', p);
  952. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  953. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  954. asml.remove(hp1);
  955. hp1.free;
  956. end;
  957. {
  958. change
  959. add/sub reg2,reg1,const1
  960. str/ldr reg3,[reg2,const2]
  961. dealloc reg2
  962. to
  963. str/ldr reg3,[reg1,const2+/-const1]
  964. }
  965. if (taicpu(p).opcode in [A_ADD,A_SUB]) and
  966. (taicpu(p).oper[1]^.typ = top_reg) and
  967. (taicpu(p).oper[2]^.typ = top_const) then
  968. begin
  969. hp1:=p;
  970. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  971. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  972. (MatchInstruction(hp1, A_LDR, [C_None], []) or
  973. MatchInstruction(hp1, A_STR, [C_None], [])) and
  974. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  975. { don't optimize if the register is stored/overwritten }
  976. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  977. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  978. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  979. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  980. ldr postfix }
  981. (((taicpu(p).opcode=A_ADD) and
  982. (((taicpu(hp1).oppostfix in [PF_None,PF_B]) and
  983. (abs(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val)<4096)) or
  984. (abs(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val)<256)
  985. )
  986. ) or
  987. ((taicpu(p).opcode=A_SUB) and
  988. (((taicpu(hp1).oppostfix in [PF_None,PF_B]) and
  989. (abs(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val)<4096)) or
  990. (abs(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val)<256)
  991. )
  992. )
  993. ) do
  994. begin
  995. { neither reg1 nor reg2 might be changed inbetween }
  996. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  997. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  998. break;
  999. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  1000. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  1001. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1002. begin
  1003. { remember last instruction }
  1004. hp2:=hp1;
  1005. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  1006. hp1:=p;
  1007. { fix all ldr/str }
  1008. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  1009. begin
  1010. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  1011. if taicpu(p).opcode=A_ADD then
  1012. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  1013. else
  1014. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  1015. if hp1=hp2 then
  1016. break;
  1017. end;
  1018. GetNextInstruction(p,hp1);
  1019. asml.remove(p);
  1020. p.free;
  1021. p:=hp1;
  1022. break;
  1023. end;
  1024. end;
  1025. end;
  1026. {
  1027. change
  1028. add reg1, ...
  1029. mov reg2, reg1
  1030. to
  1031. add reg2, ...
  1032. }
  1033. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  1034. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  1035. end;
  1036. A_CMP:
  1037. begin
  1038. {
  1039. change
  1040. cmp reg,const1
  1041. moveq reg,const1
  1042. movne reg,const2
  1043. to
  1044. cmp reg,const1
  1045. movne reg,const2
  1046. }
  1047. if (taicpu(p).oper[1]^.typ = top_const) and
  1048. GetNextInstruction(p, hp1) and
  1049. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1050. (taicpu(hp1).oper[1]^.typ = top_const) and
  1051. GetNextInstruction(hp1, hp2) and
  1052. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1053. (taicpu(hp1).oper[1]^.typ = top_const) then
  1054. begin
  1055. RemoveRedundantMove(p, hp1, asml);
  1056. RemoveRedundantMove(p, hp2, asml);
  1057. end;
  1058. end;
  1059. end;
  1060. end;
  1061. end;
  1062. end;
  1063. { instructions modifying the CPSR can be only the last instruction }
  1064. function MustBeLast(p : tai) : boolean;
  1065. begin
  1066. Result:=(p.typ=ait_instruction) and
  1067. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  1068. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  1069. (taicpu(p).oppostfix=PF_S));
  1070. end;
  1071. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  1072. var
  1073. p,hp1,hp2: tai;
  1074. l : longint;
  1075. condition : tasmcond;
  1076. hp3: tai;
  1077. WasLast: boolean;
  1078. { UsedRegs, TmpUsedRegs: TRegSet; }
  1079. begin
  1080. p := BlockStart;
  1081. { UsedRegs := []; }
  1082. while (p <> BlockEnd) Do
  1083. begin
  1084. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1085. case p.Typ Of
  1086. Ait_Instruction:
  1087. begin
  1088. case taicpu(p).opcode Of
  1089. A_B:
  1090. if taicpu(p).condition<>C_None then
  1091. begin
  1092. { check for
  1093. Bxx xxx
  1094. <several instructions>
  1095. xxx:
  1096. }
  1097. l:=0;
  1098. WasLast:=False;
  1099. GetNextInstruction(p, hp1);
  1100. while assigned(hp1) and
  1101. (l<=4) and
  1102. CanBeCond(hp1) and
  1103. { stop on labels }
  1104. not(hp1.typ=ait_label) do
  1105. begin
  1106. inc(l);
  1107. if MustBeLast(hp1) then
  1108. begin
  1109. WasLast:=True;
  1110. GetNextInstruction(hp1,hp1);
  1111. break;
  1112. end
  1113. else
  1114. GetNextInstruction(hp1,hp1);
  1115. end;
  1116. if assigned(hp1) then
  1117. begin
  1118. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1119. begin
  1120. if (l<=4) and (l>0) then
  1121. begin
  1122. condition:=inverse_cond(taicpu(p).condition);
  1123. hp2:=p;
  1124. GetNextInstruction(p,hp1);
  1125. p:=hp1;
  1126. repeat
  1127. if hp1.typ=ait_instruction then
  1128. taicpu(hp1).condition:=condition;
  1129. if MustBeLast(hp1) then
  1130. begin
  1131. GetNextInstruction(hp1,hp1);
  1132. break;
  1133. end
  1134. else
  1135. GetNextInstruction(hp1,hp1);
  1136. until not(assigned(hp1)) or
  1137. not(CanBeCond(hp1)) or
  1138. (hp1.typ=ait_label);
  1139. { wait with removing else GetNextInstruction could
  1140. ignore the label if it was the only usage in the
  1141. jump moved away }
  1142. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1143. asml.remove(hp2);
  1144. hp2.free;
  1145. continue;
  1146. end;
  1147. end
  1148. else
  1149. { do not perform further optimizations if there is inctructon
  1150. in block #1 which can not be optimized.
  1151. }
  1152. if not WasLast then
  1153. begin
  1154. { check further for
  1155. Bcc xxx
  1156. <several instructions 1>
  1157. B yyy
  1158. xxx:
  1159. <several instructions 2>
  1160. yyy:
  1161. }
  1162. { hp2 points to jmp yyy }
  1163. hp2:=hp1;
  1164. { skip hp1 to xxx }
  1165. GetNextInstruction(hp1, hp1);
  1166. if assigned(hp2) and
  1167. assigned(hp1) and
  1168. (l<=3) and
  1169. (hp2.typ=ait_instruction) and
  1170. (taicpu(hp2).is_jmp) and
  1171. (taicpu(hp2).condition=C_None) and
  1172. { real label and jump, no further references to the
  1173. label are allowed }
  1174. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1175. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1176. begin
  1177. l:=0;
  1178. { skip hp1 to <several moves 2> }
  1179. GetNextInstruction(hp1, hp1);
  1180. while assigned(hp1) and
  1181. CanBeCond(hp1) do
  1182. begin
  1183. inc(l);
  1184. GetNextInstruction(hp1, hp1);
  1185. end;
  1186. { hp1 points to yyy: }
  1187. if assigned(hp1) and
  1188. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1189. begin
  1190. condition:=inverse_cond(taicpu(p).condition);
  1191. GetNextInstruction(p,hp1);
  1192. hp3:=p;
  1193. p:=hp1;
  1194. repeat
  1195. if hp1.typ=ait_instruction then
  1196. taicpu(hp1).condition:=condition;
  1197. GetNextInstruction(hp1,hp1);
  1198. until not(assigned(hp1)) or
  1199. not(CanBeCond(hp1));
  1200. { hp2 is still at jmp yyy }
  1201. GetNextInstruction(hp2,hp1);
  1202. { hp2 is now at xxx: }
  1203. condition:=inverse_cond(condition);
  1204. GetNextInstruction(hp1,hp1);
  1205. { hp1 is now at <several movs 2> }
  1206. repeat
  1207. taicpu(hp1).condition:=condition;
  1208. GetNextInstruction(hp1,hp1);
  1209. until not(assigned(hp1)) or
  1210. not(CanBeCond(hp1)) or
  1211. (hp1.typ=ait_label);
  1212. {
  1213. asml.remove(hp1.next)
  1214. hp1.next.free;
  1215. asml.remove(hp1);
  1216. hp1.free;
  1217. }
  1218. { remove Bcc }
  1219. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1220. asml.remove(hp3);
  1221. hp3.free;
  1222. { remove jmp }
  1223. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1224. asml.remove(hp2);
  1225. hp2.free;
  1226. continue;
  1227. end;
  1228. end;
  1229. end;
  1230. end;
  1231. end;
  1232. end;
  1233. end;
  1234. end;
  1235. p := tai(p.next)
  1236. end;
  1237. end;
  1238. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1239. begin
  1240. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1241. Result:=true
  1242. else
  1243. Result:=inherited RegInInstruction(Reg, p1);
  1244. end;
  1245. const
  1246. { set of opcode which might or do write to memory }
  1247. { TODO : extend armins.dat to contain r/w info }
  1248. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1249. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1250. { adjust the register live information when swapping the two instructions p and hp1,
  1251. they must follow one after the other }
  1252. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1253. procedure CheckLiveEnd(reg : tregister);
  1254. var
  1255. supreg : TSuperRegister;
  1256. regtype : TRegisterType;
  1257. begin
  1258. if reg=NR_NO then
  1259. exit;
  1260. regtype:=getregtype(reg);
  1261. supreg:=getsupreg(reg);
  1262. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1263. RegInInstruction(reg,p) then
  1264. cg.rg[regtype].live_end[supreg]:=p;
  1265. end;
  1266. procedure CheckLiveStart(reg : TRegister);
  1267. var
  1268. supreg : TSuperRegister;
  1269. regtype : TRegisterType;
  1270. begin
  1271. if reg=NR_NO then
  1272. exit;
  1273. regtype:=getregtype(reg);
  1274. supreg:=getsupreg(reg);
  1275. if (cg.rg[regtype].live_start[supreg]=p) and
  1276. RegInInstruction(reg,hp1) then
  1277. cg.rg[regtype].live_start[supreg]:=hp1;
  1278. end;
  1279. var
  1280. i : longint;
  1281. r : TSuperRegister;
  1282. begin
  1283. { assumption: p is directly followed by hp1 }
  1284. { if live of any reg used by p starts at p and hp1 uses this register then
  1285. set live start to hp1 }
  1286. for i:=0 to p.ops-1 do
  1287. case p.oper[i]^.typ of
  1288. Top_Reg:
  1289. CheckLiveStart(p.oper[i]^.reg);
  1290. Top_Ref:
  1291. begin
  1292. CheckLiveStart(p.oper[i]^.ref^.base);
  1293. CheckLiveStart(p.oper[i]^.ref^.index);
  1294. end;
  1295. Top_Shifterop:
  1296. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1297. Top_RegSet:
  1298. for r:=RS_R0 to RS_R15 do
  1299. if r in p.oper[i]^.regset^ then
  1300. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1301. end;
  1302. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1303. set live end to p }
  1304. for i:=0 to hp1.ops-1 do
  1305. case hp1.oper[i]^.typ of
  1306. Top_Reg:
  1307. CheckLiveEnd(hp1.oper[i]^.reg);
  1308. Top_Ref:
  1309. begin
  1310. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1311. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1312. end;
  1313. Top_Shifterop:
  1314. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1315. Top_RegSet:
  1316. for r:=RS_R0 to RS_R15 do
  1317. if r in hp1.oper[i]^.regset^ then
  1318. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1319. end;
  1320. end;
  1321. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1322. { TODO : schedule also forward }
  1323. { TODO : schedule distance > 1 }
  1324. var
  1325. hp1,hp2,hp3,hp4,hp5 : tai;
  1326. list : TAsmList;
  1327. begin
  1328. result:=true;
  1329. list:=TAsmList.Create;
  1330. p:=BlockStart;
  1331. while p<>BlockEnd Do
  1332. begin
  1333. if (p.typ=ait_instruction) and
  1334. GetNextInstruction(p,hp1) and
  1335. (hp1.typ=ait_instruction) and
  1336. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1337. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1338. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1339. not(RegModifiedByInstruction(NR_PC,p))
  1340. ) or
  1341. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1342. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1343. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1344. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1345. )
  1346. ) or
  1347. { try to prove that the memory accesses don't overlapp }
  1348. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1349. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1350. (taicpu(p).oppostfix=PF_None) and
  1351. (taicpu(hp1).oppostfix=PF_None) and
  1352. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1353. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1354. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1355. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1356. )
  1357. )
  1358. ) and
  1359. GetNextInstruction(hp1,hp2) and
  1360. (hp2.typ=ait_instruction) and
  1361. { loaded register used by next instruction? }
  1362. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1363. { loaded register not used by previous instruction? }
  1364. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1365. { same condition? }
  1366. (taicpu(p).condition=taicpu(hp1).condition) and
  1367. { first instruction might not change the register used as base }
  1368. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1369. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1370. ) and
  1371. { first instruction might not change the register used as index }
  1372. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1373. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1374. ) then
  1375. begin
  1376. hp3:=tai(p.Previous);
  1377. hp5:=tai(p.next);
  1378. asml.Remove(p);
  1379. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1380. { before the instruction? }
  1381. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1382. begin
  1383. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1384. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1385. begin
  1386. hp4:=hp3;
  1387. hp3:=tai(hp3.Previous);
  1388. asml.Remove(hp4);
  1389. list.Concat(hp4);
  1390. end
  1391. else
  1392. hp3:=tai(hp3.Previous);
  1393. end;
  1394. list.Concat(p);
  1395. SwapRegLive(taicpu(p),taicpu(hp1));
  1396. { after the instruction? }
  1397. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1398. begin
  1399. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1400. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1401. begin
  1402. hp4:=hp5;
  1403. hp5:=tai(hp5.next);
  1404. asml.Remove(hp4);
  1405. list.Concat(hp4);
  1406. end
  1407. else
  1408. hp5:=tai(hp5.Next);
  1409. end;
  1410. asml.Remove(hp1);
  1411. {$ifdef DEBUG_PREREGSCHEDULER}
  1412. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1413. {$endif DEBUG_PREREGSCHEDULER}
  1414. asml.InsertBefore(hp1,hp2);
  1415. asml.InsertListBefore(hp2,list);
  1416. p:=tai(p.next)
  1417. end
  1418. else if p.typ=ait_instruction then
  1419. p:=hp1
  1420. else
  1421. p:=tai(p.next);
  1422. end;
  1423. list.Free;
  1424. end;
  1425. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1426. begin
  1427. { TODO: Add optimizer code }
  1428. end;
  1429. begin
  1430. casmoptimizer:=TCpuAsmOptimizer;
  1431. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1432. End.