aoptcpu.pas 68 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  31. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  32. var AllUsedRegs: TAllUsedRegs): Boolean;
  33. { gets the next tai object after current that contains info relevant
  34. to the optimizer in p1 which used the given register or does a
  35. change in program flow.
  36. If there is none, it returns false and
  37. sets p1 to nil }
  38. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  39. { outputs a debug message into the assembler file }
  40. procedure DebugMsg(const s: string; p: tai);
  41. protected
  42. function LookForPostindexedPattern(p: taicpu): boolean;
  43. End;
  44. TCpuPreRegallocScheduler = class(TAsmScheduler)
  45. function SchedulerPass1Cpu(var p: tai): boolean;override;
  46. procedure SwapRegLive(p, hp1: taicpu);
  47. end;
  48. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  49. { uses the same constructor as TAopObj }
  50. procedure PeepHoleOptPass2;override;
  51. End;
  52. Implementation
  53. uses
  54. cutils,verbose,globals,
  55. systems,
  56. cpuinfo,
  57. cgobj,cgutils,procinfo,
  58. aasmbase,aasmdata;
  59. function CanBeCond(p : tai) : boolean;
  60. begin
  61. result:=
  62. (p.typ=ait_instruction) and
  63. (taicpu(p).condition=C_None) and
  64. (taicpu(p).opcode<>A_PLD) and
  65. ((taicpu(p).opcode<>A_BLX) or
  66. (taicpu(p).oper[0]^.typ=top_reg));
  67. end;
  68. function RefsEqual(const r1, r2: treference): boolean;
  69. begin
  70. refsequal :=
  71. (r1.offset = r2.offset) and
  72. (r1.base = r2.base) and
  73. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  74. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  75. (r1.relsymbol = r2.relsymbol) and
  76. (r1.signindex = r2.signindex) and
  77. (r1.shiftimm = r2.shiftimm) and
  78. (r1.addressmode = r2.addressmode) and
  79. (r1.shiftmode = r2.shiftmode);
  80. end;
  81. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  82. begin
  83. result :=
  84. (instr.typ = ait_instruction) and
  85. ((op = []) or (taicpu(instr).opcode in op)) and
  86. ((cond = []) or (taicpu(instr).condition in cond)) and
  87. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  88. end;
  89. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  90. begin
  91. result :=
  92. (instr.typ = ait_instruction) and
  93. (taicpu(instr).opcode = op) and
  94. ((cond = []) or (taicpu(instr).condition in cond)) and
  95. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  96. end;
  97. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  98. begin
  99. result := oper1.typ = oper2.typ;
  100. if result then
  101. case oper1.typ of
  102. top_const:
  103. Result:=oper1.val = oper2.val;
  104. top_reg:
  105. Result:=oper1.reg = oper2.reg;
  106. top_conditioncode:
  107. Result:=oper1.cc = oper2.cc;
  108. top_ref:
  109. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  110. else Result:=false;
  111. end
  112. end;
  113. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  114. begin
  115. result := (oper.typ = top_reg) and (oper.reg = reg);
  116. end;
  117. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  118. begin
  119. if (taicpu(movp).condition = C_EQ) and
  120. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  121. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  122. begin
  123. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  124. asml.remove(movp);
  125. movp.free;
  126. end;
  127. end;
  128. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  129. var
  130. p: taicpu;
  131. begin
  132. p := taicpu(hp);
  133. regLoadedWithNewValue := false;
  134. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  135. exit;
  136. case p.opcode of
  137. { These operands do not write into a register at all }
  138. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  139. exit;
  140. {Take care of post/preincremented store and loads, they will change their base register}
  141. A_STR, A_LDR:
  142. regLoadedWithNewValue :=
  143. (taicpu(p).oper[1]^.typ=top_ref) and
  144. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  145. (taicpu(p).oper[1]^.ref^.base = reg);
  146. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  147. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  148. regLoadedWithNewValue :=
  149. (p.oper[1]^.typ = top_reg) and
  150. (p.oper[1]^.reg = reg);
  151. {Loads to oper2 from coprocessor}
  152. {
  153. MCR/MRC is currently not supported in FPC
  154. A_MRC:
  155. regLoadedWithNewValue :=
  156. (p.oper[2]^.typ = top_reg) and
  157. (p.oper[2]^.reg = reg);
  158. }
  159. {Loads to all register in the registerset}
  160. A_LDM:
  161. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  162. end;
  163. if regLoadedWithNewValue then
  164. exit;
  165. case p.oper[0]^.typ of
  166. {This is the case}
  167. top_reg:
  168. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  169. { LDRD }
  170. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  171. {LDM/STM might write a new value to their index register}
  172. top_ref:
  173. regLoadedWithNewValue :=
  174. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  175. (taicpu(p).oper[0]^.ref^.base = reg);
  176. end;
  177. end;
  178. function AlignedToQWord(const ref : treference) : boolean;
  179. begin
  180. { (safe) heuristics to ensure alignment }
  181. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  182. (((ref.offset>=0) and
  183. ((ref.offset mod 8)=0) and
  184. ((ref.base=NR_R13) or
  185. (ref.index=NR_R13))
  186. ) or
  187. ((ref.offset<=0) and
  188. { when using NR_R11, it has always a value of <qword align>+4 }
  189. ((abs(ref.offset+4) mod 8)=0) and
  190. (current_procinfo.framepointer=NR_R11) and
  191. ((ref.base=NR_R11) or
  192. (ref.index=NR_R11))
  193. )
  194. );
  195. end;
  196. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  197. var
  198. p: taicpu;
  199. i: longint;
  200. begin
  201. instructionLoadsFromReg := false;
  202. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  203. exit;
  204. p:=taicpu(hp);
  205. i:=1;
  206. {For these instructions we have to start on oper[0]}
  207. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  208. A_CMP, A_CMN, A_TST, A_TEQ,
  209. A_B, A_BL, A_BX, A_BLX,
  210. A_SMLAL, A_UMLAL]) then i:=0;
  211. while(i<p.ops) do
  212. begin
  213. case p.oper[I]^.typ of
  214. top_reg:
  215. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  216. { STRD }
  217. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  218. top_regset:
  219. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  220. top_shifterop:
  221. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  222. top_ref:
  223. instructionLoadsFromReg :=
  224. (p.oper[I]^.ref^.base = reg) or
  225. (p.oper[I]^.ref^.index = reg);
  226. end;
  227. if instructionLoadsFromReg then exit; {Bailout if we found something}
  228. Inc(I);
  229. end;
  230. end;
  231. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  232. var AllUsedRegs: TAllUsedRegs): Boolean;
  233. begin
  234. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  235. RegUsedAfterInstruction :=
  236. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  237. not(regLoadedWithNewValue(reg,p)) and
  238. (
  239. not(GetNextInstruction(p,p)) or
  240. instructionLoadsFromReg(reg,p) or
  241. not(regLoadedWithNewValue(reg,p))
  242. );
  243. end;
  244. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  245. var Next: tai; reg: TRegister): Boolean;
  246. begin
  247. Next:=Current;
  248. repeat
  249. Result:=GetNextInstruction(Next,Next);
  250. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  251. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  252. end;
  253. {$ifdef DEBUG_AOPTCPU}
  254. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  255. begin
  256. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  257. end;
  258. {$else DEBUG_AOPTCPU}
  259. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  260. begin
  261. end;
  262. {$endif DEBUG_AOPTCPU}
  263. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  264. var
  265. alloc,
  266. dealloc : tai_regalloc;
  267. hp1 : tai;
  268. begin
  269. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  270. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  271. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  272. { don't mess with moves to pc }
  273. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  274. { don't mess with moves to lr }
  275. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  276. { the destination register of the mov might not be used beween p and movp }
  277. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  278. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  279. not (
  280. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  281. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  282. ) then
  283. begin
  284. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  285. if assigned(dealloc) then
  286. begin
  287. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  288. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  289. and remove it if possible }
  290. GetLastInstruction(p,hp1);
  291. asml.Remove(dealloc);
  292. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  293. if assigned(alloc) then
  294. begin
  295. asml.Remove(alloc);
  296. alloc.free;
  297. dealloc.free;
  298. end
  299. else
  300. asml.InsertAfter(dealloc,p);
  301. { try to move the allocation of the target register }
  302. GetLastInstruction(movp,hp1);
  303. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  304. if assigned(alloc) then
  305. begin
  306. asml.Remove(alloc);
  307. asml.InsertBefore(alloc,p);
  308. { adjust used regs }
  309. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  310. end;
  311. { finally get rid of the mov }
  312. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  313. asml.remove(movp);
  314. movp.free;
  315. end;
  316. end;
  317. end;
  318. {
  319. optimize
  320. ldr/str regX,[reg1]
  321. ...
  322. add/sub reg1,reg1,regY/const
  323. into
  324. ldr/str regX,[reg1], regY/const
  325. }
  326. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  327. var
  328. hp1 : tai;
  329. begin
  330. Result:=false;
  331. if (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  332. (p.oper[1]^.ref^.index=NR_NO) and
  333. (p.oper[1]^.ref^.offset=0) and
  334. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  335. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  336. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  337. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  338. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  339. (
  340. (taicpu(hp1).oper[2]^.typ=top_reg) or
  341. { valid offset? }
  342. ((taicpu(hp1).oper[2]^.typ=top_const) and
  343. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  344. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  345. )
  346. )
  347. ) and
  348. { don't apply the optimization if the base register is loaded }
  349. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  350. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  351. { don't apply the optimization if the (new) index register is loaded }
  352. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  353. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
  354. begin
  355. DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  356. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  357. if taicpu(hp1).oper[2]^.typ=top_const then
  358. begin
  359. if taicpu(hp1).opcode=A_ADD then
  360. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  361. else
  362. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  363. end
  364. else
  365. begin
  366. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  367. if taicpu(hp1).opcode=A_ADD then
  368. p.oper[1]^.ref^.signindex:=1
  369. else
  370. p.oper[1]^.ref^.signindex:=-1;
  371. end;
  372. asml.Remove(hp1);
  373. hp1.Free;
  374. Result:=true;
  375. end;
  376. end;
  377. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  378. var
  379. hp1,hp2: tai;
  380. i, i2: longint;
  381. TmpUsedRegs: TAllUsedRegs;
  382. tempop: tasmop;
  383. function IsPowerOf2(const value: DWord): boolean; inline;
  384. begin
  385. Result:=(value and (value - 1)) = 0;
  386. end;
  387. begin
  388. result := false;
  389. case p.typ of
  390. ait_instruction:
  391. begin
  392. {
  393. change
  394. <op> reg,x,y
  395. cmp reg,#0
  396. into
  397. <op>s reg,x,y
  398. }
  399. { this optimization can applied only to the currently enabled operations because
  400. the other operations do not update all flags and FPC does not track flag usage }
  401. if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
  402. A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  403. GetNextInstruction(p, hp1) and
  404. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  405. (taicpu(hp1).oper[1]^.typ = top_const) and
  406. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  407. (taicpu(hp1).oper[1]^.val = 0) and
  408. GetNextInstruction(hp1, hp2) and
  409. { be careful here, following instructions could use other flags
  410. however after a jump fpc never depends on the value of flags }
  411. { All above instructions set Z and N according to the following
  412. Z := result = 0;
  413. N := result[31];
  414. EQ = Z=1; NE = Z=0;
  415. MI = N=1; PL = N=0; }
  416. MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) and
  417. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  418. begin
  419. DebugMsg('Peephole OpCmp2OpS done', p);
  420. taicpu(p).oppostfix:=PF_S;
  421. { move flag allocation if possible }
  422. GetLastInstruction(hp1, hp2);
  423. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  424. if assigned(hp2) then
  425. begin
  426. asml.Remove(hp2);
  427. asml.insertbefore(hp2, p);
  428. end;
  429. asml.remove(hp1);
  430. hp1.free;
  431. end
  432. else
  433. case taicpu(p).opcode of
  434. A_STR:
  435. begin
  436. { change
  437. str reg1,ref
  438. ldr reg2,ref
  439. into
  440. str reg1,ref
  441. mov reg2,reg1
  442. }
  443. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  444. (taicpu(p).oppostfix=PF_None) and
  445. GetNextInstruction(p,hp1) and
  446. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  447. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  448. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  449. begin
  450. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  451. begin
  452. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  453. asml.remove(hp1);
  454. hp1.free;
  455. end
  456. else
  457. begin
  458. taicpu(hp1).opcode:=A_MOV;
  459. taicpu(hp1).oppostfix:=PF_None;
  460. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  461. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  462. end;
  463. result := true;
  464. end
  465. { change
  466. str reg1,ref
  467. str reg2,ref
  468. into
  469. strd reg1,ref
  470. }
  471. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  472. (taicpu(p).oppostfix=PF_None) and
  473. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  474. GetNextInstruction(p,hp1) and
  475. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  476. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  477. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  478. { str ensures that either base or index contain no register, else ldr wouldn't
  479. use an offset either
  480. }
  481. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  482. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  483. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  484. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  485. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  486. begin
  487. DebugMsg('Peephole StrStr2Strd done', p);
  488. taicpu(p).oppostfix:=PF_D;
  489. asml.remove(hp1);
  490. hp1.free;
  491. end;
  492. LookForPostindexedPattern(taicpu(p));
  493. end;
  494. A_LDR:
  495. begin
  496. { change
  497. ldr reg1,ref
  498. ldr reg2,ref
  499. into ...
  500. }
  501. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  502. GetNextInstruction(p,hp1) and
  503. { ldrd is not allowed here }
  504. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  505. begin
  506. {
  507. ...
  508. ldr reg1,ref
  509. mov reg2,reg1
  510. }
  511. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  512. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  513. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  514. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  515. begin
  516. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  517. begin
  518. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  519. asml.remove(hp1);
  520. hp1.free;
  521. end
  522. else
  523. begin
  524. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  525. taicpu(hp1).opcode:=A_MOV;
  526. taicpu(hp1).oppostfix:=PF_None;
  527. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  528. end;
  529. result := true;
  530. end
  531. {
  532. ...
  533. ldrd reg1,ref
  534. }
  535. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  536. { ldrd does not allow any postfixes ... }
  537. (taicpu(p).oppostfix=PF_None) and
  538. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  539. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  540. { ldr ensures that either base or index contain no register, else ldr wouldn't
  541. use an offset either
  542. }
  543. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  544. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  545. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  546. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  547. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  548. begin
  549. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  550. taicpu(p).oppostfix:=PF_D;
  551. asml.remove(hp1);
  552. hp1.free;
  553. end;
  554. end;
  555. LookForPostindexedPattern(taicpu(p));
  556. { Remove superfluous mov after ldr
  557. changes
  558. ldr reg1, ref
  559. mov reg2, reg1
  560. to
  561. ldr reg2, ref
  562. conditions are:
  563. * no ldrd usage
  564. * reg1 must be released after mov
  565. * mov can not contain shifterops
  566. * ldr+mov have the same conditions
  567. * mov does not set flags
  568. }
  569. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  570. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  571. end;
  572. A_MOV:
  573. begin
  574. { fold
  575. mov reg1,reg0, shift imm1
  576. mov reg1,reg1, shift imm2
  577. }
  578. if (taicpu(p).ops=3) and
  579. (taicpu(p).oper[2]^.typ = top_shifterop) and
  580. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  581. getnextinstruction(p,hp1) and
  582. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  583. (taicpu(hp1).ops=3) and
  584. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  585. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  586. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  587. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  588. begin
  589. { fold
  590. mov reg1,reg0, lsl 16
  591. mov reg1,reg1, lsr 16
  592. strh reg1, ...
  593. dealloc reg1
  594. to
  595. strh reg1, ...
  596. dealloc reg1
  597. }
  598. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  599. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  600. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  601. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  602. getnextinstruction(hp1,hp2) and
  603. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  604. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  605. begin
  606. CopyUsedRegs(TmpUsedRegs);
  607. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  608. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  609. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  610. begin
  611. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  612. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  613. asml.remove(p);
  614. asml.remove(hp1);
  615. p.free;
  616. hp1.free;
  617. p:=hp2;
  618. end;
  619. ReleaseUsedRegs(TmpUsedRegs);
  620. end
  621. { fold
  622. mov reg1,reg0, shift imm1
  623. mov reg1,reg1, shift imm2
  624. to
  625. mov reg1,reg0, shift imm1+imm2
  626. }
  627. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  628. { asr makes no use after a lsr, the asr can be foled into the lsr }
  629. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  630. begin
  631. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  632. { avoid overflows }
  633. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  634. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  635. SM_ROR:
  636. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  637. SM_ASR:
  638. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  639. SM_LSR,
  640. SM_LSL:
  641. begin
  642. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  643. InsertLLItem(p.previous, p.next, hp1);
  644. p.free;
  645. p:=hp1;
  646. end;
  647. else
  648. internalerror(2008072803);
  649. end;
  650. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  651. asml.remove(hp1);
  652. hp1.free;
  653. result := true;
  654. end
  655. { fold
  656. mov reg1,reg0, shift imm1
  657. mov reg1,reg1, shift imm2
  658. mov reg1,reg1, shift imm3 ...
  659. }
  660. else if getnextinstruction(hp1,hp2) and
  661. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  662. (taicpu(hp2).ops=3) and
  663. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  664. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  665. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  666. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  667. begin
  668. { mov reg1,reg0, lsl imm1
  669. mov reg1,reg1, lsr/asr imm2
  670. mov reg1,reg1, lsl imm3 ...
  671. if imm3<=imm1 and imm2>=imm3
  672. to
  673. mov reg1,reg0, lsl imm1
  674. mov reg1,reg1, lsr/asr imm2-imm3
  675. }
  676. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  677. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  678. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  679. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  680. begin
  681. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  682. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1 done', p);
  683. asml.remove(hp2);
  684. hp2.free;
  685. result := true;
  686. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  687. begin
  688. asml.remove(hp1);
  689. hp1.free;
  690. end;
  691. end
  692. { mov reg1,reg0, lsr/asr imm1
  693. mov reg1,reg1, lsl imm2
  694. mov reg1,reg1, lsr/asr imm3 ...
  695. if imm3>=imm1 and imm2>=imm1
  696. to
  697. mov reg1,reg0, lsl imm2-imm1
  698. mov reg1,reg1, lsr/asr imm3 ...
  699. }
  700. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  701. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  702. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  703. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  704. begin
  705. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  706. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  707. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  708. asml.remove(p);
  709. p.free;
  710. p:=hp2;
  711. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  712. begin
  713. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  714. asml.remove(hp1);
  715. hp1.free;
  716. p:=hp2;
  717. end;
  718. result := true;
  719. end;
  720. end;
  721. end;
  722. { Change the common
  723. mov r0, r0, lsr #24
  724. and r0, r0, #255
  725. and remove the superfluous and
  726. This could be extended to handle more cases.
  727. }
  728. if (taicpu(p).ops=3) and
  729. (taicpu(p).oper[2]^.typ = top_shifterop) and
  730. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  731. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  732. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  733. getnextinstruction(p,hp1) and
  734. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  735. (taicpu(hp1).ops=3) and
  736. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  737. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  738. (taicpu(hp1).oper[2]^.typ = top_const) and
  739. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  740. For LSR #25 and an AndConst of 255 that whould go like this:
  741. 255 and ((2 shl (32-25))-1)
  742. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  743. LSR #25 and AndConst of 254:
  744. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  745. }
  746. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  747. begin
  748. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  749. asml.remove(hp1);
  750. hp1.free;
  751. end;
  752. {
  753. optimize
  754. mov rX, yyyy
  755. ....
  756. }
  757. if (taicpu(p).ops = 2) and
  758. GetNextInstruction(p,hp1) and
  759. (tai(hp1).typ = ait_instruction) then
  760. begin
  761. {
  762. This changes the very common
  763. mov r0, #0
  764. str r0, [...]
  765. mov r0, #0
  766. str r0, [...]
  767. and removes all superfluous mov instructions
  768. }
  769. if (taicpu(p).oper[1]^.typ = top_const) and
  770. (taicpu(hp1).opcode=A_STR) then
  771. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], [PF_None]) and
  772. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  773. GetNextInstruction(hp1, hp2) and
  774. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  775. (taicpu(hp2).ops = 2) and
  776. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  777. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  778. begin
  779. DebugMsg('Peephole MovStrMov done', hp2);
  780. GetNextInstruction(hp2,hp1);
  781. asml.remove(hp2);
  782. hp2.free;
  783. if not assigned(hp1) then break;
  784. end
  785. {
  786. This removes the first mov from
  787. mov rX,...
  788. mov rX,...
  789. }
  790. else if taicpu(hp1).opcode=A_MOV then
  791. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  792. (taicpu(hp1).ops = 2) and
  793. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  794. { don't remove the first mov if the second is a mov rX,rX }
  795. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  796. begin
  797. DebugMsg('Peephole MovMov done', p);
  798. asml.remove(p);
  799. p.free;
  800. p:=hp1;
  801. GetNextInstruction(hp1,hp1);
  802. if not assigned(hp1) then
  803. break;
  804. end;
  805. end;
  806. {
  807. change
  808. mov r1, r0
  809. add r1, r1, #1
  810. to
  811. add r1, r0, #1
  812. Todo: Make it work for mov+cmp too
  813. CAUTION! If this one is successful p might not be a mov instruction anymore!
  814. }
  815. if (taicpu(p).ops = 2) and
  816. (taicpu(p).oper[1]^.typ = top_reg) and
  817. (taicpu(p).oppostfix = PF_NONE) and
  818. GetNextInstruction(p, hp1) and
  819. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  820. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  821. [C_NONE, taicpu(hp1).condition], []) and
  822. {MOV and MVN might only have 2 ops}
  823. (taicpu(hp1).ops = 3) and
  824. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  825. (taicpu(hp1).oper[1]^.typ = top_reg) and
  826. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  827. begin
  828. { When we get here we still don't know if the registers match}
  829. for I:=1 to 2 do
  830. {
  831. If the first loop was successful p will be replaced with hp1.
  832. The checks will still be ok, because all required information
  833. will also be in hp1 then.
  834. }
  835. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  836. begin
  837. DebugMsg('Peephole RedundantMovProcess done', hp1);
  838. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  839. if p<>hp1 then
  840. begin
  841. asml.remove(p);
  842. p.free;
  843. p:=hp1;
  844. end;
  845. end;
  846. end;
  847. { This folds shifterops into following instructions
  848. mov r0, r1, lsl #8
  849. add r2, r3, r0
  850. to
  851. add r2, r3, r1, lsl #8
  852. CAUTION! If this one is successful p might not be a mov instruction anymore!
  853. }
  854. if (taicpu(p).opcode = A_MOV) and
  855. (taicpu(p).ops = 3) and
  856. (taicpu(p).oper[1]^.typ = top_reg) and
  857. (taicpu(p).oper[2]^.typ = top_shifterop) and
  858. (taicpu(p).oppostfix = PF_NONE) and
  859. GetNextInstruction(p, hp1) and
  860. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  861. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  862. A_CMP, A_CMN],
  863. [taicpu(p).condition], [PF_None]) and
  864. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  865. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  866. (
  867. {Only ONE of the two src operands is allowed to match}
  868. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  869. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  870. ) then
  871. begin
  872. CopyUsedRegs(TmpUsedRegs);
  873. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  874. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  875. I2:=0
  876. else
  877. I2:=1;
  878. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  879. for I:=I2 to taicpu(hp1).ops-1 do
  880. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  881. begin
  882. { If the parameter matched on the second op from the RIGHT
  883. we have to switch the parameters, this will not happen for CMP
  884. were we're only evaluating the most right parameter
  885. }
  886. if I <> taicpu(hp1).ops-1 then
  887. begin
  888. {The SUB operators need to be changed when we swap parameters}
  889. case taicpu(hp1).opcode of
  890. A_SUB: tempop:=A_RSB;
  891. A_SBC: tempop:=A_RSC;
  892. A_RSB: tempop:=A_SUB;
  893. A_RSC: tempop:=A_SBC;
  894. else tempop:=taicpu(hp1).opcode;
  895. end;
  896. if taicpu(hp1).ops = 3 then
  897. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  898. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  899. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  900. else
  901. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  902. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  903. taicpu(p).oper[2]^.shifterop^);
  904. end
  905. else
  906. if taicpu(hp1).ops = 3 then
  907. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  908. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  909. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  910. else
  911. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  912. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  913. taicpu(p).oper[2]^.shifterop^);
  914. asml.insertbefore(hp2, p);
  915. asml.remove(p);
  916. asml.remove(hp1);
  917. p.free;
  918. hp1.free;
  919. p:=hp2;
  920. GetNextInstruction(p,hp1);
  921. DebugMsg('Peephole FoldShiftProcess done', p);
  922. break;
  923. end;
  924. ReleaseUsedRegs(TmpUsedRegs);
  925. end;
  926. {
  927. Often we see shifts and then a superfluous mov to another register
  928. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  929. }
  930. if (taicpu(p).opcode = A_MOV) and
  931. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  932. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  933. end;
  934. A_ADD,
  935. A_ADC,
  936. A_RSB,
  937. A_RSC,
  938. A_SUB,
  939. A_SBC,
  940. A_AND,
  941. A_BIC,
  942. A_EOR,
  943. A_ORR,
  944. A_MLA,
  945. A_MUL:
  946. begin
  947. {
  948. change
  949. and reg2,reg1,const1
  950. and reg2,reg2,const2
  951. to
  952. and reg2,reg1,(const1 and const2)
  953. }
  954. if (taicpu(p).opcode = A_AND) and
  955. (taicpu(p).oper[1]^.typ = top_reg) and
  956. (taicpu(p).oper[2]^.typ = top_const) and
  957. GetNextInstruction(p, hp1) and
  958. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  959. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  960. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  961. (taicpu(hp1).oper[2]^.typ = top_const) then
  962. begin
  963. DebugMsg('Peephole AndAnd2And done', p);
  964. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  965. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  966. asml.remove(hp1);
  967. hp1.free;
  968. end;
  969. {
  970. change
  971. add/sub reg2,reg1,const1
  972. str/ldr reg3,[reg2,const2]
  973. dealloc reg2
  974. to
  975. str/ldr reg3,[reg1,const2+/-const1]
  976. }
  977. if (taicpu(p).opcode in [A_ADD,A_SUB]) and
  978. (taicpu(p).oper[1]^.typ = top_reg) and
  979. (taicpu(p).oper[2]^.typ = top_const) then
  980. begin
  981. hp1:=p;
  982. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  983. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  984. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  985. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  986. { don't optimize if the register is stored/overwritten }
  987. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  988. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  989. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  990. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  991. ldr postfix }
  992. (((taicpu(p).opcode=A_ADD) and
  993. (((taicpu(hp1).oppostfix in [PF_None,PF_B]) and
  994. (abs(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val)<4096)) or
  995. (abs(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val)<256)
  996. )
  997. ) or
  998. ((taicpu(p).opcode=A_SUB) and
  999. (((taicpu(hp1).oppostfix in [PF_None,PF_B]) and
  1000. (abs(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val)<4096)) or
  1001. (abs(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val)<256)
  1002. )
  1003. )
  1004. ) do
  1005. begin
  1006. { neither reg1 nor reg2 might be changed inbetween }
  1007. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  1008. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  1009. break;
  1010. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  1011. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  1012. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1013. begin
  1014. { remember last instruction }
  1015. hp2:=hp1;
  1016. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  1017. hp1:=p;
  1018. { fix all ldr/str }
  1019. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  1020. begin
  1021. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  1022. if taicpu(p).opcode=A_ADD then
  1023. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  1024. else
  1025. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  1026. if hp1=hp2 then
  1027. break;
  1028. end;
  1029. GetNextInstruction(p,hp1);
  1030. asml.remove(p);
  1031. p.free;
  1032. p:=hp1;
  1033. break;
  1034. end;
  1035. end;
  1036. end;
  1037. {
  1038. change
  1039. add reg1, ...
  1040. mov reg2, reg1
  1041. to
  1042. add reg2, ...
  1043. }
  1044. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  1045. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  1046. end;
  1047. A_CMP:
  1048. begin
  1049. {
  1050. change
  1051. cmp reg,const1
  1052. moveq reg,const1
  1053. movne reg,const2
  1054. to
  1055. cmp reg,const1
  1056. movne reg,const2
  1057. }
  1058. if (taicpu(p).oper[1]^.typ = top_const) and
  1059. GetNextInstruction(p, hp1) and
  1060. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1061. (taicpu(hp1).oper[1]^.typ = top_const) and
  1062. GetNextInstruction(hp1, hp2) and
  1063. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1064. (taicpu(hp1).oper[1]^.typ = top_const) then
  1065. begin
  1066. RemoveRedundantMove(p, hp1, asml);
  1067. RemoveRedundantMove(p, hp2, asml);
  1068. end;
  1069. end;
  1070. end;
  1071. end;
  1072. end;
  1073. end;
  1074. { instructions modifying the CPSR can be only the last instruction }
  1075. function MustBeLast(p : tai) : boolean;
  1076. begin
  1077. Result:=(p.typ=ait_instruction) and
  1078. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  1079. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  1080. (taicpu(p).oppostfix=PF_S));
  1081. end;
  1082. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  1083. var
  1084. p,hp1,hp2: tai;
  1085. l : longint;
  1086. condition : tasmcond;
  1087. hp3: tai;
  1088. WasLast: boolean;
  1089. { UsedRegs, TmpUsedRegs: TRegSet; }
  1090. begin
  1091. p := BlockStart;
  1092. { UsedRegs := []; }
  1093. while (p <> BlockEnd) Do
  1094. begin
  1095. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1096. case p.Typ Of
  1097. Ait_Instruction:
  1098. begin
  1099. case taicpu(p).opcode Of
  1100. A_B:
  1101. if taicpu(p).condition<>C_None then
  1102. begin
  1103. { check for
  1104. Bxx xxx
  1105. <several instructions>
  1106. xxx:
  1107. }
  1108. l:=0;
  1109. WasLast:=False;
  1110. GetNextInstruction(p, hp1);
  1111. while assigned(hp1) and
  1112. (l<=4) and
  1113. CanBeCond(hp1) and
  1114. { stop on labels }
  1115. not(hp1.typ=ait_label) do
  1116. begin
  1117. inc(l);
  1118. if MustBeLast(hp1) then
  1119. begin
  1120. WasLast:=True;
  1121. GetNextInstruction(hp1,hp1);
  1122. break;
  1123. end
  1124. else
  1125. GetNextInstruction(hp1,hp1);
  1126. end;
  1127. if assigned(hp1) then
  1128. begin
  1129. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1130. begin
  1131. if (l<=4) and (l>0) then
  1132. begin
  1133. condition:=inverse_cond(taicpu(p).condition);
  1134. hp2:=p;
  1135. GetNextInstruction(p,hp1);
  1136. p:=hp1;
  1137. repeat
  1138. if hp1.typ=ait_instruction then
  1139. taicpu(hp1).condition:=condition;
  1140. if MustBeLast(hp1) then
  1141. begin
  1142. GetNextInstruction(hp1,hp1);
  1143. break;
  1144. end
  1145. else
  1146. GetNextInstruction(hp1,hp1);
  1147. until not(assigned(hp1)) or
  1148. not(CanBeCond(hp1)) or
  1149. (hp1.typ=ait_label);
  1150. { wait with removing else GetNextInstruction could
  1151. ignore the label if it was the only usage in the
  1152. jump moved away }
  1153. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1154. asml.remove(hp2);
  1155. hp2.free;
  1156. continue;
  1157. end;
  1158. end
  1159. else
  1160. { do not perform further optimizations if there is inctructon
  1161. in block #1 which can not be optimized.
  1162. }
  1163. if not WasLast then
  1164. begin
  1165. { check further for
  1166. Bcc xxx
  1167. <several instructions 1>
  1168. B yyy
  1169. xxx:
  1170. <several instructions 2>
  1171. yyy:
  1172. }
  1173. { hp2 points to jmp yyy }
  1174. hp2:=hp1;
  1175. { skip hp1 to xxx }
  1176. GetNextInstruction(hp1, hp1);
  1177. if assigned(hp2) and
  1178. assigned(hp1) and
  1179. (l<=3) and
  1180. (hp2.typ=ait_instruction) and
  1181. (taicpu(hp2).is_jmp) and
  1182. (taicpu(hp2).condition=C_None) and
  1183. { real label and jump, no further references to the
  1184. label are allowed }
  1185. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1186. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1187. begin
  1188. l:=0;
  1189. { skip hp1 to <several moves 2> }
  1190. GetNextInstruction(hp1, hp1);
  1191. while assigned(hp1) and
  1192. CanBeCond(hp1) do
  1193. begin
  1194. inc(l);
  1195. GetNextInstruction(hp1, hp1);
  1196. end;
  1197. { hp1 points to yyy: }
  1198. if assigned(hp1) and
  1199. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1200. begin
  1201. condition:=inverse_cond(taicpu(p).condition);
  1202. GetNextInstruction(p,hp1);
  1203. hp3:=p;
  1204. p:=hp1;
  1205. repeat
  1206. if hp1.typ=ait_instruction then
  1207. taicpu(hp1).condition:=condition;
  1208. GetNextInstruction(hp1,hp1);
  1209. until not(assigned(hp1)) or
  1210. not(CanBeCond(hp1));
  1211. { hp2 is still at jmp yyy }
  1212. GetNextInstruction(hp2,hp1);
  1213. { hp2 is now at xxx: }
  1214. condition:=inverse_cond(condition);
  1215. GetNextInstruction(hp1,hp1);
  1216. { hp1 is now at <several movs 2> }
  1217. repeat
  1218. taicpu(hp1).condition:=condition;
  1219. GetNextInstruction(hp1,hp1);
  1220. until not(assigned(hp1)) or
  1221. not(CanBeCond(hp1)) or
  1222. (hp1.typ=ait_label);
  1223. {
  1224. asml.remove(hp1.next)
  1225. hp1.next.free;
  1226. asml.remove(hp1);
  1227. hp1.free;
  1228. }
  1229. { remove Bcc }
  1230. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1231. asml.remove(hp3);
  1232. hp3.free;
  1233. { remove jmp }
  1234. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1235. asml.remove(hp2);
  1236. hp2.free;
  1237. continue;
  1238. end;
  1239. end;
  1240. end;
  1241. end;
  1242. end;
  1243. end;
  1244. end;
  1245. end;
  1246. p := tai(p.next)
  1247. end;
  1248. end;
  1249. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1250. begin
  1251. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1252. Result:=true
  1253. else
  1254. Result:=inherited RegInInstruction(Reg, p1);
  1255. end;
  1256. const
  1257. { set of opcode which might or do write to memory }
  1258. { TODO : extend armins.dat to contain r/w info }
  1259. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1260. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1261. { adjust the register live information when swapping the two instructions p and hp1,
  1262. they must follow one after the other }
  1263. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1264. procedure CheckLiveEnd(reg : tregister);
  1265. var
  1266. supreg : TSuperRegister;
  1267. regtype : TRegisterType;
  1268. begin
  1269. if reg=NR_NO then
  1270. exit;
  1271. regtype:=getregtype(reg);
  1272. supreg:=getsupreg(reg);
  1273. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1274. RegInInstruction(reg,p) then
  1275. cg.rg[regtype].live_end[supreg]:=p;
  1276. end;
  1277. procedure CheckLiveStart(reg : TRegister);
  1278. var
  1279. supreg : TSuperRegister;
  1280. regtype : TRegisterType;
  1281. begin
  1282. if reg=NR_NO then
  1283. exit;
  1284. regtype:=getregtype(reg);
  1285. supreg:=getsupreg(reg);
  1286. if (cg.rg[regtype].live_start[supreg]=p) and
  1287. RegInInstruction(reg,hp1) then
  1288. cg.rg[regtype].live_start[supreg]:=hp1;
  1289. end;
  1290. var
  1291. i : longint;
  1292. r : TSuperRegister;
  1293. begin
  1294. { assumption: p is directly followed by hp1 }
  1295. { if live of any reg used by p starts at p and hp1 uses this register then
  1296. set live start to hp1 }
  1297. for i:=0 to p.ops-1 do
  1298. case p.oper[i]^.typ of
  1299. Top_Reg:
  1300. CheckLiveStart(p.oper[i]^.reg);
  1301. Top_Ref:
  1302. begin
  1303. CheckLiveStart(p.oper[i]^.ref^.base);
  1304. CheckLiveStart(p.oper[i]^.ref^.index);
  1305. end;
  1306. Top_Shifterop:
  1307. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1308. Top_RegSet:
  1309. for r:=RS_R0 to RS_R15 do
  1310. if r in p.oper[i]^.regset^ then
  1311. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1312. end;
  1313. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1314. set live end to p }
  1315. for i:=0 to hp1.ops-1 do
  1316. case hp1.oper[i]^.typ of
  1317. Top_Reg:
  1318. CheckLiveEnd(hp1.oper[i]^.reg);
  1319. Top_Ref:
  1320. begin
  1321. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1322. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1323. end;
  1324. Top_Shifterop:
  1325. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1326. Top_RegSet:
  1327. for r:=RS_R0 to RS_R15 do
  1328. if r in hp1.oper[i]^.regset^ then
  1329. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1330. end;
  1331. end;
  1332. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1333. { TODO : schedule also forward }
  1334. { TODO : schedule distance > 1 }
  1335. var
  1336. hp1,hp2,hp3,hp4,hp5 : tai;
  1337. list : TAsmList;
  1338. begin
  1339. result:=true;
  1340. list:=TAsmList.Create;
  1341. p:=BlockStart;
  1342. while p<>BlockEnd Do
  1343. begin
  1344. if (p.typ=ait_instruction) and
  1345. GetNextInstruction(p,hp1) and
  1346. (hp1.typ=ait_instruction) and
  1347. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1348. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1349. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1350. not(RegModifiedByInstruction(NR_PC,p))
  1351. ) or
  1352. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1353. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1354. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1355. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1356. )
  1357. ) or
  1358. { try to prove that the memory accesses don't overlapp }
  1359. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1360. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1361. (taicpu(p).oppostfix=PF_None) and
  1362. (taicpu(hp1).oppostfix=PF_None) and
  1363. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1364. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1365. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1366. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1367. )
  1368. )
  1369. ) and
  1370. GetNextInstruction(hp1,hp2) and
  1371. (hp2.typ=ait_instruction) and
  1372. { loaded register used by next instruction? }
  1373. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1374. { loaded register not used by previous instruction? }
  1375. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1376. { same condition? }
  1377. (taicpu(p).condition=taicpu(hp1).condition) and
  1378. { first instruction might not change the register used as base }
  1379. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1380. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1381. ) and
  1382. { first instruction might not change the register used as index }
  1383. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1384. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1385. ) then
  1386. begin
  1387. hp3:=tai(p.Previous);
  1388. hp5:=tai(p.next);
  1389. asml.Remove(p);
  1390. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1391. { before the instruction? }
  1392. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1393. begin
  1394. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1395. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1396. begin
  1397. hp4:=hp3;
  1398. hp3:=tai(hp3.Previous);
  1399. asml.Remove(hp4);
  1400. list.Concat(hp4);
  1401. end
  1402. else
  1403. hp3:=tai(hp3.Previous);
  1404. end;
  1405. list.Concat(p);
  1406. SwapRegLive(taicpu(p),taicpu(hp1));
  1407. { after the instruction? }
  1408. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1409. begin
  1410. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1411. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1412. begin
  1413. hp4:=hp5;
  1414. hp5:=tai(hp5.next);
  1415. asml.Remove(hp4);
  1416. list.Concat(hp4);
  1417. end
  1418. else
  1419. hp5:=tai(hp5.Next);
  1420. end;
  1421. asml.Remove(hp1);
  1422. {$ifdef DEBUG_PREREGSCHEDULER}
  1423. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1424. {$endif DEBUG_PREREGSCHEDULER}
  1425. asml.InsertBefore(hp1,hp2);
  1426. asml.InsertListBefore(hp2,list);
  1427. p:=tai(p.next)
  1428. end
  1429. else if p.typ=ait_instruction then
  1430. p:=hp1
  1431. else
  1432. p:=tai(p.next);
  1433. end;
  1434. list.Free;
  1435. end;
  1436. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1437. begin
  1438. { TODO: Add optimizer code }
  1439. end;
  1440. begin
  1441. casmoptimizer:=TCpuAsmOptimizer;
  1442. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1443. End.