aoptcpu.pas 96 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj, cclasses;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  31. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  32. var AllUsedRegs: TAllUsedRegs): Boolean;
  33. { gets the next tai object after current that contains info relevant
  34. to the optimizer in p1 which used the given register or does a
  35. change in program flow.
  36. If there is none, it returns false and
  37. sets p1 to nil }
  38. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  39. { outputs a debug message into the assembler file }
  40. procedure DebugMsg(const s: string; p: tai);
  41. protected
  42. function LookForPostindexedPattern(p: taicpu): boolean;
  43. End;
  44. TCpuPreRegallocScheduler = class(TAsmScheduler)
  45. function SchedulerPass1Cpu(var p: tai): boolean;override;
  46. procedure SwapRegLive(p, hp1: taicpu);
  47. end;
  48. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  49. { uses the same constructor as TAopObj }
  50. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  51. procedure PeepHoleOptPass2;override;
  52. End;
  53. function MustBeLast(p : tai) : boolean;
  54. Implementation
  55. uses
  56. cutils,verbose,globals,
  57. systems,
  58. cpuinfo,
  59. cgobj,cgutils,procinfo,
  60. aasmbase,aasmdata;
  61. function CanBeCond(p : tai) : boolean;
  62. begin
  63. result:=
  64. (p.typ=ait_instruction) and
  65. (taicpu(p).condition=C_None) and
  66. ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
  67. (taicpu(p).opcode<>A_CBZ) and
  68. (taicpu(p).opcode<>A_CBNZ) and
  69. (taicpu(p).opcode<>A_PLD) and
  70. ((taicpu(p).opcode<>A_BLX) or
  71. (taicpu(p).oper[0]^.typ=top_reg));
  72. end;
  73. function RefsEqual(const r1, r2: treference): boolean;
  74. begin
  75. refsequal :=
  76. (r1.offset = r2.offset) and
  77. (r1.base = r2.base) and
  78. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  79. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  80. (r1.relsymbol = r2.relsymbol) and
  81. (r1.signindex = r2.signindex) and
  82. (r1.shiftimm = r2.shiftimm) and
  83. (r1.addressmode = r2.addressmode) and
  84. (r1.shiftmode = r2.shiftmode);
  85. end;
  86. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  87. begin
  88. result :=
  89. (instr.typ = ait_instruction) and
  90. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  91. ((cond = []) or (taicpu(instr).condition in cond)) and
  92. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  93. end;
  94. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  95. begin
  96. result :=
  97. (instr.typ = ait_instruction) and
  98. (taicpu(instr).opcode = op) and
  99. ((cond = []) or (taicpu(instr).condition in cond)) and
  100. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  101. end;
  102. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  103. begin
  104. result := oper1.typ = oper2.typ;
  105. if result then
  106. case oper1.typ of
  107. top_const:
  108. Result:=oper1.val = oper2.val;
  109. top_reg:
  110. Result:=oper1.reg = oper2.reg;
  111. top_conditioncode:
  112. Result:=oper1.cc = oper2.cc;
  113. top_ref:
  114. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  115. else Result:=false;
  116. end
  117. end;
  118. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  119. begin
  120. result := (oper.typ = top_reg) and (oper.reg = reg);
  121. end;
  122. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  123. begin
  124. if (taicpu(movp).condition = C_EQ) and
  125. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  126. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  127. begin
  128. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  129. asml.remove(movp);
  130. movp.free;
  131. end;
  132. end;
  133. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  134. var
  135. p: taicpu;
  136. begin
  137. p := taicpu(hp);
  138. regLoadedWithNewValue := false;
  139. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  140. exit;
  141. case p.opcode of
  142. { These operands do not write into a register at all }
  143. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  144. exit;
  145. {Take care of post/preincremented store and loads, they will change their base register}
  146. A_STR, A_LDR:
  147. regLoadedWithNewValue :=
  148. (taicpu(p).oper[1]^.typ=top_ref) and
  149. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  150. (taicpu(p).oper[1]^.ref^.base = reg);
  151. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  152. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  153. regLoadedWithNewValue :=
  154. (p.oper[1]^.typ = top_reg) and
  155. (p.oper[1]^.reg = reg);
  156. {Loads to oper2 from coprocessor}
  157. {
  158. MCR/MRC is currently not supported in FPC
  159. A_MRC:
  160. regLoadedWithNewValue :=
  161. (p.oper[2]^.typ = top_reg) and
  162. (p.oper[2]^.reg = reg);
  163. }
  164. {Loads to all register in the registerset}
  165. A_LDM:
  166. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  167. end;
  168. if regLoadedWithNewValue then
  169. exit;
  170. case p.oper[0]^.typ of
  171. {This is the case}
  172. top_reg:
  173. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  174. { LDRD }
  175. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  176. {LDM/STM might write a new value to their index register}
  177. top_ref:
  178. regLoadedWithNewValue :=
  179. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  180. (taicpu(p).oper[0]^.ref^.base = reg);
  181. end;
  182. end;
  183. function AlignedToQWord(const ref : treference) : boolean;
  184. begin
  185. { (safe) heuristics to ensure alignment }
  186. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  187. (((ref.offset>=0) and
  188. ((ref.offset mod 8)=0) and
  189. ((ref.base=NR_R13) or
  190. (ref.index=NR_R13))
  191. ) or
  192. ((ref.offset<=0) and
  193. { when using NR_R11, it has always a value of <qword align>+4 }
  194. ((abs(ref.offset+4) mod 8)=0) and
  195. (current_procinfo.framepointer=NR_R11) and
  196. ((ref.base=NR_R11) or
  197. (ref.index=NR_R11))
  198. )
  199. );
  200. end;
  201. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  202. var
  203. p: taicpu;
  204. i: longint;
  205. begin
  206. instructionLoadsFromReg := false;
  207. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  208. exit;
  209. p:=taicpu(hp);
  210. i:=1;
  211. {For these instructions we have to start on oper[0]}
  212. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  213. A_CMP, A_CMN, A_TST, A_TEQ,
  214. A_B, A_BL, A_BX, A_BLX,
  215. A_SMLAL, A_UMLAL]) then i:=0;
  216. while(i<p.ops) do
  217. begin
  218. case p.oper[I]^.typ of
  219. top_reg:
  220. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  221. { STRD }
  222. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  223. top_regset:
  224. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  225. top_shifterop:
  226. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  227. top_ref:
  228. instructionLoadsFromReg :=
  229. (p.oper[I]^.ref^.base = reg) or
  230. (p.oper[I]^.ref^.index = reg);
  231. end;
  232. if instructionLoadsFromReg then exit; {Bailout if we found something}
  233. Inc(I);
  234. end;
  235. end;
  236. function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
  237. begin
  238. if current_settings.cputype in cpu_thumb2 then
  239. result := (aoffset<4096) and (aoffset>-256)
  240. else
  241. result := ((pf in [PF_None,PF_B]) and
  242. (abs(aoffset)<4096)) or
  243. (abs(aoffset)<256);
  244. end;
  245. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  246. var AllUsedRegs: TAllUsedRegs): Boolean;
  247. begin
  248. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  249. RegUsedAfterInstruction :=
  250. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  251. not(regLoadedWithNewValue(reg,p)) and
  252. (
  253. not(GetNextInstruction(p,p)) or
  254. instructionLoadsFromReg(reg,p) or
  255. not(regLoadedWithNewValue(reg,p))
  256. );
  257. end;
  258. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  259. var Next: tai; reg: TRegister): Boolean;
  260. begin
  261. Next:=Current;
  262. repeat
  263. Result:=GetNextInstruction(Next,Next);
  264. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  265. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  266. end;
  267. {$ifdef DEBUG_AOPTCPU}
  268. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  269. begin
  270. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  271. end;
  272. {$else DEBUG_AOPTCPU}
  273. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  274. begin
  275. end;
  276. {$endif DEBUG_AOPTCPU}
  277. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  278. var
  279. alloc,
  280. dealloc : tai_regalloc;
  281. hp1 : tai;
  282. begin
  283. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  284. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  285. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  286. { don't mess with moves to pc }
  287. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  288. { don't mess with moves to lr }
  289. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  290. { the destination register of the mov might not be used beween p and movp }
  291. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  292. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  293. (taicpu(p).opcode<>A_CBZ) and
  294. (taicpu(p).opcode<>A_CBNZ) and
  295. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  296. not (
  297. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  298. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  299. (current_settings.cputype < cpu_armv6)
  300. ) then
  301. begin
  302. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  303. if assigned(dealloc) then
  304. begin
  305. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  306. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  307. and remove it if possible }
  308. GetLastInstruction(p,hp1);
  309. asml.Remove(dealloc);
  310. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  311. if assigned(alloc) then
  312. begin
  313. asml.Remove(alloc);
  314. alloc.free;
  315. dealloc.free;
  316. end
  317. else
  318. asml.InsertAfter(dealloc,p);
  319. { try to move the allocation of the target register }
  320. GetLastInstruction(movp,hp1);
  321. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  322. if assigned(alloc) then
  323. begin
  324. asml.Remove(alloc);
  325. asml.InsertBefore(alloc,p);
  326. { adjust used regs }
  327. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  328. end;
  329. { finally get rid of the mov }
  330. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  331. asml.remove(movp);
  332. movp.free;
  333. end;
  334. end;
  335. end;
  336. {
  337. optimize
  338. ldr/str regX,[reg1]
  339. ...
  340. add/sub reg1,reg1,regY/const
  341. into
  342. ldr/str regX,[reg1], regY/const
  343. }
  344. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  345. var
  346. hp1 : tai;
  347. begin
  348. Result:=false;
  349. if (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  350. (p.oper[1]^.ref^.index=NR_NO) and
  351. (p.oper[1]^.ref^.offset=0) and
  352. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  353. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  354. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  355. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  356. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  357. (
  358. (taicpu(hp1).oper[2]^.typ=top_reg) or
  359. { valid offset? }
  360. ((taicpu(hp1).oper[2]^.typ=top_const) and
  361. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  362. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  363. )
  364. )
  365. ) and
  366. { don't apply the optimization if the base register is loaded }
  367. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  368. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  369. { don't apply the optimization if the (new) index register is loaded }
  370. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  371. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
  372. begin
  373. DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  374. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  375. if taicpu(hp1).oper[2]^.typ=top_const then
  376. begin
  377. if taicpu(hp1).opcode=A_ADD then
  378. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  379. else
  380. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  381. end
  382. else
  383. begin
  384. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  385. if taicpu(hp1).opcode=A_ADD then
  386. p.oper[1]^.ref^.signindex:=1
  387. else
  388. p.oper[1]^.ref^.signindex:=-1;
  389. end;
  390. asml.Remove(hp1);
  391. hp1.Free;
  392. Result:=true;
  393. end;
  394. end;
  395. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  396. var
  397. hp1,hp2: tai;
  398. i, i2: longint;
  399. TmpUsedRegs: TAllUsedRegs;
  400. tempop: tasmop;
  401. function IsPowerOf2(const value: DWord): boolean; inline;
  402. begin
  403. Result:=(value and (value - 1)) = 0;
  404. end;
  405. begin
  406. result := false;
  407. case p.typ of
  408. ait_instruction:
  409. begin
  410. {
  411. change
  412. <op> reg,x,y
  413. cmp reg,#0
  414. into
  415. <op>s reg,x,y
  416. }
  417. { this optimization can applied only to the currently enabled operations because
  418. the other operations do not update all flags and FPC does not track flag usage }
  419. if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
  420. A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  421. GetNextInstruction(p, hp1) and
  422. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  423. (taicpu(hp1).oper[1]^.typ = top_const) and
  424. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  425. (taicpu(hp1).oper[1]^.val = 0) and
  426. GetNextInstruction(hp1, hp2) and
  427. { be careful here, following instructions could use other flags
  428. however after a jump fpc never depends on the value of flags }
  429. { All above instructions set Z and N according to the following
  430. Z := result = 0;
  431. N := result[31];
  432. EQ = Z=1; NE = Z=0;
  433. MI = N=1; PL = N=0; }
  434. MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) and
  435. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  436. begin
  437. DebugMsg('Peephole OpCmp2OpS done', p);
  438. taicpu(p).oppostfix:=PF_S;
  439. { move flag allocation if possible }
  440. GetLastInstruction(hp1, hp2);
  441. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  442. if assigned(hp2) then
  443. begin
  444. asml.Remove(hp2);
  445. asml.insertbefore(hp2, p);
  446. end;
  447. asml.remove(hp1);
  448. hp1.free;
  449. end
  450. else
  451. case taicpu(p).opcode of
  452. A_STR:
  453. begin
  454. { change
  455. str reg1,ref
  456. ldr reg2,ref
  457. into
  458. str reg1,ref
  459. mov reg2,reg1
  460. }
  461. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  462. (taicpu(p).oppostfix=PF_None) and
  463. GetNextInstruction(p,hp1) and
  464. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  465. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  466. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  467. begin
  468. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  469. begin
  470. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  471. asml.remove(hp1);
  472. hp1.free;
  473. end
  474. else
  475. begin
  476. taicpu(hp1).opcode:=A_MOV;
  477. taicpu(hp1).oppostfix:=PF_None;
  478. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  479. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  480. end;
  481. result := true;
  482. end
  483. { change
  484. str reg1,ref
  485. str reg2,ref
  486. into
  487. strd reg1,ref
  488. }
  489. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  490. (taicpu(p).oppostfix=PF_None) and
  491. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  492. GetNextInstruction(p,hp1) and
  493. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  494. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  495. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  496. { str ensures that either base or index contain no register, else ldr wouldn't
  497. use an offset either
  498. }
  499. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  500. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  501. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  502. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  503. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  504. begin
  505. DebugMsg('Peephole StrStr2Strd done', p);
  506. taicpu(p).oppostfix:=PF_D;
  507. asml.remove(hp1);
  508. hp1.free;
  509. end;
  510. LookForPostindexedPattern(taicpu(p));
  511. end;
  512. A_LDR:
  513. begin
  514. { change
  515. ldr reg1,ref
  516. ldr reg2,ref
  517. into ...
  518. }
  519. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  520. GetNextInstruction(p,hp1) and
  521. { ldrd is not allowed here }
  522. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  523. begin
  524. {
  525. ...
  526. ldr reg1,ref
  527. mov reg2,reg1
  528. }
  529. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  530. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  531. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  532. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  533. begin
  534. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  535. begin
  536. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  537. asml.remove(hp1);
  538. hp1.free;
  539. end
  540. else
  541. begin
  542. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  543. taicpu(hp1).opcode:=A_MOV;
  544. taicpu(hp1).oppostfix:=PF_None;
  545. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  546. end;
  547. result := true;
  548. end
  549. {
  550. ...
  551. ldrd reg1,ref
  552. }
  553. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  554. { ldrd does not allow any postfixes ... }
  555. (taicpu(p).oppostfix=PF_None) and
  556. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  557. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  558. { ldr ensures that either base or index contain no register, else ldr wouldn't
  559. use an offset either
  560. }
  561. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  562. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  563. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  564. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  565. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  566. begin
  567. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  568. taicpu(p).oppostfix:=PF_D;
  569. asml.remove(hp1);
  570. hp1.free;
  571. end;
  572. end;
  573. LookForPostindexedPattern(taicpu(p));
  574. { Remove superfluous mov after ldr
  575. changes
  576. ldr reg1, ref
  577. mov reg2, reg1
  578. to
  579. ldr reg2, ref
  580. conditions are:
  581. * no ldrd usage
  582. * reg1 must be released after mov
  583. * mov can not contain shifterops
  584. * ldr+mov have the same conditions
  585. * mov does not set flags
  586. }
  587. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  588. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  589. end;
  590. A_MOV:
  591. begin
  592. { fold
  593. mov reg1,reg0, shift imm1
  594. mov reg1,reg1, shift imm2
  595. }
  596. if (taicpu(p).ops=3) and
  597. (taicpu(p).oper[2]^.typ = top_shifterop) and
  598. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  599. getnextinstruction(p,hp1) and
  600. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  601. (taicpu(hp1).ops=3) and
  602. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  603. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  604. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  605. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  606. begin
  607. { fold
  608. mov reg1,reg0, lsl 16
  609. mov reg1,reg1, lsr 16
  610. strh reg1, ...
  611. dealloc reg1
  612. to
  613. strh reg1, ...
  614. dealloc reg1
  615. }
  616. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  617. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  618. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  619. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  620. getnextinstruction(hp1,hp2) and
  621. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  622. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  623. begin
  624. CopyUsedRegs(TmpUsedRegs);
  625. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  626. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  627. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  628. begin
  629. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  630. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  631. asml.remove(p);
  632. asml.remove(hp1);
  633. p.free;
  634. hp1.free;
  635. p:=hp2;
  636. end;
  637. ReleaseUsedRegs(TmpUsedRegs);
  638. end
  639. { fold
  640. mov reg1,reg0, shift imm1
  641. mov reg1,reg1, shift imm2
  642. to
  643. mov reg1,reg0, shift imm1+imm2
  644. }
  645. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  646. { asr makes no use after a lsr, the asr can be foled into the lsr }
  647. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  648. begin
  649. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  650. { avoid overflows }
  651. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  652. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  653. SM_ROR:
  654. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  655. SM_ASR:
  656. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  657. SM_LSR,
  658. SM_LSL:
  659. begin
  660. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  661. InsertLLItem(p.previous, p.next, hp1);
  662. p.free;
  663. p:=hp1;
  664. end;
  665. else
  666. internalerror(2008072803);
  667. end;
  668. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  669. asml.remove(hp1);
  670. hp1.free;
  671. result := true;
  672. end
  673. { fold
  674. mov reg1,reg0, shift imm1
  675. mov reg1,reg1, shift imm2
  676. mov reg1,reg1, shift imm3 ...
  677. }
  678. else if getnextinstruction(hp1,hp2) and
  679. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  680. (taicpu(hp2).ops=3) and
  681. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  682. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  683. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  684. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  685. begin
  686. { mov reg1,reg0, lsl imm1
  687. mov reg1,reg1, lsr/asr imm2
  688. mov reg1,reg1, lsl imm3 ...
  689. if imm3<=imm1 and imm2>=imm3
  690. to
  691. mov reg1,reg0, lsl imm1
  692. mov reg1,reg1, lsr/asr imm2-imm3
  693. }
  694. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  695. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  696. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  697. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  698. begin
  699. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  700. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1 done', p);
  701. asml.remove(hp2);
  702. hp2.free;
  703. result := true;
  704. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  705. begin
  706. asml.remove(hp1);
  707. hp1.free;
  708. end;
  709. end
  710. { mov reg1,reg0, lsr/asr imm1
  711. mov reg1,reg1, lsl imm2
  712. mov reg1,reg1, lsr/asr imm3 ...
  713. if imm3>=imm1 and imm2>=imm1
  714. to
  715. mov reg1,reg0, lsl imm2-imm1
  716. mov reg1,reg1, lsr/asr imm3 ...
  717. }
  718. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  719. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  720. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  721. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  722. begin
  723. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  724. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  725. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  726. asml.remove(p);
  727. p.free;
  728. p:=hp2;
  729. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  730. begin
  731. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  732. asml.remove(hp1);
  733. hp1.free;
  734. p:=hp2;
  735. end;
  736. result := true;
  737. end;
  738. end;
  739. end;
  740. { Change the common
  741. mov r0, r0, lsr #24
  742. and r0, r0, #255
  743. and remove the superfluous and
  744. This could be extended to handle more cases.
  745. }
  746. if (taicpu(p).ops=3) and
  747. (taicpu(p).oper[2]^.typ = top_shifterop) and
  748. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  749. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  750. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  751. getnextinstruction(p,hp1) and
  752. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  753. (taicpu(hp1).ops=3) and
  754. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  755. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  756. (taicpu(hp1).oper[2]^.typ = top_const) and
  757. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  758. For LSR #25 and an AndConst of 255 that whould go like this:
  759. 255 and ((2 shl (32-25))-1)
  760. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  761. LSR #25 and AndConst of 254:
  762. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  763. }
  764. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  765. begin
  766. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  767. asml.remove(hp1);
  768. hp1.free;
  769. end;
  770. {
  771. optimize
  772. mov rX, yyyy
  773. ....
  774. }
  775. if (taicpu(p).ops = 2) and
  776. GetNextInstruction(p,hp1) and
  777. (tai(hp1).typ = ait_instruction) then
  778. begin
  779. {
  780. This changes the very common
  781. mov r0, #0
  782. str r0, [...]
  783. mov r0, #0
  784. str r0, [...]
  785. and removes all superfluous mov instructions
  786. }
  787. if (taicpu(p).oper[1]^.typ = top_const) and
  788. (taicpu(hp1).opcode=A_STR) then
  789. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  790. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  791. GetNextInstruction(hp1, hp2) and
  792. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  793. (taicpu(hp2).ops = 2) and
  794. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  795. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  796. begin
  797. DebugMsg('Peephole MovStrMov done', hp2);
  798. GetNextInstruction(hp2,hp1);
  799. asml.remove(hp2);
  800. hp2.free;
  801. if not assigned(hp1) then break;
  802. end
  803. {
  804. This removes the first mov from
  805. mov rX,...
  806. mov rX,...
  807. }
  808. else if taicpu(hp1).opcode=A_MOV then
  809. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  810. (taicpu(hp1).ops = 2) and
  811. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  812. { don't remove the first mov if the second is a mov rX,rX }
  813. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  814. begin
  815. DebugMsg('Peephole MovMov done', p);
  816. asml.remove(p);
  817. p.free;
  818. p:=hp1;
  819. GetNextInstruction(hp1,hp1);
  820. if not assigned(hp1) then
  821. break;
  822. end;
  823. end;
  824. {
  825. change
  826. mov r1, r0
  827. add r1, r1, #1
  828. to
  829. add r1, r0, #1
  830. Todo: Make it work for mov+cmp too
  831. CAUTION! If this one is successful p might not be a mov instruction anymore!
  832. }
  833. if (taicpu(p).ops = 2) and
  834. (taicpu(p).oper[1]^.typ = top_reg) and
  835. (taicpu(p).oppostfix = PF_NONE) and
  836. GetNextInstruction(p, hp1) and
  837. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  838. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  839. [taicpu(p).condition], []) and
  840. {MOV and MVN might only have 2 ops}
  841. (taicpu(hp1).ops = 3) and
  842. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  843. (taicpu(hp1).oper[1]^.typ = top_reg) and
  844. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  845. begin
  846. { When we get here we still don't know if the registers match}
  847. for I:=1 to 2 do
  848. {
  849. If the first loop was successful p will be replaced with hp1.
  850. The checks will still be ok, because all required information
  851. will also be in hp1 then.
  852. }
  853. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  854. begin
  855. DebugMsg('Peephole RedundantMovProcess done', hp1);
  856. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  857. if p<>hp1 then
  858. begin
  859. asml.remove(p);
  860. p.free;
  861. p:=hp1;
  862. end;
  863. end;
  864. end;
  865. { This folds shifterops into following instructions
  866. mov r0, r1, lsl #8
  867. add r2, r3, r0
  868. to
  869. add r2, r3, r1, lsl #8
  870. CAUTION! If this one is successful p might not be a mov instruction anymore!
  871. }
  872. if (taicpu(p).opcode = A_MOV) and
  873. (taicpu(p).ops = 3) and
  874. (taicpu(p).oper[1]^.typ = top_reg) and
  875. (taicpu(p).oper[2]^.typ = top_shifterop) and
  876. (taicpu(p).oppostfix = PF_NONE) and
  877. GetNextInstruction(p, hp1) and
  878. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  879. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  880. A_CMP, A_CMN],
  881. [taicpu(p).condition], [PF_None]) and
  882. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  883. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  884. (
  885. {Only ONE of the two src operands is allowed to match}
  886. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  887. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  888. ) then
  889. begin
  890. CopyUsedRegs(TmpUsedRegs);
  891. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  892. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  893. I2:=0
  894. else
  895. I2:=1;
  896. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  897. for I:=I2 to taicpu(hp1).ops-1 do
  898. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  899. begin
  900. { If the parameter matched on the second op from the RIGHT
  901. we have to switch the parameters, this will not happen for CMP
  902. were we're only evaluating the most right parameter
  903. }
  904. if I <> taicpu(hp1).ops-1 then
  905. begin
  906. {The SUB operators need to be changed when we swap parameters}
  907. case taicpu(hp1).opcode of
  908. A_SUB: tempop:=A_RSB;
  909. A_SBC: tempop:=A_RSC;
  910. A_RSB: tempop:=A_SUB;
  911. A_RSC: tempop:=A_SBC;
  912. else tempop:=taicpu(hp1).opcode;
  913. end;
  914. if taicpu(hp1).ops = 3 then
  915. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  916. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  917. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  918. else
  919. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  920. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  921. taicpu(p).oper[2]^.shifterop^);
  922. end
  923. else
  924. if taicpu(hp1).ops = 3 then
  925. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  926. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  927. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  928. else
  929. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  930. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  931. taicpu(p).oper[2]^.shifterop^);
  932. asml.insertbefore(hp2, p);
  933. asml.remove(p);
  934. asml.remove(hp1);
  935. p.free;
  936. hp1.free;
  937. p:=hp2;
  938. GetNextInstruction(p,hp1);
  939. DebugMsg('Peephole FoldShiftProcess done', p);
  940. break;
  941. end;
  942. ReleaseUsedRegs(TmpUsedRegs);
  943. end;
  944. {
  945. Often we see shifts and then a superfluous mov to another register
  946. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  947. }
  948. if (taicpu(p).opcode = A_MOV) and
  949. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  950. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  951. end;
  952. A_ADD,
  953. A_ADC,
  954. A_RSB,
  955. A_RSC,
  956. A_SUB,
  957. A_SBC,
  958. A_AND,
  959. A_BIC,
  960. A_EOR,
  961. A_ORR,
  962. A_MLA,
  963. A_MUL:
  964. begin
  965. {
  966. optimize
  967. and reg2,reg1,const1
  968. ...
  969. }
  970. if (taicpu(p).opcode = A_AND) and
  971. (taicpu(p).oper[1]^.typ = top_reg) and
  972. (taicpu(p).oper[2]^.typ = top_const) then
  973. begin
  974. {
  975. change
  976. and reg2,reg1,const1
  977. and reg3,reg2,const2
  978. to
  979. and reg3,reg1,(const1 and const2)
  980. }
  981. if GetNextInstruction(p, hp1) and
  982. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  983. { either reg3 and reg2 are equal or reg2 is deallocated after the and }
  984. (MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) or
  985. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)))) and
  986. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  987. (taicpu(hp1).oper[2]^.typ = top_const) then
  988. begin
  989. DebugMsg('Peephole AndAnd2And done', p);
  990. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  991. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  992. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  993. asml.remove(hp1);
  994. hp1.free;
  995. end
  996. {
  997. change
  998. and reg2,reg1,255
  999. strb reg2,[...]
  1000. dealloc reg2
  1001. to
  1002. strb reg1,[...]
  1003. }
  1004. else if (taicpu(p).oper[2]^.val = 255) and
  1005. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1006. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1007. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1008. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1009. { the reference in strb might not use reg2 }
  1010. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1011. { reg1 might not be modified inbetween }
  1012. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1013. begin
  1014. DebugMsg('Peephole AndStrb2Strb done', p);
  1015. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1016. asml.remove(p);
  1017. p.free;
  1018. p:=hp1;
  1019. end;
  1020. end;
  1021. {
  1022. change
  1023. add/sub reg2,reg1,const1
  1024. str/ldr reg3,[reg2,const2]
  1025. dealloc reg2
  1026. to
  1027. str/ldr reg3,[reg1,const2+/-const1]
  1028. }
  1029. if (taicpu(p).opcode in [A_ADD,A_SUB]) and
  1030. (taicpu(p).oper[1]^.typ = top_reg) and
  1031. (taicpu(p).oper[2]^.typ = top_const) then
  1032. begin
  1033. hp1:=p;
  1034. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  1035. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  1036. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  1037. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  1038. { don't optimize if the register is stored/overwritten }
  1039. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  1040. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1041. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1042. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  1043. ldr postfix }
  1044. (((taicpu(p).opcode=A_ADD) and
  1045. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1046. ) or
  1047. ((taicpu(p).opcode=A_SUB) and
  1048. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1049. )
  1050. ) do
  1051. begin
  1052. { neither reg1 nor reg2 might be changed inbetween }
  1053. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  1054. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  1055. break;
  1056. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  1057. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  1058. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1059. begin
  1060. { remember last instruction }
  1061. hp2:=hp1;
  1062. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  1063. hp1:=p;
  1064. { fix all ldr/str }
  1065. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  1066. begin
  1067. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  1068. if taicpu(p).opcode=A_ADD then
  1069. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  1070. else
  1071. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  1072. if hp1=hp2 then
  1073. break;
  1074. end;
  1075. GetNextInstruction(p,hp1);
  1076. asml.remove(p);
  1077. p.free;
  1078. p:=hp1;
  1079. break;
  1080. end;
  1081. end;
  1082. end;
  1083. {
  1084. change
  1085. add reg1, ...
  1086. mov reg2, reg1
  1087. to
  1088. add reg2, ...
  1089. }
  1090. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  1091. begin
  1092. if (taicpu(p).ops=3) then
  1093. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  1094. end;
  1095. end;
  1096. A_MVN:
  1097. begin
  1098. {
  1099. change
  1100. mvn reg2,reg1
  1101. and reg3,reg4,reg2
  1102. dealloc reg2
  1103. to
  1104. bic reg3,reg4,reg1
  1105. }
  1106. if (taicpu(p).oper[1]^.typ = top_reg) and
  1107. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1108. MatchInstruction(hp1,A_AND,[],[]) and
  1109. (((taicpu(hp1).ops=3) and
  1110. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1111. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1112. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  1113. ((taicpu(hp1).ops=2) and
  1114. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1115. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1116. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1117. { reg1 might not be modified inbetween }
  1118. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1119. begin
  1120. DebugMsg('Peephole MvnAnd2Bic done', p);
  1121. taicpu(hp1).opcode:=A_BIC;
  1122. if taicpu(hp1).ops=3 then
  1123. begin
  1124. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1125. taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
  1126. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1127. end
  1128. else
  1129. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1130. asml.remove(p);
  1131. p.free;
  1132. p:=hp1;
  1133. end;
  1134. end;
  1135. A_UXTB:
  1136. begin
  1137. {
  1138. change
  1139. uxtb reg2,reg1
  1140. strb reg2,[...]
  1141. dealloc reg2
  1142. to
  1143. strb reg1,[...]
  1144. }
  1145. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1146. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1147. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1148. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1149. { the reference in strb might not use reg2 }
  1150. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1151. { reg1 might not be modified inbetween }
  1152. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1153. begin
  1154. DebugMsg('Peephole UxtbStrb2Strb done', p);
  1155. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1156. asml.remove(p);
  1157. p.free;
  1158. p:=hp1;
  1159. end
  1160. {
  1161. change
  1162. uxtb reg2,reg1
  1163. uxth reg3,reg2
  1164. dealloc reg2
  1165. to
  1166. uxtb reg3,reg1
  1167. }
  1168. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1169. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1170. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1171. (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
  1172. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg)) and
  1173. { reg1 might not be modified inbetween }
  1174. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1175. begin
  1176. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  1177. taicpu(hp1).opcode:=A_UXTB;
  1178. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1179. asml.remove(p);
  1180. p.free;
  1181. p:=hp1;
  1182. end
  1183. {
  1184. change
  1185. uxtb reg2,reg1
  1186. uxtb reg3,reg2
  1187. dealloc reg2
  1188. to
  1189. uxtb reg3,reg1
  1190. }
  1191. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1192. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1193. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  1194. (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
  1195. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg)) and
  1196. { reg1 might not be modified inbetween }
  1197. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1198. begin
  1199. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  1200. taicpu(hp1).opcode:=A_UXTB;
  1201. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1202. asml.remove(p);
  1203. p.free;
  1204. p:=hp1;
  1205. end
  1206. {
  1207. change
  1208. uxth reg2,reg1
  1209. uxth reg3,reg2
  1210. dealloc reg2
  1211. to
  1212. uxth reg3,reg1
  1213. }
  1214. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  1215. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1216. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1217. (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
  1218. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg)) and
  1219. { reg1 might not be modified inbetween }
  1220. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1221. begin
  1222. DebugMsg('Peephole UxthUxth2Uxth done', p);
  1223. taicpu(hp1).opcode:=A_UXTH;
  1224. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1225. asml.remove(p);
  1226. p.free;
  1227. p:=hp1;
  1228. end;
  1229. end;
  1230. A_UXTH:
  1231. begin
  1232. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1233. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1234. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  1235. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1236. { the reference in strb might not use reg2 }
  1237. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1238. { reg1 might not be modified inbetween }
  1239. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1240. begin
  1241. DebugMsg('Peephole UXTHStrh2Strh done', p);
  1242. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1243. asml.remove(p);
  1244. p.free;
  1245. p:=hp1;
  1246. end;
  1247. end;
  1248. A_CMP:
  1249. begin
  1250. {
  1251. change
  1252. cmp reg,const1
  1253. moveq reg,const1
  1254. movne reg,const2
  1255. to
  1256. cmp reg,const1
  1257. movne reg,const2
  1258. }
  1259. if (taicpu(p).oper[1]^.typ = top_const) and
  1260. GetNextInstruction(p, hp1) and
  1261. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1262. (taicpu(hp1).oper[1]^.typ = top_const) and
  1263. GetNextInstruction(hp1, hp2) and
  1264. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1265. (taicpu(hp1).oper[1]^.typ = top_const) then
  1266. begin
  1267. RemoveRedundantMove(p, hp1, asml);
  1268. RemoveRedundantMove(p, hp2, asml);
  1269. end;
  1270. end;
  1271. end;
  1272. end;
  1273. end;
  1274. end;
  1275. { instructions modifying the CPSR can be only the last instruction }
  1276. function MustBeLast(p : tai) : boolean;
  1277. begin
  1278. Result:=(p.typ=ait_instruction) and
  1279. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  1280. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  1281. (taicpu(p).oppostfix=PF_S));
  1282. end;
  1283. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  1284. var
  1285. p,hp1,hp2: tai;
  1286. l : longint;
  1287. condition : tasmcond;
  1288. hp3: tai;
  1289. WasLast: boolean;
  1290. { UsedRegs, TmpUsedRegs: TRegSet; }
  1291. begin
  1292. p := BlockStart;
  1293. { UsedRegs := []; }
  1294. while (p <> BlockEnd) Do
  1295. begin
  1296. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1297. case p.Typ Of
  1298. Ait_Instruction:
  1299. begin
  1300. case taicpu(p).opcode Of
  1301. A_B:
  1302. if taicpu(p).condition<>C_None then
  1303. begin
  1304. { check for
  1305. Bxx xxx
  1306. <several instructions>
  1307. xxx:
  1308. }
  1309. l:=0;
  1310. WasLast:=False;
  1311. GetNextInstruction(p, hp1);
  1312. while assigned(hp1) and
  1313. (l<=4) and
  1314. CanBeCond(hp1) and
  1315. { stop on labels }
  1316. not(hp1.typ=ait_label) do
  1317. begin
  1318. inc(l);
  1319. if MustBeLast(hp1) then
  1320. begin
  1321. WasLast:=True;
  1322. GetNextInstruction(hp1,hp1);
  1323. break;
  1324. end
  1325. else
  1326. GetNextInstruction(hp1,hp1);
  1327. end;
  1328. if assigned(hp1) then
  1329. begin
  1330. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1331. begin
  1332. if (l<=4) and (l>0) then
  1333. begin
  1334. condition:=inverse_cond(taicpu(p).condition);
  1335. hp2:=p;
  1336. GetNextInstruction(p,hp1);
  1337. p:=hp1;
  1338. repeat
  1339. if hp1.typ=ait_instruction then
  1340. taicpu(hp1).condition:=condition;
  1341. if MustBeLast(hp1) then
  1342. begin
  1343. GetNextInstruction(hp1,hp1);
  1344. break;
  1345. end
  1346. else
  1347. GetNextInstruction(hp1,hp1);
  1348. until not(assigned(hp1)) or
  1349. not(CanBeCond(hp1)) or
  1350. (hp1.typ=ait_label);
  1351. { wait with removing else GetNextInstruction could
  1352. ignore the label if it was the only usage in the
  1353. jump moved away }
  1354. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1355. asml.remove(hp2);
  1356. hp2.free;
  1357. continue;
  1358. end;
  1359. end
  1360. else
  1361. { do not perform further optimizations if there is inctructon
  1362. in block #1 which can not be optimized.
  1363. }
  1364. if not WasLast then
  1365. begin
  1366. { check further for
  1367. Bcc xxx
  1368. <several instructions 1>
  1369. B yyy
  1370. xxx:
  1371. <several instructions 2>
  1372. yyy:
  1373. }
  1374. { hp2 points to jmp yyy }
  1375. hp2:=hp1;
  1376. { skip hp1 to xxx }
  1377. GetNextInstruction(hp1, hp1);
  1378. if assigned(hp2) and
  1379. assigned(hp1) and
  1380. (l<=3) and
  1381. (hp2.typ=ait_instruction) and
  1382. (taicpu(hp2).is_jmp) and
  1383. (taicpu(hp2).condition=C_None) and
  1384. { real label and jump, no further references to the
  1385. label are allowed }
  1386. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1387. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1388. begin
  1389. l:=0;
  1390. { skip hp1 to <several moves 2> }
  1391. GetNextInstruction(hp1, hp1);
  1392. while assigned(hp1) and
  1393. CanBeCond(hp1) do
  1394. begin
  1395. inc(l);
  1396. GetNextInstruction(hp1, hp1);
  1397. end;
  1398. { hp1 points to yyy: }
  1399. if assigned(hp1) and
  1400. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1401. begin
  1402. condition:=inverse_cond(taicpu(p).condition);
  1403. GetNextInstruction(p,hp1);
  1404. hp3:=p;
  1405. p:=hp1;
  1406. repeat
  1407. if hp1.typ=ait_instruction then
  1408. taicpu(hp1).condition:=condition;
  1409. GetNextInstruction(hp1,hp1);
  1410. until not(assigned(hp1)) or
  1411. not(CanBeCond(hp1));
  1412. { hp2 is still at jmp yyy }
  1413. GetNextInstruction(hp2,hp1);
  1414. { hp2 is now at xxx: }
  1415. condition:=inverse_cond(condition);
  1416. GetNextInstruction(hp1,hp1);
  1417. { hp1 is now at <several movs 2> }
  1418. repeat
  1419. taicpu(hp1).condition:=condition;
  1420. GetNextInstruction(hp1,hp1);
  1421. until not(assigned(hp1)) or
  1422. not(CanBeCond(hp1)) or
  1423. (hp1.typ=ait_label);
  1424. {
  1425. asml.remove(hp1.next)
  1426. hp1.next.free;
  1427. asml.remove(hp1);
  1428. hp1.free;
  1429. }
  1430. { remove Bcc }
  1431. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1432. asml.remove(hp3);
  1433. hp3.free;
  1434. { remove jmp }
  1435. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1436. asml.remove(hp2);
  1437. hp2.free;
  1438. continue;
  1439. end;
  1440. end;
  1441. end;
  1442. end;
  1443. end;
  1444. end;
  1445. end;
  1446. end;
  1447. p := tai(p.next)
  1448. end;
  1449. end;
  1450. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1451. begin
  1452. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1453. Result:=true
  1454. else
  1455. Result:=inherited RegInInstruction(Reg, p1);
  1456. end;
  1457. const
  1458. { set of opcode which might or do write to memory }
  1459. { TODO : extend armins.dat to contain r/w info }
  1460. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1461. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1462. { adjust the register live information when swapping the two instructions p and hp1,
  1463. they must follow one after the other }
  1464. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1465. procedure CheckLiveEnd(reg : tregister);
  1466. var
  1467. supreg : TSuperRegister;
  1468. regtype : TRegisterType;
  1469. begin
  1470. if reg=NR_NO then
  1471. exit;
  1472. regtype:=getregtype(reg);
  1473. supreg:=getsupreg(reg);
  1474. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1475. RegInInstruction(reg,p) then
  1476. cg.rg[regtype].live_end[supreg]:=p;
  1477. end;
  1478. procedure CheckLiveStart(reg : TRegister);
  1479. var
  1480. supreg : TSuperRegister;
  1481. regtype : TRegisterType;
  1482. begin
  1483. if reg=NR_NO then
  1484. exit;
  1485. regtype:=getregtype(reg);
  1486. supreg:=getsupreg(reg);
  1487. if (cg.rg[regtype].live_start[supreg]=p) and
  1488. RegInInstruction(reg,hp1) then
  1489. cg.rg[regtype].live_start[supreg]:=hp1;
  1490. end;
  1491. var
  1492. i : longint;
  1493. r : TSuperRegister;
  1494. begin
  1495. { assumption: p is directly followed by hp1 }
  1496. { if live of any reg used by p starts at p and hp1 uses this register then
  1497. set live start to hp1 }
  1498. for i:=0 to p.ops-1 do
  1499. case p.oper[i]^.typ of
  1500. Top_Reg:
  1501. CheckLiveStart(p.oper[i]^.reg);
  1502. Top_Ref:
  1503. begin
  1504. CheckLiveStart(p.oper[i]^.ref^.base);
  1505. CheckLiveStart(p.oper[i]^.ref^.index);
  1506. end;
  1507. Top_Shifterop:
  1508. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1509. Top_RegSet:
  1510. for r:=RS_R0 to RS_R15 do
  1511. if r in p.oper[i]^.regset^ then
  1512. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1513. end;
  1514. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1515. set live end to p }
  1516. for i:=0 to hp1.ops-1 do
  1517. case hp1.oper[i]^.typ of
  1518. Top_Reg:
  1519. CheckLiveEnd(hp1.oper[i]^.reg);
  1520. Top_Ref:
  1521. begin
  1522. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1523. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1524. end;
  1525. Top_Shifterop:
  1526. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1527. Top_RegSet:
  1528. for r:=RS_R0 to RS_R15 do
  1529. if r in hp1.oper[i]^.regset^ then
  1530. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1531. end;
  1532. end;
  1533. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1534. { TODO : schedule also forward }
  1535. { TODO : schedule distance > 1 }
  1536. var
  1537. hp1,hp2,hp3,hp4,hp5 : tai;
  1538. list : TAsmList;
  1539. begin
  1540. result:=true;
  1541. list:=TAsmList.Create;
  1542. p:=BlockStart;
  1543. while p<>BlockEnd Do
  1544. begin
  1545. if (p.typ=ait_instruction) and
  1546. GetNextInstruction(p,hp1) and
  1547. (hp1.typ=ait_instruction) and
  1548. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1549. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1550. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1551. not(RegModifiedByInstruction(NR_PC,p))
  1552. ) or
  1553. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1554. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1555. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1556. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1557. )
  1558. ) or
  1559. { try to prove that the memory accesses don't overlapp }
  1560. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1561. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1562. (taicpu(p).oppostfix=PF_None) and
  1563. (taicpu(hp1).oppostfix=PF_None) and
  1564. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1565. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1566. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1567. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1568. )
  1569. )
  1570. ) and
  1571. GetNextInstruction(hp1,hp2) and
  1572. (hp2.typ=ait_instruction) and
  1573. { loaded register used by next instruction? }
  1574. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1575. { loaded register not used by previous instruction? }
  1576. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1577. { same condition? }
  1578. (taicpu(p).condition=taicpu(hp1).condition) and
  1579. { first instruction might not change the register used as base }
  1580. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1581. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1582. ) and
  1583. { first instruction might not change the register used as index }
  1584. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1585. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1586. ) then
  1587. begin
  1588. hp3:=tai(p.Previous);
  1589. hp5:=tai(p.next);
  1590. asml.Remove(p);
  1591. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1592. { before the instruction? }
  1593. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1594. begin
  1595. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1596. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1597. begin
  1598. hp4:=hp3;
  1599. hp3:=tai(hp3.Previous);
  1600. asml.Remove(hp4);
  1601. list.Concat(hp4);
  1602. end
  1603. else
  1604. hp3:=tai(hp3.Previous);
  1605. end;
  1606. list.Concat(p);
  1607. SwapRegLive(taicpu(p),taicpu(hp1));
  1608. { after the instruction? }
  1609. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1610. begin
  1611. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1612. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1613. begin
  1614. hp4:=hp5;
  1615. hp5:=tai(hp5.next);
  1616. asml.Remove(hp4);
  1617. list.Concat(hp4);
  1618. end
  1619. else
  1620. hp5:=tai(hp5.Next);
  1621. end;
  1622. asml.Remove(hp1);
  1623. {$ifdef DEBUG_PREREGSCHEDULER}
  1624. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1625. {$endif DEBUG_PREREGSCHEDULER}
  1626. asml.InsertBefore(hp1,hp2);
  1627. asml.InsertListBefore(hp2,list);
  1628. p:=tai(p.next)
  1629. end
  1630. else if p.typ=ait_instruction then
  1631. p:=hp1
  1632. else
  1633. p:=tai(p.next);
  1634. end;
  1635. list.Free;
  1636. end;
  1637. procedure DecrementPreceedingIT(list: TAsmList; p: tai);
  1638. var
  1639. hp : tai;
  1640. l : longint;
  1641. begin
  1642. hp := tai(p.Previous);
  1643. l := 1;
  1644. while assigned(hp) and
  1645. (l <= 4) do
  1646. begin
  1647. if hp.typ=ait_instruction then
  1648. begin
  1649. if (taicpu(hp).opcode>=A_IT) and
  1650. (taicpu(hp).opcode <= A_ITTTT) then
  1651. begin
  1652. if (taicpu(hp).opcode = A_IT) and
  1653. (l=1) then
  1654. list.Remove(hp)
  1655. else
  1656. case taicpu(hp).opcode of
  1657. A_ITE:
  1658. if l=2 then taicpu(hp).opcode := A_IT;
  1659. A_ITT:
  1660. if l=2 then taicpu(hp).opcode := A_IT;
  1661. A_ITEE:
  1662. if l=3 then taicpu(hp).opcode := A_ITE;
  1663. A_ITTE:
  1664. if l=3 then taicpu(hp).opcode := A_ITT;
  1665. A_ITET:
  1666. if l=3 then taicpu(hp).opcode := A_ITE;
  1667. A_ITTT:
  1668. if l=3 then taicpu(hp).opcode := A_ITT;
  1669. A_ITEEE:
  1670. if l=4 then taicpu(hp).opcode := A_ITEE;
  1671. A_ITTEE:
  1672. if l=4 then taicpu(hp).opcode := A_ITTE;
  1673. A_ITETE:
  1674. if l=4 then taicpu(hp).opcode := A_ITET;
  1675. A_ITTTE:
  1676. if l=4 then taicpu(hp).opcode := A_ITTT;
  1677. A_ITEET:
  1678. if l=4 then taicpu(hp).opcode := A_ITEE;
  1679. A_ITTET:
  1680. if l=4 then taicpu(hp).opcode := A_ITTE;
  1681. A_ITETT:
  1682. if l=4 then taicpu(hp).opcode := A_ITET;
  1683. A_ITTTT:
  1684. if l=4 then taicpu(hp).opcode := A_ITTT;
  1685. end;
  1686. break;
  1687. end;
  1688. {else if (taicpu(hp).condition<>taicpu(p).condition) or
  1689. (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
  1690. break;}
  1691. inc(l);
  1692. end;
  1693. hp := tai(hp.Previous);
  1694. end;
  1695. end;
  1696. function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1697. var
  1698. hp : taicpu;
  1699. hp1,hp2 : tai;
  1700. begin
  1701. if (p.typ=ait_instruction) and
  1702. MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
  1703. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  1704. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  1705. ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
  1706. begin
  1707. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  1708. AsmL.InsertAfter(hp, p);
  1709. asml.Remove(p);
  1710. p:=hp;
  1711. result:=true;
  1712. end
  1713. else if (p.typ=ait_instruction) and
  1714. MatchInstruction(p, A_STR, [C_None], [PF_None]) and
  1715. (taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
  1716. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  1717. (taicpu(p).oper[1]^.ref^.offset=-4) and
  1718. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,14]) then
  1719. begin
  1720. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  1721. asml.InsertAfter(hp, p);
  1722. asml.Remove(p);
  1723. p.Free;
  1724. p:=hp;
  1725. result:=true;
  1726. end
  1727. else if (p.typ=ait_instruction) and
  1728. MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
  1729. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  1730. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  1731. ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
  1732. begin
  1733. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  1734. asml.InsertBefore(hp, p);
  1735. asml.Remove(p);
  1736. p.Free;
  1737. p:=hp;
  1738. result:=true;
  1739. end
  1740. else if (p.typ=ait_instruction) and
  1741. MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
  1742. (taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
  1743. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  1744. (taicpu(p).oper[1]^.ref^.offset=4) and
  1745. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,15]) then
  1746. begin
  1747. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  1748. asml.InsertBefore(hp, p);
  1749. asml.Remove(p);
  1750. p.Free;
  1751. p:=hp;
  1752. result:=true;
  1753. end
  1754. else if (p.typ=ait_instruction) and
  1755. MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
  1756. (taicpu(p).oper[1]^.typ=top_const) and
  1757. (taicpu(p).oper[1]^.val >= 0) and
  1758. (taicpu(p).oper[1]^.val < 256) and
  1759. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1760. begin
  1761. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1762. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1763. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1764. taicpu(p).oppostfix:=PF_S;
  1765. result:=true;
  1766. end
  1767. else if (p.typ=ait_instruction) and
  1768. MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
  1769. (taicpu(p).oper[1]^.typ=top_reg) and
  1770. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1771. begin
  1772. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1773. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1774. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1775. taicpu(p).oppostfix:=PF_S;
  1776. result:=true;
  1777. end
  1778. else if (p.typ=ait_instruction) and
  1779. MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  1780. (taicpu(p).ops = 3) and
  1781. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1782. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  1783. (taicpu(p).oper[2]^.typ=top_const) and
  1784. (taicpu(p).oper[2]^.val >= 0) and
  1785. (taicpu(p).oper[2]^.val < 256) and
  1786. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1787. begin
  1788. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1789. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1790. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1791. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  1792. taicpu(p).oppostfix:=PF_S;
  1793. taicpu(p).ops := 2;
  1794. result:=true;
  1795. end
  1796. else if (p.typ=ait_instruction) and
  1797. MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
  1798. (taicpu(p).ops = 3) and
  1799. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1800. (taicpu(p).oper[2]^.typ=top_reg) then
  1801. begin
  1802. taicpu(p).ops := 2;
  1803. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  1804. result:=true;
  1805. end
  1806. else if (p.typ=ait_instruction) and
  1807. MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
  1808. (taicpu(p).ops = 3) and
  1809. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1810. (taicpu(p).oper[2]^.typ=top_reg) and
  1811. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1812. begin
  1813. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1814. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1815. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1816. taicpu(p).ops := 2;
  1817. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  1818. taicpu(p).oppostfix:=PF_S;
  1819. result:=true;
  1820. end
  1821. else if (p.typ=ait_instruction) and
  1822. MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
  1823. (taicpu(p).ops = 3) and
  1824. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
  1825. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1826. begin
  1827. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1828. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1829. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1830. taicpu(p).oppostfix:=PF_S;
  1831. taicpu(p).ops := 2;
  1832. result:=true;
  1833. end
  1834. else if (p.typ=ait_instruction) and
  1835. MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
  1836. (taicpu(p).ops=3) and
  1837. (taicpu(p).oper[2]^.typ=top_shifterop) and
  1838. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
  1839. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1840. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1841. begin
  1842. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1843. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1844. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1845. taicpu(p).oppostfix:=PF_S;
  1846. taicpu(p).ops := 2;
  1847. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  1848. taicpu(p).loadreg(1, taicpu(p).oper[2]^.shifterop^.rs)
  1849. else
  1850. taicpu(p).loadconst(1, taicpu(p).oper[2]^.shifterop^.shiftimm);
  1851. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  1852. SM_LSL: taicpu(p).opcode:=A_LSL;
  1853. SM_LSR: taicpu(p).opcode:=A_LSR;
  1854. SM_ASR: taicpu(p).opcode:=A_ASR;
  1855. SM_ROR: taicpu(p).opcode:=A_ROR;
  1856. end;
  1857. result:=true;
  1858. end
  1859. else if (p.typ=ait_instruction) and
  1860. MatchInstruction(p, [A_AND], [], [PF_None]) and
  1861. (taicpu(p).ops = 2) and
  1862. (taicpu(p).oper[1]^.typ=top_const) and
  1863. ((taicpu(p).oper[1]^.val=255) or
  1864. (taicpu(p).oper[1]^.val=65535)) then
  1865. begin
  1866. if taicpu(p).oper[1]^.val=255 then
  1867. taicpu(p).opcode:=A_UXTB
  1868. else
  1869. taicpu(p).opcode:=A_UXTH;
  1870. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  1871. result := true;
  1872. end
  1873. else if (p.typ=ait_instruction) and
  1874. MatchInstruction(p, [A_AND], [], [PF_None]) and
  1875. (taicpu(p).ops = 3) and
  1876. (taicpu(p).oper[2]^.typ=top_const) and
  1877. ((taicpu(p).oper[2]^.val=255) or
  1878. (taicpu(p).oper[2]^.val=65535)) then
  1879. begin
  1880. if taicpu(p).oper[2]^.val=255 then
  1881. taicpu(p).opcode:=A_UXTB
  1882. else
  1883. taicpu(p).opcode:=A_UXTH;
  1884. taicpu(p).ops:=2;
  1885. result := true;
  1886. end
  1887. {
  1888. Turn
  1889. mul reg0, z,w
  1890. sub/add x, y, reg0
  1891. dealloc reg0
  1892. into
  1893. mls/mla x,y,z,w
  1894. }
  1895. {else if (p.typ=ait_instruction) and
  1896. MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and
  1897. (taicpu(p).ops=3) and
  1898. (taicpu(p).oper[0]^.typ = top_reg) and
  1899. (taicpu(p).oper[1]^.typ = top_reg) and
  1900. (taicpu(p).oper[2]^.typ = top_reg) and
  1901. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1902. MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
  1903. (((taicpu(hp1).ops=3) and
  1904. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1905. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1906. (MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1907. (taicpu(hp1).opcode=A_ADD)))) or
  1908. ((taicpu(hp1).ops=2) and
  1909. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1910. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1911. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1912. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1913. not(RegModifiedBetween(taicpu(p).oper[2]^.reg,p,hp1)) then
  1914. begin
  1915. if taicpu(hp1).opcode=A_ADD then
  1916. begin
  1917. taicpu(hp1).opcode:=A_MLA;
  1918. if taicpu(hp1).ops=3 then
  1919. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
  1920. taicpu(hp1).loadreg(1,taicpu(hp1).oper[2]^.reg);
  1921. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  1922. taicpu(hp1).loadreg(3,taicpu(p).oper[2]^.reg);
  1923. DebugMsg('MulAdd2MLA done', p);
  1924. taicpu(hp1).ops:=4;
  1925. asml.remove(p);
  1926. p.free;
  1927. p:=hp1;
  1928. end
  1929. else
  1930. begin
  1931. taicpu(hp1).opcode:=A_MLS;
  1932. if taicpu(hp1).ops=2 then
  1933. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1934. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  1935. taicpu(hp1).loadreg(3,taicpu(p).oper[2]^.reg);
  1936. DebugMsg('MulSub2MLS done', p);
  1937. taicpu(hp1).ops:=4;
  1938. asml.remove(p);
  1939. p.free;
  1940. p:=hp1;
  1941. end;
  1942. result:=true;
  1943. end}
  1944. {else if (p.typ=ait_instruction) and
  1945. MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
  1946. (taicpu(p).oper[1]^.typ=top_const) and
  1947. (taicpu(p).oper[1]^.val=0) and
  1948. GetNextInstruction(p,hp1) and
  1949. (taicpu(hp1).opcode=A_B) and
  1950. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  1951. begin
  1952. if taicpu(hp1).condition = C_EQ then
  1953. hp2:=taicpu.op_reg_ref(A_CBZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^)
  1954. else
  1955. hp2:=taicpu.op_reg_ref(A_CBNZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^);
  1956. taicpu(hp2).is_jmp := true;
  1957. asml.InsertAfter(hp2, hp1);
  1958. asml.Remove(hp1);
  1959. hp1.Free;
  1960. asml.Remove(p);
  1961. p.Free;
  1962. p := hp2;
  1963. result := true;
  1964. end}
  1965. else
  1966. Result := inherited PeepHoleOptPass1Cpu(p);
  1967. end;
  1968. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1969. var
  1970. p,hp1,hp2: tai;
  1971. l,l2 : longint;
  1972. condition : tasmcond;
  1973. hp3: tai;
  1974. WasLast: boolean;
  1975. { UsedRegs, TmpUsedRegs: TRegSet; }
  1976. begin
  1977. p := BlockStart;
  1978. { UsedRegs := []; }
  1979. while (p <> BlockEnd) Do
  1980. begin
  1981. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1982. case p.Typ Of
  1983. Ait_Instruction:
  1984. begin
  1985. case taicpu(p).opcode Of
  1986. A_B:
  1987. if taicpu(p).condition<>C_None then
  1988. begin
  1989. { check for
  1990. Bxx xxx
  1991. <several instructions>
  1992. xxx:
  1993. }
  1994. l:=0;
  1995. GetNextInstruction(p, hp1);
  1996. while assigned(hp1) and
  1997. (l<=4) and
  1998. CanBeCond(hp1) and
  1999. { stop on labels }
  2000. not(hp1.typ=ait_label) do
  2001. begin
  2002. inc(l);
  2003. if MustBeLast(hp1) then
  2004. begin
  2005. //hp1:=nil;
  2006. GetNextInstruction(hp1,hp1);
  2007. break;
  2008. end
  2009. else
  2010. GetNextInstruction(hp1,hp1);
  2011. end;
  2012. if assigned(hp1) then
  2013. begin
  2014. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2015. begin
  2016. if (l<=4) and (l>0) then
  2017. begin
  2018. condition:=inverse_cond(taicpu(p).condition);
  2019. hp2:=p;
  2020. GetNextInstruction(p,hp1);
  2021. p:=hp1;
  2022. repeat
  2023. if hp1.typ=ait_instruction then
  2024. taicpu(hp1).condition:=condition;
  2025. if MustBeLast(hp1) then
  2026. begin
  2027. GetNextInstruction(hp1,hp1);
  2028. break;
  2029. end
  2030. else
  2031. GetNextInstruction(hp1,hp1);
  2032. until not(assigned(hp1)) or
  2033. not(CanBeCond(hp1)) or
  2034. (hp1.typ=ait_label);
  2035. { wait with removing else GetNextInstruction could
  2036. ignore the label if it was the only usage in the
  2037. jump moved away }
  2038. asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
  2039. DecrementPreceedingIT(asml, hp2);
  2040. case l of
  2041. 1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
  2042. 2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
  2043. 3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
  2044. 4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
  2045. end;
  2046. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2047. asml.remove(hp2);
  2048. hp2.free;
  2049. continue;
  2050. end;
  2051. end;
  2052. end;
  2053. end;
  2054. end;
  2055. end;
  2056. end;
  2057. p := tai(p.next)
  2058. end;
  2059. end;
  2060. begin
  2061. casmoptimizer:=TCpuAsmOptimizer;
  2062. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  2063. End.