aoptcpu.pas 138 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  31. { gets the next tai object after current that contains info relevant
  32. to the optimizer in p1 which used the given register or does a
  33. change in program flow.
  34. If there is none, it returns false and
  35. sets p1 to nil }
  36. Function GetNextInstructionUsingReg(Current: tai; Out Next: tai; reg: TRegister): Boolean;
  37. Function GetNextInstructionUsingRef(Current: tai; Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  38. { outputs a debug message into the assembler file }
  39. procedure DebugMsg(const s: string; p: tai);
  40. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  41. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  42. protected
  43. function LookForPreindexedPattern(p: taicpu): boolean;
  44. function LookForPostindexedPattern(p: taicpu): boolean;
  45. End;
  46. TCpuPreRegallocScheduler = class(TAsmScheduler)
  47. function SchedulerPass1Cpu(var p: tai): boolean;override;
  48. procedure SwapRegLive(p, hp1: taicpu);
  49. end;
  50. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  51. { uses the same constructor as TAopObj }
  52. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  53. procedure PeepHoleOptPass2;override;
  54. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  55. End;
  56. function MustBeLast(p : tai) : boolean;
  57. Implementation
  58. uses
  59. cutils,verbose,globtype,globals,
  60. systems,
  61. cpuinfo,
  62. cgobj,procinfo,
  63. aasmbase,aasmdata;
  64. function CanBeCond(p : tai) : boolean;
  65. begin
  66. result:=
  67. not(GenerateThumbCode) and
  68. (p.typ=ait_instruction) and
  69. (taicpu(p).condition=C_None) and
  70. ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
  71. (taicpu(p).opcode<>A_CBZ) and
  72. (taicpu(p).opcode<>A_CBNZ) and
  73. (taicpu(p).opcode<>A_PLD) and
  74. ((taicpu(p).opcode<>A_BLX) or
  75. (taicpu(p).oper[0]^.typ=top_reg));
  76. end;
  77. function RefsEqual(const r1, r2: treference): boolean;
  78. begin
  79. refsequal :=
  80. (r1.offset = r2.offset) and
  81. (r1.base = r2.base) and
  82. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  83. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  84. (r1.relsymbol = r2.relsymbol) and
  85. (r1.signindex = r2.signindex) and
  86. (r1.shiftimm = r2.shiftimm) and
  87. (r1.addressmode = r2.addressmode) and
  88. (r1.shiftmode = r2.shiftmode);
  89. end;
  90. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  91. begin
  92. result :=
  93. (instr.typ = ait_instruction) and
  94. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  95. ((cond = []) or (taicpu(instr).condition in cond)) and
  96. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  97. end;
  98. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  99. begin
  100. result :=
  101. (instr.typ = ait_instruction) and
  102. (taicpu(instr).opcode = op) and
  103. ((cond = []) or (taicpu(instr).condition in cond)) and
  104. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  105. end;
  106. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  107. begin
  108. result := oper1.typ = oper2.typ;
  109. if result then
  110. case oper1.typ of
  111. top_const:
  112. Result:=oper1.val = oper2.val;
  113. top_reg:
  114. Result:=oper1.reg = oper2.reg;
  115. top_conditioncode:
  116. Result:=oper1.cc = oper2.cc;
  117. top_ref:
  118. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  119. else Result:=false;
  120. end
  121. end;
  122. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  123. begin
  124. result := (oper.typ = top_reg) and (oper.reg = reg);
  125. end;
  126. function RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList):Boolean;
  127. begin
  128. Result:=false;
  129. if (taicpu(movp).condition = C_EQ) and
  130. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  131. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  132. begin
  133. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  134. asml.remove(movp);
  135. movp.free;
  136. Result:=true;
  137. end;
  138. end;
  139. function AlignedToQWord(const ref : treference) : boolean;
  140. begin
  141. { (safe) heuristics to ensure alignment }
  142. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  143. (((ref.offset>=0) and
  144. ((ref.offset mod 8)=0) and
  145. ((ref.base=NR_R13) or
  146. (ref.index=NR_R13))
  147. ) or
  148. ((ref.offset<=0) and
  149. { when using NR_R11, it has always a value of <qword align>+4 }
  150. ((abs(ref.offset+4) mod 8)=0) and
  151. (current_procinfo.framepointer=NR_R11) and
  152. ((ref.base=NR_R11) or
  153. (ref.index=NR_R11))
  154. )
  155. );
  156. end;
  157. function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
  158. begin
  159. if GenerateThumb2Code then
  160. result := (aoffset<4096) and (aoffset>-256)
  161. else
  162. result := ((pf in [PF_None,PF_B]) and
  163. (abs(aoffset)<4096)) or
  164. (abs(aoffset)<256);
  165. end;
  166. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  167. var
  168. p: taicpu;
  169. i: longint;
  170. begin
  171. instructionLoadsFromReg := false;
  172. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  173. exit;
  174. p:=taicpu(hp);
  175. i:=1;
  176. {For these instructions we have to start on oper[0]}
  177. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  178. A_CMP, A_CMN, A_TST, A_TEQ,
  179. A_B, A_BL, A_BX, A_BLX,
  180. A_SMLAL, A_UMLAL]) then i:=0;
  181. while(i<p.ops) do
  182. begin
  183. case p.oper[I]^.typ of
  184. top_reg:
  185. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  186. { STRD }
  187. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  188. top_regset:
  189. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  190. top_shifterop:
  191. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  192. top_ref:
  193. instructionLoadsFromReg :=
  194. (p.oper[I]^.ref^.base = reg) or
  195. (p.oper[I]^.ref^.index = reg);
  196. end;
  197. if instructionLoadsFromReg then exit; {Bailout if we found something}
  198. Inc(I);
  199. end;
  200. end;
  201. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  202. var
  203. p: taicpu;
  204. begin
  205. p := taicpu(hp);
  206. Result := false;
  207. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  208. exit;
  209. case p.opcode of
  210. { These operands do not write into a register at all }
  211. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  212. exit;
  213. {Take care of post/preincremented store and loads, they will change their base register}
  214. A_STR, A_LDR:
  215. begin
  216. Result := false;
  217. { actually, this does not apply here because post-/preindexed does not mean that a register
  218. is loaded with a new value, it is only modified
  219. (taicpu(p).oper[1]^.typ=top_ref) and
  220. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  221. (taicpu(p).oper[1]^.ref^.base = reg);
  222. }
  223. { STR does not load into it's first register }
  224. if p.opcode = A_STR then
  225. exit;
  226. end;
  227. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  228. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  229. Result :=
  230. (p.oper[1]^.typ = top_reg) and
  231. (p.oper[1]^.reg = reg);
  232. {Loads to oper2 from coprocessor}
  233. {
  234. MCR/MRC is currently not supported in FPC
  235. A_MRC:
  236. Result :=
  237. (p.oper[2]^.typ = top_reg) and
  238. (p.oper[2]^.reg = reg);
  239. }
  240. {Loads to all register in the registerset}
  241. A_LDM:
  242. Result := (getsupreg(reg) in p.oper[1]^.regset^);
  243. A_POP:
  244. Result := (getsupreg(reg) in p.oper[0]^.regset^) or
  245. (reg=NR_STACK_POINTER_REG);
  246. end;
  247. if Result then
  248. exit;
  249. case p.oper[0]^.typ of
  250. {This is the case}
  251. top_reg:
  252. Result := (p.oper[0]^.reg = reg) or
  253. { LDRD }
  254. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  255. {LDM/STM might write a new value to their index register}
  256. top_ref:
  257. Result :=
  258. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  259. (taicpu(p).oper[0]^.ref^.base = reg);
  260. end;
  261. end;
  262. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  263. Out Next: tai; reg: TRegister): Boolean;
  264. begin
  265. Next:=Current;
  266. repeat
  267. Result:=GetNextInstruction(Next,Next);
  268. until not (Result) or
  269. not(cs_opt_level3 in current_settings.optimizerswitches) or
  270. (Next.typ<>ait_instruction) or
  271. RegInInstruction(reg,Next) or
  272. is_calljmp(taicpu(Next).opcode) or
  273. RegModifiedByInstruction(NR_PC,Next);
  274. end;
  275. function TCpuAsmOptimizer.GetNextInstructionUsingRef(Current: tai;
  276. Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  277. begin
  278. Next:=Current;
  279. repeat
  280. Result:=GetNextInstruction(Next,Next);
  281. if Result and
  282. (Next.typ=ait_instruction) and
  283. (taicpu(Next).opcode in [A_LDR, A_STR]) and
  284. (
  285. ((taicpu(Next).ops = 2) and
  286. (taicpu(Next).oper[1]^.typ = top_ref) and
  287. RefsEqual(taicpu(Next).oper[1]^.ref^,ref)) or
  288. ((taicpu(Next).ops = 3) and { LDRD/STRD }
  289. (taicpu(Next).oper[2]^.typ = top_ref) and
  290. RefsEqual(taicpu(Next).oper[2]^.ref^,ref))
  291. ) then
  292. {We've found an instruction LDR or STR with the same reference}
  293. exit;
  294. until not(Result) or
  295. (Next.typ<>ait_instruction) or
  296. not(cs_opt_level3 in current_settings.optimizerswitches) or
  297. is_calljmp(taicpu(Next).opcode) or
  298. (StopOnStore and (taicpu(Next).opcode in [A_STR, A_STM])) or
  299. RegModifiedByInstruction(NR_PC,Next);
  300. Result:=false;
  301. end;
  302. {$ifdef DEBUG_AOPTCPU}
  303. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  304. begin
  305. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  306. end;
  307. {$else DEBUG_AOPTCPU}
  308. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  309. begin
  310. end;
  311. {$endif DEBUG_AOPTCPU}
  312. function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  313. var
  314. alloc,
  315. dealloc : tai_regalloc;
  316. hp1 : tai;
  317. begin
  318. Result:=false;
  319. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  320. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  321. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  322. { don't mess with moves to pc }
  323. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  324. { don't mess with moves to lr }
  325. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  326. { the destination register of the mov might not be used beween p and movp }
  327. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  328. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  329. (taicpu(p).opcode<>A_CBZ) and
  330. (taicpu(p).opcode<>A_CBNZ) and
  331. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  332. not (
  333. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  334. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  335. (current_settings.cputype < cpu_armv6)
  336. ) and
  337. { Take care to only do this for instructions which REALLY load to the first register.
  338. Otherwise
  339. str reg0, [reg1]
  340. mov reg2, reg0
  341. will be optimized to
  342. str reg2, [reg1]
  343. }
  344. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  345. begin
  346. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  347. if assigned(dealloc) then
  348. begin
  349. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  350. result:=true;
  351. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  352. and remove it if possible }
  353. asml.Remove(dealloc);
  354. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  355. if assigned(alloc) then
  356. begin
  357. asml.Remove(alloc);
  358. alloc.free;
  359. dealloc.free;
  360. end
  361. else
  362. asml.InsertAfter(dealloc,p);
  363. { try to move the allocation of the target register }
  364. GetLastInstruction(movp,hp1);
  365. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  366. if assigned(alloc) then
  367. begin
  368. asml.Remove(alloc);
  369. asml.InsertBefore(alloc,p);
  370. { adjust used regs }
  371. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  372. end;
  373. { finally get rid of the mov }
  374. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  375. asml.remove(movp);
  376. movp.free;
  377. end;
  378. end;
  379. end;
  380. {
  381. optimize
  382. add/sub reg1,reg1,regY/const
  383. ...
  384. ldr/str regX,[reg1]
  385. into
  386. ldr/str regX,[reg1, regY/const]!
  387. }
  388. function TCpuAsmOptimizer.LookForPreindexedPattern(p: taicpu): boolean;
  389. var
  390. hp1: tai;
  391. begin
  392. if GenerateARMCode and
  393. (p.ops=3) and
  394. MatchOperand(p.oper[0]^, p.oper[1]^.reg) and
  395. GetNextInstructionUsingReg(p, hp1, p.oper[0]^.reg) and
  396. (not RegModifiedBetween(p.oper[0]^.reg, p, hp1)) and
  397. MatchInstruction(hp1, [A_LDR,A_STR], [C_None], [PF_None,PF_B,PF_H,PF_SH,PF_SB]) and
  398. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  399. (taicpu(hp1).oper[1]^.ref^.base=p.oper[0]^.reg) and
  400. (taicpu(hp1).oper[0]^.reg<>p.oper[0]^.reg) and
  401. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  402. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  403. (((p.oper[2]^.typ=top_reg) and
  404. (not RegModifiedBetween(p.oper[2]^.reg, p, hp1))) or
  405. ((p.oper[2]^.typ=top_const) and
  406. ((abs(p.oper[2]^.val) < 256) or
  407. ((abs(p.oper[2]^.val) < 4096) and
  408. (taicpu(hp1).oppostfix in [PF_None,PF_B]))))) then
  409. begin
  410. taicpu(hp1).oper[1]^.ref^.addressmode:=AM_PREINDEXED;
  411. if p.oper[2]^.typ=top_reg then
  412. begin
  413. taicpu(hp1).oper[1]^.ref^.index:=p.oper[2]^.reg;
  414. if p.opcode=A_ADD then
  415. taicpu(hp1).oper[1]^.ref^.signindex:=1
  416. else
  417. taicpu(hp1).oper[1]^.ref^.signindex:=-1;
  418. end
  419. else
  420. begin
  421. if p.opcode=A_ADD then
  422. taicpu(hp1).oper[1]^.ref^.offset:=p.oper[2]^.val
  423. else
  424. taicpu(hp1).oper[1]^.ref^.offset:=-p.oper[2]^.val;
  425. end;
  426. result:=true;
  427. end
  428. else
  429. result:=false;
  430. end;
  431. {
  432. optimize
  433. ldr/str regX,[reg1]
  434. ...
  435. add/sub reg1,reg1,regY/const
  436. into
  437. ldr/str regX,[reg1], regY/const
  438. }
  439. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  440. var
  441. hp1 : tai;
  442. begin
  443. Result:=false;
  444. if (p.oper[1]^.typ = top_ref) and
  445. (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  446. (p.oper[1]^.ref^.index=NR_NO) and
  447. (p.oper[1]^.ref^.offset=0) and
  448. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  449. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  450. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  451. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  452. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  453. (
  454. (taicpu(hp1).oper[2]^.typ=top_reg) or
  455. { valid offset? }
  456. ((taicpu(hp1).oper[2]^.typ=top_const) and
  457. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  458. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  459. )
  460. )
  461. ) and
  462. { don't apply the optimization if the base register is loaded }
  463. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  464. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  465. { don't apply the optimization if the (new) index register is loaded }
  466. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  467. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) and
  468. GenerateARMCode then
  469. begin
  470. DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  471. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  472. if taicpu(hp1).oper[2]^.typ=top_const then
  473. begin
  474. if taicpu(hp1).opcode=A_ADD then
  475. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  476. else
  477. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  478. end
  479. else
  480. begin
  481. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  482. if taicpu(hp1).opcode=A_ADD then
  483. p.oper[1]^.ref^.signindex:=1
  484. else
  485. p.oper[1]^.ref^.signindex:=-1;
  486. end;
  487. asml.Remove(hp1);
  488. hp1.Free;
  489. Result:=true;
  490. end;
  491. end;
  492. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  493. var
  494. hp1,hp2,hp3,hp4: tai;
  495. i, i2: longint;
  496. TmpUsedRegs: TAllUsedRegs;
  497. tempop: tasmop;
  498. oldreg: tregister;
  499. dealloc: tai_regalloc;
  500. function IsPowerOf2(const value: DWord): boolean; inline;
  501. begin
  502. Result:=(value and (value - 1)) = 0;
  503. end;
  504. begin
  505. result := false;
  506. case p.typ of
  507. ait_instruction:
  508. begin
  509. {
  510. change
  511. <op> reg,x,y
  512. cmp reg,#0
  513. into
  514. <op>s reg,x,y
  515. }
  516. { this optimization can applied only to the currently enabled operations because
  517. the other operations do not update all flags and FPC does not track flag usage }
  518. if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
  519. A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  520. GetNextInstruction(p, hp1) and
  521. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  522. (taicpu(hp1).oper[1]^.typ = top_const) and
  523. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  524. (taicpu(hp1).oper[1]^.val = 0) and
  525. GetNextInstruction(hp1, hp2) and
  526. { be careful here, following instructions could use other flags
  527. however after a jump fpc never depends on the value of flags }
  528. { All above instructions set Z and N according to the following
  529. Z := result = 0;
  530. N := result[31];
  531. EQ = Z=1; NE = Z=0;
  532. MI = N=1; PL = N=0; }
  533. (MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) or
  534. { mov is also possible, but only if there is no shifter operand, it could be an rxx,
  535. we are too lazy to check if it is rxx or something else }
  536. (MatchInstruction(hp2, A_MOV, [C_EQ,C_NE,C_MI,C_PL], []) and (taicpu(hp2).ops=2))) and
  537. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  538. begin
  539. DebugMsg('Peephole OpCmp2OpS done', p);
  540. taicpu(p).oppostfix:=PF_S;
  541. { move flag allocation if possible }
  542. GetLastInstruction(hp1, hp2);
  543. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  544. if assigned(hp2) then
  545. begin
  546. asml.Remove(hp2);
  547. asml.insertbefore(hp2, p);
  548. end;
  549. asml.remove(hp1);
  550. hp1.free;
  551. Result:=true;
  552. end
  553. else
  554. case taicpu(p).opcode of
  555. A_STR:
  556. begin
  557. { change
  558. str reg1,ref
  559. ldr reg2,ref
  560. into
  561. str reg1,ref
  562. mov reg2,reg1
  563. }
  564. if (taicpu(p).oper[1]^.typ = top_ref) and
  565. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  566. (taicpu(p).oppostfix=PF_None) and
  567. (taicpu(p).condition=C_None) and
  568. GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and
  569. MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and
  570. (taicpu(hp1).oper[1]^.typ=top_ref) and
  571. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  572. not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  573. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and
  574. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1))) then
  575. begin
  576. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  577. begin
  578. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  579. asml.remove(hp1);
  580. hp1.free;
  581. end
  582. else
  583. begin
  584. taicpu(hp1).opcode:=A_MOV;
  585. taicpu(hp1).oppostfix:=PF_None;
  586. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  587. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  588. end;
  589. result := true;
  590. end
  591. { change
  592. str reg1,ref
  593. str reg2,ref
  594. into
  595. strd reg1,reg2,ref
  596. }
  597. else if (GenerateARMCode or GenerateThumb2Code) and
  598. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  599. (taicpu(p).oppostfix=PF_None) and
  600. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  601. GetNextInstruction(p,hp1) and
  602. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  603. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  604. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  605. { str ensures that either base or index contain no register, else ldr wouldn't
  606. use an offset either
  607. }
  608. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  609. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  610. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  611. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  612. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  613. begin
  614. DebugMsg('Peephole StrStr2Strd done', p);
  615. taicpu(p).oppostfix:=PF_D;
  616. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  617. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  618. taicpu(p).ops:=3;
  619. asml.remove(hp1);
  620. hp1.free;
  621. result:=true;
  622. end;
  623. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  624. end;
  625. A_LDR:
  626. begin
  627. { change
  628. ldr reg1,ref
  629. ldr reg2,ref
  630. into ...
  631. }
  632. if (taicpu(p).oper[1]^.typ = top_ref) and
  633. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  634. GetNextInstruction(p,hp1) and
  635. { ldrd is not allowed here }
  636. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  637. begin
  638. {
  639. ...
  640. ldr reg1,ref
  641. mov reg2,reg1
  642. }
  643. if (taicpu(p).oppostfix=taicpu(hp1).oppostfix) and
  644. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  645. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  646. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  647. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  648. begin
  649. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  650. begin
  651. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  652. asml.remove(hp1);
  653. hp1.free;
  654. end
  655. else
  656. begin
  657. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  658. taicpu(hp1).opcode:=A_MOV;
  659. taicpu(hp1).oppostfix:=PF_None;
  660. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  661. end;
  662. result := true;
  663. end
  664. {
  665. ...
  666. ldrd reg1,reg1+1,ref
  667. }
  668. else if (GenerateARMCode or GenerateThumb2Code) and
  669. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  670. { ldrd does not allow any postfixes ... }
  671. (taicpu(p).oppostfix=PF_None) and
  672. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  673. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  674. { ldr ensures that either base or index contain no register, else ldr wouldn't
  675. use an offset either
  676. }
  677. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  678. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  679. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  680. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  681. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  682. begin
  683. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  684. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  685. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  686. taicpu(p).ops:=3;
  687. taicpu(p).oppostfix:=PF_D;
  688. asml.remove(hp1);
  689. hp1.free;
  690. result:=true;
  691. end;
  692. end;
  693. {
  694. Change
  695. ldrb dst1, [REF]
  696. and dst2, dst1, #255
  697. into
  698. ldrb dst2, [ref]
  699. }
  700. if not(GenerateThumbCode) and
  701. (taicpu(p).oppostfix=PF_B) and
  702. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  703. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_NONE]) and
  704. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  705. (taicpu(hp1).oper[2]^.typ = top_const) and
  706. (taicpu(hp1).oper[2]^.val = $FF) and
  707. not(RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
  708. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  709. begin
  710. DebugMsg('Peephole LdrbAnd2Ldrb done', p);
  711. taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg;
  712. asml.remove(hp1);
  713. hp1.free;
  714. result:=true;
  715. end;
  716. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  717. { Remove superfluous mov after ldr
  718. changes
  719. ldr reg1, ref
  720. mov reg2, reg1
  721. to
  722. ldr reg2, ref
  723. conditions are:
  724. * no ldrd usage
  725. * reg1 must be released after mov
  726. * mov can not contain shifterops
  727. * ldr+mov have the same conditions
  728. * mov does not set flags
  729. }
  730. if (taicpu(p).oppostfix<>PF_D) and
  731. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  732. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr') then
  733. Result:=true;
  734. end;
  735. A_MOV:
  736. begin
  737. { fold
  738. mov reg1,reg0, shift imm1
  739. mov reg1,reg1, shift imm2
  740. }
  741. if (taicpu(p).ops=3) and
  742. (taicpu(p).oper[2]^.typ = top_shifterop) and
  743. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  744. getnextinstruction(p,hp1) and
  745. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  746. (taicpu(hp1).ops=3) and
  747. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  748. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  749. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  750. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  751. begin
  752. { fold
  753. mov reg1,reg0, lsl 16
  754. mov reg1,reg1, lsr 16
  755. strh reg1, ...
  756. dealloc reg1
  757. to
  758. strh reg1, ...
  759. dealloc reg1
  760. }
  761. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  762. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  763. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  764. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  765. getnextinstruction(hp1,hp2) and
  766. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  767. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  768. begin
  769. CopyUsedRegs(TmpUsedRegs);
  770. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  771. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  772. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  773. begin
  774. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  775. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  776. asml.remove(p);
  777. asml.remove(hp1);
  778. p.free;
  779. hp1.free;
  780. p:=hp2;
  781. Result:=true;
  782. end;
  783. ReleaseUsedRegs(TmpUsedRegs);
  784. end
  785. { fold
  786. mov reg1,reg0, shift imm1
  787. mov reg1,reg1, shift imm2
  788. to
  789. mov reg1,reg0, shift imm1+imm2
  790. }
  791. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  792. { asr makes no use after a lsr, the asr can be foled into the lsr }
  793. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  794. begin
  795. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  796. { avoid overflows }
  797. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  798. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  799. SM_ROR:
  800. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  801. SM_ASR:
  802. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  803. SM_LSR,
  804. SM_LSL:
  805. begin
  806. hp2:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  807. InsertLLItem(p.previous, p.next, hp2);
  808. p.free;
  809. p:=hp2;
  810. end;
  811. else
  812. internalerror(2008072803);
  813. end;
  814. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  815. asml.remove(hp1);
  816. hp1.free;
  817. result := true;
  818. end
  819. { fold
  820. mov reg1,reg0, shift imm1
  821. mov reg1,reg1, shift imm2
  822. mov reg1,reg1, shift imm3 ...
  823. mov reg2,reg1, shift imm3 ...
  824. }
  825. else if GetNextInstructionUsingReg(hp1,hp2, taicpu(hp1).oper[0]^.reg) and
  826. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  827. (taicpu(hp2).ops=3) and
  828. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  829. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp2)) and
  830. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  831. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  832. begin
  833. { mov reg1,reg0, lsl imm1
  834. mov reg1,reg1, lsr/asr imm2
  835. mov reg2,reg1, lsl imm3 ...
  836. to
  837. mov reg1,reg0, lsl imm1
  838. mov reg2,reg1, lsr/asr imm2-imm3
  839. if
  840. imm1>=imm2
  841. }
  842. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  843. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  844. (taicpu(p).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  845. begin
  846. if (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  847. begin
  848. if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,p,hp1)) and
  849. not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  850. begin
  851. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1a done', p);
  852. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm-taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  853. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  854. asml.remove(hp1);
  855. asml.remove(hp2);
  856. hp1.free;
  857. hp2.free;
  858. if taicpu(p).oper[2]^.shifterop^.shiftimm>=32 then
  859. begin
  860. taicpu(p).freeop(1);
  861. taicpu(p).freeop(2);
  862. taicpu(p).loadconst(1,0);
  863. end;
  864. result := true;
  865. end;
  866. end
  867. else if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  868. begin
  869. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1b done', p);
  870. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  871. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  872. asml.remove(hp2);
  873. hp2.free;
  874. result := true;
  875. end;
  876. end
  877. { mov reg1,reg0, lsr/asr imm1
  878. mov reg1,reg1, lsl imm2
  879. mov reg1,reg1, lsr/asr imm3 ...
  880. if imm3>=imm1 and imm2>=imm1
  881. to
  882. mov reg1,reg0, lsl imm2-imm1
  883. mov reg1,reg1, lsr/asr imm3 ...
  884. }
  885. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  886. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  887. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  888. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  889. begin
  890. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  891. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  892. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  893. asml.remove(p);
  894. p.free;
  895. p:=hp2;
  896. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  897. begin
  898. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  899. asml.remove(hp1);
  900. hp1.free;
  901. p:=hp2;
  902. end;
  903. result := true;
  904. end;
  905. end;
  906. end;
  907. { Change the common
  908. mov r0, r0, lsr #xxx
  909. and r0, r0, #yyy/bic r0, r0, #xxx
  910. and remove the superfluous and/bic if possible
  911. This could be extended to handle more cases.
  912. }
  913. if (taicpu(p).ops=3) and
  914. (taicpu(p).oper[2]^.typ = top_shifterop) and
  915. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  916. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  917. GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
  918. (hp1.typ=ait_instruction) and
  919. (taicpu(hp1).ops>=1) and
  920. (taicpu(hp1).oper[0]^.typ=top_reg) and
  921. (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
  922. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  923. begin
  924. if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  925. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  926. (taicpu(hp1).ops=3) and
  927. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  928. (taicpu(hp1).oper[2]^.typ = top_const) and
  929. { Check if the AND actually would only mask out bits being already zero because of the shift
  930. }
  931. ((($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm) and taicpu(hp1).oper[2]^.val) =
  932. ($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm)) then
  933. begin
  934. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  935. taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
  936. asml.remove(hp1);
  937. hp1.free;
  938. result:=true;
  939. end
  940. else if MatchInstruction(hp1, A_BIC, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  941. (taicpu(hp1).ops=3) and
  942. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  943. (taicpu(hp1).oper[2]^.typ = top_const) and
  944. { Check if the BIC actually would only mask out bits beeing already zero because of the shift }
  945. (taicpu(hp1).oper[2]^.val<>0) and
  946. (BsfDWord(taicpu(hp1).oper[2]^.val)>=32-taicpu(p).oper[2]^.shifterop^.shiftimm) then
  947. begin
  948. DebugMsg('Peephole LsrBic2Lsr done', hp1);
  949. taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
  950. asml.remove(hp1);
  951. hp1.free;
  952. result:=true;
  953. end;
  954. end;
  955. { Change
  956. mov rx, ry, lsr/ror #xxx
  957. uxtb/uxth rz,rx/and rz,rx,0xFF
  958. dealloc rx
  959. to
  960. uxtb/uxth rz,ry,ror #xxx
  961. }
  962. if (taicpu(p).ops=3) and
  963. (taicpu(p).oper[2]^.typ = top_shifterop) and
  964. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  965. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and
  966. (GenerateThumb2Code) and
  967. GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
  968. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  969. begin
  970. if MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  971. (taicpu(hp1).ops = 2) and
  972. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  973. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  974. begin
  975. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  976. taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  977. taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  978. taicpu(hp1).ops := 3;
  979. GetNextInstruction(p,hp1);
  980. asml.Remove(p);
  981. p.Free;
  982. p:=hp1;
  983. result:=true;
  984. exit;
  985. end
  986. else if MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  987. (taicpu(hp1).ops=2) and
  988. (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and
  989. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  990. begin
  991. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  992. taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  993. taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  994. taicpu(hp1).ops := 3;
  995. GetNextInstruction(p,hp1);
  996. asml.Remove(p);
  997. p.Free;
  998. p:=hp1;
  999. result:=true;
  1000. exit;
  1001. end
  1002. else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  1003. (taicpu(hp1).ops = 3) and
  1004. (taicpu(hp1).oper[2]^.typ = top_const) and
  1005. (taicpu(hp1).oper[2]^.val = $FF) and
  1006. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1007. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1008. begin
  1009. taicpu(hp1).ops := 3;
  1010. taicpu(hp1).opcode := A_UXTB;
  1011. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1012. taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1013. taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1014. GetNextInstruction(p,hp1);
  1015. asml.Remove(p);
  1016. p.Free;
  1017. p:=hp1;
  1018. result:=true;
  1019. exit;
  1020. end;
  1021. end;
  1022. {
  1023. optimize
  1024. mov rX, yyyy
  1025. ....
  1026. }
  1027. if (taicpu(p).ops = 2) and
  1028. GetNextInstruction(p,hp1) and
  1029. (tai(hp1).typ = ait_instruction) then
  1030. begin
  1031. {
  1032. This changes the very common
  1033. mov r0, #0
  1034. str r0, [...]
  1035. mov r0, #0
  1036. str r0, [...]
  1037. and removes all superfluous mov instructions
  1038. }
  1039. if (taicpu(p).oper[1]^.typ = top_const) and
  1040. (taicpu(hp1).opcode=A_STR) then
  1041. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  1042. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1043. GetNextInstruction(hp1, hp2) and
  1044. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1045. (taicpu(hp2).ops = 2) and
  1046. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  1047. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  1048. begin
  1049. DebugMsg('Peephole MovStrMov done', hp2);
  1050. GetNextInstruction(hp2,hp1);
  1051. asml.remove(hp2);
  1052. hp2.free;
  1053. result:=true;
  1054. if not assigned(hp1) then break;
  1055. end
  1056. {
  1057. This removes the first mov from
  1058. mov rX,...
  1059. mov rX,...
  1060. }
  1061. else if taicpu(hp1).opcode=A_MOV then
  1062. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1063. (taicpu(hp1).ops = 2) and
  1064. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  1065. { don't remove the first mov if the second is a mov rX,rX }
  1066. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  1067. begin
  1068. DebugMsg('Peephole MovMov done', p);
  1069. asml.remove(p);
  1070. p.free;
  1071. p:=hp1;
  1072. GetNextInstruction(hp1,hp1);
  1073. result:=true;
  1074. if not assigned(hp1) then
  1075. break;
  1076. end;
  1077. end;
  1078. {
  1079. change
  1080. mov r1, r0
  1081. add r1, r1, #1
  1082. to
  1083. add r1, r0, #1
  1084. Todo: Make it work for mov+cmp too
  1085. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1086. }
  1087. if (taicpu(p).ops = 2) and
  1088. (taicpu(p).oper[1]^.typ = top_reg) and
  1089. (taicpu(p).oppostfix = PF_NONE) and
  1090. GetNextInstruction(p, hp1) and
  1091. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  1092. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  1093. [taicpu(p).condition], []) and
  1094. {MOV and MVN might only have 2 ops}
  1095. (taicpu(hp1).ops >= 2) and
  1096. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  1097. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1098. (
  1099. (taicpu(hp1).ops = 2) or
  1100. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  1101. ) then
  1102. begin
  1103. { When we get here we still don't know if the registers match}
  1104. for I:=1 to 2 do
  1105. {
  1106. If the first loop was successful p will be replaced with hp1.
  1107. The checks will still be ok, because all required information
  1108. will also be in hp1 then.
  1109. }
  1110. if (taicpu(hp1).ops > I) and
  1111. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) and
  1112. { prevent certain combinations on thumb(2), this is only a safe approximation }
  1113. (not(GenerateThumbCode or GenerateThumb2Code) or
  1114. ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
  1115. (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15))
  1116. ) then
  1117. begin
  1118. DebugMsg('Peephole RedundantMovProcess done', hp1);
  1119. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  1120. if p<>hp1 then
  1121. begin
  1122. asml.remove(p);
  1123. p.free;
  1124. p:=hp1;
  1125. Result:=true;
  1126. end;
  1127. end;
  1128. end;
  1129. { Fold the very common sequence
  1130. mov regA, regB
  1131. ldr* regA, [regA]
  1132. to
  1133. ldr* regA, [regB]
  1134. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1135. }
  1136. if (taicpu(p).opcode = A_MOV) and
  1137. (taicpu(p).ops = 2) and
  1138. (taicpu(p).oper[1]^.typ = top_reg) and
  1139. (taicpu(p).oppostfix = PF_NONE) and
  1140. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1141. MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], []) and
  1142. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1143. { We can change the base register only when the instruction uses AM_OFFSET }
  1144. ((taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
  1145. ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1146. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg))
  1147. ) and
  1148. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1149. // Make sure that Thumb code doesn't propagate a high register into a reference
  1150. ((GenerateThumbCode and
  1151. (getsupreg(taicpu(p).oper[1]^.reg) < RS_R8)) or
  1152. (not GenerateThumbCode)) and
  1153. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1154. begin
  1155. DebugMsg('Peephole MovLdr2Ldr done', hp1);
  1156. if (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1157. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1158. taicpu(hp1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  1159. if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  1160. taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1161. dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, taicpu(p.Next));
  1162. if Assigned(dealloc) then
  1163. begin
  1164. asml.remove(dealloc);
  1165. asml.InsertAfter(dealloc,hp1);
  1166. end;
  1167. GetNextInstruction(p, hp1);
  1168. asml.remove(p);
  1169. p.free;
  1170. p:=hp1;
  1171. result:=true;
  1172. end;
  1173. { This folds shifterops into following instructions
  1174. mov r0, r1, lsl #8
  1175. add r2, r3, r0
  1176. to
  1177. add r2, r3, r1, lsl #8
  1178. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1179. }
  1180. if (taicpu(p).opcode = A_MOV) and
  1181. (taicpu(p).ops = 3) and
  1182. (taicpu(p).oper[1]^.typ = top_reg) and
  1183. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1184. (taicpu(p).oppostfix = PF_NONE) and
  1185. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1186. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  1187. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  1188. A_CMP, A_CMN],
  1189. [taicpu(p).condition], [PF_None]) and
  1190. (not ((GenerateThumb2Code) and
  1191. (taicpu(hp1).opcode in [A_SBC]) and
  1192. (((taicpu(hp1).ops=3) and
  1193. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^.reg)) or
  1194. ((taicpu(hp1).ops=2) and
  1195. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg))))) and
  1196. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
  1197. (taicpu(hp1).ops >= 2) and
  1198. {Currently we can't fold into another shifterop}
  1199. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  1200. {Folding rrx is problematic because of the C-Flag, as we currently can't check
  1201. NR_DEFAULTFLAGS for modification}
  1202. (
  1203. {Everything is fine if we don't use RRX}
  1204. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or
  1205. (
  1206. {If it is RRX, then check if we're just accessing the next instruction}
  1207. GetNextInstruction(p, hp2) and
  1208. (hp1 = hp2)
  1209. )
  1210. ) and
  1211. { reg1 might not be modified inbetween }
  1212. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1213. { The shifterop can contain a register, might not be modified}
  1214. (
  1215. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or
  1216. not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hp1))
  1217. ) and
  1218. (
  1219. {Only ONE of the two src operands is allowed to match}
  1220. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  1221. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  1222. ) then
  1223. begin
  1224. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  1225. I2:=0
  1226. else
  1227. I2:=1;
  1228. for I:=I2 to taicpu(hp1).ops-1 do
  1229. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  1230. begin
  1231. { If the parameter matched on the second op from the RIGHT
  1232. we have to switch the parameters, this will not happen for CMP
  1233. were we're only evaluating the most right parameter
  1234. }
  1235. if I <> taicpu(hp1).ops-1 then
  1236. begin
  1237. {The SUB operators need to be changed when we swap parameters}
  1238. case taicpu(hp1).opcode of
  1239. A_SUB: tempop:=A_RSB;
  1240. A_SBC: tempop:=A_RSC;
  1241. A_RSB: tempop:=A_SUB;
  1242. A_RSC: tempop:=A_SBC;
  1243. else tempop:=taicpu(hp1).opcode;
  1244. end;
  1245. if taicpu(hp1).ops = 3 then
  1246. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  1247. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  1248. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1249. else
  1250. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  1251. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1252. taicpu(p).oper[2]^.shifterop^);
  1253. end
  1254. else
  1255. if taicpu(hp1).ops = 3 then
  1256. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  1257. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  1258. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1259. else
  1260. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  1261. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1262. taicpu(p).oper[2]^.shifterop^);
  1263. asml.insertbefore(hp2, hp1);
  1264. GetNextInstruction(p, hp2);
  1265. asml.remove(p);
  1266. asml.remove(hp1);
  1267. p.free;
  1268. hp1.free;
  1269. p:=hp2;
  1270. DebugMsg('Peephole FoldShiftProcess done', p);
  1271. Result:=true;
  1272. break;
  1273. end;
  1274. end;
  1275. {
  1276. Fold
  1277. mov r1, r1, lsl #2
  1278. ldr/ldrb r0, [r0, r1]
  1279. to
  1280. ldr/ldrb r0, [r0, r1, lsl #2]
  1281. XXX: This still needs some work, as we quite often encounter something like
  1282. mov r1, r2, lsl #2
  1283. add r2, r3, #imm
  1284. ldr r0, [r2, r1]
  1285. which can't be folded because r2 is overwritten between the shift and the ldr.
  1286. We could try to shuffle the registers around and fold it into.
  1287. add r1, r3, #imm
  1288. ldr r0, [r1, r2, lsl #2]
  1289. }
  1290. if (not(GenerateThumbCode)) and
  1291. (taicpu(p).opcode = A_MOV) and
  1292. (taicpu(p).ops = 3) and
  1293. (taicpu(p).oper[1]^.typ = top_reg) and
  1294. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1295. { RRX is tough to handle, because it requires tracking the C-Flag,
  1296. it is also extremly unlikely to be emitted this way}
  1297. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) and
  1298. (taicpu(p).oper[2]^.shifterop^.shiftimm <> 0) and
  1299. { thumb2 allows only lsl #0..#3 }
  1300. (not(GenerateThumb2Code) or
  1301. ((taicpu(p).oper[2]^.shifterop^.shiftimm in [0..3]) and
  1302. (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL)
  1303. )
  1304. ) and
  1305. (taicpu(p).oppostfix = PF_NONE) and
  1306. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1307. {Only LDR, LDRB, STR, STRB can handle scaled register indexing}
  1308. (MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B]) or
  1309. (GenerateThumb2Code and
  1310. MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B, PF_SB, PF_H, PF_SH]))
  1311. ) and
  1312. (
  1313. {If this is address by offset, one of the two registers can be used}
  1314. ((taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1315. (
  1316. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor
  1317. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)
  1318. )
  1319. ) or
  1320. {For post and preindexed only the index register can be used}
  1321. ((taicpu(hp1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and
  1322. (
  1323. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
  1324. (taicpu(hp1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
  1325. ) and
  1326. (not GenerateThumb2Code)
  1327. )
  1328. ) and
  1329. { Only fold if both registers are used. Otherwise we are folding p with itself }
  1330. (taicpu(hp1).oper[1]^.ref^.index<>NR_NO) and
  1331. (taicpu(hp1).oper[1]^.ref^.base<>NR_NO) and
  1332. { Only fold if there isn't another shifterop already, and offset is zero. }
  1333. (taicpu(hp1).oper[1]^.ref^.offset = 0) and
  1334. (taicpu(hp1).oper[1]^.ref^.shiftmode = SM_None) and
  1335. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1336. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  1337. begin
  1338. { If the register we want to do the shift for resides in base, we need to swap that}
  1339. if (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1340. taicpu(hp1).oper[1]^.ref^.base := taicpu(hp1).oper[1]^.ref^.index;
  1341. taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1342. taicpu(hp1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
  1343. taicpu(hp1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
  1344. DebugMsg('Peephole FoldShiftLdrStr done', hp1);
  1345. GetNextInstruction(p, hp1);
  1346. asml.remove(p);
  1347. p.free;
  1348. p:=hp1;
  1349. Result:=true;
  1350. end;
  1351. {
  1352. Often we see shifts and then a superfluous mov to another register
  1353. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  1354. }
  1355. if (taicpu(p).opcode = A_MOV) and
  1356. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1357. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
  1358. Result:=true;
  1359. end;
  1360. A_ADD,
  1361. A_ADC,
  1362. A_RSB,
  1363. A_RSC,
  1364. A_SUB,
  1365. A_SBC,
  1366. A_AND,
  1367. A_BIC,
  1368. A_EOR,
  1369. A_ORR,
  1370. A_MLA,
  1371. A_MLS,
  1372. A_MUL:
  1373. begin
  1374. {
  1375. optimize
  1376. and reg2,reg1,const1
  1377. ...
  1378. }
  1379. if (taicpu(p).opcode = A_AND) and
  1380. (taicpu(p).ops>2) and
  1381. (taicpu(p).oper[1]^.typ = top_reg) and
  1382. (taicpu(p).oper[2]^.typ = top_const) then
  1383. begin
  1384. {
  1385. change
  1386. and reg2,reg1,const1
  1387. ...
  1388. and reg3,reg2,const2
  1389. to
  1390. and reg3,reg1,(const1 and const2)
  1391. }
  1392. if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1393. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1394. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1395. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1396. (taicpu(hp1).oper[2]^.typ = top_const) then
  1397. begin
  1398. if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
  1399. begin
  1400. DebugMsg('Peephole AndAnd2And done', p);
  1401. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1402. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1403. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1404. asml.remove(hp1);
  1405. hp1.free;
  1406. Result:=true;
  1407. end
  1408. else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1409. begin
  1410. DebugMsg('Peephole AndAnd2And done', hp1);
  1411. taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1412. taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
  1413. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1414. GetNextInstruction(p, hp1);
  1415. asml.remove(p);
  1416. p.free;
  1417. p:=hp1;
  1418. Result:=true;
  1419. end;
  1420. end
  1421. {
  1422. change
  1423. and reg2,reg1,$xxxxxxFF
  1424. strb reg2,[...]
  1425. dealloc reg2
  1426. to
  1427. strb reg1,[...]
  1428. }
  1429. else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
  1430. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1431. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1432. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1433. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1434. { the reference in strb might not use reg2 }
  1435. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1436. { reg1 might not be modified inbetween }
  1437. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1438. begin
  1439. DebugMsg('Peephole AndStrb2Strb done', p);
  1440. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1441. GetNextInstruction(p, hp1);
  1442. asml.remove(p);
  1443. p.free;
  1444. p:=hp1;
  1445. result:=true;
  1446. end
  1447. {
  1448. change
  1449. and reg2,reg1,255
  1450. uxtb/uxth reg3,reg2
  1451. dealloc reg2
  1452. to
  1453. and reg3,reg1,x
  1454. }
  1455. else if (taicpu(p).oper[2]^.val = $FF) and
  1456. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1457. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1458. MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
  1459. (taicpu(hp1).ops = 2) and
  1460. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1461. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1462. { reg1 might not be modified inbetween }
  1463. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1464. begin
  1465. DebugMsg('Peephole AndUxt2And done', p);
  1466. taicpu(hp1).opcode:=A_AND;
  1467. taicpu(hp1).ops:=3;
  1468. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1469. taicpu(hp1).loadconst(2,255);
  1470. GetNextInstruction(p,hp1);
  1471. asml.remove(p);
  1472. p.Free;
  1473. p:=hp1;
  1474. result:=true;
  1475. end
  1476. {
  1477. from
  1478. and reg1,reg0,2^n-1
  1479. mov reg2,reg1, lsl imm1
  1480. (mov reg3,reg2, lsr/asr imm1)
  1481. remove either the and or the lsl/xsr sequence if possible
  1482. }
  1483. else if cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
  1484. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1485. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1486. (taicpu(hp1).ops=3) and
  1487. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1488. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1489. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
  1490. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1491. RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
  1492. begin
  1493. {
  1494. and reg1,reg0,2^n-1
  1495. mov reg2,reg1, lsl imm1
  1496. mov reg3,reg2, lsr/asr imm1
  1497. =>
  1498. and reg1,reg0,2^n-1
  1499. if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
  1500. }
  1501. if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
  1502. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1503. (taicpu(hp2).ops=3) and
  1504. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1505. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1506. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
  1507. (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1508. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
  1509. RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
  1510. ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
  1511. ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1512. (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
  1513. begin
  1514. DebugMsg('Peephole AndLslXsr2And done', p);
  1515. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1516. asml.Remove(hp1);
  1517. asml.Remove(hp2);
  1518. hp1.free;
  1519. hp2.free;
  1520. result:=true;
  1521. end
  1522. {
  1523. and reg1,reg0,2^n-1
  1524. mov reg2,reg1, lsl imm1
  1525. =>
  1526. mov reg2,reg0, lsl imm1
  1527. if imm1>i
  1528. }
  1529. else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
  1530. not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
  1531. begin
  1532. DebugMsg('Peephole AndLsl2Lsl done', p);
  1533. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1534. GetNextInstruction(p, hp1);
  1535. asml.Remove(p);
  1536. p.free;
  1537. p:=hp1;
  1538. result:=true;
  1539. end
  1540. end;
  1541. end;
  1542. {
  1543. change
  1544. add/sub reg2,reg1,const1
  1545. str/ldr reg3,[reg2,const2]
  1546. dealloc reg2
  1547. to
  1548. str/ldr reg3,[reg1,const2+/-const1]
  1549. }
  1550. if (not GenerateThumbCode) and
  1551. (taicpu(p).opcode in [A_ADD,A_SUB]) and
  1552. (taicpu(p).ops>2) and
  1553. (taicpu(p).oper[1]^.typ = top_reg) and
  1554. (taicpu(p).oper[2]^.typ = top_const) then
  1555. begin
  1556. hp1:=p;
  1557. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  1558. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  1559. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  1560. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1561. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  1562. { don't optimize if the register is stored/overwritten }
  1563. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  1564. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1565. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1566. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  1567. ldr postfix }
  1568. (((taicpu(p).opcode=A_ADD) and
  1569. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1570. ) or
  1571. ((taicpu(p).opcode=A_SUB) and
  1572. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1573. )
  1574. ) do
  1575. begin
  1576. { neither reg1 nor reg2 might be changed inbetween }
  1577. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  1578. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  1579. break;
  1580. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  1581. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  1582. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1583. begin
  1584. { remember last instruction }
  1585. hp2:=hp1;
  1586. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  1587. hp1:=p;
  1588. { fix all ldr/str }
  1589. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  1590. begin
  1591. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  1592. if taicpu(p).opcode=A_ADD then
  1593. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  1594. else
  1595. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  1596. if hp1=hp2 then
  1597. break;
  1598. end;
  1599. GetNextInstruction(p,hp1);
  1600. asml.remove(p);
  1601. p.free;
  1602. p:=hp1;
  1603. result:=true;
  1604. break;
  1605. end;
  1606. end;
  1607. end;
  1608. {
  1609. change
  1610. add reg1, ...
  1611. mov reg2, reg1
  1612. to
  1613. add reg2, ...
  1614. }
  1615. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1616. (taicpu(p).ops>=3) and
  1617. RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
  1618. Result:=true;
  1619. if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  1620. LookForPreindexedPattern(taicpu(p)) then
  1621. begin
  1622. GetNextInstruction(p,hp1);
  1623. DebugMsg('Peephole Add/Sub to Preindexed done', p);
  1624. asml.remove(p);
  1625. p.free;
  1626. p:=hp1;
  1627. Result:=true;
  1628. end;
  1629. {
  1630. Turn
  1631. mul reg0, z,w
  1632. sub/add x, y, reg0
  1633. dealloc reg0
  1634. into
  1635. mls/mla x,z,w,y
  1636. }
  1637. if MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and
  1638. (taicpu(p).ops=3) and
  1639. (taicpu(p).oper[0]^.typ = top_reg) and
  1640. (taicpu(p).oper[1]^.typ = top_reg) and
  1641. (taicpu(p).oper[2]^.typ = top_reg) and
  1642. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1643. MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
  1644. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  1645. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p, hp1)) and
  1646. (((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype>=cpu_armv4)) or
  1647. ((taicpu(hp1).opcode=A_SUB) and (current_settings.cputype in [cpu_armv6t2,cpu_armv7,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em]))) and
  1648. // CPUs before ARMv6 don't recommend having the same Rd and Rm for MLA.
  1649. // TODO: A workaround would be to swap Rm and Rs
  1650. (not ((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype<=cpu_armv6) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^))) and
  1651. (((taicpu(hp1).ops=3) and
  1652. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1653. ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  1654. (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or
  1655. ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1656. (taicpu(hp1).opcode=A_ADD) and
  1657. (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or
  1658. ((taicpu(hp1).ops=2) and
  1659. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1660. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1661. (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then
  1662. begin
  1663. if taicpu(hp1).opcode=A_ADD then
  1664. begin
  1665. taicpu(hp1).opcode:=A_MLA;
  1666. if taicpu(hp1).ops=3 then
  1667. begin
  1668. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
  1669. oldreg:=taicpu(hp1).oper[2]^.reg
  1670. else
  1671. oldreg:=taicpu(hp1).oper[1]^.reg;
  1672. end
  1673. else
  1674. oldreg:=taicpu(hp1).oper[0]^.reg;
  1675. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  1676. taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg);
  1677. taicpu(hp1).loadreg(3,oldreg);
  1678. DebugMsg('MulAdd2MLA done', p);
  1679. taicpu(hp1).ops:=4;
  1680. asml.remove(p);
  1681. p.free;
  1682. p:=hp1;
  1683. end
  1684. else
  1685. begin
  1686. taicpu(hp1).opcode:=A_MLS;
  1687. taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
  1688. if taicpu(hp1).ops=2 then
  1689. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg)
  1690. else
  1691. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  1692. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  1693. DebugMsg('MulSub2MLS done', p);
  1694. taicpu(hp1).ops:=4;
  1695. asml.remove(p);
  1696. p.free;
  1697. p:=hp1;
  1698. end;
  1699. result:=true;
  1700. end
  1701. end;
  1702. {$ifdef dummy}
  1703. A_MVN:
  1704. begin
  1705. {
  1706. change
  1707. mvn reg2,reg1
  1708. and reg3,reg4,reg2
  1709. dealloc reg2
  1710. to
  1711. bic reg3,reg4,reg1
  1712. }
  1713. if (taicpu(p).oper[1]^.typ = top_reg) and
  1714. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1715. MatchInstruction(hp1,A_AND,[],[]) and
  1716. (((taicpu(hp1).ops=3) and
  1717. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1718. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1719. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  1720. ((taicpu(hp1).ops=2) and
  1721. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1722. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1723. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1724. { reg1 might not be modified inbetween }
  1725. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1726. begin
  1727. DebugMsg('Peephole MvnAnd2Bic done', p);
  1728. taicpu(hp1).opcode:=A_BIC;
  1729. if taicpu(hp1).ops=3 then
  1730. begin
  1731. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1732. taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
  1733. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1734. end
  1735. else
  1736. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1737. GetNextInstruction(p, hp1);
  1738. asml.remove(p);
  1739. p.free;
  1740. p:=hp1;
  1741. end;
  1742. end;
  1743. {$endif dummy}
  1744. A_UXTB:
  1745. begin
  1746. {
  1747. change
  1748. uxtb reg2,reg1
  1749. strb reg2,[...]
  1750. dealloc reg2
  1751. to
  1752. strb reg1,[...]
  1753. }
  1754. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1755. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1756. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1757. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1758. { the reference in strb might not use reg2 }
  1759. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1760. { reg1 might not be modified inbetween }
  1761. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1762. begin
  1763. DebugMsg('Peephole UxtbStrb2Strb done', p);
  1764. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1765. GetNextInstruction(p,hp2);
  1766. asml.remove(p);
  1767. p.free;
  1768. p:=hp2;
  1769. result:=true;
  1770. end
  1771. {
  1772. change
  1773. uxtb reg2,reg1
  1774. uxth reg3,reg2
  1775. dealloc reg2
  1776. to
  1777. uxtb reg3,reg1
  1778. }
  1779. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1780. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1781. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1782. (taicpu(hp1).ops = 2) and
  1783. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1784. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1785. { reg1 might not be modified inbetween }
  1786. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1787. begin
  1788. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  1789. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1790. asml.remove(hp1);
  1791. hp1.free;
  1792. result:=true;
  1793. end
  1794. {
  1795. change
  1796. uxtb reg2,reg1
  1797. uxtb reg3,reg2
  1798. dealloc reg2
  1799. to
  1800. uxtb reg3,reg1
  1801. }
  1802. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1803. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1804. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  1805. (taicpu(hp1).ops = 2) and
  1806. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1807. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1808. { reg1 might not be modified inbetween }
  1809. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1810. begin
  1811. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  1812. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1813. asml.remove(hp1);
  1814. hp1.free;
  1815. result:=true;
  1816. end
  1817. {
  1818. change
  1819. uxtb reg2,reg1
  1820. and reg3,reg2,#0x*FF
  1821. dealloc reg2
  1822. to
  1823. uxtb reg3,reg1
  1824. }
  1825. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1826. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1827. (taicpu(p).ops=2) and
  1828. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  1829. (taicpu(hp1).ops=3) and
  1830. (taicpu(hp1).oper[2]^.typ=top_const) and
  1831. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  1832. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1833. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1834. { reg1 might not be modified inbetween }
  1835. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1836. begin
  1837. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  1838. taicpu(hp1).opcode:=A_UXTB;
  1839. taicpu(hp1).ops:=2;
  1840. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1841. GetNextInstruction(p,hp2);
  1842. asml.remove(p);
  1843. p.free;
  1844. p:=hp2;
  1845. result:=true;
  1846. end
  1847. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1848. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  1849. Result:=true;
  1850. end;
  1851. A_UXTH:
  1852. begin
  1853. {
  1854. change
  1855. uxth reg2,reg1
  1856. strh reg2,[...]
  1857. dealloc reg2
  1858. to
  1859. strh reg1,[...]
  1860. }
  1861. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1862. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1863. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  1864. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1865. { the reference in strb might not use reg2 }
  1866. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1867. { reg1 might not be modified inbetween }
  1868. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1869. begin
  1870. DebugMsg('Peephole UXTHStrh2Strh done', p);
  1871. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1872. GetNextInstruction(p, hp1);
  1873. asml.remove(p);
  1874. p.free;
  1875. p:=hp1;
  1876. result:=true;
  1877. end
  1878. {
  1879. change
  1880. uxth reg2,reg1
  1881. uxth reg3,reg2
  1882. dealloc reg2
  1883. to
  1884. uxth reg3,reg1
  1885. }
  1886. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  1887. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1888. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1889. (taicpu(hp1).ops=2) and
  1890. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1891. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1892. { reg1 might not be modified inbetween }
  1893. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1894. begin
  1895. DebugMsg('Peephole UxthUxth2Uxth done', p);
  1896. taicpu(hp1).opcode:=A_UXTH;
  1897. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1898. GetNextInstruction(p, hp1);
  1899. asml.remove(p);
  1900. p.free;
  1901. p:=hp1;
  1902. result:=true;
  1903. end
  1904. {
  1905. change
  1906. uxth reg2,reg1
  1907. and reg3,reg2,#65535
  1908. dealloc reg2
  1909. to
  1910. uxth reg3,reg1
  1911. }
  1912. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  1913. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1914. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  1915. (taicpu(hp1).ops=3) and
  1916. (taicpu(hp1).oper[2]^.typ=top_const) and
  1917. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  1918. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1919. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  1920. { reg1 might not be modified inbetween }
  1921. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1922. begin
  1923. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  1924. taicpu(hp1).opcode:=A_UXTH;
  1925. taicpu(hp1).ops:=2;
  1926. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1927. GetNextInstruction(p, hp1);
  1928. asml.remove(p);
  1929. p.free;
  1930. p:=hp1;
  1931. result:=true;
  1932. end
  1933. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1934. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  1935. Result:=true;
  1936. end;
  1937. A_CMP:
  1938. begin
  1939. {
  1940. change
  1941. cmp reg,const1
  1942. moveq reg,const1
  1943. movne reg,const2
  1944. to
  1945. cmp reg,const1
  1946. movne reg,const2
  1947. }
  1948. if (taicpu(p).oper[1]^.typ = top_const) and
  1949. GetNextInstruction(p, hp1) and
  1950. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1951. (taicpu(hp1).oper[1]^.typ = top_const) and
  1952. GetNextInstruction(hp1, hp2) and
  1953. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1954. (taicpu(hp1).oper[1]^.typ = top_const) then
  1955. begin
  1956. Result:=RemoveRedundantMove(p, hp1, asml) or Result;
  1957. Result:=RemoveRedundantMove(p, hp2, asml) or Result;
  1958. end;
  1959. end;
  1960. A_STM:
  1961. begin
  1962. {
  1963. change
  1964. stmfd r13!,[r14]
  1965. sub r13,r13,#4
  1966. bl abc
  1967. add r13,r13,#4
  1968. ldmfd r13!,[r15]
  1969. into
  1970. b abc
  1971. }
  1972. if not(ts_thumb_interworking in current_settings.targetswitches) and
  1973. MatchInstruction(p, A_STM, [C_None], [PF_FD]) and
  1974. GetNextInstruction(p, hp1) and
  1975. GetNextInstruction(hp1, hp2) and
  1976. SkipEntryExitMarker(hp2, hp2) and
  1977. GetNextInstruction(hp2, hp3) and
  1978. SkipEntryExitMarker(hp3, hp3) and
  1979. GetNextInstruction(hp3, hp4) and
  1980. (taicpu(p).oper[0]^.typ = top_ref) and
  1981. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  1982. (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1983. (taicpu(p).oper[0]^.ref^.offset=0) and
  1984. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  1985. (taicpu(p).oper[1]^.typ = top_regset) and
  1986. (taicpu(p).oper[1]^.regset^ = [RS_R14]) and
  1987. MatchInstruction(hp1, A_SUB, [C_None], [PF_NONE]) and
  1988. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1989. (taicpu(hp1).oper[0]^.reg = NR_STACK_POINTER_REG) and
  1990. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) and
  1991. (taicpu(hp1).oper[2]^.typ = top_const) and
  1992. MatchInstruction(hp3, A_ADD, [C_None], [PF_NONE]) and
  1993. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[0]^) and
  1994. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[1]^) and
  1995. MatchOperand(taicpu(hp1).oper[2]^,taicpu(hp3).oper[2]^) and
  1996. MatchInstruction(hp2, [A_BL,A_BLX], [C_None], [PF_NONE]) and
  1997. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1998. MatchInstruction(hp4, A_LDM, [C_None], [PF_FD]) and
  1999. MatchOperand(taicpu(p).oper[0]^,taicpu(hp4).oper[0]^) and
  2000. (taicpu(hp4).oper[1]^.typ = top_regset) and
  2001. (taicpu(hp4).oper[1]^.regset^ = [RS_R15]) then
  2002. begin
  2003. asml.Remove(p);
  2004. asml.Remove(hp1);
  2005. asml.Remove(hp3);
  2006. asml.Remove(hp4);
  2007. taicpu(hp2).opcode:=A_B;
  2008. p.free;
  2009. hp1.free;
  2010. hp3.free;
  2011. hp4.free;
  2012. p:=hp2;
  2013. DebugMsg('Peephole Bl2B done', p);
  2014. end;
  2015. end;
  2016. end;
  2017. end;
  2018. end;
  2019. end;
  2020. { instructions modifying the CPSR can be only the last instruction }
  2021. function MustBeLast(p : tai) : boolean;
  2022. begin
  2023. Result:=(p.typ=ait_instruction) and
  2024. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  2025. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  2026. (taicpu(p).oppostfix=PF_S));
  2027. end;
  2028. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  2029. var
  2030. p,hp1,hp2: tai;
  2031. l : longint;
  2032. condition : tasmcond;
  2033. hp3: tai;
  2034. WasLast: boolean;
  2035. { UsedRegs, TmpUsedRegs: TRegSet; }
  2036. begin
  2037. p := BlockStart;
  2038. { UsedRegs := []; }
  2039. while (p <> BlockEnd) Do
  2040. begin
  2041. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  2042. case p.Typ Of
  2043. Ait_Instruction:
  2044. begin
  2045. case taicpu(p).opcode Of
  2046. A_B:
  2047. if (taicpu(p).condition<>C_None) and
  2048. not(GenerateThumbCode) then
  2049. begin
  2050. { check for
  2051. Bxx xxx
  2052. <several instructions>
  2053. xxx:
  2054. }
  2055. l:=0;
  2056. WasLast:=False;
  2057. GetNextInstruction(p, hp1);
  2058. while assigned(hp1) and
  2059. (l<=4) and
  2060. CanBeCond(hp1) and
  2061. { stop on labels }
  2062. not(hp1.typ=ait_label) do
  2063. begin
  2064. inc(l);
  2065. if MustBeLast(hp1) then
  2066. begin
  2067. WasLast:=True;
  2068. GetNextInstruction(hp1,hp1);
  2069. break;
  2070. end
  2071. else
  2072. GetNextInstruction(hp1,hp1);
  2073. end;
  2074. if assigned(hp1) then
  2075. begin
  2076. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2077. begin
  2078. if (l<=4) and (l>0) then
  2079. begin
  2080. condition:=inverse_cond(taicpu(p).condition);
  2081. hp2:=p;
  2082. GetNextInstruction(p,hp1);
  2083. p:=hp1;
  2084. repeat
  2085. if hp1.typ=ait_instruction then
  2086. taicpu(hp1).condition:=condition;
  2087. if MustBeLast(hp1) then
  2088. begin
  2089. GetNextInstruction(hp1,hp1);
  2090. break;
  2091. end
  2092. else
  2093. GetNextInstruction(hp1,hp1);
  2094. until not(assigned(hp1)) or
  2095. not(CanBeCond(hp1)) or
  2096. (hp1.typ=ait_label);
  2097. { wait with removing else GetNextInstruction could
  2098. ignore the label if it was the only usage in the
  2099. jump moved away }
  2100. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2101. asml.remove(hp2);
  2102. hp2.free;
  2103. continue;
  2104. end;
  2105. end
  2106. else
  2107. { do not perform further optimizations if there is inctructon
  2108. in block #1 which can not be optimized.
  2109. }
  2110. if not WasLast then
  2111. begin
  2112. { check further for
  2113. Bcc xxx
  2114. <several instructions 1>
  2115. B yyy
  2116. xxx:
  2117. <several instructions 2>
  2118. yyy:
  2119. }
  2120. { hp2 points to jmp yyy }
  2121. hp2:=hp1;
  2122. { skip hp1 to xxx }
  2123. GetNextInstruction(hp1, hp1);
  2124. if assigned(hp2) and
  2125. assigned(hp1) and
  2126. (l<=3) and
  2127. (hp2.typ=ait_instruction) and
  2128. (taicpu(hp2).is_jmp) and
  2129. (taicpu(hp2).condition=C_None) and
  2130. { real label and jump, no further references to the
  2131. label are allowed }
  2132. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  2133. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2134. begin
  2135. l:=0;
  2136. { skip hp1 to <several moves 2> }
  2137. GetNextInstruction(hp1, hp1);
  2138. while assigned(hp1) and
  2139. CanBeCond(hp1) do
  2140. begin
  2141. inc(l);
  2142. GetNextInstruction(hp1, hp1);
  2143. end;
  2144. { hp1 points to yyy: }
  2145. if assigned(hp1) and
  2146. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2147. begin
  2148. condition:=inverse_cond(taicpu(p).condition);
  2149. GetNextInstruction(p,hp1);
  2150. hp3:=p;
  2151. p:=hp1;
  2152. repeat
  2153. if hp1.typ=ait_instruction then
  2154. taicpu(hp1).condition:=condition;
  2155. GetNextInstruction(hp1,hp1);
  2156. until not(assigned(hp1)) or
  2157. not(CanBeCond(hp1));
  2158. { hp2 is still at jmp yyy }
  2159. GetNextInstruction(hp2,hp1);
  2160. { hp2 is now at xxx: }
  2161. condition:=inverse_cond(condition);
  2162. GetNextInstruction(hp1,hp1);
  2163. { hp1 is now at <several movs 2> }
  2164. repeat
  2165. taicpu(hp1).condition:=condition;
  2166. GetNextInstruction(hp1,hp1);
  2167. until not(assigned(hp1)) or
  2168. not(CanBeCond(hp1)) or
  2169. (hp1.typ=ait_label);
  2170. {
  2171. asml.remove(hp1.next)
  2172. hp1.next.free;
  2173. asml.remove(hp1);
  2174. hp1.free;
  2175. }
  2176. { remove Bcc }
  2177. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2178. asml.remove(hp3);
  2179. hp3.free;
  2180. { remove jmp }
  2181. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2182. asml.remove(hp2);
  2183. hp2.free;
  2184. continue;
  2185. end;
  2186. end;
  2187. end;
  2188. end;
  2189. end;
  2190. end;
  2191. end;
  2192. end;
  2193. p := tai(p.next)
  2194. end;
  2195. end;
  2196. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  2197. begin
  2198. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  2199. Result:=true
  2200. else If MatchInstruction(p1, [A_LDR, A_STR], [], [PF_D]) and
  2201. (getsupreg(taicpu(p1).oper[0]^.reg)+1=getsupreg(reg)) then
  2202. Result:=true
  2203. else
  2204. Result:=inherited RegInInstruction(Reg, p1);
  2205. end;
  2206. const
  2207. { set of opcode which might or do write to memory }
  2208. { TODO : extend armins.dat to contain r/w info }
  2209. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  2210. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD,A_VSTR,A_VSTM];
  2211. { adjust the register live information when swapping the two instructions p and hp1,
  2212. they must follow one after the other }
  2213. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  2214. procedure CheckLiveEnd(reg : tregister);
  2215. var
  2216. supreg : TSuperRegister;
  2217. regtype : TRegisterType;
  2218. begin
  2219. if reg=NR_NO then
  2220. exit;
  2221. regtype:=getregtype(reg);
  2222. supreg:=getsupreg(reg);
  2223. if (cg.rg[regtype].live_end[supreg]=hp1) and
  2224. RegInInstruction(reg,p) then
  2225. cg.rg[regtype].live_end[supreg]:=p;
  2226. end;
  2227. procedure CheckLiveStart(reg : TRegister);
  2228. var
  2229. supreg : TSuperRegister;
  2230. regtype : TRegisterType;
  2231. begin
  2232. if reg=NR_NO then
  2233. exit;
  2234. regtype:=getregtype(reg);
  2235. supreg:=getsupreg(reg);
  2236. if (cg.rg[regtype].live_start[supreg]=p) and
  2237. RegInInstruction(reg,hp1) then
  2238. cg.rg[regtype].live_start[supreg]:=hp1;
  2239. end;
  2240. var
  2241. i : longint;
  2242. r : TSuperRegister;
  2243. begin
  2244. { assumption: p is directly followed by hp1 }
  2245. { if live of any reg used by p starts at p and hp1 uses this register then
  2246. set live start to hp1 }
  2247. for i:=0 to p.ops-1 do
  2248. case p.oper[i]^.typ of
  2249. Top_Reg:
  2250. CheckLiveStart(p.oper[i]^.reg);
  2251. Top_Ref:
  2252. begin
  2253. CheckLiveStart(p.oper[i]^.ref^.base);
  2254. CheckLiveStart(p.oper[i]^.ref^.index);
  2255. end;
  2256. Top_Shifterop:
  2257. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  2258. Top_RegSet:
  2259. for r:=RS_R0 to RS_R15 do
  2260. if r in p.oper[i]^.regset^ then
  2261. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2262. end;
  2263. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  2264. set live end to p }
  2265. for i:=0 to hp1.ops-1 do
  2266. case hp1.oper[i]^.typ of
  2267. Top_Reg:
  2268. CheckLiveEnd(hp1.oper[i]^.reg);
  2269. Top_Ref:
  2270. begin
  2271. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  2272. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  2273. end;
  2274. Top_Shifterop:
  2275. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  2276. Top_RegSet:
  2277. for r:=RS_R0 to RS_R15 do
  2278. if r in hp1.oper[i]^.regset^ then
  2279. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2280. end;
  2281. end;
  2282. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  2283. { TODO : schedule also forward }
  2284. { TODO : schedule distance > 1 }
  2285. var
  2286. hp1,hp2,hp3,hp4,hp5,insertpos : tai;
  2287. list : TAsmList;
  2288. begin
  2289. result:=true;
  2290. list:=TAsmList.create;
  2291. p:=BlockStart;
  2292. while p<>BlockEnd Do
  2293. begin
  2294. if (p.typ=ait_instruction) and
  2295. GetNextInstruction(p,hp1) and
  2296. (hp1.typ=ait_instruction) and
  2297. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  2298. (taicpu(hp1).oppostfix in [PF_NONE, PF_B, PF_H, PF_SB, PF_SH]) and
  2299. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  2300. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  2301. not(RegModifiedByInstruction(NR_PC,p))
  2302. ) or
  2303. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  2304. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  2305. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  2306. (taicpu(hp1).oper[1]^.ref^.offset=0)
  2307. )
  2308. ) or
  2309. { try to prove that the memory accesses don't overlapp }
  2310. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  2311. (taicpu(p).oper[1]^.typ = top_ref) and
  2312. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  2313. (taicpu(p).oppostfix=PF_None) and
  2314. (taicpu(hp1).oppostfix=PF_None) and
  2315. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  2316. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  2317. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  2318. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  2319. )
  2320. )
  2321. ) and
  2322. GetNextInstruction(hp1,hp2) and
  2323. (hp2.typ=ait_instruction) and
  2324. { loaded register used by next instruction? }
  2325. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  2326. { loaded register not used by previous instruction? }
  2327. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  2328. { same condition? }
  2329. (taicpu(p).condition=taicpu(hp1).condition) and
  2330. { first instruction might not change the register used as base }
  2331. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  2332. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  2333. ) and
  2334. { first instruction might not change the register used as index }
  2335. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  2336. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  2337. ) and
  2338. { if we modify the basereg AND the first instruction used that reg, we can not schedule }
  2339. ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
  2340. not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) then
  2341. begin
  2342. hp3:=tai(p.Previous);
  2343. hp5:=tai(p.next);
  2344. asml.Remove(p);
  2345. { if there is a reg. dealloc instruction or address labels (e.g. for GOT-less PIC)
  2346. associated with p, move it together with p }
  2347. { before the instruction? }
  2348. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  2349. begin
  2350. if ( (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  2351. RegInInstruction(tai_regalloc(hp3).reg,p) )
  2352. or ( (hp3.typ=ait_label) and (tai_label(hp3).labsym.typ=AT_ADDR) )
  2353. then
  2354. begin
  2355. hp4:=hp3;
  2356. hp3:=tai(hp3.Previous);
  2357. asml.Remove(hp4);
  2358. list.Concat(hp4);
  2359. end
  2360. else
  2361. hp3:=tai(hp3.Previous);
  2362. end;
  2363. list.Concat(p);
  2364. SwapRegLive(taicpu(p),taicpu(hp1));
  2365. { after the instruction? }
  2366. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  2367. begin
  2368. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  2369. RegInInstruction(tai_regalloc(hp5).reg,p) then
  2370. begin
  2371. hp4:=hp5;
  2372. hp5:=tai(hp5.next);
  2373. asml.Remove(hp4);
  2374. list.Concat(hp4);
  2375. end
  2376. else
  2377. hp5:=tai(hp5.Next);
  2378. end;
  2379. asml.Remove(hp1);
  2380. { if there are address labels associated with hp2, those must
  2381. stay with hp2 (e.g. for GOT-less PIC) }
  2382. insertpos:=hp2;
  2383. while assigned(hp2.previous) and
  2384. (tai(hp2.previous).typ<>ait_instruction) do
  2385. begin
  2386. hp2:=tai(hp2.previous);
  2387. if (hp2.typ=ait_label) and
  2388. (tai_label(hp2).labsym.typ=AT_ADDR) then
  2389. insertpos:=hp2;
  2390. end;
  2391. {$ifdef DEBUG_PREREGSCHEDULER}
  2392. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),insertpos);
  2393. {$endif DEBUG_PREREGSCHEDULER}
  2394. asml.InsertBefore(hp1,insertpos);
  2395. asml.InsertListBefore(insertpos,list);
  2396. p:=tai(p.next);
  2397. end
  2398. else if p.typ=ait_instruction then
  2399. p:=hp1
  2400. else
  2401. p:=tai(p.next);
  2402. end;
  2403. list.Free;
  2404. end;
  2405. procedure DecrementPreceedingIT(list: TAsmList; p: tai);
  2406. var
  2407. hp : tai;
  2408. l : longint;
  2409. begin
  2410. hp := tai(p.Previous);
  2411. l := 1;
  2412. while assigned(hp) and
  2413. (l <= 4) do
  2414. begin
  2415. if hp.typ=ait_instruction then
  2416. begin
  2417. if (taicpu(hp).opcode>=A_IT) and
  2418. (taicpu(hp).opcode <= A_ITTTT) then
  2419. begin
  2420. if (taicpu(hp).opcode = A_IT) and
  2421. (l=1) then
  2422. list.Remove(hp)
  2423. else
  2424. case taicpu(hp).opcode of
  2425. A_ITE:
  2426. if l=2 then taicpu(hp).opcode := A_IT;
  2427. A_ITT:
  2428. if l=2 then taicpu(hp).opcode := A_IT;
  2429. A_ITEE:
  2430. if l=3 then taicpu(hp).opcode := A_ITE;
  2431. A_ITTE:
  2432. if l=3 then taicpu(hp).opcode := A_ITT;
  2433. A_ITET:
  2434. if l=3 then taicpu(hp).opcode := A_ITE;
  2435. A_ITTT:
  2436. if l=3 then taicpu(hp).opcode := A_ITT;
  2437. A_ITEEE:
  2438. if l=4 then taicpu(hp).opcode := A_ITEE;
  2439. A_ITTEE:
  2440. if l=4 then taicpu(hp).opcode := A_ITTE;
  2441. A_ITETE:
  2442. if l=4 then taicpu(hp).opcode := A_ITET;
  2443. A_ITTTE:
  2444. if l=4 then taicpu(hp).opcode := A_ITTT;
  2445. A_ITEET:
  2446. if l=4 then taicpu(hp).opcode := A_ITEE;
  2447. A_ITTET:
  2448. if l=4 then taicpu(hp).opcode := A_ITTE;
  2449. A_ITETT:
  2450. if l=4 then taicpu(hp).opcode := A_ITET;
  2451. A_ITTTT:
  2452. if l=4 then taicpu(hp).opcode := A_ITTT;
  2453. end;
  2454. break;
  2455. end;
  2456. {else if (taicpu(hp).condition<>taicpu(p).condition) or
  2457. (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
  2458. break;}
  2459. inc(l);
  2460. end;
  2461. hp := tai(hp.Previous);
  2462. end;
  2463. end;
  2464. function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  2465. var
  2466. hp : taicpu;
  2467. //hp1,hp2 : tai;
  2468. begin
  2469. result:=false;
  2470. if inherited PeepHoleOptPass1Cpu(p) then
  2471. result:=true
  2472. else if (p.typ=ait_instruction) and
  2473. MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
  2474. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2475. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2476. ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
  2477. begin
  2478. DebugMsg('Peephole Stm2Push done', p);
  2479. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2480. AsmL.InsertAfter(hp, p);
  2481. asml.Remove(p);
  2482. p:=hp;
  2483. result:=true;
  2484. end
  2485. {else if (p.typ=ait_instruction) and
  2486. MatchInstruction(p, A_STR, [C_None], [PF_None]) and
  2487. (taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
  2488. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  2489. (taicpu(p).oper[1]^.ref^.offset=-4) and
  2490. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,14]) then
  2491. begin
  2492. DebugMsg('Peephole Str2Push done', p);
  2493. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  2494. asml.InsertAfter(hp, p);
  2495. asml.Remove(p);
  2496. p.Free;
  2497. p:=hp;
  2498. result:=true;
  2499. end}
  2500. else if (p.typ=ait_instruction) and
  2501. MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
  2502. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2503. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2504. ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
  2505. begin
  2506. DebugMsg('Peephole Ldm2Pop done', p);
  2507. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2508. asml.InsertBefore(hp, p);
  2509. asml.Remove(p);
  2510. p.Free;
  2511. p:=hp;
  2512. result:=true;
  2513. end
  2514. {else if (p.typ=ait_instruction) and
  2515. MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
  2516. (taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
  2517. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  2518. (taicpu(p).oper[1]^.ref^.offset=4) and
  2519. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,15]) then
  2520. begin
  2521. DebugMsg('Peephole Ldr2Pop done', p);
  2522. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  2523. asml.InsertBefore(hp, p);
  2524. asml.Remove(p);
  2525. p.Free;
  2526. p:=hp;
  2527. result:=true;
  2528. end}
  2529. else if (p.typ=ait_instruction) and
  2530. MatchInstruction(p, [A_AND], [], [PF_None]) and
  2531. (taicpu(p).ops = 2) and
  2532. (taicpu(p).oper[1]^.typ=top_const) and
  2533. ((taicpu(p).oper[1]^.val=255) or
  2534. (taicpu(p).oper[1]^.val=65535)) then
  2535. begin
  2536. DebugMsg('Peephole AndR2Uxt done', p);
  2537. if taicpu(p).oper[1]^.val=255 then
  2538. taicpu(p).opcode:=A_UXTB
  2539. else
  2540. taicpu(p).opcode:=A_UXTH;
  2541. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  2542. result := true;
  2543. end
  2544. else if (p.typ=ait_instruction) and
  2545. MatchInstruction(p, [A_AND], [], [PF_None]) and
  2546. (taicpu(p).ops = 3) and
  2547. (taicpu(p).oper[2]^.typ=top_const) and
  2548. ((taicpu(p).oper[2]^.val=255) or
  2549. (taicpu(p).oper[2]^.val=65535)) then
  2550. begin
  2551. DebugMsg('Peephole AndRR2Uxt done', p);
  2552. if taicpu(p).oper[2]^.val=255 then
  2553. taicpu(p).opcode:=A_UXTB
  2554. else
  2555. taicpu(p).opcode:=A_UXTH;
  2556. taicpu(p).ops:=2;
  2557. result := true;
  2558. end
  2559. {else if (p.typ=ait_instruction) and
  2560. MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
  2561. (taicpu(p).oper[1]^.typ=top_const) and
  2562. (taicpu(p).oper[1]^.val=0) and
  2563. GetNextInstruction(p,hp1) and
  2564. (taicpu(hp1).opcode=A_B) and
  2565. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  2566. begin
  2567. if taicpu(hp1).condition = C_EQ then
  2568. hp2:=taicpu.op_reg_ref(A_CBZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^)
  2569. else
  2570. hp2:=taicpu.op_reg_ref(A_CBNZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^);
  2571. taicpu(hp2).is_jmp := true;
  2572. asml.InsertAfter(hp2, hp1);
  2573. asml.Remove(hp1);
  2574. hp1.Free;
  2575. asml.Remove(p);
  2576. p.Free;
  2577. p := hp2;
  2578. result := true;
  2579. end}
  2580. end;
  2581. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  2582. var
  2583. p,hp1,hp2: tai;
  2584. l : longint;
  2585. condition : tasmcond;
  2586. { UsedRegs, TmpUsedRegs: TRegSet; }
  2587. begin
  2588. p := BlockStart;
  2589. { UsedRegs := []; }
  2590. while (p <> BlockEnd) Do
  2591. begin
  2592. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  2593. case p.Typ Of
  2594. Ait_Instruction:
  2595. begin
  2596. case taicpu(p).opcode Of
  2597. A_B:
  2598. if taicpu(p).condition<>C_None then
  2599. begin
  2600. { check for
  2601. Bxx xxx
  2602. <several instructions>
  2603. xxx:
  2604. }
  2605. l:=0;
  2606. GetNextInstruction(p, hp1);
  2607. while assigned(hp1) and
  2608. (l<=4) and
  2609. CanBeCond(hp1) and
  2610. { stop on labels }
  2611. not(hp1.typ=ait_label) do
  2612. begin
  2613. inc(l);
  2614. if MustBeLast(hp1) then
  2615. begin
  2616. //hp1:=nil;
  2617. GetNextInstruction(hp1,hp1);
  2618. break;
  2619. end
  2620. else
  2621. GetNextInstruction(hp1,hp1);
  2622. end;
  2623. if assigned(hp1) then
  2624. begin
  2625. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2626. begin
  2627. if (l<=4) and (l>0) then
  2628. begin
  2629. condition:=inverse_cond(taicpu(p).condition);
  2630. hp2:=p;
  2631. GetNextInstruction(p,hp1);
  2632. p:=hp1;
  2633. repeat
  2634. if hp1.typ=ait_instruction then
  2635. taicpu(hp1).condition:=condition;
  2636. if MustBeLast(hp1) then
  2637. begin
  2638. GetNextInstruction(hp1,hp1);
  2639. break;
  2640. end
  2641. else
  2642. GetNextInstruction(hp1,hp1);
  2643. until not(assigned(hp1)) or
  2644. not(CanBeCond(hp1)) or
  2645. (hp1.typ=ait_label);
  2646. { wait with removing else GetNextInstruction could
  2647. ignore the label if it was the only usage in the
  2648. jump moved away }
  2649. asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
  2650. DecrementPreceedingIT(asml, hp2);
  2651. case l of
  2652. 1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
  2653. 2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
  2654. 3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
  2655. 4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
  2656. end;
  2657. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2658. asml.remove(hp2);
  2659. hp2.free;
  2660. continue;
  2661. end;
  2662. end;
  2663. end;
  2664. end;
  2665. end;
  2666. end;
  2667. end;
  2668. p := tai(p.next)
  2669. end;
  2670. end;
  2671. function TCpuThumb2AsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  2672. begin
  2673. result:=false;
  2674. if p.typ = ait_instruction then
  2675. begin
  2676. if MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
  2677. (taicpu(p).oper[1]^.typ=top_const) and
  2678. (taicpu(p).oper[1]^.val >= 0) and
  2679. (taicpu(p).oper[1]^.val < 256) and
  2680. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2681. begin
  2682. DebugMsg('Peephole Mov2Movs done', p);
  2683. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2684. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2685. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2686. taicpu(p).oppostfix:=PF_S;
  2687. result:=true;
  2688. end
  2689. else if MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
  2690. (taicpu(p).oper[1]^.typ=top_reg) and
  2691. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2692. begin
  2693. DebugMsg('Peephole Mvn2Mvns done', p);
  2694. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2695. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2696. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2697. taicpu(p).oppostfix:=PF_S;
  2698. result:=true;
  2699. end
  2700. else if MatchInstruction(p, A_RSB, [C_None], [PF_None]) and
  2701. (taicpu(p).ops = 3) and
  2702. (taicpu(p).oper[2]^.typ=top_const) and
  2703. (taicpu(p).oper[2]^.val=0) and
  2704. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2705. begin
  2706. DebugMsg('Peephole Rsb2Rsbs done', p);
  2707. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2708. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2709. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2710. taicpu(p).oppostfix:=PF_S;
  2711. result:=true;
  2712. end
  2713. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2714. (taicpu(p).ops = 3) and
  2715. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2716. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2717. (taicpu(p).oper[2]^.typ=top_const) and
  2718. (taicpu(p).oper[2]^.val >= 0) and
  2719. (taicpu(p).oper[2]^.val < 256) and
  2720. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2721. begin
  2722. DebugMsg('Peephole AddSub2*s done', p);
  2723. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2724. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2725. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2726. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2727. taicpu(p).oppostfix:=PF_S;
  2728. taicpu(p).ops := 2;
  2729. result:=true;
  2730. end
  2731. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2732. (taicpu(p).ops = 2) and
  2733. (taicpu(p).oper[1]^.typ=top_reg) and
  2734. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2735. (not MatchOperand(taicpu(p).oper[1]^, NR_STACK_POINTER_REG)) and
  2736. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2737. begin
  2738. DebugMsg('Peephole AddSub2*s done', p);
  2739. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2740. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2741. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2742. taicpu(p).oppostfix:=PF_S;
  2743. result:=true;
  2744. end
  2745. else if MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
  2746. (taicpu(p).ops = 3) and
  2747. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2748. (taicpu(p).oper[2]^.typ=top_reg) then
  2749. begin
  2750. DebugMsg('Peephole AddRRR2AddRR done', p);
  2751. taicpu(p).ops := 2;
  2752. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2753. result:=true;
  2754. end
  2755. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
  2756. (taicpu(p).ops = 3) and
  2757. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2758. (taicpu(p).oper[2]^.typ=top_reg) and
  2759. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2760. begin
  2761. DebugMsg('Peephole opXXY2opsXY done', p);
  2762. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2763. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2764. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2765. taicpu(p).ops := 2;
  2766. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2767. taicpu(p).oppostfix:=PF_S;
  2768. result:=true;
  2769. end
  2770. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_S]) and
  2771. (taicpu(p).ops = 3) and
  2772. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2773. (taicpu(p).oper[2]^.typ in [top_reg,top_const]) then
  2774. begin
  2775. DebugMsg('Peephole opXXY2opXY done', p);
  2776. taicpu(p).ops := 2;
  2777. if taicpu(p).oper[2]^.typ=top_reg then
  2778. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg)
  2779. else
  2780. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2781. result:=true;
  2782. end
  2783. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
  2784. (taicpu(p).ops = 3) and
  2785. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
  2786. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2787. begin
  2788. DebugMsg('Peephole opXYX2opsXY done', p);
  2789. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2790. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2791. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2792. taicpu(p).oppostfix:=PF_S;
  2793. taicpu(p).ops := 2;
  2794. result:=true;
  2795. end
  2796. else if MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
  2797. (taicpu(p).ops=3) and
  2798. (taicpu(p).oper[2]^.typ=top_shifterop) and
  2799. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
  2800. //MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2801. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2802. begin
  2803. DebugMsg('Peephole Mov2Shift done', p);
  2804. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2805. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2806. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2807. taicpu(p).oppostfix:=PF_S;
  2808. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  2809. SM_LSL: taicpu(p).opcode:=A_LSL;
  2810. SM_LSR: taicpu(p).opcode:=A_LSR;
  2811. SM_ASR: taicpu(p).opcode:=A_ASR;
  2812. SM_ROR: taicpu(p).opcode:=A_ROR;
  2813. end;
  2814. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  2815. taicpu(p).loadreg(2, taicpu(p).oper[2]^.shifterop^.rs)
  2816. else
  2817. taicpu(p).loadconst(2, taicpu(p).oper[2]^.shifterop^.shiftimm);
  2818. result:=true;
  2819. end
  2820. end;
  2821. end;
  2822. begin
  2823. casmoptimizer:=TCpuAsmOptimizer;
  2824. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  2825. End.