aoptcpu.pas 118 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. { $define DEBUG_AOPTCPU}
  22. Interface
  23. uses
  24. cgbase, cgutils, cpubase, aasmtai,
  25. aasmcpu,
  26. aopt, aoptobj, aoptarm;
  27. Type
  28. { TCpuAsmOptimizer }
  29. TCpuAsmOptimizer = class(TARMAsmOptimizer)
  30. { Can't be done in some cases due to the limited range of jumps }
  31. function CanDoJumpOpts: Boolean; override;
  32. { uses the same constructor as TAopObj }
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  35. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  36. function RemoveSuperfluousVMov(const p : tai; movp : tai; const optimizer : string) : boolean;
  37. { gets the next tai object after current that contains info relevant
  38. to the optimizer in p1 which used the given register or does a
  39. change in program flow.
  40. If there is none, it returns false and
  41. sets p1 to nil }
  42. Function GetNextInstructionUsingRef(Current: tai; Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  43. { outputs a debug message into the assembler file }
  44. procedure DebugMsg(const s: string; p: tai);
  45. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  46. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  47. { With these routines, there's optimisation code that's general for all ARM platforms }
  48. function OptPass1And(var p: tai): Boolean; override;
  49. function OptPass1LDR(var p: tai): Boolean; override;
  50. function OptPass1STR(var p: tai): Boolean; override;
  51. protected
  52. function LookForPreindexedPattern(p: taicpu): boolean;
  53. function LookForPostindexedPattern(p: taicpu): boolean;
  54. { Individual optimisation routines }
  55. function OptPass1DataCheckMov(var p: tai): Boolean;
  56. function OptPass1ADDSUB(var p: tai): Boolean;
  57. function OptPass1CMP(var p: tai): Boolean;
  58. function OptPass1STM(var p: tai): Boolean;
  59. function OptPass1MOV(var p: tai): Boolean;
  60. function OptPass1MUL(var p: tai): Boolean;
  61. function OptPass1MVN(var p: tai): Boolean;
  62. function OptPass1VMov(var p: tai): Boolean;
  63. function OptPass1VOp(var p: tai): Boolean;
  64. function OptPass1Push(var p: tai): Boolean;
  65. function OptPass2Bcc(var p: tai): Boolean;
  66. function OptPass2STM(var p: tai): Boolean;
  67. function OptPass2STR(var p: tai): Boolean;
  68. End;
  69. TCpuPreRegallocScheduler = class(TAsmScheduler)
  70. function SchedulerPass1Cpu(var p: tai): boolean;override;
  71. procedure SwapRegLive(p, hp1: taicpu);
  72. end;
  73. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  74. { uses the same constructor as TAopObj }
  75. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  76. procedure PeepHoleOptPass2;override;
  77. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  78. protected
  79. function OptPass1AndThumb2(var p : tai) : boolean;
  80. function OptPass1LDM(var p : tai) : boolean;
  81. function OptPass1STM(var p : tai) : boolean;
  82. End;
  83. function MustBeLast(p : tai) : boolean;
  84. Implementation
  85. uses
  86. cutils,verbose,globtype,globals,
  87. systems,
  88. cpuinfo,
  89. cgobj,procinfo,
  90. aasmbase,aasmdata,
  91. aoptutils;
  92. { Range check must be disabled explicitly as conversions between signed and unsigned
  93. 32-bit values are done without explicit typecasts }
  94. {$R-}
  95. function CanBeCond(p : tai) : boolean;
  96. begin
  97. result:=
  98. not(GenerateThumbCode) and
  99. (p.typ=ait_instruction) and
  100. (taicpu(p).condition=C_None) and
  101. ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
  102. (taicpu(p).opcode<>A_CBZ) and
  103. (taicpu(p).opcode<>A_CBNZ) and
  104. (taicpu(p).opcode<>A_PLD) and
  105. (((taicpu(p).opcode<>A_BLX) and
  106. { BL may need to be converted into BLX by the linker -- could possibly
  107. be allowed in case it's to a local symbol of which we know that it
  108. uses the same instruction set as the current one }
  109. (taicpu(p).opcode<>A_BL)) or
  110. (taicpu(p).oper[0]^.typ=top_reg));
  111. end;
  112. function RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList):Boolean;
  113. begin
  114. Result:=false;
  115. if (taicpu(movp).condition = C_EQ) and
  116. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  117. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  118. begin
  119. asml.insertafter(tai_comment.Create(strpnew('Peephole Optimization: CmpMovMov - Removed redundant moveq')), movp);
  120. asml.remove(movp);
  121. movp.free;
  122. Result:=true;
  123. end;
  124. end;
  125. function AlignedToQWord(const ref : treference) : boolean;
  126. begin
  127. { (safe) heuristics to ensure alignment }
  128. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  129. (((ref.offset>=0) and
  130. ((ref.offset mod 8)=0) and
  131. ((ref.base=NR_R13) or
  132. (ref.index=NR_R13))
  133. ) or
  134. ((ref.offset<=0) and
  135. { when using NR_R11, it has always a value of <qword align>+4 }
  136. ((abs(ref.offset+4) mod 8)=0) and
  137. (current_procinfo.framepointer=NR_R11) and
  138. ((ref.base=NR_R11) or
  139. (ref.index=NR_R11))
  140. )
  141. );
  142. end;
  143. function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
  144. begin
  145. if GenerateThumb2Code then
  146. result := (aoffset<4096) and (aoffset>-256)
  147. else
  148. result := ((pf in [PF_None,PF_B]) and
  149. (abs(aoffset)<4096)) or
  150. (abs(aoffset)<256);
  151. end;
  152. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  153. var
  154. p: taicpu;
  155. i: longint;
  156. begin
  157. instructionLoadsFromReg := false;
  158. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  159. exit;
  160. p:=taicpu(hp);
  161. i:=1;
  162. {For these instructions we have to start on oper[0]}
  163. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  164. A_CMP, A_CMN, A_TST, A_TEQ,
  165. A_B, A_BL, A_BX, A_BLX,
  166. A_SMLAL, A_UMLAL, A_VSTM, A_VLDM]) then i:=0;
  167. while(i<p.ops) do
  168. begin
  169. case p.oper[I]^.typ of
  170. top_reg:
  171. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  172. { STRD }
  173. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  174. top_regset:
  175. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  176. top_shifterop:
  177. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  178. top_ref:
  179. instructionLoadsFromReg :=
  180. (p.oper[I]^.ref^.base = reg) or
  181. (p.oper[I]^.ref^.index = reg);
  182. else
  183. ;
  184. end;
  185. if (i=0) and (p.opcode in [A_LDM,A_VLDM]) then
  186. exit;
  187. if instructionLoadsFromReg then
  188. exit; {Bailout if we found something}
  189. Inc(I);
  190. end;
  191. end;
  192. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  193. var
  194. p: taicpu;
  195. begin
  196. Result := false;
  197. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  198. exit;
  199. p := taicpu(hp);
  200. case p.opcode of
  201. { These operands do not write into a register at all }
  202. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD,
  203. A_VCMP:
  204. exit;
  205. {Take care of post/preincremented store and loads, they will change their base register}
  206. A_STR, A_LDR:
  207. begin
  208. Result := false;
  209. { actually, this does not apply here because post-/preindexed does not mean that a register
  210. is loaded with a new value, it is only modified
  211. (taicpu(p).oper[1]^.typ=top_ref) and
  212. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  213. (taicpu(p).oper[1]^.ref^.base = reg);
  214. }
  215. { STR does not load into it's first register }
  216. if p.opcode = A_STR then
  217. exit;
  218. end;
  219. A_VSTR:
  220. begin
  221. Result := false;
  222. exit;
  223. end;
  224. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  225. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  226. Result :=
  227. (p.oper[1]^.typ = top_reg) and
  228. (p.oper[1]^.reg = reg);
  229. {Loads to oper2 from coprocessor}
  230. {
  231. MCR/MRC is currently not supported in FPC
  232. A_MRC:
  233. Result :=
  234. (p.oper[2]^.typ = top_reg) and
  235. (p.oper[2]^.reg = reg);
  236. }
  237. {Loads to all register in the registerset}
  238. A_LDM, A_VLDM:
  239. Result := (getsupreg(reg) in p.oper[1]^.regset^);
  240. A_POP:
  241. Result := (getsupreg(reg) in p.oper[0]^.regset^) or
  242. (reg=NR_STACK_POINTER_REG);
  243. else
  244. ;
  245. end;
  246. if Result then
  247. exit;
  248. case p.oper[0]^.typ of
  249. {This is the case}
  250. top_reg:
  251. Result := (p.oper[0]^.reg = reg) or
  252. { LDRD }
  253. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  254. {LDM/STM might write a new value to their index register}
  255. top_ref:
  256. Result :=
  257. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  258. (taicpu(p).oper[0]^.ref^.base = reg);
  259. else
  260. ;
  261. end;
  262. end;
  263. function TCpuAsmOptimizer.GetNextInstructionUsingRef(Current: tai; out
  264. Next: tai; const ref: TReference; StopOnStore: Boolean): Boolean;
  265. begin
  266. Next:=Current;
  267. repeat
  268. Result:=GetNextInstruction(Next,Next);
  269. if Result and
  270. (Next.typ=ait_instruction) and
  271. (taicpu(Next).opcode in [A_LDR, A_STR]) and
  272. (
  273. ((taicpu(Next).ops = 2) and
  274. (taicpu(Next).oper[1]^.typ = top_ref) and
  275. RefsEqual(taicpu(Next).oper[1]^.ref^,ref)) or
  276. ((taicpu(Next).ops = 3) and { LDRD/STRD }
  277. (taicpu(Next).oper[2]^.typ = top_ref) and
  278. RefsEqual(taicpu(Next).oper[2]^.ref^,ref))
  279. ) then
  280. {We've found an instruction LDR or STR with the same reference}
  281. exit;
  282. until not(Result) or
  283. (Next.typ<>ait_instruction) or
  284. not(cs_opt_level3 in current_settings.optimizerswitches) or
  285. is_calljmp(taicpu(Next).opcode) or
  286. (StopOnStore and (taicpu(Next).opcode in [A_STR, A_STM])) or
  287. RegModifiedByInstruction(NR_PC,Next);
  288. Result:=false;
  289. end;
  290. {$ifdef DEBUG_AOPTCPU}
  291. const
  292. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  293. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  294. begin
  295. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  296. end;
  297. {$else DEBUG_AOPTCPU}
  298. const
  299. SPeepholeOptimization = '';
  300. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  301. begin
  302. end;
  303. {$endif DEBUG_AOPTCPU}
  304. function TCpuAsmOptimizer.CanDoJumpOpts: Boolean;
  305. begin
  306. { Cannot perform these jump optimisations if the ARM architecture has 16-bit thumb codes }
  307. Result := not (
  308. (current_settings.instructionset = is_thumb) and not (CPUARM_HAS_THUMB2 in cpu_capabilities[current_settings.cputype])
  309. );
  310. end;
  311. function TCpuAsmOptimizer.RemoveSuperfluousVMov(const p: tai; movp: tai; const optimizer: string):boolean;
  312. var
  313. alloc,
  314. dealloc : tai_regalloc;
  315. hp1 : tai;
  316. begin
  317. Result:=false;
  318. if ((MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  319. ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) or (taicpu(p).opcode=A_VLDR))
  320. ) or
  321. (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
  322. (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32]))
  323. ) and
  324. (taicpu(movp).ops=2) and
  325. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  326. { the destination register of the mov might not be used beween p and movp }
  327. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  328. { Take care to only do this for instructions which REALLY load to the first register.
  329. Otherwise
  330. vstr reg0, [reg1]
  331. vmov reg2, reg0
  332. will be optimized to
  333. vstr reg2, [reg1]
  334. }
  335. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  336. begin
  337. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  338. if assigned(dealloc) then
  339. begin
  340. DebugMsg(SPeepholeOptimization + optimizer + ' removed superfluous vmov', movp);
  341. result:=true;
  342. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  343. and remove it if possible }
  344. asml.Remove(dealloc);
  345. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  346. if assigned(alloc) then
  347. begin
  348. asml.Remove(alloc);
  349. alloc.free;
  350. dealloc.free;
  351. end
  352. else
  353. asml.InsertAfter(dealloc,p);
  354. { try to move the allocation of the target register }
  355. GetLastInstruction(movp,hp1);
  356. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  357. if assigned(alloc) then
  358. begin
  359. asml.Remove(alloc);
  360. asml.InsertBefore(alloc,p);
  361. { adjust used regs }
  362. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  363. end;
  364. { change
  365. vldr reg0,[reg1]
  366. vmov reg2,reg0
  367. into
  368. ldr reg2,[reg1]
  369. if reg2 is an int register
  370. }
  371. if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
  372. taicpu(p).opcode:=A_LDR;
  373. { finally get rid of the mov }
  374. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  375. asml.remove(movp);
  376. movp.free;
  377. end;
  378. end;
  379. end;
  380. {
  381. optimize
  382. add/sub reg1,reg1,regY/const
  383. ...
  384. ldr/str regX,[reg1]
  385. into
  386. ldr/str regX,[reg1, regY/const]!
  387. }
  388. function TCpuAsmOptimizer.LookForPreindexedPattern(p: taicpu): boolean;
  389. var
  390. hp1: tai;
  391. begin
  392. if GenerateARMCode and
  393. (p.ops=3) and
  394. MatchOperand(p.oper[0]^, p.oper[1]^.reg) and
  395. GetNextInstructionUsingReg(p, hp1, p.oper[0]^.reg) and
  396. (not RegModifiedBetween(p.oper[0]^.reg, p, hp1)) and
  397. MatchInstruction(hp1, [A_LDR,A_STR], [C_None], [PF_None,PF_B,PF_H,PF_SH,PF_SB]) and
  398. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  399. (taicpu(hp1).oper[1]^.ref^.base=p.oper[0]^.reg) and
  400. (taicpu(hp1).oper[0]^.reg<>p.oper[0]^.reg) and
  401. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  402. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  403. (((p.oper[2]^.typ=top_reg) and
  404. (not RegModifiedBetween(p.oper[2]^.reg, p, hp1))) or
  405. ((p.oper[2]^.typ=top_const) and
  406. ((abs(p.oper[2]^.val) < 256) or
  407. ((abs(p.oper[2]^.val) < 4096) and
  408. (taicpu(hp1).oppostfix in [PF_None,PF_B]))))) then
  409. begin
  410. taicpu(hp1).oper[1]^.ref^.addressmode:=AM_PREINDEXED;
  411. if p.oper[2]^.typ=top_reg then
  412. begin
  413. taicpu(hp1).oper[1]^.ref^.index:=p.oper[2]^.reg;
  414. if p.opcode=A_ADD then
  415. taicpu(hp1).oper[1]^.ref^.signindex:=1
  416. else
  417. taicpu(hp1).oper[1]^.ref^.signindex:=-1;
  418. end
  419. else
  420. begin
  421. if p.opcode=A_ADD then
  422. taicpu(hp1).oper[1]^.ref^.offset:=p.oper[2]^.val
  423. else
  424. taicpu(hp1).oper[1]^.ref^.offset:=-p.oper[2]^.val;
  425. end;
  426. result:=true;
  427. end
  428. else
  429. result:=false;
  430. end;
  431. {
  432. optimize
  433. ldr/str regX,[reg1]
  434. ...
  435. add/sub reg1,reg1,regY/const
  436. into
  437. ldr/str regX,[reg1], regY/const
  438. }
  439. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  440. var
  441. hp1 : tai;
  442. begin
  443. Result:=false;
  444. if (p.oper[1]^.typ = top_ref) and
  445. (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  446. (p.oper[1]^.ref^.index=NR_NO) and
  447. (p.oper[1]^.ref^.offset=0) and
  448. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  449. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  450. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  451. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  452. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  453. (
  454. (taicpu(hp1).oper[2]^.typ=top_reg) or
  455. { valid offset? }
  456. ((taicpu(hp1).oper[2]^.typ=top_const) and
  457. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  458. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  459. )
  460. )
  461. ) and
  462. { don't apply the optimization if the base register is loaded }
  463. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  464. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  465. { don't apply the optimization if the (new) index register is loaded }
  466. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  467. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) and
  468. GenerateARMCode then
  469. begin
  470. DebugMsg(SPeepholeOptimization + 'Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  471. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  472. if taicpu(hp1).oper[2]^.typ=top_const then
  473. begin
  474. if taicpu(hp1).opcode=A_ADD then
  475. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  476. else
  477. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  478. end
  479. else
  480. begin
  481. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  482. if taicpu(hp1).opcode=A_ADD then
  483. p.oper[1]^.ref^.signindex:=1
  484. else
  485. p.oper[1]^.ref^.signindex:=-1;
  486. end;
  487. asml.Remove(hp1);
  488. hp1.Free;
  489. Result:=true;
  490. end;
  491. end;
  492. function TCpuAsmOptimizer.OptPass1ADDSUB(var p: tai): Boolean;
  493. var
  494. hp1,hp2: tai;
  495. sign: Integer;
  496. newvalue: TCGInt;
  497. b: byte;
  498. begin
  499. Result := OptPass1DataCheckMov(p);
  500. {
  501. change
  502. add/sub reg2,reg1,const1
  503. str/ldr reg3,[reg2,const2]
  504. dealloc reg2
  505. to
  506. str/ldr reg3,[reg1,const2+/-const1]
  507. }
  508. if (not GenerateThumbCode) and
  509. (taicpu(p).ops>2) and
  510. (taicpu(p).oper[1]^.typ = top_reg) and
  511. (taicpu(p).oper[2]^.typ = top_const) then
  512. begin
  513. hp1:=p;
  514. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  515. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  516. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  517. (taicpu(hp1).oper[1]^.typ = top_ref) and
  518. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  519. { don't optimize if the register is stored/overwritten }
  520. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  521. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  522. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  523. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  524. ldr postfix }
  525. (((taicpu(p).opcode=A_ADD) and
  526. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  527. ) or
  528. ((taicpu(p).opcode=A_SUB) and
  529. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  530. )
  531. ) do
  532. begin
  533. { neither reg1 nor reg2 might be changed inbetween }
  534. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  535. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  536. break;
  537. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  538. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  539. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  540. begin
  541. { remember last instruction }
  542. hp2:=hp1;
  543. DebugMsg(SPeepholeOptimization + 'Add/SubLdr2Ldr done', p);
  544. hp1:=p;
  545. { fix all ldr/str }
  546. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  547. begin
  548. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  549. if taicpu(p).opcode=A_ADD then
  550. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  551. else
  552. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  553. if hp1=hp2 then
  554. break;
  555. end;
  556. RemoveCurrentP(p);
  557. result:=true;
  558. Exit;
  559. end;
  560. end;
  561. end;
  562. {
  563. optimize
  564. add/sub rx,ry,const1
  565. add/sub rx,rx,const2
  566. into
  567. add/sub rx,ry,const1+/-const
  568. check if the first operation has no postfix and condition
  569. }
  570. if MatchInstruction(p,[A_ADD,A_SUB],[C_None],[PF_None]) and
  571. MatchOptype(taicpu(p),top_reg,top_reg,top_const) and
  572. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  573. MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
  574. MatchOptype(taicpu(hp1),top_reg,top_reg,top_const) and
  575. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  576. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  577. begin
  578. sign:=1;
  579. if (taicpu(p).opcode=A_SUB) xor (taicpu(hp1).opcode=A_SUB) then
  580. sign:=-1;
  581. newvalue:=taicpu(p).oper[2]^.val+sign*taicpu(hp1).oper[2]^.val;
  582. if (not(GenerateThumbCode) and is_shifter_const(newvalue,b)) or
  583. (GenerateThumbCode and is_thumb_imm(newvalue)) then
  584. begin
  585. DebugMsg(SPeepholeOptimization + 'Merge Add/Sub done', p);
  586. taicpu(p).oper[2]^.val:=newvalue;
  587. RemoveInstruction(hp1);
  588. Result:=true;
  589. if newvalue=0 then
  590. begin
  591. RemoveCurrentP(p);
  592. Exit;
  593. end;
  594. end;
  595. end;
  596. if (taicpu(p).condition = C_None) and
  597. (taicpu(p).oppostfix = PF_None) and
  598. LookForPreindexedPattern(taicpu(p)) then
  599. begin
  600. DebugMsg(SPeepholeOptimization + 'Add/Sub to Preindexed done', p);
  601. RemoveCurrentP(p);
  602. Result:=true;
  603. Exit;
  604. end;
  605. end;
  606. function TCpuAsmOptimizer.OptPass1MUL(var p: tai): Boolean;
  607. var
  608. hp1: tai;
  609. oldreg: tregister;
  610. begin
  611. Result := OptPass1DataCheckMov(p);
  612. {
  613. Turn
  614. mul reg0, z,w
  615. sub/add x, y, reg0
  616. dealloc reg0
  617. into
  618. mls/mla x,z,w,y
  619. }
  620. if (taicpu(p).condition = C_None) and
  621. (taicpu(p).oppostfix = PF_None) and
  622. (taicpu(p).ops=3) and
  623. (taicpu(p).oper[0]^.typ = top_reg) and
  624. (taicpu(p).oper[1]^.typ = top_reg) and
  625. (taicpu(p).oper[2]^.typ = top_reg) and
  626. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  627. MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
  628. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  629. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p, hp1)) and
  630. (((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype>=cpu_armv4)) or
  631. ((taicpu(hp1).opcode=A_SUB) and (current_settings.cputype in [cpu_armv6t2,cpu_armv7,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em]))) and
  632. // CPUs before ARMv6 don't recommend having the same Rd and Rm for MLA.
  633. // TODO: A workaround would be to swap Rm and Rs
  634. (not ((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype<=cpu_armv6) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^))) and
  635. (((taicpu(hp1).ops=3) and
  636. (taicpu(hp1).oper[2]^.typ=top_reg) and
  637. ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  638. (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or
  639. ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  640. (taicpu(hp1).opcode=A_ADD) and
  641. (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or
  642. ((taicpu(hp1).ops=2) and
  643. (taicpu(hp1).oper[1]^.typ=top_reg) and
  644. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  645. (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then
  646. begin
  647. if taicpu(hp1).opcode=A_ADD then
  648. begin
  649. taicpu(hp1).opcode:=A_MLA;
  650. if taicpu(hp1).ops=3 then
  651. begin
  652. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
  653. oldreg:=taicpu(hp1).oper[2]^.reg
  654. else
  655. oldreg:=taicpu(hp1).oper[1]^.reg;
  656. end
  657. else
  658. oldreg:=taicpu(hp1).oper[0]^.reg;
  659. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  660. taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg);
  661. taicpu(hp1).loadreg(3,oldreg);
  662. DebugMsg(SPeepholeOptimization + 'MulAdd2MLA done', p);
  663. end
  664. else
  665. begin
  666. taicpu(hp1).opcode:=A_MLS;
  667. taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
  668. if taicpu(hp1).ops=2 then
  669. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg)
  670. else
  671. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  672. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  673. DebugMsg(SPeepholeOptimization + 'MulSub2MLS done', p);
  674. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  675. AllocRegBetween(taicpu(hp1).oper[2]^.reg,p,hp1,UsedRegs);
  676. AllocRegBetween(taicpu(hp1).oper[3]^.reg,p,hp1,UsedRegs);
  677. end;
  678. taicpu(hp1).ops:=4;
  679. RemoveCurrentP(p);
  680. Result := True;
  681. Exit;
  682. end
  683. end;
  684. function TCpuAsmOptimizer.OptPass1And(var p: tai): Boolean;
  685. begin
  686. Result := OptPass1DataCheckMov(p);
  687. Result := inherited OptPass1And(p) or Result;
  688. end;
  689. function TCpuAsmOptimizer.OptPass1DataCheckMov(var p: tai): Boolean;
  690. var
  691. hp1: tai;
  692. begin
  693. {
  694. change
  695. op reg1, ...
  696. mov reg2, reg1
  697. to
  698. op reg2, ...
  699. }
  700. Result := (taicpu(p).ops >= 3) and
  701. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  702. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  703. end;
  704. function TCpuAsmOptimizer.OptPass1CMP(var p: tai): Boolean;
  705. var
  706. hp1, hp2, hp_last: tai;
  707. MovRem1, MovRem2: Boolean;
  708. begin
  709. Result := False;
  710. { These optimizations can be applied only to the currently enabled operations because
  711. the other operations do not update all flags and FPC does not track flag usage }
  712. if (taicpu(p).condition = C_None) and
  713. (taicpu(p).oper[1]^.typ = top_const) and
  714. GetNextInstruction(p, hp1) then
  715. begin
  716. {
  717. change
  718. cmp reg,const1
  719. moveq reg,const1
  720. movne reg,const2
  721. to
  722. cmp reg,const1
  723. movne reg,const2
  724. }
  725. if MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  726. (taicpu(hp1).oper[1]^.typ = top_const) and
  727. GetNextInstruction(hp1, hp2) and
  728. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  729. (taicpu(hp2).oper[1]^.typ = top_const) then
  730. begin
  731. MovRem1 := RemoveRedundantMove(p, hp1, asml);
  732. MovRem2 := RemoveRedundantMove(p, hp2, asml);
  733. Result:= MovRem1 or MovRem2;
  734. { Make sure that hp1 is still the next instruction after p }
  735. if MovRem1 then
  736. if MovRem2 then
  737. begin
  738. if not GetNextInstruction(p, hp1) then
  739. Exit;
  740. end
  741. else
  742. hp1 := hp2;
  743. end;
  744. {
  745. change
  746. <op> reg,x,y
  747. cmp reg,#0
  748. into
  749. <op>s reg,x,y
  750. }
  751. if (taicpu(p).oppostfix = PF_None) and
  752. (taicpu(p).oper[1]^.val = 0) and
  753. { be careful here, following instructions could use other flags
  754. however after a jump fpc never depends on the value of flags }
  755. { All above instructions set Z and N according to the following
  756. Z := result = 0;
  757. N := result[31];
  758. EQ = Z=1; NE = Z=0;
  759. MI = N=1; PL = N=0; }
  760. (MatchInstruction(hp1, A_B, [C_EQ,C_NE,C_MI,C_PL], []) or
  761. { mov is also possible, but only if there is no shifter operand, it could be an rxx,
  762. we are too lazy to check if it is rxx or something else }
  763. (MatchInstruction(hp1, A_MOV, [C_EQ,C_NE,C_MI,C_PL], []) and (taicpu(hp1).ops=2))) and
  764. GetLastInstruction(p, hp_last) and
  765. MatchInstruction(hp_last, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,
  766. A_EOR,A_AND,A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  767. (
  768. { mlas is only allowed in arm mode }
  769. (taicpu(hp_last).opcode<>A_MLA) or
  770. (current_settings.instructionset<>is_thumb)
  771. ) and
  772. (taicpu(hp_last).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  773. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp1.Next))) then
  774. begin
  775. DebugMsg(SPeepholeOptimization + 'OpCmp2OpS done', hp_last);
  776. taicpu(hp_last).oppostfix:=PF_S;
  777. { move flag allocation if possible }
  778. hp1:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp_last.Next));
  779. if assigned(hp1) then
  780. begin
  781. asml.Remove(hp1);
  782. asml.insertbefore(hp1, hp_last);
  783. end;
  784. RemoveCurrentP(p);
  785. Result:=true;
  786. end;
  787. end;
  788. end;
  789. function TCpuAsmOptimizer.OptPass1LDR(var p: tai): Boolean;
  790. var
  791. hp1: tai;
  792. begin
  793. Result := inherited OptPass1LDR(p);
  794. if Result then
  795. Exit;
  796. { change
  797. ldr reg1,ref
  798. ldr reg2,ref
  799. into ...
  800. }
  801. if (taicpu(p).oper[1]^.typ = top_ref) and
  802. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  803. GetNextInstruction(p,hp1) and
  804. { ldrd is not allowed here }
  805. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  806. begin
  807. {
  808. ...
  809. ldr reg1,ref
  810. mov reg2,reg1
  811. }
  812. if (taicpu(p).oppostfix=taicpu(hp1).oppostfix) and
  813. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  814. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  815. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  816. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  817. begin
  818. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  819. begin
  820. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldr done', hp1);
  821. asml.remove(hp1);
  822. hp1.free;
  823. end
  824. else
  825. begin
  826. DebugMsg(SPeepholeOptimization + 'LdrLdr2LdrMov done', hp1);
  827. taicpu(hp1).opcode:=A_MOV;
  828. taicpu(hp1).oppostfix:=PF_None;
  829. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  830. end;
  831. result := true;
  832. end
  833. {
  834. ...
  835. ldrd reg1,reg1+1,ref
  836. }
  837. else if (GenerateARMCode or GenerateThumb2Code) and
  838. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  839. { ldrd does not allow any postfixes ... }
  840. (taicpu(p).oppostfix=PF_None) and
  841. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  842. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  843. { ldr ensures that either base or index contain no register, else ldr wouldn't
  844. use an offset either
  845. }
  846. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  847. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  848. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  849. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  850. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  851. begin
  852. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldrd done', p);
  853. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  854. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  855. taicpu(p).ops:=3;
  856. taicpu(p).oppostfix:=PF_D;
  857. asml.remove(hp1);
  858. hp1.free;
  859. result:=true;
  860. end;
  861. end;
  862. {
  863. Change
  864. ldrb dst1, [REF]
  865. and dst2, dst1, #255
  866. into
  867. ldrb dst2, [ref]
  868. }
  869. if not(GenerateThumbCode) and
  870. (taicpu(p).oppostfix=PF_B) and
  871. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  872. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_NONE]) and
  873. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  874. (taicpu(hp1).oper[2]^.typ = top_const) and
  875. (taicpu(hp1).oper[2]^.val = $FF) and
  876. not(RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
  877. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  878. begin
  879. DebugMsg(SPeepholeOptimization + 'LdrbAnd2Ldrb done', p);
  880. taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg;
  881. asml.remove(hp1);
  882. hp1.free;
  883. result:=true;
  884. end;
  885. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  886. { Remove superfluous mov after ldr
  887. changes
  888. ldr reg1, ref
  889. mov reg2, reg1
  890. to
  891. ldr reg2, ref
  892. conditions are:
  893. * no ldrd usage
  894. * reg1 must be released after mov
  895. * mov can not contain shifterops
  896. * ldr+mov have the same conditions
  897. * mov does not set flags
  898. }
  899. if (taicpu(p).oppostfix<>PF_D) and
  900. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  901. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr') then
  902. Result:=true;
  903. end;
  904. function TCpuAsmOptimizer.OptPass1STM(var p: tai): Boolean;
  905. var
  906. hp1, hp2, hp3, hp4: tai;
  907. begin
  908. Result := False;
  909. {
  910. change
  911. stmfd r13!,[r14]
  912. sub r13,r13,#4
  913. bl abc
  914. add r13,r13,#4
  915. ldmfd r13!,[r15]
  916. into
  917. b abc
  918. }
  919. if not(ts_thumb_interworking in current_settings.targetswitches) and
  920. (taicpu(p).condition = C_None) and
  921. (taicpu(p).oppostfix = PF_FD) and
  922. (taicpu(p).oper[0]^.typ = top_ref) and
  923. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  924. (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  925. (taicpu(p).oper[0]^.ref^.offset=0) and
  926. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  927. (taicpu(p).oper[1]^.typ = top_regset) and
  928. (taicpu(p).oper[1]^.regset^ = [RS_R14]) and
  929. GetNextInstruction(p, hp1) and
  930. MatchInstruction(hp1, A_SUB, [C_None], [PF_NONE]) and
  931. (taicpu(hp1).oper[0]^.typ = top_reg) and
  932. (taicpu(hp1).oper[0]^.reg = NR_STACK_POINTER_REG) and
  933. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) and
  934. (taicpu(hp1).oper[2]^.typ = top_const) and
  935. GetNextInstruction(hp1, hp2) and
  936. SkipEntryExitMarker(hp2, hp2) and
  937. MatchInstruction(hp2, [A_BL,A_BLX], [C_None], [PF_NONE]) and
  938. (taicpu(hp2).oper[0]^.typ = top_ref) and
  939. GetNextInstruction(hp2, hp3) and
  940. SkipEntryExitMarker(hp3, hp3) and
  941. MatchInstruction(hp3, A_ADD, [C_None], [PF_NONE]) and
  942. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[0]^) and
  943. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[1]^) and
  944. MatchOperand(taicpu(hp1).oper[2]^,taicpu(hp3).oper[2]^) and
  945. GetNextInstruction(hp3, hp4) and
  946. MatchInstruction(hp4, A_LDM, [C_None], [PF_FD]) and
  947. MatchOperand(taicpu(p).oper[0]^,taicpu(hp4).oper[0]^) and
  948. (taicpu(hp4).oper[1]^.typ = top_regset) and
  949. (taicpu(hp4).oper[1]^.regset^ = [RS_R15]) then
  950. begin
  951. asml.Remove(hp1);
  952. asml.Remove(hp3);
  953. asml.Remove(hp4);
  954. taicpu(hp2).opcode:=A_B;
  955. hp1.free;
  956. hp3.free;
  957. hp4.free;
  958. RemoveCurrentp(p, hp2);
  959. DebugMsg(SPeepholeOptimization + 'Bl2B done', p);
  960. Result := True;
  961. end;
  962. end;
  963. function TCpuAsmOptimizer.OptPass1STR(var p: tai): Boolean;
  964. var
  965. hp1: tai;
  966. begin
  967. Result := inherited OptPass1STR(p);
  968. if Result then
  969. Exit;
  970. { Common conditions }
  971. if (taicpu(p).oper[1]^.typ = top_ref) and
  972. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  973. (taicpu(p).oppostfix=PF_None) then
  974. begin
  975. { change
  976. str reg1,ref
  977. ldr reg2,ref
  978. into
  979. str reg1,ref
  980. mov reg2,reg1
  981. }
  982. if (taicpu(p).condition=C_None) and
  983. GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and
  984. MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and
  985. (taicpu(hp1).oper[1]^.typ=top_ref) and
  986. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  987. not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  988. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and
  989. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1))) then
  990. begin
  991. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  992. begin
  993. DebugMsg(SPeepholeOptimization + 'StrLdr2StrMov 1 done', hp1);
  994. asml.remove(hp1);
  995. hp1.free;
  996. end
  997. else
  998. begin
  999. taicpu(hp1).opcode:=A_MOV;
  1000. taicpu(hp1).oppostfix:=PF_None;
  1001. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1002. DebugMsg(SPeepholeOptimization + 'StrLdr2StrMov 2 done', hp1);
  1003. end;
  1004. result := True;
  1005. end
  1006. { change
  1007. str reg1,ref
  1008. str reg2,ref
  1009. into
  1010. strd reg1,reg2,ref
  1011. }
  1012. else if (GenerateARMCode or GenerateThumb2Code) and
  1013. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  1014. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  1015. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  1016. AlignedToQWord(taicpu(p).oper[1]^.ref^) and
  1017. GetNextInstruction(p,hp1) and
  1018. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  1019. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  1020. { str ensures that either base or index contain no register, else ldr wouldn't
  1021. use an offset either
  1022. }
  1023. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1024. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  1025. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) then
  1026. begin
  1027. DebugMsg(SPeepholeOptimization + 'StrStr2Strd done', p);
  1028. taicpu(p).oppostfix:=PF_D;
  1029. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  1030. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  1031. taicpu(p).ops:=3;
  1032. asml.remove(hp1);
  1033. hp1.free;
  1034. result:=true;
  1035. end;
  1036. end;
  1037. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  1038. end;
  1039. function TCpuAsmOptimizer.OptPass1MOV(var p: tai): Boolean;
  1040. var
  1041. hp1, hpfar1, hp2: tai;
  1042. i, i2: longint;
  1043. tempop: tasmop;
  1044. dealloc: tai_regalloc;
  1045. begin
  1046. Result := False;
  1047. hp1 := nil;
  1048. { fold
  1049. mov reg1,reg0, shift imm1
  1050. mov reg1,reg1, shift imm2
  1051. }
  1052. if (taicpu(p).ops=3) and
  1053. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1054. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1055. getnextinstruction(p,hp1) and
  1056. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1057. (taicpu(hp1).ops=3) and
  1058. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  1059. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1060. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1061. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  1062. begin
  1063. { fold
  1064. mov reg1,reg0, lsl 16
  1065. mov reg1,reg1, lsr 16
  1066. strh reg1, ...
  1067. dealloc reg1
  1068. to
  1069. strh reg1, ...
  1070. dealloc reg1
  1071. }
  1072. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1073. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  1074. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  1075. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  1076. getnextinstruction(hp1,hp2) and
  1077. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  1078. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  1079. begin
  1080. TransferUsedRegs(TmpUsedRegs);
  1081. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1082. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1083. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  1084. begin
  1085. DebugMsg(SPeepholeOptimization + 'Removed superfluous 16 Bit zero extension', hp1);
  1086. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  1087. asml.remove(hp1);
  1088. hp1.free;
  1089. RemoveCurrentP(p, hp2);
  1090. Result:=true;
  1091. Exit;
  1092. end;
  1093. end
  1094. { fold
  1095. mov reg1,reg0, shift imm1
  1096. mov reg1,reg1, shift imm2
  1097. to
  1098. mov reg1,reg0, shift imm1+imm2
  1099. }
  1100. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  1101. { asr makes no use after a lsr, the asr can be foled into the lsr }
  1102. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  1103. begin
  1104. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  1105. { avoid overflows }
  1106. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  1107. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  1108. SM_ROR:
  1109. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  1110. SM_ASR:
  1111. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  1112. SM_LSR,
  1113. SM_LSL:
  1114. begin
  1115. hp2:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  1116. InsertLLItem(p.previous, p.next, hp2);
  1117. p.free;
  1118. p:=hp2;
  1119. end;
  1120. else
  1121. internalerror(2008072803);
  1122. end;
  1123. DebugMsg(SPeepholeOptimization + 'ShiftShift2Shift 1 done', p);
  1124. asml.remove(hp1);
  1125. hp1.free;
  1126. hp1 := nil;
  1127. result := true;
  1128. end
  1129. { fold
  1130. mov reg1,reg0, shift imm1
  1131. mov reg1,reg1, shift imm2
  1132. mov reg1,reg1, shift imm3 ...
  1133. mov reg2,reg1, shift imm3 ...
  1134. }
  1135. else if GetNextInstructionUsingReg(hp1,hp2, taicpu(hp1).oper[0]^.reg) and
  1136. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1137. (taicpu(hp2).ops=3) and
  1138. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1139. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp2)) and
  1140. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1141. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  1142. begin
  1143. { mov reg1,reg0, lsl imm1
  1144. mov reg1,reg1, lsr/asr imm2
  1145. mov reg2,reg1, lsl imm3 ...
  1146. to
  1147. mov reg1,reg0, lsl imm1
  1148. mov reg2,reg1, lsr/asr imm2-imm3
  1149. if
  1150. imm1>=imm2
  1151. }
  1152. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1153. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1154. (taicpu(p).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  1155. begin
  1156. if (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  1157. begin
  1158. if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,p,hp1)) and
  1159. not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  1160. begin
  1161. DebugMsg(SPeepholeOptimization + 'ShiftShiftShift2ShiftShift 1a done', p);
  1162. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm-taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  1163. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1164. asml.remove(hp1);
  1165. asml.remove(hp2);
  1166. hp1.free;
  1167. hp2.free;
  1168. if taicpu(p).oper[2]^.shifterop^.shiftimm>=32 then
  1169. begin
  1170. taicpu(p).freeop(1);
  1171. taicpu(p).freeop(2);
  1172. taicpu(p).loadconst(1,0);
  1173. end;
  1174. result := true;
  1175. Exit;
  1176. end;
  1177. end
  1178. else if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  1179. begin
  1180. DebugMsg(SPeepholeOptimization + 'ShiftShiftShift2ShiftShift 1b done', p);
  1181. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  1182. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1183. asml.remove(hp2);
  1184. hp2.free;
  1185. result := true;
  1186. Exit;
  1187. end;
  1188. end
  1189. { mov reg1,reg0, lsr/asr imm1
  1190. mov reg1,reg1, lsl imm2
  1191. mov reg1,reg1, lsr/asr imm3 ...
  1192. if imm3>=imm1 and imm2>=imm1
  1193. to
  1194. mov reg1,reg0, lsl imm2-imm1
  1195. mov reg1,reg1, lsr/asr imm3 ...
  1196. }
  1197. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1198. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1199. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  1200. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  1201. begin
  1202. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  1203. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1204. DebugMsg(SPeepholeOptimization + 'ShiftShiftShift2ShiftShift 2 done', p);
  1205. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  1206. begin
  1207. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  1208. asml.remove(hp1);
  1209. hp1.free;
  1210. end;
  1211. RemoveCurrentp(p);
  1212. result := true;
  1213. Exit;
  1214. end;
  1215. end;
  1216. end;
  1217. { All the optimisations from this point on require GetNextInstructionUsingReg
  1218. to return True }
  1219. while (
  1220. GetNextInstructionUsingReg(p, hpfar1, taicpu(p).oper[0]^.reg) and
  1221. (hpfar1.typ = ait_instruction)
  1222. ) do
  1223. begin
  1224. { Change the common
  1225. mov r0, r0, lsr #xxx
  1226. and r0, r0, #yyy/bic r0, r0, #xxx
  1227. and remove the superfluous and/bic if possible
  1228. This could be extended to handle more cases.
  1229. }
  1230. { Change
  1231. mov rx, ry, lsr/ror #xxx
  1232. uxtb/uxth rz,rx/and rz,rx,0xFF
  1233. dealloc rx
  1234. to
  1235. uxtb/uxth rz,ry,ror #xxx
  1236. }
  1237. if (GenerateThumb2Code) and
  1238. (taicpu(p).ops=3) and
  1239. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1240. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1241. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and
  1242. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1243. begin
  1244. if MatchInstruction(hpfar1, A_UXTB, [C_None], [PF_None]) and
  1245. (taicpu(hpfar1).ops = 2) and
  1246. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1247. MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1248. begin
  1249. taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1250. taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1251. taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1252. taicpu(hpfar1).ops := 3;
  1253. if not Assigned(hp1) then
  1254. GetNextInstruction(p,hp1);
  1255. RemoveCurrentP(p, hp1);
  1256. result:=true;
  1257. exit;
  1258. end
  1259. else if MatchInstruction(hpfar1, A_UXTH, [C_None], [PF_None]) and
  1260. (taicpu(hpfar1).ops=2) and
  1261. (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and
  1262. MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1263. begin
  1264. taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1265. taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1266. taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1267. taicpu(hpfar1).ops := 3;
  1268. if not Assigned(hp1) then
  1269. GetNextInstruction(p,hp1);
  1270. RemoveCurrentP(p, hp1);
  1271. result:=true;
  1272. exit;
  1273. end
  1274. else if MatchInstruction(hpfar1, A_AND, [C_None], [PF_None]) and
  1275. (taicpu(hpfar1).ops = 3) and
  1276. (taicpu(hpfar1).oper[2]^.typ = top_const) and
  1277. (taicpu(hpfar1).oper[2]^.val = $FF) and
  1278. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1279. MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1280. begin
  1281. taicpu(hpfar1).ops := 3;
  1282. taicpu(hpfar1).opcode := A_UXTB;
  1283. taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1284. taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1285. taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1286. if not Assigned(hp1) then
  1287. GetNextInstruction(p,hp1);
  1288. RemoveCurrentP(p, hp1);
  1289. result:=true;
  1290. exit;
  1291. end;
  1292. end;
  1293. { 2-operald mov optimisations }
  1294. if (taicpu(p).ops = 2) then
  1295. begin
  1296. {
  1297. This removes the mul from
  1298. mov rX,0
  1299. ...
  1300. mul ...,rX,...
  1301. }
  1302. if (taicpu(p).oper[1]^.typ = top_const) then
  1303. begin
  1304. (* if false and
  1305. (taicpu(p).oper[1]^.val=0) and
  1306. MatchInstruction(hpfar1, [A_MUL,A_MLA], [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1307. (((taicpu(hpfar1).oper[1]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^)) or
  1308. ((taicpu(hpfar1).oper[2]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[2]^))) then
  1309. begin
  1310. TransferUsedRegs(TmpUsedRegs);
  1311. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1312. UpdateUsedRegs(TmpUsedRegs, tai(hpfar1.next));
  1313. DebugMsg(SPeepholeOptimization + 'MovMUL/MLA2Mov0 done', p);
  1314. if taicpu(hpfar1).opcode=A_MUL then
  1315. taicpu(hpfar1).loadconst(1,0)
  1316. else
  1317. taicpu(hpfar1).loadreg(1,taicpu(hpfar1).oper[3]^.reg);
  1318. taicpu(hpfar1).ops:=2;
  1319. taicpu(hpfar1).opcode:=A_MOV;
  1320. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hpfar1,TmpUsedRegs)) then
  1321. RemoveCurrentP(p);
  1322. Result:=true;
  1323. exit;
  1324. end
  1325. else*) if (taicpu(p).oper[1]^.val=0) and
  1326. MatchInstruction(hpfar1, A_MLA, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1327. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[3]^) then
  1328. begin
  1329. TransferUsedRegs(TmpUsedRegs);
  1330. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1331. UpdateUsedRegs(TmpUsedRegs, tai(hpfar1.next));
  1332. DebugMsg(SPeepholeOptimization + 'MovMLA2MUL 1 done', p);
  1333. taicpu(hpfar1).ops:=3;
  1334. taicpu(hpfar1).opcode:=A_MUL;
  1335. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hpfar1,TmpUsedRegs)) then
  1336. begin
  1337. RemoveCurrentP(p);
  1338. Result:=true;
  1339. end;
  1340. exit;
  1341. end
  1342. {
  1343. This changes the very common
  1344. mov r0, #0
  1345. str r0, [...]
  1346. mov r0, #0
  1347. str r0, [...]
  1348. and removes all superfluous mov instructions
  1349. }
  1350. else if (taicpu(hpfar1).opcode=A_STR) then
  1351. begin
  1352. hp1 := hpfar1;
  1353. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  1354. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[0]^) and
  1355. GetNextInstruction(hp1, hp2) and
  1356. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1357. (taicpu(hp2).ops = 2) and
  1358. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  1359. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  1360. begin
  1361. DebugMsg(SPeepholeOptimization + 'MovStrMov done', hp2);
  1362. GetNextInstruction(hp2,hp1);
  1363. asml.remove(hp2);
  1364. hp2.free;
  1365. result:=true;
  1366. if not assigned(hp1) then break;
  1367. end;
  1368. if Result then
  1369. Exit;
  1370. end;
  1371. end;
  1372. {
  1373. This removes the first mov from
  1374. mov rX,...
  1375. mov rX,...
  1376. }
  1377. if taicpu(hpfar1).opcode=A_MOV then
  1378. begin
  1379. hp1 := p;
  1380. while MatchInstruction(hpfar1, A_MOV, [taicpu(hp1).condition], [taicpu(hp1).oppostfix]) and
  1381. (taicpu(hpfar1).ops = 2) and
  1382. MatchOperand(taicpu(hp1).oper[0]^, taicpu(hpfar1).oper[0]^) and
  1383. { don't remove the first mov if the second is a mov rX,rX }
  1384. not(MatchOperand(taicpu(hpfar1).oper[0]^, taicpu(hpfar1).oper[1]^)) do
  1385. begin
  1386. { Defer removing the first p until after the while loop }
  1387. if p <> hp1 then
  1388. begin
  1389. DebugMsg(SPeepholeOptimization + 'MovMov done', hp1);
  1390. asml.remove(hp1);
  1391. hp1.free;
  1392. end;
  1393. hp1:=hpfar1;
  1394. GetNextInstruction(hpfar1,hpfar1);
  1395. result:=true;
  1396. if not assigned(hpfar1) then
  1397. Break;
  1398. end;
  1399. if Result then
  1400. begin
  1401. DebugMsg(SPeepholeOptimization + 'MovMov done', p);
  1402. RemoveCurrentp(p);
  1403. Exit;
  1404. end;
  1405. end;
  1406. if RedundantMovProcess(p,hpfar1) then
  1407. begin
  1408. Result:=true;
  1409. { p might not point at a mov anymore }
  1410. exit;
  1411. end;
  1412. { If hpfar1 is nil after the call to RedundantMovProcess, it is
  1413. because it would have become a dangling pointer, so reinitialise it. }
  1414. if not Assigned(hpfar1) then
  1415. Continue;
  1416. { Fold the very common sequence
  1417. mov regA, regB
  1418. ldr* regA, [regA]
  1419. to
  1420. ldr* regA, [regB]
  1421. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1422. }
  1423. if
  1424. // Make sure that Thumb code doesn't propagate a high register into a reference
  1425. (
  1426. (
  1427. GenerateThumbCode and
  1428. (getsupreg(taicpu(p).oper[1]^.reg) < RS_R8)
  1429. ) or (not GenerateThumbCode)
  1430. ) and
  1431. (taicpu(p).oper[1]^.typ = top_reg) and
  1432. (taicpu(p).oppostfix = PF_NONE) and
  1433. MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], []) and
  1434. (taicpu(hpfar1).oper[1]^.typ = top_ref) and
  1435. { We can change the base register only when the instruction uses AM_OFFSET }
  1436. ((taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
  1437. ((taicpu(hpfar1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1438. (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg))
  1439. ) and
  1440. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
  1441. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1442. begin
  1443. DebugMsg(SPeepholeOptimization + 'MovLdr2Ldr done', hpfar1);
  1444. if (taicpu(hpfar1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1445. (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1446. taicpu(hpfar1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  1447. if taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  1448. taicpu(hpfar1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1449. dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, tai(p.Next));
  1450. if Assigned(dealloc) then
  1451. begin
  1452. asml.remove(dealloc);
  1453. asml.InsertAfter(dealloc,hpfar1);
  1454. end;
  1455. if (not Assigned(hp1)) or (p=hp1) then
  1456. GetNextInstruction(p, hp1);
  1457. RemoveCurrentP(p, hp1);
  1458. result:=true;
  1459. Exit;
  1460. end
  1461. end
  1462. { 3-operald mov optimisations }
  1463. else if (taicpu(p).ops = 3) then
  1464. begin
  1465. if (taicpu(p).oper[2]^.typ = top_shifterop) and
  1466. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1467. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  1468. (taicpu(hpfar1).ops>=1) and
  1469. (taicpu(hpfar1).oper[0]^.typ=top_reg) and
  1470. (not RegModifiedBetween(taicpu(hpfar1).oper[0]^.reg, p, hpfar1)) and
  1471. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1472. begin
  1473. if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  1474. MatchInstruction(hpfar1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1475. (taicpu(hpfar1).ops=3) and
  1476. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^) and
  1477. (taicpu(hpfar1).oper[2]^.typ = top_const) and
  1478. { Check if the AND actually would only mask out bits being already zero because of the shift
  1479. }
  1480. ((($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm) and taicpu(hpfar1).oper[2]^.val) =
  1481. ($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm)) then
  1482. begin
  1483. DebugMsg(SPeepholeOptimization + 'LsrAnd2Lsr done', hpfar1);
  1484. taicpu(p).oper[0]^.reg:=taicpu(hpfar1).oper[0]^.reg;
  1485. asml.remove(hpfar1);
  1486. hpfar1.free;
  1487. result:=true;
  1488. Exit;
  1489. end
  1490. else if MatchInstruction(hpfar1, A_BIC, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1491. (taicpu(hpfar1).ops=3) and
  1492. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^) and
  1493. (taicpu(hpfar1).oper[2]^.typ = top_const) and
  1494. { Check if the BIC actually would only mask out bits beeing already zero because of the shift }
  1495. (taicpu(hpfar1).oper[2]^.val<>0) and
  1496. (BsfDWord(taicpu(hpfar1).oper[2]^.val)>=32-taicpu(p).oper[2]^.shifterop^.shiftimm) then
  1497. begin
  1498. DebugMsg(SPeepholeOptimization + 'LsrBic2Lsr done', hpfar1);
  1499. taicpu(p).oper[0]^.reg:=taicpu(hpfar1).oper[0]^.reg;
  1500. asml.remove(hpfar1);
  1501. hpfar1.free;
  1502. result:=true;
  1503. Exit;
  1504. end;
  1505. end;
  1506. { This folds shifterops into following instructions
  1507. mov r0, r1, lsl #8
  1508. add r2, r3, r0
  1509. to
  1510. add r2, r3, r1, lsl #8
  1511. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1512. }
  1513. if (taicpu(p).oper[1]^.typ = top_reg) and
  1514. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1515. (taicpu(p).oppostfix = PF_NONE) and
  1516. MatchInstruction(hpfar1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  1517. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  1518. A_CMP, A_CMN],
  1519. [taicpu(p).condition], [PF_None]) and
  1520. (not ((GenerateThumb2Code) and
  1521. (taicpu(hpfar1).opcode in [A_SBC]) and
  1522. (((taicpu(hpfar1).ops=3) and
  1523. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^.reg)) or
  1524. ((taicpu(hpfar1).ops=2) and
  1525. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[0]^.reg))))) and
  1526. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) and
  1527. (taicpu(hpfar1).ops >= 2) and
  1528. {Currently we can't fold into another shifterop}
  1529. (taicpu(hpfar1).oper[taicpu(hpfar1).ops-1]^.typ = top_reg) and
  1530. {Folding rrx is problematic because of the C-Flag, as we currently can't check
  1531. NR_DEFAULTFLAGS for modification}
  1532. (
  1533. {Everything is fine if we don't use RRX}
  1534. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or
  1535. (
  1536. {If it is RRX, then check if we're just accessing the next instruction}
  1537. Assigned(hp1) and
  1538. (hpfar1 = hp1)
  1539. )
  1540. ) and
  1541. { reg1 might not be modified inbetween }
  1542. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
  1543. { The shifterop can contain a register, might not be modified}
  1544. (
  1545. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or
  1546. not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hpfar1))
  1547. ) and
  1548. (
  1549. {Only ONE of the two src operands is allowed to match}
  1550. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[taicpu(hpfar1).ops-2]^) xor
  1551. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[taicpu(hpfar1).ops-1]^)
  1552. ) then
  1553. begin
  1554. if taicpu(hpfar1).opcode in [A_TST, A_TEQ, A_CMN] then
  1555. I2:=0
  1556. else
  1557. I2:=1;
  1558. for I:=I2 to taicpu(hpfar1).ops-1 do
  1559. if MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[I]^.reg) then
  1560. begin
  1561. { If the parameter matched on the second op from the RIGHT
  1562. we have to switch the parameters, this will not happen for CMP
  1563. were we're only evaluating the most right parameter
  1564. }
  1565. if I <> taicpu(hpfar1).ops-1 then
  1566. begin
  1567. {The SUB operators need to be changed when we swap parameters}
  1568. case taicpu(hpfar1).opcode of
  1569. A_SUB: tempop:=A_RSB;
  1570. A_SBC: tempop:=A_RSC;
  1571. A_RSB: tempop:=A_SUB;
  1572. A_RSC: tempop:=A_SBC;
  1573. else tempop:=taicpu(hpfar1).opcode;
  1574. end;
  1575. if taicpu(hpfar1).ops = 3 then
  1576. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  1577. taicpu(hpfar1).oper[0]^.reg, taicpu(hpfar1).oper[2]^.reg,
  1578. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1579. else
  1580. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  1581. taicpu(hpfar1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1582. taicpu(p).oper[2]^.shifterop^);
  1583. end
  1584. else
  1585. if taicpu(hpfar1).ops = 3 then
  1586. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hpfar1).opcode,
  1587. taicpu(hpfar1).oper[0]^.reg, taicpu(hpfar1).oper[1]^.reg,
  1588. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1589. else
  1590. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hpfar1).opcode,
  1591. taicpu(hpfar1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1592. taicpu(p).oper[2]^.shifterop^);
  1593. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  1594. AllocRegBetween(taicpu(p).oper[2]^.shifterop^.rs,p,hpfar1,UsedRegs);
  1595. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hpfar1,UsedRegs);
  1596. asml.insertbefore(hp2, hpfar1);
  1597. asml.remove(hpfar1);
  1598. hpfar1.free;
  1599. DebugMsg(SPeepholeOptimization + 'FoldShiftProcess done', hp2);
  1600. if not Assigned(hp1) then
  1601. GetNextInstruction(p, hp1)
  1602. else if hp1 = hpfar1 then
  1603. { If hp1 = hpfar1, then it's a dangling pointer }
  1604. hp1 := hp2;
  1605. RemoveCurrentP(p, hp1);
  1606. Result:=true;
  1607. Exit;
  1608. end;
  1609. end;
  1610. {
  1611. Fold
  1612. mov r1, r1, lsl #2
  1613. ldr/ldrb r0, [r0, r1]
  1614. to
  1615. ldr/ldrb r0, [r0, r1, lsl #2]
  1616. XXX: This still needs some work, as we quite often encounter something like
  1617. mov r1, r2, lsl #2
  1618. add r2, r3, #imm
  1619. ldr r0, [r2, r1]
  1620. which can't be folded because r2 is overwritten between the shift and the ldr.
  1621. We could try to shuffle the registers around and fold it into.
  1622. add r1, r3, #imm
  1623. ldr r0, [r1, r2, lsl #2]
  1624. }
  1625. if (not(GenerateThumbCode)) and
  1626. { thumb2 allows only lsl #0..#3 }
  1627. (not(GenerateThumb2Code) or
  1628. ((taicpu(p).oper[2]^.shifterop^.shiftimm in [0..3]) and
  1629. (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL)
  1630. )
  1631. ) and
  1632. (taicpu(p).oper[1]^.typ = top_reg) and
  1633. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1634. { RRX is tough to handle, because it requires tracking the C-Flag,
  1635. it is also extremly unlikely to be emitted this way}
  1636. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) and
  1637. (taicpu(p).oper[2]^.shifterop^.shiftimm <> 0) and
  1638. (taicpu(p).oppostfix = PF_NONE) and
  1639. {Only LDR, LDRB, STR, STRB can handle scaled register indexing}
  1640. (MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B]) or
  1641. (GenerateThumb2Code and
  1642. MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B, PF_SB, PF_H, PF_SH]))
  1643. ) and
  1644. (
  1645. {If this is address by offset, one of the two registers can be used}
  1646. ((taicpu(hpfar1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1647. (
  1648. (taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor
  1649. (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)
  1650. )
  1651. ) or
  1652. {For post and preindexed only the index register can be used}
  1653. ((taicpu(hpfar1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and
  1654. (
  1655. (taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
  1656. (taicpu(hpfar1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
  1657. ) and
  1658. (not GenerateThumb2Code)
  1659. )
  1660. ) and
  1661. { Only fold if both registers are used. Otherwise we are folding p with itself }
  1662. (taicpu(hpfar1).oper[1]^.ref^.index<>NR_NO) and
  1663. (taicpu(hpfar1).oper[1]^.ref^.base<>NR_NO) and
  1664. { Only fold if there isn't another shifterop already, and offset is zero. }
  1665. (taicpu(hpfar1).oper[1]^.ref^.offset = 0) and
  1666. (taicpu(hpfar1).oper[1]^.ref^.shiftmode = SM_None) and
  1667. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
  1668. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1669. begin
  1670. { If the register we want to do the shift for resides in base, we need to swap that}
  1671. if (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1672. taicpu(hpfar1).oper[1]^.ref^.base := taicpu(hpfar1).oper[1]^.ref^.index;
  1673. taicpu(hpfar1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1674. taicpu(hpfar1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
  1675. taicpu(hpfar1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
  1676. DebugMsg(SPeepholeOptimization + 'FoldShiftLdrStr done', hpfar1);
  1677. RemoveCurrentP(p);
  1678. Result:=true;
  1679. Exit;
  1680. end;
  1681. end;
  1682. {
  1683. Often we see shifts and then a superfluous mov to another register
  1684. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  1685. }
  1686. if RemoveSuperfluousMove(p, hpfar1, 'MovMov2Mov') then
  1687. Result:=true;
  1688. Exit;
  1689. end;
  1690. end;
  1691. function TCpuAsmOptimizer.OptPass1MVN(var p: tai): Boolean;
  1692. var
  1693. hp1: tai;
  1694. begin
  1695. {
  1696. change
  1697. mvn reg2,reg1
  1698. and reg3,reg4,reg2
  1699. dealloc reg2
  1700. to
  1701. bic reg3,reg4,reg1
  1702. }
  1703. Result := False;
  1704. if (taicpu(p).oper[1]^.typ = top_reg) and
  1705. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1706. MatchInstruction(hp1,A_AND,[],[]) and
  1707. (((taicpu(hp1).ops=3) and
  1708. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1709. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1710. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  1711. ((taicpu(hp1).ops=2) and
  1712. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1713. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1714. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1715. { reg1 might not be modified inbetween }
  1716. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1717. begin
  1718. DebugMsg(SPeepholeOptimization + 'MvnAnd2Bic done', p);
  1719. taicpu(hp1).opcode:=A_BIC;
  1720. if taicpu(hp1).ops=3 then
  1721. begin
  1722. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1723. taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
  1724. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1725. end
  1726. else
  1727. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1728. RemoveCurrentp(p);
  1729. Result := True;
  1730. Exit;
  1731. end;
  1732. end;
  1733. function TCpuAsmOptimizer.OptPass1VMov(var p: tai): Boolean;
  1734. var
  1735. hp1: tai;
  1736. begin
  1737. {
  1738. change
  1739. vmov reg0,reg1,reg2
  1740. vmov reg1,reg2,reg0
  1741. into
  1742. vmov reg0,reg1,reg2
  1743. can be applied regardless if reg0 or reg2 is the vfp register
  1744. }
  1745. Result := False;
  1746. if (taicpu(p).ops = 3) then
  1747. while GetNextInstruction(p, hp1) and
  1748. MatchInstruction(hp1, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1749. (taicpu(hp1).ops = 3) and
  1750. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^) and
  1751. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) and
  1752. MatchOperand(taicpu(p).oper[2]^, taicpu(hp1).oper[1]^) do
  1753. begin
  1754. asml.Remove(hp1);
  1755. hp1.free;
  1756. DebugMsg(SPeepholeOptimization + 'VMovVMov2VMov done', p);
  1757. { Can we do it again? }
  1758. end;
  1759. end;
  1760. function TCpuAsmOptimizer.OptPass1VOp(var p: tai): Boolean;
  1761. var
  1762. hp1: tai;
  1763. begin
  1764. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1765. RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp');
  1766. end;
  1767. function TCpuAsmOptimizer.OptPass1Push(var p: tai): Boolean;
  1768. var
  1769. hp1: tai;
  1770. begin
  1771. Result:=false;
  1772. if (taicpu(p).oper[0]^.regset^=[RS_R14]) and
  1773. GetNextInstruction(p,hp1) and
  1774. MatchInstruction(hp1,A_POP,[C_None],[PF_None]) and
  1775. (taicpu(hp1).oper[0]^.regset^=[RS_R15]) then
  1776. begin
  1777. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  1778. begin
  1779. DebugMsg('Peephole Optimization: PushPop2Mov done', p);
  1780. taicpu(p).ops:=2;
  1781. taicpu(p).loadreg(1, NR_R14);
  1782. taicpu(p).loadreg(0, NR_R15);
  1783. taicpu(p).opcode:=A_MOV;
  1784. end
  1785. else
  1786. begin
  1787. DebugMsg('Peephole Optimization: PushPop2Bx done', p);
  1788. taicpu(p).loadreg(0, NR_R14);
  1789. taicpu(p).opcode:=A_BX;
  1790. end;
  1791. RemoveInstruction(hp1);
  1792. Result:=true;
  1793. Exit;
  1794. end;
  1795. end;
  1796. function TCpuAsmOptimizer.OptPass2Bcc(var p: tai): Boolean;
  1797. var
  1798. hp1,hp2,hp3,after_p: tai;
  1799. l : longint;
  1800. WasLast: boolean;
  1801. Label_X, Label_Y: TASmLabel;
  1802. procedure ConvertInstructins(this_hp: tai; newcond: tasmcond);
  1803. var
  1804. next_hp: tai;
  1805. begin
  1806. repeat
  1807. if this_hp.typ=ait_instruction then
  1808. taicpu(this_hp).condition := newcond;
  1809. GetNextInstruction(this_hp, next_hp);
  1810. if MustBeLast(this_hp) then
  1811. Break;
  1812. this_hp := next_hp
  1813. until not(assigned(this_hp)) or
  1814. not(CanBeCond(this_hp)) or
  1815. ((hp1.typ = ait_instruction) and (taicpu(hp1).opcode = A_B)) or
  1816. (this_hp.typ = ait_label);
  1817. end;
  1818. begin
  1819. Result := False;
  1820. if (taicpu(p).condition<>C_None) and
  1821. not(GenerateThumbCode) then
  1822. begin
  1823. { check for
  1824. Bxx xxx
  1825. <several instructions>
  1826. xxx:
  1827. }
  1828. Label_X := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  1829. l:=0;
  1830. WasLast:=False;
  1831. GetNextInstruction(p, hp1);
  1832. after_p := hp1;
  1833. while assigned(hp1) and
  1834. (l<=4) and
  1835. CanBeCond(hp1) and
  1836. { stop on labels }
  1837. not(hp1.typ=ait_label) and
  1838. { avoid that we cannot recognize the case BccB2Cond }
  1839. not((hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_B)) do
  1840. begin
  1841. inc(l);
  1842. if MustBeLast(hp1) then
  1843. begin
  1844. WasLast:=True;
  1845. GetNextInstruction(hp1,hp1);
  1846. break;
  1847. end
  1848. else
  1849. GetNextInstruction(hp1,hp1);
  1850. end;
  1851. if assigned(hp1) then
  1852. begin
  1853. if FindLabel(Label_X, hp1) then
  1854. begin
  1855. if (l<=4) and (l>0) then
  1856. begin
  1857. ConvertInstructins(after_p, inverse_cond(taicpu(p).condition));
  1858. DebugMsg(SPeepholeOptimization + 'Bcc2Cond done', p);
  1859. { wait with removing else GetNextInstruction could
  1860. ignore the label if it was the only usage in the
  1861. jump moved away }
  1862. Label_X.decrefs;
  1863. RemoveCurrentP(p, after_p);
  1864. Result := True;
  1865. Exit;
  1866. end;
  1867. end
  1868. else
  1869. { do not perform further optimizations if there is an instruction
  1870. in block #1 which cannot be optimized.
  1871. }
  1872. if not WasLast then
  1873. begin
  1874. { check further for
  1875. Bcc xxx
  1876. <several instructions 1>
  1877. B yyy
  1878. xxx:
  1879. <several instructions 2>
  1880. yyy:
  1881. }
  1882. { hp2 points to jmp yyy }
  1883. hp2:=hp1;
  1884. { skip hp2 to xxx }
  1885. if assigned(hp2) and
  1886. (l<=3) and
  1887. (hp2.typ=ait_instruction) and
  1888. (taicpu(hp2).is_jmp) and
  1889. (taicpu(hp2).condition=C_None) and
  1890. GetNextInstruction(hp2, hp1) and
  1891. { real label and jump, no further references to the
  1892. label are allowed }
  1893. (Label_X.getrefs = 1) and
  1894. FindLabel(Label_X, hp1) then
  1895. begin
  1896. Label_Y := TAsmLabel(taicpu(hp2).oper[0]^.ref^.symbol);
  1897. l:=0;
  1898. { skip hp1 and hp3 to <several moves 2> }
  1899. GetNextInstruction(hp1, hp1);
  1900. hp3 := hp1;
  1901. while assigned(hp1) and
  1902. CanBeCond(hp1) and
  1903. (l<=3) do
  1904. begin
  1905. inc(l);
  1906. if MustBeLast(hp1) then
  1907. begin
  1908. GetNextInstruction(hp1, hp1);
  1909. break;
  1910. end
  1911. else
  1912. GetNextInstruction(hp1, hp1);
  1913. end;
  1914. { hp1 points to yyy: }
  1915. if assigned(hp1) and
  1916. FindLabel(Label_Y, hp1) then
  1917. begin
  1918. ConvertInstructins(after_p, inverse_cond(taicpu(p).condition));
  1919. ConvertInstructins(hp3, taicpu(p).condition);
  1920. DebugMsg(SPeepholeOptimization + 'BccB2Cond done', after_p);
  1921. { remove B }
  1922. Label_Y.decrefs;
  1923. RemoveInstruction(hp2);
  1924. { remove Bcc }
  1925. Label_X.decrefs;
  1926. RemoveCurrentP(p, after_p);
  1927. Result := True;
  1928. Exit;
  1929. end;
  1930. end;
  1931. end;
  1932. end;
  1933. end;
  1934. end;
  1935. function TCpuAsmOptimizer.OptPass2STR(var p: tai): Boolean;
  1936. var
  1937. hp1: tai;
  1938. Postfix: TOpPostfix;
  1939. OpcodeStr: shortstring;
  1940. begin
  1941. Result := False;
  1942. { Try to merge two STRs into an STM instruction }
  1943. if not(GenerateThumbCode) and (taicpu(p).oper[1]^.typ = top_ref) and
  1944. (taicpu(p).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1945. (
  1946. (taicpu(p).oper[1]^.ref^.base = NR_NO) or
  1947. (taicpu(p).oper[1]^.ref^.index = NR_NO)
  1948. ) and
  1949. (taicpu(p).oppostfix = PF_None) and
  1950. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) then
  1951. begin
  1952. hp1 := p;
  1953. while GetNextInstruction(hp1, hp1) and (hp1.typ = ait_instruction) and
  1954. (taicpu(hp1).opcode = A_STR) do
  1955. if (taicpu(hp1).condition = taicpu(p).condition) and
  1956. (taicpu(hp1).oppostfix = PF_None) and
  1957. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1958. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1959. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[1]^.ref^.base) and
  1960. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[1]^.ref^.index) and
  1961. (
  1962. (
  1963. (taicpu(p).oper[1]^.ref^.offset = 0) and
  1964. (getsupreg(taicpu(hp1).oper[0]^.reg) > getsupreg(taicpu(p).oper[0]^.reg)) and
  1965. (abs(taicpu(hp1).oper[1]^.ref^.offset) = 4)
  1966. ) or (
  1967. (taicpu(hp1).oper[1]^.ref^.offset = 0) and
  1968. (getsupreg(taicpu(hp1).oper[0]^.reg) < getsupreg(taicpu(p).oper[0]^.reg)) and
  1969. (abs(taicpu(p).oper[1]^.ref^.offset) = 4)
  1970. )
  1971. ) then
  1972. begin
  1973. if (getsupreg(taicpu(hp1).oper[0]^.reg) < getsupreg(taicpu(p).oper[0]^.reg)) xor
  1974. (taicpu(hp1).oper[1]^.ref^.offset < taicpu(p).oper[1]^.ref^.offset) then
  1975. begin
  1976. Postfix := PF_DA;
  1977. OpcodeStr := 'DA';
  1978. end
  1979. else
  1980. begin
  1981. Postfix := PF_None;
  1982. OpcodeStr := '';
  1983. end;
  1984. taicpu(hp1).oper[1]^.ref^.offset := 0;
  1985. if taicpu(hp1).oper[1]^.ref^.index = NR_NO then
  1986. begin
  1987. taicpu(hp1).oper[1]^.ref^.index := taicpu(hp1).oper[1]^.ref^.base;
  1988. taicpu(hp1).oper[1]^.ref^.base := NR_NO;
  1989. end;
  1990. taicpu(p).opcode := A_STM;
  1991. taicpu(p).loadregset(1, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg), getsupreg(taicpu(hp1).oper[0]^.reg)]);
  1992. taicpu(p).loadref(0, taicpu(hp1).oper[1]^.ref^);
  1993. taicpu(p).oppostfix := Postfix;
  1994. RemoveInstruction(hp1);
  1995. DebugMsg(SPeepholeOptimization + 'Merging stores: STR/STR -> STM' + OpcodeStr, p);
  1996. Result := True;
  1997. Exit;
  1998. end;
  1999. end;
  2000. end;
  2001. function TCpuAsmOptimizer.OptPass2STM(var p: tai): Boolean;
  2002. var
  2003. hp1: tai;
  2004. CorrectOffset:ASizeInt;
  2005. i, LastReg: TSuperRegister;
  2006. Postfix: TOpPostfix;
  2007. OpcodeStr: shortstring;
  2008. begin
  2009. Result := False;
  2010. { See if STM/STR can be merged into a single STM }
  2011. if (taicpu(p).oper[0]^.ref^.addressmode = AM_OFFSET) then
  2012. begin
  2013. CorrectOffset := 0;
  2014. LastReg := RS_NO;
  2015. for i in taicpu(p).oper[1]^.regset^ do
  2016. begin
  2017. LastReg := i;
  2018. Inc(CorrectOffset, 4);
  2019. end;
  2020. { This while loop effectively doea a Selection Sort on any STR
  2021. instructions that follow }
  2022. hp1 := p;
  2023. while (LastReg < maxcpuregister) and
  2024. GetNextInstruction(hp1, hp1) and (hp1.typ = ait_instruction) and
  2025. (taicpu(hp1).opcode = A_STR) do
  2026. if (taicpu(hp1).condition = taicpu(p).condition) and
  2027. (taicpu(hp1).oppostfix = PF_None) and
  2028. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  2029. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  2030. (
  2031. (
  2032. (taicpu(p).oper[1]^.ref^.base = NR_NO) and
  2033. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.ref^.index)
  2034. ) or (
  2035. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  2036. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.ref^.base)
  2037. )
  2038. ) and
  2039. { Next register must be later in the set }
  2040. (getsupreg(taicpu(hp1).oper[0]^.reg) > LastReg) and
  2041. (
  2042. (
  2043. (taicpu(p).oppostfix = PF_None) and
  2044. (taicpu(hp1).oper[1]^.ref^.offset = CorrectOffset)
  2045. ) or (
  2046. (taicpu(p).oppostfix = PF_DA) and
  2047. (taicpu(hp1).oper[1]^.ref^.offset = -CorrectOffset)
  2048. )
  2049. ) then
  2050. begin
  2051. { Increment the reference values ready for the next STR instruction to find }
  2052. LastReg := getsupreg(taicpu(hp1).oper[0]^.reg);
  2053. Inc(CorrectOffset, 4);
  2054. if (taicpu(p).oppostfix = PF_DA) then
  2055. OpcodeStr := 'DA'
  2056. else
  2057. OpcodeStr := '';
  2058. Include(taicpu(p).oper[1]^.regset^, LastReg);
  2059. DebugMsg(SPeepholeOptimization + 'Merging stores: STM' + OpcodeStr + '/STR -> STM' + OpcodeStr, hp1);
  2060. RemoveInstruction(hp1);
  2061. Result := True;
  2062. { See if we can find another one to merge }
  2063. hp1 := p;
  2064. Continue;
  2065. end;
  2066. end;
  2067. end;
  2068. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  2069. begin
  2070. result := false;
  2071. if p.typ = ait_instruction then
  2072. begin
  2073. case taicpu(p).opcode of
  2074. A_CMP:
  2075. Result := OptPass1CMP(p);
  2076. A_STR:
  2077. Result := OptPass1STR(p);
  2078. A_LDR:
  2079. Result := OptPass1LDR(p);
  2080. A_MOV:
  2081. Result := OptPass1MOV(p);
  2082. A_AND:
  2083. Result := OptPass1And(p);
  2084. A_ADD,
  2085. A_SUB:
  2086. Result := OptPass1ADDSUB(p);
  2087. A_MUL:
  2088. REsult := OptPass1MUL(p);
  2089. A_ADC,
  2090. A_RSB,
  2091. A_RSC,
  2092. A_SBC,
  2093. A_BIC,
  2094. A_EOR,
  2095. A_ORR,
  2096. A_MLA,
  2097. A_MLS,
  2098. A_QADD,A_QADD16,A_QADD8,
  2099. A_QSUB,A_QSUB16,A_QSUB8,
  2100. A_QDADD,A_QDSUB,A_QASX,A_QSAX,
  2101. A_SHADD16,A_SHADD8,A_UHADD16,A_UHADD8,
  2102. A_SHSUB16,A_SHSUB8,A_UHSUB16,A_UHSUB8,
  2103. A_PKHTB,A_PKHBT,
  2104. A_SMUAD,A_SMUSD:
  2105. Result := OptPass1DataCheckMov(p);
  2106. {$ifdef dummy}
  2107. A_MVN:
  2108. Result := OPtPass1MVN(p);
  2109. {$endif dummy}
  2110. A_UXTB:
  2111. Result := OptPass1UXTB(p);
  2112. A_UXTH:
  2113. Result := OptPass1UXTH(p);
  2114. A_SXTB:
  2115. Result := OptPass1SXTB(p);
  2116. A_SXTH:
  2117. Result := OptPass1SXTH(p);
  2118. A_STM:
  2119. Result := OptPass1STM(p);
  2120. A_VMOV:
  2121. Result := OptPass1VMov(p);
  2122. A_VLDR,
  2123. A_VADD,
  2124. A_VMUL,
  2125. A_VDIV,
  2126. A_VSUB,
  2127. A_VSQRT,
  2128. A_VNEG,
  2129. A_VCVT,
  2130. A_VABS:
  2131. Result := OptPass1VOp(p);
  2132. A_PUSH:
  2133. Result := OptPass1Push(p);
  2134. else
  2135. ;
  2136. end;
  2137. end;
  2138. end;
  2139. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  2140. begin
  2141. result := False;
  2142. if p.typ = ait_instruction then
  2143. begin
  2144. case taicpu(p).opcode of
  2145. A_B:
  2146. Result := OptPass2Bcc(p);
  2147. A_STM:
  2148. Result := OptPass2STM(p);
  2149. A_STR:
  2150. Result := OptPass2STR(p);
  2151. else
  2152. ;
  2153. end;
  2154. end;
  2155. end;
  2156. { instructions modifying the CPSR can be only the last instruction }
  2157. function MustBeLast(p : tai) : boolean;
  2158. begin
  2159. Result:=(p.typ=ait_instruction) and
  2160. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  2161. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  2162. (taicpu(p).oppostfix=PF_S));
  2163. end;
  2164. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  2165. begin
  2166. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  2167. Result:=true
  2168. else If MatchInstruction(p1, [A_LDR, A_STR], [], [PF_D]) and
  2169. (getsupreg(taicpu(p1).oper[0]^.reg)+1=getsupreg(reg)) then
  2170. Result:=true
  2171. else
  2172. Result:=inherited RegInInstruction(Reg, p1);
  2173. end;
  2174. const
  2175. { set of opcode which might or do write to memory }
  2176. { TODO : extend armins.dat to contain r/w info }
  2177. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  2178. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD,A_VSTR,A_VSTM];
  2179. { adjust the register live information when swapping the two instructions p and hp1,
  2180. they must follow one after the other }
  2181. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  2182. procedure CheckLiveEnd(reg : tregister);
  2183. var
  2184. supreg : TSuperRegister;
  2185. regtype : TRegisterType;
  2186. begin
  2187. if reg=NR_NO then
  2188. exit;
  2189. regtype:=getregtype(reg);
  2190. supreg:=getsupreg(reg);
  2191. if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_end[supreg]=hp1) and
  2192. RegInInstruction(reg,p) then
  2193. cg.rg[regtype].live_end[supreg]:=p;
  2194. end;
  2195. procedure CheckLiveStart(reg : TRegister);
  2196. var
  2197. supreg : TSuperRegister;
  2198. regtype : TRegisterType;
  2199. begin
  2200. if reg=NR_NO then
  2201. exit;
  2202. regtype:=getregtype(reg);
  2203. supreg:=getsupreg(reg);
  2204. if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_start[supreg]=p) and
  2205. RegInInstruction(reg,hp1) then
  2206. cg.rg[regtype].live_start[supreg]:=hp1;
  2207. end;
  2208. var
  2209. i : longint;
  2210. r : TSuperRegister;
  2211. begin
  2212. { assumption: p is directly followed by hp1 }
  2213. { if live of any reg used by p starts at p and hp1 uses this register then
  2214. set live start to hp1 }
  2215. for i:=0 to p.ops-1 do
  2216. case p.oper[i]^.typ of
  2217. Top_Reg:
  2218. CheckLiveStart(p.oper[i]^.reg);
  2219. Top_Ref:
  2220. begin
  2221. CheckLiveStart(p.oper[i]^.ref^.base);
  2222. CheckLiveStart(p.oper[i]^.ref^.index);
  2223. end;
  2224. Top_Shifterop:
  2225. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  2226. Top_RegSet:
  2227. for r:=RS_R0 to RS_R15 do
  2228. if r in p.oper[i]^.regset^ then
  2229. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2230. else
  2231. ;
  2232. end;
  2233. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  2234. set live end to p }
  2235. for i:=0 to hp1.ops-1 do
  2236. case hp1.oper[i]^.typ of
  2237. Top_Reg:
  2238. CheckLiveEnd(hp1.oper[i]^.reg);
  2239. Top_Ref:
  2240. begin
  2241. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  2242. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  2243. end;
  2244. Top_Shifterop:
  2245. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  2246. Top_RegSet:
  2247. for r:=RS_R0 to RS_R15 do
  2248. if r in hp1.oper[i]^.regset^ then
  2249. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2250. else
  2251. ;
  2252. end;
  2253. end;
  2254. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  2255. { TODO : schedule also forward }
  2256. { TODO : schedule distance > 1 }
  2257. { returns true if p might be a load of a pc relative tls offset }
  2258. function PossibleTLSLoad(const p: tai) : boolean;
  2259. begin
  2260. Result:=(p.typ=ait_instruction) and (taicpu(p).opcode=A_LDR) and (taicpu(p).oper[1]^.typ=top_ref) and (((taicpu(p).oper[1]^.ref^.base=NR_PC) and
  2261. (taicpu(p).oper[1]^.ref^.index<>NR_NO)) or ((taicpu(p).oper[1]^.ref^.base<>NR_NO) and
  2262. (taicpu(p).oper[1]^.ref^.index=NR_PC)));
  2263. end;
  2264. var
  2265. hp1,hp2,hp3,hp4,hp5,insertpos : tai;
  2266. list : TAsmList;
  2267. begin
  2268. result:=true;
  2269. list:=TAsmList.create;
  2270. p:=BlockStart;
  2271. while p<>BlockEnd Do
  2272. begin
  2273. if (p.typ=ait_instruction) and
  2274. GetNextInstruction(p,hp1) and
  2275. (hp1.typ=ait_instruction) and
  2276. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  2277. (taicpu(hp1).oppostfix in [PF_NONE, PF_B, PF_H, PF_SB, PF_SH]) and
  2278. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  2279. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  2280. not(RegModifiedByInstruction(NR_PC,p))
  2281. ) or
  2282. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  2283. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  2284. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  2285. (taicpu(hp1).oper[1]^.ref^.offset=0)
  2286. )
  2287. ) or
  2288. { try to prove that the memory accesses don't overlapp }
  2289. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  2290. (taicpu(p).oper[1]^.typ = top_ref) and
  2291. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  2292. (taicpu(p).oppostfix=PF_None) and
  2293. (taicpu(hp1).oppostfix=PF_None) and
  2294. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  2295. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  2296. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  2297. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  2298. )
  2299. )
  2300. ) and
  2301. GetNextInstruction(hp1,hp2) and
  2302. (hp2.typ=ait_instruction) and
  2303. { loaded register used by next instruction?
  2304. if we ever support labels (they could be skipped in theory) here, the gnu2 tls general-dynamic code could get broken (the ldr before
  2305. the bl may not be scheduled away from the bl) and it needs to be taken care of this case
  2306. }
  2307. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  2308. { loaded register not used by previous instruction? }
  2309. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  2310. { same condition? }
  2311. (taicpu(p).condition=taicpu(hp1).condition) and
  2312. { first instruction might not change the register used as base }
  2313. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  2314. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  2315. ) and
  2316. { first instruction might not change the register used as index }
  2317. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  2318. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  2319. ) and
  2320. { if we modify the basereg AND the first instruction used that reg, we can not schedule }
  2321. ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
  2322. not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) and
  2323. not(PossibleTLSLoad(p)) and
  2324. not(PossibleTLSLoad(hp1)) then
  2325. begin
  2326. hp3:=tai(p.Previous);
  2327. hp5:=tai(p.next);
  2328. asml.Remove(p);
  2329. { if there is a reg. alloc/dealloc/sync instructions or address labels (e.g. for GOT-less PIC)
  2330. associated with p, move it together with p }
  2331. { before the instruction? }
  2332. { find reg allocs,deallocs and PIC labels }
  2333. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  2334. begin
  2335. if ( (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_alloc, ra_dealloc]) and
  2336. RegInInstruction(tai_regalloc(hp3).reg,p) )
  2337. or ( (hp3.typ=ait_label) and (tai_label(hp3).labsym.typ=AT_ADDR) )
  2338. then
  2339. begin
  2340. hp4:=hp3;
  2341. hp3:=tai(hp3.Previous);
  2342. asml.Remove(hp4);
  2343. list.Insert(hp4);
  2344. end
  2345. else
  2346. hp3:=tai(hp3.Previous);
  2347. end;
  2348. list.Concat(p);
  2349. SwapRegLive(taicpu(p),taicpu(hp1));
  2350. { after the instruction? }
  2351. { find reg deallocs and reg syncs }
  2352. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  2353. begin
  2354. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc, ra_sync]) and
  2355. RegInInstruction(tai_regalloc(hp5).reg,p) then
  2356. begin
  2357. hp4:=hp5;
  2358. hp5:=tai(hp5.next);
  2359. asml.Remove(hp4);
  2360. list.Concat(hp4);
  2361. end
  2362. else
  2363. hp5:=tai(hp5.Next);
  2364. end;
  2365. asml.Remove(hp1);
  2366. { if there are address labels associated with hp2, those must
  2367. stay with hp2 (e.g. for GOT-less PIC) }
  2368. insertpos:=hp2;
  2369. while assigned(hp2.previous) and
  2370. (tai(hp2.previous).typ<>ait_instruction) do
  2371. begin
  2372. hp2:=tai(hp2.previous);
  2373. if (hp2.typ=ait_label) and
  2374. (tai_label(hp2).labsym.typ=AT_ADDR) then
  2375. insertpos:=hp2;
  2376. end;
  2377. {$ifdef DEBUG_PREREGSCHEDULER}
  2378. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),insertpos);
  2379. {$endif DEBUG_PREREGSCHEDULER}
  2380. asml.InsertBefore(hp1,insertpos);
  2381. asml.InsertListBefore(insertpos,list);
  2382. p:=tai(p.next);
  2383. end
  2384. else if p.typ=ait_instruction then
  2385. p:=hp1
  2386. else
  2387. p:=tai(p.next);
  2388. end;
  2389. list.Free;
  2390. end;
  2391. procedure DecrementPreceedingIT(list: TAsmList; p: tai);
  2392. var
  2393. hp : tai;
  2394. l : longint;
  2395. begin
  2396. hp := tai(p.Previous);
  2397. l := 1;
  2398. while assigned(hp) and
  2399. (l <= 4) do
  2400. begin
  2401. if hp.typ=ait_instruction then
  2402. begin
  2403. if (taicpu(hp).opcode>=A_IT) and
  2404. (taicpu(hp).opcode <= A_ITTTT) then
  2405. begin
  2406. if (taicpu(hp).opcode = A_IT) and
  2407. (l=1) then
  2408. list.Remove(hp)
  2409. else
  2410. case taicpu(hp).opcode of
  2411. A_ITE:
  2412. if l=2 then taicpu(hp).opcode := A_IT;
  2413. A_ITT:
  2414. if l=2 then taicpu(hp).opcode := A_IT;
  2415. A_ITEE:
  2416. if l=3 then taicpu(hp).opcode := A_ITE;
  2417. A_ITTE:
  2418. if l=3 then taicpu(hp).opcode := A_ITT;
  2419. A_ITET:
  2420. if l=3 then taicpu(hp).opcode := A_ITE;
  2421. A_ITTT:
  2422. if l=3 then taicpu(hp).opcode := A_ITT;
  2423. A_ITEEE:
  2424. if l=4 then taicpu(hp).opcode := A_ITEE;
  2425. A_ITTEE:
  2426. if l=4 then taicpu(hp).opcode := A_ITTE;
  2427. A_ITETE:
  2428. if l=4 then taicpu(hp).opcode := A_ITET;
  2429. A_ITTTE:
  2430. if l=4 then taicpu(hp).opcode := A_ITTT;
  2431. A_ITEET:
  2432. if l=4 then taicpu(hp).opcode := A_ITEE;
  2433. A_ITTET:
  2434. if l=4 then taicpu(hp).opcode := A_ITTE;
  2435. A_ITETT:
  2436. if l=4 then taicpu(hp).opcode := A_ITET;
  2437. A_ITTTT:
  2438. begin
  2439. if l=4 then taicpu(hp).opcode := A_ITTT;
  2440. end
  2441. else
  2442. ;
  2443. end;
  2444. break;
  2445. end;
  2446. {else if (taicpu(hp).condition<>taicpu(p).condition) or
  2447. (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
  2448. break;}
  2449. inc(l);
  2450. end;
  2451. hp := tai(hp.Previous);
  2452. end;
  2453. end;
  2454. function TCpuThumb2AsmOptimizer.OptPass1STM(var p: tai): boolean;
  2455. var
  2456. hp : taicpu;
  2457. begin
  2458. result:=false;
  2459. if MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
  2460. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2461. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2462. ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
  2463. begin
  2464. DebugMsg('Peephole Stm2Push done', p);
  2465. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2466. AsmL.InsertAfter(hp, p);
  2467. asml.Remove(p);
  2468. p:=hp;
  2469. result:=true;
  2470. end;
  2471. end;
  2472. function TCpuThumb2AsmOptimizer.OptPass1LDM(var p: tai): boolean;
  2473. var
  2474. hp : taicpu;
  2475. begin
  2476. result:=false;
  2477. if MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
  2478. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2479. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2480. ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
  2481. begin
  2482. DebugMsg('Peephole Ldm2Pop done', p);
  2483. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2484. asml.InsertBefore(hp, p);
  2485. asml.Remove(p);
  2486. p.Free;
  2487. p:=hp;
  2488. result:=true;
  2489. end;
  2490. end;
  2491. function TCpuThumb2AsmOptimizer.OptPass1AndThumb2(var p : tai) : boolean;
  2492. begin
  2493. result:=false;
  2494. if MatchInstruction(p, [A_AND], [], [PF_None]) and
  2495. (taicpu(p).ops = 2) and
  2496. (taicpu(p).oper[1]^.typ=top_const) and
  2497. ((taicpu(p).oper[1]^.val=255) or
  2498. (taicpu(p).oper[1]^.val=65535)) then
  2499. begin
  2500. DebugMsg('Peephole AndR2Uxt done', p);
  2501. if taicpu(p).oper[1]^.val=255 then
  2502. taicpu(p).opcode:=A_UXTB
  2503. else
  2504. taicpu(p).opcode:=A_UXTH;
  2505. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  2506. result := true;
  2507. end
  2508. else if MatchInstruction(p, [A_AND], [], [PF_None]) and
  2509. (taicpu(p).ops = 3) and
  2510. (taicpu(p).oper[2]^.typ=top_const) and
  2511. ((taicpu(p).oper[2]^.val=255) or
  2512. (taicpu(p).oper[2]^.val=65535)) then
  2513. begin
  2514. DebugMsg('Peephole AndRR2Uxt done', p);
  2515. if taicpu(p).oper[2]^.val=255 then
  2516. taicpu(p).opcode:=A_UXTB
  2517. else
  2518. taicpu(p).opcode:=A_UXTH;
  2519. taicpu(p).ops:=2;
  2520. result := true;
  2521. end;
  2522. end;
  2523. function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  2524. begin
  2525. result:=false;
  2526. if inherited PeepHoleOptPass1Cpu(p) then
  2527. result:=true
  2528. else if p.typ=ait_instruction then
  2529. case taicpu(p).opcode of
  2530. A_STM:
  2531. result:=OptPass1STM(p);
  2532. A_LDM:
  2533. result:=OptPass1LDM(p);
  2534. A_AND:
  2535. result:=OptPass1AndThumb2(p);
  2536. else
  2537. ;
  2538. end;
  2539. end;
  2540. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  2541. var
  2542. p,hp1,hp2: tai;
  2543. l : longint;
  2544. condition : tasmcond;
  2545. { UsedRegs, TmpUsedRegs: TRegSet; }
  2546. begin
  2547. p := BlockStart;
  2548. { UsedRegs := []; }
  2549. while (p <> BlockEnd) Do
  2550. begin
  2551. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  2552. case p.Typ Of
  2553. Ait_Instruction:
  2554. begin
  2555. case taicpu(p).opcode Of
  2556. A_B:
  2557. if taicpu(p).condition<>C_None then
  2558. begin
  2559. { check for
  2560. Bxx xxx
  2561. <several instructions>
  2562. xxx:
  2563. }
  2564. l:=0;
  2565. GetNextInstruction(p, hp1);
  2566. while assigned(hp1) and
  2567. (l<=4) and
  2568. CanBeCond(hp1) and
  2569. { stop on labels }
  2570. not(hp1.typ=ait_label) do
  2571. begin
  2572. inc(l);
  2573. if MustBeLast(hp1) then
  2574. begin
  2575. //hp1:=nil;
  2576. GetNextInstruction(hp1,hp1);
  2577. break;
  2578. end
  2579. else
  2580. GetNextInstruction(hp1,hp1);
  2581. end;
  2582. if assigned(hp1) then
  2583. begin
  2584. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2585. begin
  2586. if (l<=4) and (l>0) then
  2587. begin
  2588. condition:=inverse_cond(taicpu(p).condition);
  2589. hp2:=p;
  2590. GetNextInstruction(p,hp1);
  2591. p:=hp1;
  2592. repeat
  2593. if hp1.typ=ait_instruction then
  2594. taicpu(hp1).condition:=condition;
  2595. if MustBeLast(hp1) then
  2596. begin
  2597. GetNextInstruction(hp1,hp1);
  2598. break;
  2599. end
  2600. else
  2601. GetNextInstruction(hp1,hp1);
  2602. until not(assigned(hp1)) or
  2603. not(CanBeCond(hp1)) or
  2604. (hp1.typ=ait_label);
  2605. { wait with removing else GetNextInstruction could
  2606. ignore the label if it was the only usage in the
  2607. jump moved away }
  2608. asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
  2609. DecrementPreceedingIT(asml, hp2);
  2610. case l of
  2611. 1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
  2612. 2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
  2613. 3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
  2614. 4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
  2615. end;
  2616. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2617. asml.remove(hp2);
  2618. hp2.free;
  2619. continue;
  2620. end;
  2621. end;
  2622. end;
  2623. end;
  2624. else
  2625. ;
  2626. end;
  2627. end;
  2628. else
  2629. ;
  2630. end;
  2631. p := tai(p.next)
  2632. end;
  2633. end;
  2634. function TCpuThumb2AsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  2635. begin
  2636. result:=false;
  2637. if p.typ = ait_instruction then
  2638. begin
  2639. if MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
  2640. (taicpu(p).oper[1]^.typ=top_const) and
  2641. (taicpu(p).oper[1]^.val >= 0) and
  2642. (taicpu(p).oper[1]^.val < 256) and
  2643. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2644. begin
  2645. DebugMsg('Peephole Mov2Movs done', p);
  2646. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2647. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2648. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2649. taicpu(p).oppostfix:=PF_S;
  2650. result:=true;
  2651. end
  2652. else if MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
  2653. (taicpu(p).oper[1]^.typ=top_reg) and
  2654. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2655. begin
  2656. DebugMsg('Peephole Mvn2Mvns done', p);
  2657. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2658. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2659. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2660. taicpu(p).oppostfix:=PF_S;
  2661. result:=true;
  2662. end
  2663. else if MatchInstruction(p, A_RSB, [C_None], [PF_None]) and
  2664. (taicpu(p).ops = 3) and
  2665. (taicpu(p).oper[2]^.typ=top_const) and
  2666. (taicpu(p).oper[2]^.val=0) and
  2667. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2668. begin
  2669. DebugMsg('Peephole Rsb2Rsbs done', p);
  2670. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2671. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2672. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2673. taicpu(p).oppostfix:=PF_S;
  2674. result:=true;
  2675. end
  2676. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2677. (taicpu(p).ops = 3) and
  2678. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2679. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2680. (taicpu(p).oper[2]^.typ=top_const) and
  2681. (taicpu(p).oper[2]^.val >= 0) and
  2682. (taicpu(p).oper[2]^.val < 256) and
  2683. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2684. begin
  2685. DebugMsg('Peephole AddSub2*s done', p);
  2686. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2687. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2688. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2689. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2690. taicpu(p).oppostfix:=PF_S;
  2691. taicpu(p).ops := 2;
  2692. result:=true;
  2693. end
  2694. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2695. (taicpu(p).ops = 2) and
  2696. (taicpu(p).oper[1]^.typ=top_reg) and
  2697. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2698. (not MatchOperand(taicpu(p).oper[1]^, NR_STACK_POINTER_REG)) and
  2699. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2700. begin
  2701. DebugMsg('Peephole AddSub2*s done', p);
  2702. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2703. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2704. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2705. taicpu(p).oppostfix:=PF_S;
  2706. result:=true;
  2707. end
  2708. else if MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
  2709. (taicpu(p).ops = 3) and
  2710. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2711. (taicpu(p).oper[2]^.typ=top_reg) then
  2712. begin
  2713. DebugMsg('Peephole AddRRR2AddRR done', p);
  2714. taicpu(p).ops := 2;
  2715. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2716. result:=true;
  2717. end
  2718. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
  2719. (taicpu(p).ops = 3) and
  2720. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2721. (taicpu(p).oper[2]^.typ=top_reg) and
  2722. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2723. begin
  2724. DebugMsg('Peephole opXXY2opsXY done', p);
  2725. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2726. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2727. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2728. taicpu(p).ops := 2;
  2729. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2730. taicpu(p).oppostfix:=PF_S;
  2731. result:=true;
  2732. end
  2733. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_S]) and
  2734. (taicpu(p).ops = 3) and
  2735. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2736. (taicpu(p).oper[2]^.typ in [top_reg,top_const]) then
  2737. begin
  2738. DebugMsg('Peephole opXXY2opXY done', p);
  2739. taicpu(p).ops := 2;
  2740. if taicpu(p).oper[2]^.typ=top_reg then
  2741. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg)
  2742. else
  2743. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2744. result:=true;
  2745. end
  2746. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
  2747. (taicpu(p).ops = 3) and
  2748. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
  2749. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2750. begin
  2751. DebugMsg('Peephole opXYX2opsXY done', p);
  2752. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2753. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2754. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2755. taicpu(p).oppostfix:=PF_S;
  2756. taicpu(p).ops := 2;
  2757. result:=true;
  2758. end
  2759. else if MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
  2760. (taicpu(p).ops=3) and
  2761. (taicpu(p).oper[2]^.typ=top_shifterop) and
  2762. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
  2763. //MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2764. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2765. begin
  2766. DebugMsg('Peephole Mov2Shift done', p);
  2767. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2768. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2769. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2770. taicpu(p).oppostfix:=PF_S;
  2771. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  2772. SM_LSL: taicpu(p).opcode:=A_LSL;
  2773. SM_LSR: taicpu(p).opcode:=A_LSR;
  2774. SM_ASR: taicpu(p).opcode:=A_ASR;
  2775. SM_ROR: taicpu(p).opcode:=A_ROR;
  2776. else
  2777. internalerror(2019050912);
  2778. end;
  2779. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  2780. taicpu(p).loadreg(2, taicpu(p).oper[2]^.shifterop^.rs)
  2781. else
  2782. taicpu(p).loadconst(2, taicpu(p).oper[2]^.shifterop^.shiftimm);
  2783. result:=true;
  2784. end
  2785. end;
  2786. end;
  2787. begin
  2788. casmoptimizer:=TCpuAsmOptimizer;
  2789. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  2790. End.