aoptcpu.pas 118 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. { $define DEBUG_AOPTCPU}
  22. Interface
  23. uses
  24. cgbase, cgutils, cpubase, aasmtai,
  25. aasmcpu,
  26. aopt, aoptobj, aoptarm;
  27. Type
  28. { TCpuAsmOptimizer }
  29. TCpuAsmOptimizer = class(TARMAsmOptimizer)
  30. { Can't be done in some cases due to the limited range of jumps }
  31. function CanDoJumpOpts: Boolean; override;
  32. { uses the same constructor as TAopObj }
  33. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  34. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  35. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  36. function RemoveSuperfluousVMov(const p : tai; movp : tai; const optimizer : string) : boolean;
  37. { gets the next tai object after current that contains info relevant
  38. to the optimizer in p1 which used the given register or does a
  39. change in program flow.
  40. If there is none, it returns false and
  41. sets p1 to nil }
  42. Function GetNextInstructionUsingRef(Current: tai; Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  43. { outputs a debug message into the assembler file }
  44. procedure DebugMsg(const s: string; p: tai);
  45. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  46. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  47. { With these routines, there's optimisation code that's general for all ARM platforms }
  48. function OptPass1And(var p: tai): Boolean; override;
  49. function OptPass1LDR(var p: tai): Boolean; override;
  50. function OptPass1STR(var p: tai): Boolean; override;
  51. protected
  52. function LookForPreindexedPattern(p: taicpu): boolean;
  53. function LookForPostindexedPattern(p: taicpu): boolean;
  54. { Individual optimisation routines }
  55. function OptPass1DataCheckMov(var p: tai): Boolean;
  56. function OptPass1ADDSUB(var p: tai): Boolean;
  57. function OptPass1CMP(var p: tai): Boolean;
  58. function OptPass1STM(var p: tai): Boolean;
  59. function OptPass1MOV(var p: tai): Boolean;
  60. function OptPass1MUL(var p: tai): Boolean;
  61. function OptPass1MVN(var p: tai): Boolean;
  62. function OptPass1VMov(var p: tai): Boolean;
  63. function OptPass1VOp(var p: tai): Boolean;
  64. function OptPass1Push(var p: tai): Boolean;
  65. function OptPass2Bcc(var p: tai): Boolean;
  66. function OptPass2STM(var p: tai): Boolean;
  67. function OptPass2STR(var p: tai): Boolean;
  68. End;
  69. TCpuPreRegallocScheduler = class(TAsmScheduler)
  70. function SchedulerPass1Cpu(var p: tai): boolean;override;
  71. procedure SwapRegLive(p, hp1: taicpu);
  72. end;
  73. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  74. { uses the same constructor as TAopObj }
  75. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  76. procedure PeepHoleOptPass2;override;
  77. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  78. protected
  79. function OptPass1AndThumb2(var p : tai) : boolean;
  80. function OptPass1LDM(var p : tai) : boolean;
  81. function OptPass1STM(var p : tai) : boolean;
  82. End;
  83. function MustBeLast(p : tai) : boolean;
  84. Implementation
  85. uses
  86. cutils,verbose,globtype,globals,
  87. systems,
  88. cpuinfo,
  89. cgobj,procinfo,
  90. aasmbase,aasmdata,
  91. aoptutils;
  92. { Range check must be disabled explicitly as conversions between signed and unsigned
  93. 32-bit values are done without explicit typecasts }
  94. {$R-}
  95. function CanBeCond(p : tai) : boolean;
  96. begin
  97. result:=
  98. not(GenerateThumbCode) and
  99. (p.typ=ait_instruction) and
  100. (taicpu(p).condition=C_None) and
  101. ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
  102. (taicpu(p).opcode<>A_CBZ) and
  103. (taicpu(p).opcode<>A_CBNZ) and
  104. (taicpu(p).opcode<>A_PLD) and
  105. (((taicpu(p).opcode<>A_BLX) and
  106. { BL may need to be converted into BLX by the linker -- could possibly
  107. be allowed in case it's to a local symbol of which we know that it
  108. uses the same instruction set as the current one }
  109. (taicpu(p).opcode<>A_BL)) or
  110. (taicpu(p).oper[0]^.typ=top_reg));
  111. end;
  112. function RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList):Boolean;
  113. begin
  114. Result:=false;
  115. if (taicpu(movp).condition = C_EQ) and
  116. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  117. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  118. begin
  119. asml.insertafter(tai_comment.Create(strpnew('Peephole Optimization: CmpMovMov - Removed redundant moveq')), movp);
  120. asml.remove(movp);
  121. movp.free;
  122. Result:=true;
  123. end;
  124. end;
  125. function AlignedToQWord(const ref : treference) : boolean;
  126. begin
  127. { (safe) heuristics to ensure alignment }
  128. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  129. (((ref.offset>=0) and
  130. ((ref.offset mod 8)=0) and
  131. ((ref.base=NR_R13) or
  132. (ref.index=NR_R13))
  133. ) or
  134. ((ref.offset<=0) and
  135. { when using NR_R11, it has always a value of <qword align>+4 }
  136. ((abs(ref.offset+4) mod 8)=0) and
  137. (current_procinfo.framepointer=NR_R11) and
  138. ((ref.base=NR_R11) or
  139. (ref.index=NR_R11))
  140. )
  141. );
  142. end;
  143. function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
  144. begin
  145. if GenerateThumb2Code then
  146. result := (aoffset<4096) and (aoffset>-256)
  147. else
  148. result := ((pf in [PF_None,PF_B]) and
  149. (abs(aoffset)<4096)) or
  150. (abs(aoffset)<256);
  151. end;
  152. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  153. var
  154. p: taicpu;
  155. i: longint;
  156. begin
  157. instructionLoadsFromReg := false;
  158. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  159. exit;
  160. p:=taicpu(hp);
  161. i:=1;
  162. {For these instructions we have to start on oper[0]}
  163. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  164. A_CMP, A_CMN, A_TST, A_TEQ,
  165. A_B, A_BL, A_BX, A_BLX,
  166. A_SMLAL, A_UMLAL, A_VSTM, A_VLDM]) then i:=0;
  167. while(i<p.ops) do
  168. begin
  169. case p.oper[I]^.typ of
  170. top_reg:
  171. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  172. { STRD }
  173. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  174. top_regset:
  175. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  176. top_shifterop:
  177. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  178. top_ref:
  179. instructionLoadsFromReg :=
  180. (p.oper[I]^.ref^.base = reg) or
  181. (p.oper[I]^.ref^.index = reg);
  182. else
  183. ;
  184. end;
  185. if (i=0) and (p.opcode in [A_LDM,A_VLDM]) then
  186. exit;
  187. if instructionLoadsFromReg then
  188. exit; {Bailout if we found something}
  189. Inc(I);
  190. end;
  191. end;
  192. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  193. var
  194. p: taicpu;
  195. begin
  196. Result := false;
  197. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  198. exit;
  199. p := taicpu(hp);
  200. case p.opcode of
  201. { These operands do not write into a register at all }
  202. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD,
  203. A_VCMP:
  204. exit;
  205. {Take care of post/preincremented store and loads, they will change their base register}
  206. A_STR, A_LDR:
  207. begin
  208. Result := false;
  209. { actually, this does not apply here because post-/preindexed does not mean that a register
  210. is loaded with a new value, it is only modified
  211. (taicpu(p).oper[1]^.typ=top_ref) and
  212. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  213. (taicpu(p).oper[1]^.ref^.base = reg);
  214. }
  215. { STR does not load into it's first register }
  216. if p.opcode = A_STR then
  217. exit;
  218. end;
  219. A_VSTR:
  220. begin
  221. Result := false;
  222. exit;
  223. end;
  224. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  225. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  226. Result :=
  227. (p.oper[1]^.typ = top_reg) and
  228. (p.oper[1]^.reg = reg);
  229. {Loads to oper2 from coprocessor}
  230. {
  231. MCR/MRC is currently not supported in FPC
  232. A_MRC:
  233. Result :=
  234. (p.oper[2]^.typ = top_reg) and
  235. (p.oper[2]^.reg = reg);
  236. }
  237. {Loads to all register in the registerset}
  238. A_LDM, A_VLDM:
  239. Result := (getsupreg(reg) in p.oper[1]^.regset^);
  240. A_POP:
  241. Result := (getsupreg(reg) in p.oper[0]^.regset^) or
  242. (reg=NR_STACK_POINTER_REG);
  243. else
  244. ;
  245. end;
  246. if Result then
  247. exit;
  248. case p.oper[0]^.typ of
  249. {This is the case}
  250. top_reg:
  251. Result := (p.oper[0]^.reg = reg) or
  252. { LDRD }
  253. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  254. {LDM/STM might write a new value to their index register}
  255. top_ref:
  256. Result :=
  257. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  258. (taicpu(p).oper[0]^.ref^.base = reg);
  259. else
  260. ;
  261. end;
  262. end;
  263. function TCpuAsmOptimizer.GetNextInstructionUsingRef(Current: tai; out
  264. Next: tai; const ref: TReference; StopOnStore: Boolean): Boolean;
  265. begin
  266. Next:=Current;
  267. repeat
  268. Result:=GetNextInstruction(Next,Next);
  269. if Result and
  270. (Next.typ=ait_instruction) and
  271. (taicpu(Next).opcode in [A_LDR, A_STR]) and
  272. (
  273. ((taicpu(Next).ops = 2) and
  274. (taicpu(Next).oper[1]^.typ = top_ref) and
  275. RefsEqual(taicpu(Next).oper[1]^.ref^,ref)) or
  276. ((taicpu(Next).ops = 3) and { LDRD/STRD }
  277. (taicpu(Next).oper[2]^.typ = top_ref) and
  278. RefsEqual(taicpu(Next).oper[2]^.ref^,ref))
  279. ) then
  280. {We've found an instruction LDR or STR with the same reference}
  281. exit;
  282. until not(Result) or
  283. (Next.typ<>ait_instruction) or
  284. not(cs_opt_level3 in current_settings.optimizerswitches) or
  285. is_calljmp(taicpu(Next).opcode) or
  286. (StopOnStore and (taicpu(Next).opcode in [A_STR, A_STM])) or
  287. RegModifiedByInstruction(NR_PC,Next);
  288. Result:=false;
  289. end;
  290. {$ifdef DEBUG_AOPTCPU}
  291. const
  292. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  293. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  294. begin
  295. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  296. end;
  297. {$else DEBUG_AOPTCPU}
  298. const
  299. SPeepholeOptimization = '';
  300. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  301. begin
  302. end;
  303. {$endif DEBUG_AOPTCPU}
  304. function TCpuAsmOptimizer.CanDoJumpOpts: Boolean;
  305. begin
  306. { Cannot perform these jump optimisations if the ARM architecture has 16-bit thumb codes }
  307. Result := not (
  308. (current_settings.instructionset = is_thumb) and not (CPUARM_HAS_THUMB2 in cpu_capabilities[current_settings.cputype])
  309. );
  310. end;
  311. function TCpuAsmOptimizer.RemoveSuperfluousVMov(const p: tai; movp: tai; const optimizer: string):boolean;
  312. var
  313. alloc,
  314. dealloc : tai_regalloc;
  315. hp1 : tai;
  316. begin
  317. Result:=false;
  318. if ((MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  319. ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) or (taicpu(p).opcode=A_VLDR))
  320. ) or
  321. (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
  322. (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32]))
  323. ) and
  324. (taicpu(movp).ops=2) and
  325. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  326. { the destination register of the mov might not be used beween p and movp }
  327. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  328. { Take care to only do this for instructions which REALLY load to the first register.
  329. Otherwise
  330. vstr reg0, [reg1]
  331. vmov reg2, reg0
  332. will be optimized to
  333. vstr reg2, [reg1]
  334. }
  335. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  336. begin
  337. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  338. if assigned(dealloc) then
  339. begin
  340. DebugMsg(SPeepholeOptimization + optimizer + ' removed superfluous vmov', movp);
  341. result:=true;
  342. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  343. and remove it if possible }
  344. asml.Remove(dealloc);
  345. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  346. if assigned(alloc) then
  347. begin
  348. asml.Remove(alloc);
  349. alloc.free;
  350. dealloc.free;
  351. end
  352. else
  353. asml.InsertAfter(dealloc,p);
  354. { try to move the allocation of the target register }
  355. GetLastInstruction(movp,hp1);
  356. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  357. if assigned(alloc) then
  358. begin
  359. asml.Remove(alloc);
  360. asml.InsertBefore(alloc,p);
  361. { adjust used regs }
  362. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  363. end;
  364. { change
  365. vldr reg0,[reg1]
  366. vmov reg2,reg0
  367. into
  368. ldr reg2,[reg1]
  369. if reg2 is an int register
  370. }
  371. if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
  372. taicpu(p).opcode:=A_LDR;
  373. { finally get rid of the mov }
  374. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  375. asml.remove(movp);
  376. movp.free;
  377. end;
  378. end;
  379. end;
  380. {
  381. optimize
  382. add/sub reg1,reg1,regY/const
  383. ...
  384. ldr/str regX,[reg1]
  385. into
  386. ldr/str regX,[reg1, regY/const]!
  387. }
  388. function TCpuAsmOptimizer.LookForPreindexedPattern(p: taicpu): boolean;
  389. var
  390. hp1: tai;
  391. begin
  392. if GenerateARMCode and
  393. (p.ops=3) and
  394. MatchOperand(p.oper[0]^, p.oper[1]^.reg) and
  395. GetNextInstructionUsingReg(p, hp1, p.oper[0]^.reg) and
  396. (not RegModifiedBetween(p.oper[0]^.reg, p, hp1)) and
  397. MatchInstruction(hp1, [A_LDR,A_STR], [C_None], [PF_None,PF_B,PF_H,PF_SH,PF_SB]) and
  398. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  399. (taicpu(hp1).oper[1]^.ref^.base=p.oper[0]^.reg) and
  400. (taicpu(hp1).oper[0]^.reg<>p.oper[0]^.reg) and
  401. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  402. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  403. (((p.oper[2]^.typ=top_reg) and
  404. (not RegModifiedBetween(p.oper[2]^.reg, p, hp1))) or
  405. ((p.oper[2]^.typ=top_const) and
  406. ((abs(p.oper[2]^.val) < 256) or
  407. ((abs(p.oper[2]^.val) < 4096) and
  408. (taicpu(hp1).oppostfix in [PF_None,PF_B]))))) then
  409. begin
  410. taicpu(hp1).oper[1]^.ref^.addressmode:=AM_PREINDEXED;
  411. if p.oper[2]^.typ=top_reg then
  412. begin
  413. taicpu(hp1).oper[1]^.ref^.index:=p.oper[2]^.reg;
  414. if p.opcode=A_ADD then
  415. taicpu(hp1).oper[1]^.ref^.signindex:=1
  416. else
  417. taicpu(hp1).oper[1]^.ref^.signindex:=-1;
  418. end
  419. else
  420. begin
  421. if p.opcode=A_ADD then
  422. taicpu(hp1).oper[1]^.ref^.offset:=p.oper[2]^.val
  423. else
  424. taicpu(hp1).oper[1]^.ref^.offset:=-p.oper[2]^.val;
  425. end;
  426. result:=true;
  427. end
  428. else
  429. result:=false;
  430. end;
  431. {
  432. optimize
  433. ldr/str regX,[reg1]
  434. ...
  435. add/sub reg1,reg1,regY/const
  436. into
  437. ldr/str regX,[reg1], regY/const
  438. }
  439. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  440. var
  441. hp1 : tai;
  442. begin
  443. Result:=false;
  444. if (p.oper[1]^.typ = top_ref) and
  445. (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  446. (p.oper[1]^.ref^.index=NR_NO) and
  447. (p.oper[1]^.ref^.offset=0) and
  448. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  449. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  450. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  451. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  452. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  453. (
  454. (taicpu(hp1).oper[2]^.typ=top_reg) or
  455. { valid offset? }
  456. ((taicpu(hp1).oper[2]^.typ=top_const) and
  457. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  458. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  459. )
  460. )
  461. ) and
  462. { don't apply the optimization if the base register is loaded }
  463. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  464. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  465. { don't apply the optimization if the (new) index register is loaded }
  466. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  467. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) and
  468. GenerateARMCode then
  469. begin
  470. DebugMsg(SPeepholeOptimization + 'Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  471. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  472. if taicpu(hp1).oper[2]^.typ=top_const then
  473. begin
  474. if taicpu(hp1).opcode=A_ADD then
  475. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  476. else
  477. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  478. end
  479. else
  480. begin
  481. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  482. if taicpu(hp1).opcode=A_ADD then
  483. p.oper[1]^.ref^.signindex:=1
  484. else
  485. p.oper[1]^.ref^.signindex:=-1;
  486. end;
  487. asml.Remove(hp1);
  488. hp1.Free;
  489. Result:=true;
  490. end;
  491. end;
  492. function TCpuAsmOptimizer.OptPass1ADDSUB(var p: tai): Boolean;
  493. var
  494. hp1,hp2: tai;
  495. sign: Integer;
  496. newvalue: TCGInt;
  497. b: byte;
  498. begin
  499. Result := OptPass1DataCheckMov(p);
  500. {
  501. change
  502. add/sub reg2,reg1,const1
  503. str/ldr reg3,[reg2,const2]
  504. dealloc reg2
  505. to
  506. str/ldr reg3,[reg1,const2+/-const1]
  507. }
  508. if (not GenerateThumbCode) and
  509. (taicpu(p).ops>2) and
  510. (taicpu(p).oper[1]^.typ = top_reg) and
  511. (taicpu(p).oper[2]^.typ = top_const) then
  512. begin
  513. hp1:=p;
  514. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  515. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  516. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  517. (taicpu(hp1).oper[1]^.typ = top_ref) and
  518. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  519. { don't optimize if the register is stored/overwritten }
  520. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  521. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  522. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  523. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  524. ldr postfix }
  525. (((taicpu(p).opcode=A_ADD) and
  526. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  527. ) or
  528. ((taicpu(p).opcode=A_SUB) and
  529. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  530. )
  531. ) do
  532. begin
  533. { neither reg1 nor reg2 might be changed inbetween }
  534. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  535. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  536. break;
  537. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  538. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  539. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  540. begin
  541. { remember last instruction }
  542. hp2:=hp1;
  543. DebugMsg(SPeepholeOptimization + 'Add/SubLdr2Ldr done', p);
  544. hp1:=p;
  545. { fix all ldr/str }
  546. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  547. begin
  548. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  549. if taicpu(p).opcode=A_ADD then
  550. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  551. else
  552. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  553. if hp1=hp2 then
  554. break;
  555. end;
  556. RemoveCurrentP(p);
  557. result:=true;
  558. Exit;
  559. end;
  560. end;
  561. end;
  562. {
  563. optimize
  564. add/sub rx,ry,const1
  565. add/sub rx,rx,const2
  566. into
  567. add/sub rx,ry,const1+/-const
  568. or
  569. mov rx,ry if const1+/-const=0
  570. or
  571. remove it, if rx=ry and const1+/-const=0
  572. check if the first operation has no postfix and condition
  573. }
  574. if MatchInstruction(p,[A_ADD,A_SUB],[C_None],[PF_None]) and
  575. MatchOptype(taicpu(p),top_reg,top_reg,top_const) and
  576. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  577. MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
  578. MatchOptype(taicpu(hp1),top_reg,top_reg,top_const) and
  579. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  580. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  581. begin
  582. sign:=1;
  583. if (taicpu(p).opcode=A_SUB) xor (taicpu(hp1).opcode=A_SUB) then
  584. sign:=-1;
  585. newvalue:=taicpu(p).oper[2]^.val+sign*taicpu(hp1).oper[2]^.val;
  586. if (not(GenerateThumbCode) and is_shifter_const(newvalue,b)) or
  587. (GenerateThumbCode and is_thumb_imm(newvalue)) then
  588. begin
  589. DebugMsg(SPeepholeOptimization + 'Merge Add/Sub done', p);
  590. taicpu(p).oper[2]^.val:=newvalue;
  591. RemoveInstruction(hp1);
  592. Result:=true;
  593. if newvalue=0 then
  594. begin
  595. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  596. RemoveCurrentP(p)
  597. else
  598. begin
  599. taicpu(p).opcode:=A_MOV;
  600. taicpu(p).ops:=2;
  601. end;
  602. Exit;
  603. end;
  604. end;
  605. end;
  606. if (taicpu(p).condition = C_None) and
  607. (taicpu(p).oppostfix = PF_None) and
  608. LookForPreindexedPattern(taicpu(p)) then
  609. begin
  610. DebugMsg(SPeepholeOptimization + 'Add/Sub to Preindexed done', p);
  611. RemoveCurrentP(p);
  612. Result:=true;
  613. Exit;
  614. end;
  615. end;
  616. function TCpuAsmOptimizer.OptPass1MUL(var p: tai): Boolean;
  617. var
  618. hp1: tai;
  619. oldreg: tregister;
  620. begin
  621. Result := OptPass1DataCheckMov(p);
  622. {
  623. Turn
  624. mul reg0, z,w
  625. sub/add x, y, reg0
  626. dealloc reg0
  627. into
  628. mls/mla x,z,w,y
  629. }
  630. if (taicpu(p).condition = C_None) and
  631. (taicpu(p).oppostfix = PF_None) and
  632. (taicpu(p).ops=3) and
  633. (taicpu(p).oper[0]^.typ = top_reg) and
  634. (taicpu(p).oper[1]^.typ = top_reg) and
  635. (taicpu(p).oper[2]^.typ = top_reg) and
  636. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  637. MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
  638. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  639. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p, hp1)) and
  640. (((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype>=cpu_armv4)) or
  641. ((taicpu(hp1).opcode=A_SUB) and (current_settings.cputype in [cpu_armv6t2,cpu_armv7,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em]))) and
  642. // CPUs before ARMv6 don't recommend having the same Rd and Rm for MLA.
  643. // TODO: A workaround would be to swap Rm and Rs
  644. (not ((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype<=cpu_armv6) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^))) and
  645. (((taicpu(hp1).ops=3) and
  646. (taicpu(hp1).oper[2]^.typ=top_reg) and
  647. ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  648. (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or
  649. ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  650. (taicpu(hp1).opcode=A_ADD) and
  651. (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or
  652. ((taicpu(hp1).ops=2) and
  653. (taicpu(hp1).oper[1]^.typ=top_reg) and
  654. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  655. (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then
  656. begin
  657. if taicpu(hp1).opcode=A_ADD then
  658. begin
  659. taicpu(hp1).opcode:=A_MLA;
  660. if taicpu(hp1).ops=3 then
  661. begin
  662. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
  663. oldreg:=taicpu(hp1).oper[2]^.reg
  664. else
  665. oldreg:=taicpu(hp1).oper[1]^.reg;
  666. end
  667. else
  668. oldreg:=taicpu(hp1).oper[0]^.reg;
  669. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  670. taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg);
  671. taicpu(hp1).loadreg(3,oldreg);
  672. DebugMsg(SPeepholeOptimization + 'MulAdd2MLA done', p);
  673. end
  674. else
  675. begin
  676. taicpu(hp1).opcode:=A_MLS;
  677. taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
  678. if taicpu(hp1).ops=2 then
  679. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg)
  680. else
  681. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  682. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  683. DebugMsg(SPeepholeOptimization + 'MulSub2MLS done', p);
  684. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  685. AllocRegBetween(taicpu(hp1).oper[2]^.reg,p,hp1,UsedRegs);
  686. AllocRegBetween(taicpu(hp1).oper[3]^.reg,p,hp1,UsedRegs);
  687. end;
  688. taicpu(hp1).ops:=4;
  689. RemoveCurrentP(p);
  690. Result := True;
  691. Exit;
  692. end
  693. end;
  694. function TCpuAsmOptimizer.OptPass1And(var p: tai): Boolean;
  695. begin
  696. Result := OptPass1DataCheckMov(p);
  697. Result := inherited OptPass1And(p) or Result;
  698. end;
  699. function TCpuAsmOptimizer.OptPass1DataCheckMov(var p: tai): Boolean;
  700. var
  701. hp1: tai;
  702. begin
  703. {
  704. change
  705. op reg1, ...
  706. mov reg2, reg1
  707. to
  708. op reg2, ...
  709. }
  710. Result := (taicpu(p).ops >= 3) and
  711. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  712. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  713. end;
  714. function TCpuAsmOptimizer.OptPass1CMP(var p: tai): Boolean;
  715. var
  716. hp1, hp2, hp_last: tai;
  717. MovRem1, MovRem2: Boolean;
  718. begin
  719. Result := False;
  720. { These optimizations can be applied only to the currently enabled operations because
  721. the other operations do not update all flags and FPC does not track flag usage }
  722. if (taicpu(p).condition = C_None) and
  723. (taicpu(p).oper[1]^.typ = top_const) and
  724. GetNextInstruction(p, hp1) then
  725. begin
  726. {
  727. change
  728. cmp reg,const1
  729. moveq reg,const1
  730. movne reg,const2
  731. to
  732. cmp reg,const1
  733. movne reg,const2
  734. }
  735. if MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  736. (taicpu(hp1).oper[1]^.typ = top_const) and
  737. GetNextInstruction(hp1, hp2) and
  738. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  739. (taicpu(hp2).oper[1]^.typ = top_const) then
  740. begin
  741. MovRem1 := RemoveRedundantMove(p, hp1, asml);
  742. MovRem2 := RemoveRedundantMove(p, hp2, asml);
  743. Result:= MovRem1 or MovRem2;
  744. { Make sure that hp1 is still the next instruction after p }
  745. if MovRem1 then
  746. if MovRem2 then
  747. begin
  748. if not GetNextInstruction(p, hp1) then
  749. Exit;
  750. end
  751. else
  752. hp1 := hp2;
  753. end;
  754. {
  755. change
  756. <op> reg,x,y
  757. cmp reg,#0
  758. into
  759. <op>s reg,x,y
  760. }
  761. if (taicpu(p).oppostfix = PF_None) and
  762. (taicpu(p).oper[1]^.val = 0) and
  763. { be careful here, following instructions could use other flags
  764. however after a jump fpc never depends on the value of flags }
  765. { All above instructions set Z and N according to the following
  766. Z := result = 0;
  767. N := result[31];
  768. EQ = Z=1; NE = Z=0;
  769. MI = N=1; PL = N=0; }
  770. (MatchInstruction(hp1, A_B, [C_EQ,C_NE,C_MI,C_PL], []) or
  771. { mov is also possible, but only if there is no shifter operand, it could be an rxx,
  772. we are too lazy to check if it is rxx or something else }
  773. (MatchInstruction(hp1, A_MOV, [C_EQ,C_NE,C_MI,C_PL], []) and (taicpu(hp1).ops=2))) and
  774. GetLastInstruction(p, hp_last) and
  775. MatchInstruction(hp_last, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,
  776. A_EOR,A_AND,A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  777. (
  778. { mlas is only allowed in arm mode }
  779. (taicpu(hp_last).opcode<>A_MLA) or
  780. (current_settings.instructionset<>is_thumb)
  781. ) and
  782. (taicpu(hp_last).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  783. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp1.Next))) then
  784. begin
  785. DebugMsg(SPeepholeOptimization + 'OpCmp2OpS done', hp_last);
  786. taicpu(hp_last).oppostfix:=PF_S;
  787. { move flag allocation if possible }
  788. hp1:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp_last.Next));
  789. if assigned(hp1) then
  790. begin
  791. asml.Remove(hp1);
  792. asml.insertbefore(hp1, hp_last);
  793. end;
  794. RemoveCurrentP(p);
  795. Result:=true;
  796. end;
  797. end;
  798. end;
  799. function TCpuAsmOptimizer.OptPass1LDR(var p: tai): Boolean;
  800. var
  801. hp1: tai;
  802. begin
  803. Result := inherited OptPass1LDR(p);
  804. if Result then
  805. Exit;
  806. { change
  807. ldr reg1,ref
  808. ldr reg2,ref
  809. into ...
  810. }
  811. if (taicpu(p).oper[1]^.typ = top_ref) and
  812. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  813. GetNextInstruction(p,hp1) and
  814. { ldrd is not allowed here }
  815. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  816. begin
  817. {
  818. ...
  819. ldr reg1,ref
  820. mov reg2,reg1
  821. }
  822. if (taicpu(p).oppostfix=taicpu(hp1).oppostfix) and
  823. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  824. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  825. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  826. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  827. begin
  828. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  829. begin
  830. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldr done', hp1);
  831. asml.remove(hp1);
  832. hp1.free;
  833. end
  834. else
  835. begin
  836. DebugMsg(SPeepholeOptimization + 'LdrLdr2LdrMov done', hp1);
  837. taicpu(hp1).opcode:=A_MOV;
  838. taicpu(hp1).oppostfix:=PF_None;
  839. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  840. end;
  841. result := true;
  842. end
  843. {
  844. ...
  845. ldrd reg1,reg1+1,ref
  846. }
  847. else if (GenerateARMCode or GenerateThumb2Code) and
  848. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  849. { ldrd does not allow any postfixes ... }
  850. (taicpu(p).oppostfix=PF_None) and
  851. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  852. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  853. { ldr ensures that either base or index contain no register, else ldr wouldn't
  854. use an offset either
  855. }
  856. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  857. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  858. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  859. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  860. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  861. begin
  862. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldrd done', p);
  863. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  864. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  865. taicpu(p).ops:=3;
  866. taicpu(p).oppostfix:=PF_D;
  867. asml.remove(hp1);
  868. hp1.free;
  869. result:=true;
  870. end;
  871. end;
  872. {
  873. Change
  874. ldrb dst1, [REF]
  875. and dst2, dst1, #255
  876. into
  877. ldrb dst2, [ref]
  878. }
  879. if not(GenerateThumbCode) and
  880. (taicpu(p).oppostfix=PF_B) and
  881. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  882. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_NONE]) and
  883. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  884. (taicpu(hp1).oper[2]^.typ = top_const) and
  885. (taicpu(hp1).oper[2]^.val = $FF) and
  886. not(RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
  887. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  888. begin
  889. DebugMsg(SPeepholeOptimization + 'LdrbAnd2Ldrb done', p);
  890. taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg;
  891. asml.remove(hp1);
  892. hp1.free;
  893. result:=true;
  894. end;
  895. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  896. { Remove superfluous mov after ldr
  897. changes
  898. ldr reg1, ref
  899. mov reg2, reg1
  900. to
  901. ldr reg2, ref
  902. conditions are:
  903. * no ldrd usage
  904. * reg1 must be released after mov
  905. * mov can not contain shifterops
  906. * ldr+mov have the same conditions
  907. * mov does not set flags
  908. }
  909. if (taicpu(p).oppostfix<>PF_D) and
  910. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  911. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr') then
  912. Result:=true;
  913. end;
  914. function TCpuAsmOptimizer.OptPass1STM(var p: tai): Boolean;
  915. var
  916. hp1, hp2, hp3, hp4: tai;
  917. begin
  918. Result := False;
  919. {
  920. change
  921. stmfd r13!,[r14]
  922. sub r13,r13,#4
  923. bl abc
  924. add r13,r13,#4
  925. ldmfd r13!,[r15]
  926. into
  927. b abc
  928. }
  929. if not(ts_thumb_interworking in current_settings.targetswitches) and
  930. (taicpu(p).condition = C_None) and
  931. (taicpu(p).oppostfix = PF_FD) and
  932. (taicpu(p).oper[0]^.typ = top_ref) and
  933. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  934. (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  935. (taicpu(p).oper[0]^.ref^.offset=0) and
  936. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  937. (taicpu(p).oper[1]^.typ = top_regset) and
  938. (taicpu(p).oper[1]^.regset^ = [RS_R14]) and
  939. GetNextInstruction(p, hp1) and
  940. MatchInstruction(hp1, A_SUB, [C_None], [PF_NONE]) and
  941. (taicpu(hp1).oper[0]^.typ = top_reg) and
  942. (taicpu(hp1).oper[0]^.reg = NR_STACK_POINTER_REG) and
  943. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) and
  944. (taicpu(hp1).oper[2]^.typ = top_const) and
  945. GetNextInstruction(hp1, hp2) and
  946. SkipEntryExitMarker(hp2, hp2) and
  947. MatchInstruction(hp2, [A_BL,A_BLX], [C_None], [PF_NONE]) and
  948. (taicpu(hp2).oper[0]^.typ = top_ref) and
  949. GetNextInstruction(hp2, hp3) and
  950. SkipEntryExitMarker(hp3, hp3) and
  951. MatchInstruction(hp3, A_ADD, [C_None], [PF_NONE]) and
  952. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[0]^) and
  953. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[1]^) and
  954. MatchOperand(taicpu(hp1).oper[2]^,taicpu(hp3).oper[2]^) and
  955. GetNextInstruction(hp3, hp4) and
  956. MatchInstruction(hp4, A_LDM, [C_None], [PF_FD]) and
  957. MatchOperand(taicpu(p).oper[0]^,taicpu(hp4).oper[0]^) and
  958. (taicpu(hp4).oper[1]^.typ = top_regset) and
  959. (taicpu(hp4).oper[1]^.regset^ = [RS_R15]) then
  960. begin
  961. asml.Remove(hp1);
  962. asml.Remove(hp3);
  963. asml.Remove(hp4);
  964. taicpu(hp2).opcode:=A_B;
  965. hp1.free;
  966. hp3.free;
  967. hp4.free;
  968. RemoveCurrentp(p, hp2);
  969. DebugMsg(SPeepholeOptimization + 'Bl2B done', p);
  970. Result := True;
  971. end;
  972. end;
  973. function TCpuAsmOptimizer.OptPass1STR(var p: tai): Boolean;
  974. var
  975. hp1: tai;
  976. begin
  977. Result := inherited OptPass1STR(p);
  978. if Result then
  979. Exit;
  980. { Common conditions }
  981. if (taicpu(p).oper[1]^.typ = top_ref) and
  982. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  983. (taicpu(p).oppostfix=PF_None) then
  984. begin
  985. { change
  986. str reg1,ref
  987. ldr reg2,ref
  988. into
  989. str reg1,ref
  990. mov reg2,reg1
  991. }
  992. if (taicpu(p).condition=C_None) and
  993. GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and
  994. MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and
  995. (taicpu(hp1).oper[1]^.typ=top_ref) and
  996. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  997. not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  998. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and
  999. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1))) then
  1000. begin
  1001. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  1002. begin
  1003. DebugMsg(SPeepholeOptimization + 'StrLdr2StrMov 1 done', hp1);
  1004. asml.remove(hp1);
  1005. hp1.free;
  1006. end
  1007. else
  1008. begin
  1009. taicpu(hp1).opcode:=A_MOV;
  1010. taicpu(hp1).oppostfix:=PF_None;
  1011. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1012. DebugMsg(SPeepholeOptimization + 'StrLdr2StrMov 2 done', hp1);
  1013. end;
  1014. result := True;
  1015. end
  1016. { change
  1017. str reg1,ref
  1018. str reg2,ref
  1019. into
  1020. strd reg1,reg2,ref
  1021. }
  1022. else if (GenerateARMCode or GenerateThumb2Code) and
  1023. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  1024. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  1025. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  1026. AlignedToQWord(taicpu(p).oper[1]^.ref^) and
  1027. GetNextInstruction(p,hp1) and
  1028. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  1029. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  1030. { str ensures that either base or index contain no register, else ldr wouldn't
  1031. use an offset either
  1032. }
  1033. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1034. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  1035. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) then
  1036. begin
  1037. DebugMsg(SPeepholeOptimization + 'StrStr2Strd done', p);
  1038. taicpu(p).oppostfix:=PF_D;
  1039. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  1040. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  1041. taicpu(p).ops:=3;
  1042. asml.remove(hp1);
  1043. hp1.free;
  1044. result:=true;
  1045. end;
  1046. end;
  1047. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  1048. end;
  1049. function TCpuAsmOptimizer.OptPass1MOV(var p: tai): Boolean;
  1050. var
  1051. hp1, hpfar1, hp2: tai;
  1052. i, i2: longint;
  1053. tempop: tasmop;
  1054. dealloc: tai_regalloc;
  1055. begin
  1056. Result := False;
  1057. hp1 := nil;
  1058. { fold
  1059. mov reg1,reg0, shift imm1
  1060. mov reg1,reg1, shift imm2
  1061. }
  1062. if (taicpu(p).ops=3) and
  1063. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1064. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1065. getnextinstruction(p,hp1) and
  1066. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1067. (taicpu(hp1).ops=3) and
  1068. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  1069. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1070. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1071. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  1072. begin
  1073. { fold
  1074. mov reg1,reg0, lsl 16
  1075. mov reg1,reg1, lsr 16
  1076. strh reg1, ...
  1077. dealloc reg1
  1078. to
  1079. strh reg1, ...
  1080. dealloc reg1
  1081. }
  1082. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1083. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  1084. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  1085. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  1086. getnextinstruction(hp1,hp2) and
  1087. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  1088. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  1089. begin
  1090. TransferUsedRegs(TmpUsedRegs);
  1091. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1092. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1093. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  1094. begin
  1095. DebugMsg(SPeepholeOptimization + 'Removed superfluous 16 Bit zero extension', hp1);
  1096. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  1097. asml.remove(hp1);
  1098. hp1.free;
  1099. RemoveCurrentP(p, hp2);
  1100. Result:=true;
  1101. Exit;
  1102. end;
  1103. end
  1104. { fold
  1105. mov reg1,reg0, shift imm1
  1106. mov reg1,reg1, shift imm2
  1107. to
  1108. mov reg1,reg0, shift imm1+imm2
  1109. }
  1110. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  1111. { asr makes no use after a lsr, the asr can be foled into the lsr }
  1112. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  1113. begin
  1114. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  1115. { avoid overflows }
  1116. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  1117. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  1118. SM_ROR:
  1119. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  1120. SM_ASR:
  1121. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  1122. SM_LSR,
  1123. SM_LSL:
  1124. begin
  1125. hp2:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  1126. InsertLLItem(p.previous, p.next, hp2);
  1127. p.free;
  1128. p:=hp2;
  1129. end;
  1130. else
  1131. internalerror(2008072803);
  1132. end;
  1133. DebugMsg(SPeepholeOptimization + 'ShiftShift2Shift 1 done', p);
  1134. asml.remove(hp1);
  1135. hp1.free;
  1136. hp1 := nil;
  1137. result := true;
  1138. end
  1139. { fold
  1140. mov reg1,reg0, shift imm1
  1141. mov reg1,reg1, shift imm2
  1142. mov reg1,reg1, shift imm3 ...
  1143. mov reg2,reg1, shift imm3 ...
  1144. }
  1145. else if GetNextInstructionUsingReg(hp1,hp2, taicpu(hp1).oper[0]^.reg) and
  1146. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1147. (taicpu(hp2).ops=3) and
  1148. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1149. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp2)) and
  1150. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1151. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  1152. begin
  1153. { mov reg1,reg0, lsl imm1
  1154. mov reg1,reg1, lsr/asr imm2
  1155. mov reg2,reg1, lsl imm3 ...
  1156. to
  1157. mov reg1,reg0, lsl imm1
  1158. mov reg2,reg1, lsr/asr imm2-imm3
  1159. if
  1160. imm1>=imm2
  1161. }
  1162. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1163. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1164. (taicpu(p).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  1165. begin
  1166. if (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  1167. begin
  1168. if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,p,hp1)) and
  1169. not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  1170. begin
  1171. DebugMsg(SPeepholeOptimization + 'ShiftShiftShift2ShiftShift 1a done', p);
  1172. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm-taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  1173. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1174. asml.remove(hp1);
  1175. asml.remove(hp2);
  1176. hp1.free;
  1177. hp2.free;
  1178. if taicpu(p).oper[2]^.shifterop^.shiftimm>=32 then
  1179. begin
  1180. taicpu(p).freeop(1);
  1181. taicpu(p).freeop(2);
  1182. taicpu(p).loadconst(1,0);
  1183. end;
  1184. result := true;
  1185. Exit;
  1186. end;
  1187. end
  1188. else if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  1189. begin
  1190. DebugMsg(SPeepholeOptimization + 'ShiftShiftShift2ShiftShift 1b done', p);
  1191. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  1192. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1193. asml.remove(hp2);
  1194. hp2.free;
  1195. result := true;
  1196. Exit;
  1197. end;
  1198. end
  1199. { mov reg1,reg0, lsr/asr imm1
  1200. mov reg1,reg1, lsl imm2
  1201. mov reg1,reg1, lsr/asr imm3 ...
  1202. if imm3>=imm1 and imm2>=imm1
  1203. to
  1204. mov reg1,reg0, lsl imm2-imm1
  1205. mov reg1,reg1, lsr/asr imm3 ...
  1206. }
  1207. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1208. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1209. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  1210. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  1211. begin
  1212. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  1213. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1214. DebugMsg(SPeepholeOptimization + 'ShiftShiftShift2ShiftShift 2 done', p);
  1215. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  1216. begin
  1217. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  1218. asml.remove(hp1);
  1219. hp1.free;
  1220. end;
  1221. RemoveCurrentp(p);
  1222. result := true;
  1223. Exit;
  1224. end;
  1225. end;
  1226. end;
  1227. { All the optimisations from this point on require GetNextInstructionUsingReg
  1228. to return True }
  1229. while (
  1230. GetNextInstructionUsingReg(p, hpfar1, taicpu(p).oper[0]^.reg) and
  1231. (hpfar1.typ = ait_instruction)
  1232. ) do
  1233. begin
  1234. { Change the common
  1235. mov r0, r0, lsr #xxx
  1236. and r0, r0, #yyy/bic r0, r0, #xxx
  1237. and remove the superfluous and/bic if possible
  1238. This could be extended to handle more cases.
  1239. }
  1240. { Change
  1241. mov rx, ry, lsr/ror #xxx
  1242. uxtb/uxth rz,rx/and rz,rx,0xFF
  1243. dealloc rx
  1244. to
  1245. uxtb/uxth rz,ry,ror #xxx
  1246. }
  1247. if (GenerateThumb2Code) and
  1248. (taicpu(p).ops=3) and
  1249. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1250. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1251. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and
  1252. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1253. begin
  1254. if MatchInstruction(hpfar1, A_UXTB, [C_None], [PF_None]) and
  1255. (taicpu(hpfar1).ops = 2) and
  1256. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1257. MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1258. begin
  1259. taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1260. taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1261. taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1262. taicpu(hpfar1).ops := 3;
  1263. if not Assigned(hp1) then
  1264. GetNextInstruction(p,hp1);
  1265. RemoveCurrentP(p, hp1);
  1266. result:=true;
  1267. exit;
  1268. end
  1269. else if MatchInstruction(hpfar1, A_UXTH, [C_None], [PF_None]) and
  1270. (taicpu(hpfar1).ops=2) and
  1271. (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and
  1272. MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1273. begin
  1274. taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1275. taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1276. taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1277. taicpu(hpfar1).ops := 3;
  1278. if not Assigned(hp1) then
  1279. GetNextInstruction(p,hp1);
  1280. RemoveCurrentP(p, hp1);
  1281. result:=true;
  1282. exit;
  1283. end
  1284. else if MatchInstruction(hpfar1, A_AND, [C_None], [PF_None]) and
  1285. (taicpu(hpfar1).ops = 3) and
  1286. (taicpu(hpfar1).oper[2]^.typ = top_const) and
  1287. (taicpu(hpfar1).oper[2]^.val = $FF) and
  1288. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1289. MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1290. begin
  1291. taicpu(hpfar1).ops := 3;
  1292. taicpu(hpfar1).opcode := A_UXTB;
  1293. taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1294. taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1295. taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1296. if not Assigned(hp1) then
  1297. GetNextInstruction(p,hp1);
  1298. RemoveCurrentP(p, hp1);
  1299. result:=true;
  1300. exit;
  1301. end;
  1302. end;
  1303. { 2-operald mov optimisations }
  1304. if (taicpu(p).ops = 2) then
  1305. begin
  1306. {
  1307. This removes the mul from
  1308. mov rX,0
  1309. ...
  1310. mul ...,rX,...
  1311. }
  1312. if (taicpu(p).oper[1]^.typ = top_const) then
  1313. begin
  1314. (* if false and
  1315. (taicpu(p).oper[1]^.val=0) and
  1316. MatchInstruction(hpfar1, [A_MUL,A_MLA], [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1317. (((taicpu(hpfar1).oper[1]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^)) or
  1318. ((taicpu(hpfar1).oper[2]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[2]^))) then
  1319. begin
  1320. TransferUsedRegs(TmpUsedRegs);
  1321. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1322. UpdateUsedRegs(TmpUsedRegs, tai(hpfar1.next));
  1323. DebugMsg(SPeepholeOptimization + 'MovMUL/MLA2Mov0 done', p);
  1324. if taicpu(hpfar1).opcode=A_MUL then
  1325. taicpu(hpfar1).loadconst(1,0)
  1326. else
  1327. taicpu(hpfar1).loadreg(1,taicpu(hpfar1).oper[3]^.reg);
  1328. taicpu(hpfar1).ops:=2;
  1329. taicpu(hpfar1).opcode:=A_MOV;
  1330. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hpfar1,TmpUsedRegs)) then
  1331. RemoveCurrentP(p);
  1332. Result:=true;
  1333. exit;
  1334. end
  1335. else*) if (taicpu(p).oper[1]^.val=0) and
  1336. MatchInstruction(hpfar1, A_MLA, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1337. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[3]^) then
  1338. begin
  1339. TransferUsedRegs(TmpUsedRegs);
  1340. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1341. UpdateUsedRegs(TmpUsedRegs, tai(hpfar1.next));
  1342. DebugMsg(SPeepholeOptimization + 'MovMLA2MUL 1 done', p);
  1343. taicpu(hpfar1).ops:=3;
  1344. taicpu(hpfar1).opcode:=A_MUL;
  1345. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hpfar1,TmpUsedRegs)) then
  1346. begin
  1347. RemoveCurrentP(p);
  1348. Result:=true;
  1349. end;
  1350. exit;
  1351. end
  1352. {
  1353. This changes the very common
  1354. mov r0, #0
  1355. str r0, [...]
  1356. mov r0, #0
  1357. str r0, [...]
  1358. and removes all superfluous mov instructions
  1359. }
  1360. else if (taicpu(hpfar1).opcode=A_STR) then
  1361. begin
  1362. hp1 := hpfar1;
  1363. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  1364. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[0]^) and
  1365. GetNextInstruction(hp1, hp2) and
  1366. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1367. (taicpu(hp2).ops = 2) and
  1368. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  1369. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  1370. begin
  1371. DebugMsg(SPeepholeOptimization + 'MovStrMov done', hp2);
  1372. GetNextInstruction(hp2,hp1);
  1373. asml.remove(hp2);
  1374. hp2.free;
  1375. result:=true;
  1376. if not assigned(hp1) then break;
  1377. end;
  1378. if Result then
  1379. Exit;
  1380. end;
  1381. end;
  1382. {
  1383. This removes the first mov from
  1384. mov rX,...
  1385. mov rX,...
  1386. }
  1387. if taicpu(hpfar1).opcode=A_MOV then
  1388. begin
  1389. hp1 := p;
  1390. while MatchInstruction(hpfar1, A_MOV, [taicpu(hp1).condition], [taicpu(hp1).oppostfix]) and
  1391. (taicpu(hpfar1).ops = 2) and
  1392. MatchOperand(taicpu(hp1).oper[0]^, taicpu(hpfar1).oper[0]^) and
  1393. { don't remove the first mov if the second is a mov rX,rX }
  1394. not(MatchOperand(taicpu(hpfar1).oper[0]^, taicpu(hpfar1).oper[1]^)) do
  1395. begin
  1396. { Defer removing the first p until after the while loop }
  1397. if p <> hp1 then
  1398. begin
  1399. DebugMsg(SPeepholeOptimization + 'MovMov done', hp1);
  1400. asml.remove(hp1);
  1401. hp1.free;
  1402. end;
  1403. hp1:=hpfar1;
  1404. GetNextInstruction(hpfar1,hpfar1);
  1405. result:=true;
  1406. if not assigned(hpfar1) then
  1407. Break;
  1408. end;
  1409. if Result then
  1410. begin
  1411. DebugMsg(SPeepholeOptimization + 'MovMov done', p);
  1412. RemoveCurrentp(p);
  1413. Exit;
  1414. end;
  1415. end;
  1416. if RedundantMovProcess(p,hpfar1) then
  1417. begin
  1418. Result:=true;
  1419. { p might not point at a mov anymore }
  1420. exit;
  1421. end;
  1422. { If hpfar1 is nil after the call to RedundantMovProcess, it is
  1423. because it would have become a dangling pointer, so reinitialise it. }
  1424. if not Assigned(hpfar1) then
  1425. Continue;
  1426. { Fold the very common sequence
  1427. mov regA, regB
  1428. ldr* regA, [regA]
  1429. to
  1430. ldr* regA, [regB]
  1431. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1432. }
  1433. if
  1434. // Make sure that Thumb code doesn't propagate a high register into a reference
  1435. (
  1436. (
  1437. GenerateThumbCode and
  1438. (getsupreg(taicpu(p).oper[1]^.reg) < RS_R8)
  1439. ) or (not GenerateThumbCode)
  1440. ) and
  1441. (taicpu(p).oper[1]^.typ = top_reg) and
  1442. (taicpu(p).oppostfix = PF_NONE) and
  1443. MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], []) and
  1444. (taicpu(hpfar1).oper[1]^.typ = top_ref) and
  1445. { We can change the base register only when the instruction uses AM_OFFSET }
  1446. ((taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
  1447. ((taicpu(hpfar1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1448. (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg))
  1449. ) and
  1450. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
  1451. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1452. begin
  1453. DebugMsg(SPeepholeOptimization + 'MovLdr2Ldr done', hpfar1);
  1454. if (taicpu(hpfar1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1455. (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1456. taicpu(hpfar1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  1457. if taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  1458. taicpu(hpfar1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1459. dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, tai(p.Next));
  1460. if Assigned(dealloc) then
  1461. begin
  1462. asml.remove(dealloc);
  1463. asml.InsertAfter(dealloc,hpfar1);
  1464. end;
  1465. if (not Assigned(hp1)) or (p=hp1) then
  1466. GetNextInstruction(p, hp1);
  1467. RemoveCurrentP(p, hp1);
  1468. result:=true;
  1469. Exit;
  1470. end
  1471. end
  1472. { 3-operald mov optimisations }
  1473. else if (taicpu(p).ops = 3) then
  1474. begin
  1475. if (taicpu(p).oper[2]^.typ = top_shifterop) and
  1476. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1477. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  1478. (taicpu(hpfar1).ops>=1) and
  1479. (taicpu(hpfar1).oper[0]^.typ=top_reg) and
  1480. (not RegModifiedBetween(taicpu(hpfar1).oper[0]^.reg, p, hpfar1)) and
  1481. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1482. begin
  1483. if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  1484. MatchInstruction(hpfar1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1485. (taicpu(hpfar1).ops=3) and
  1486. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^) and
  1487. (taicpu(hpfar1).oper[2]^.typ = top_const) and
  1488. { Check if the AND actually would only mask out bits being already zero because of the shift
  1489. }
  1490. ((($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm) and taicpu(hpfar1).oper[2]^.val) =
  1491. ($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm)) then
  1492. begin
  1493. DebugMsg(SPeepholeOptimization + 'LsrAnd2Lsr done', hpfar1);
  1494. taicpu(p).oper[0]^.reg:=taicpu(hpfar1).oper[0]^.reg;
  1495. asml.remove(hpfar1);
  1496. hpfar1.free;
  1497. result:=true;
  1498. Exit;
  1499. end
  1500. else if MatchInstruction(hpfar1, A_BIC, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1501. (taicpu(hpfar1).ops=3) and
  1502. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^) and
  1503. (taicpu(hpfar1).oper[2]^.typ = top_const) and
  1504. { Check if the BIC actually would only mask out bits beeing already zero because of the shift }
  1505. (taicpu(hpfar1).oper[2]^.val<>0) and
  1506. (BsfDWord(taicpu(hpfar1).oper[2]^.val)>=32-taicpu(p).oper[2]^.shifterop^.shiftimm) then
  1507. begin
  1508. DebugMsg(SPeepholeOptimization + 'LsrBic2Lsr done', hpfar1);
  1509. taicpu(p).oper[0]^.reg:=taicpu(hpfar1).oper[0]^.reg;
  1510. asml.remove(hpfar1);
  1511. hpfar1.free;
  1512. result:=true;
  1513. Exit;
  1514. end;
  1515. end;
  1516. { This folds shifterops into following instructions
  1517. mov r0, r1, lsl #8
  1518. add r2, r3, r0
  1519. to
  1520. add r2, r3, r1, lsl #8
  1521. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1522. }
  1523. if (taicpu(p).oper[1]^.typ = top_reg) and
  1524. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1525. (taicpu(p).oppostfix = PF_NONE) and
  1526. MatchInstruction(hpfar1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  1527. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  1528. A_CMP, A_CMN],
  1529. [taicpu(p).condition], [PF_None]) and
  1530. (not ((GenerateThumb2Code) and
  1531. (taicpu(hpfar1).opcode in [A_SBC]) and
  1532. (((taicpu(hpfar1).ops=3) and
  1533. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^.reg)) or
  1534. ((taicpu(hpfar1).ops=2) and
  1535. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[0]^.reg))))) and
  1536. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) and
  1537. (taicpu(hpfar1).ops >= 2) and
  1538. {Currently we can't fold into another shifterop}
  1539. (taicpu(hpfar1).oper[taicpu(hpfar1).ops-1]^.typ = top_reg) and
  1540. {Folding rrx is problematic because of the C-Flag, as we currently can't check
  1541. NR_DEFAULTFLAGS for modification}
  1542. (
  1543. {Everything is fine if we don't use RRX}
  1544. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or
  1545. (
  1546. {If it is RRX, then check if we're just accessing the next instruction}
  1547. Assigned(hp1) and
  1548. (hpfar1 = hp1)
  1549. )
  1550. ) and
  1551. { reg1 might not be modified inbetween }
  1552. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
  1553. { The shifterop can contain a register, might not be modified}
  1554. (
  1555. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or
  1556. not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hpfar1))
  1557. ) and
  1558. (
  1559. {Only ONE of the two src operands is allowed to match}
  1560. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[taicpu(hpfar1).ops-2]^) xor
  1561. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[taicpu(hpfar1).ops-1]^)
  1562. ) then
  1563. begin
  1564. if taicpu(hpfar1).opcode in [A_TST, A_TEQ, A_CMN] then
  1565. I2:=0
  1566. else
  1567. I2:=1;
  1568. for I:=I2 to taicpu(hpfar1).ops-1 do
  1569. if MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[I]^.reg) then
  1570. begin
  1571. { If the parameter matched on the second op from the RIGHT
  1572. we have to switch the parameters, this will not happen for CMP
  1573. were we're only evaluating the most right parameter
  1574. }
  1575. if I <> taicpu(hpfar1).ops-1 then
  1576. begin
  1577. {The SUB operators need to be changed when we swap parameters}
  1578. case taicpu(hpfar1).opcode of
  1579. A_SUB: tempop:=A_RSB;
  1580. A_SBC: tempop:=A_RSC;
  1581. A_RSB: tempop:=A_SUB;
  1582. A_RSC: tempop:=A_SBC;
  1583. else tempop:=taicpu(hpfar1).opcode;
  1584. end;
  1585. if taicpu(hpfar1).ops = 3 then
  1586. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  1587. taicpu(hpfar1).oper[0]^.reg, taicpu(hpfar1).oper[2]^.reg,
  1588. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1589. else
  1590. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  1591. taicpu(hpfar1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1592. taicpu(p).oper[2]^.shifterop^);
  1593. end
  1594. else
  1595. if taicpu(hpfar1).ops = 3 then
  1596. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hpfar1).opcode,
  1597. taicpu(hpfar1).oper[0]^.reg, taicpu(hpfar1).oper[1]^.reg,
  1598. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1599. else
  1600. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hpfar1).opcode,
  1601. taicpu(hpfar1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1602. taicpu(p).oper[2]^.shifterop^);
  1603. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  1604. AllocRegBetween(taicpu(p).oper[2]^.shifterop^.rs,p,hpfar1,UsedRegs);
  1605. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hpfar1,UsedRegs);
  1606. asml.insertbefore(hp2, hpfar1);
  1607. asml.remove(hpfar1);
  1608. hpfar1.free;
  1609. DebugMsg(SPeepholeOptimization + 'FoldShiftProcess done', hp2);
  1610. if not Assigned(hp1) then
  1611. GetNextInstruction(p, hp1)
  1612. else if hp1 = hpfar1 then
  1613. { If hp1 = hpfar1, then it's a dangling pointer }
  1614. hp1 := hp2;
  1615. RemoveCurrentP(p, hp1);
  1616. Result:=true;
  1617. Exit;
  1618. end;
  1619. end;
  1620. {
  1621. Fold
  1622. mov r1, r1, lsl #2
  1623. ldr/ldrb r0, [r0, r1]
  1624. to
  1625. ldr/ldrb r0, [r0, r1, lsl #2]
  1626. XXX: This still needs some work, as we quite often encounter something like
  1627. mov r1, r2, lsl #2
  1628. add r2, r3, #imm
  1629. ldr r0, [r2, r1]
  1630. which can't be folded because r2 is overwritten between the shift and the ldr.
  1631. We could try to shuffle the registers around and fold it into.
  1632. add r1, r3, #imm
  1633. ldr r0, [r1, r2, lsl #2]
  1634. }
  1635. if (not(GenerateThumbCode)) and
  1636. { thumb2 allows only lsl #0..#3 }
  1637. (not(GenerateThumb2Code) or
  1638. ((taicpu(p).oper[2]^.shifterop^.shiftimm in [0..3]) and
  1639. (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL)
  1640. )
  1641. ) and
  1642. (taicpu(p).oper[1]^.typ = top_reg) and
  1643. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1644. { RRX is tough to handle, because it requires tracking the C-Flag,
  1645. it is also extremly unlikely to be emitted this way}
  1646. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) and
  1647. (taicpu(p).oper[2]^.shifterop^.shiftimm <> 0) and
  1648. (taicpu(p).oppostfix = PF_NONE) and
  1649. {Only LDR, LDRB, STR, STRB can handle scaled register indexing}
  1650. (MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B]) or
  1651. (GenerateThumb2Code and
  1652. MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B, PF_SB, PF_H, PF_SH]))
  1653. ) and
  1654. (
  1655. {If this is address by offset, one of the two registers can be used}
  1656. ((taicpu(hpfar1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1657. (
  1658. (taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor
  1659. (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)
  1660. )
  1661. ) or
  1662. {For post and preindexed only the index register can be used}
  1663. ((taicpu(hpfar1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and
  1664. (
  1665. (taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
  1666. (taicpu(hpfar1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
  1667. ) and
  1668. (not GenerateThumb2Code)
  1669. )
  1670. ) and
  1671. { Only fold if both registers are used. Otherwise we are folding p with itself }
  1672. (taicpu(hpfar1).oper[1]^.ref^.index<>NR_NO) and
  1673. (taicpu(hpfar1).oper[1]^.ref^.base<>NR_NO) and
  1674. { Only fold if there isn't another shifterop already, and offset is zero. }
  1675. (taicpu(hpfar1).oper[1]^.ref^.offset = 0) and
  1676. (taicpu(hpfar1).oper[1]^.ref^.shiftmode = SM_None) and
  1677. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
  1678. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1679. begin
  1680. { If the register we want to do the shift for resides in base, we need to swap that}
  1681. if (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1682. taicpu(hpfar1).oper[1]^.ref^.base := taicpu(hpfar1).oper[1]^.ref^.index;
  1683. taicpu(hpfar1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1684. taicpu(hpfar1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
  1685. taicpu(hpfar1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
  1686. DebugMsg(SPeepholeOptimization + 'FoldShiftLdrStr done', hpfar1);
  1687. RemoveCurrentP(p);
  1688. Result:=true;
  1689. Exit;
  1690. end;
  1691. end;
  1692. {
  1693. Often we see shifts and then a superfluous mov to another register
  1694. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  1695. }
  1696. if RemoveSuperfluousMove(p, hpfar1, 'MovMov2Mov') then
  1697. Result:=true;
  1698. Exit;
  1699. end;
  1700. end;
  1701. function TCpuAsmOptimizer.OptPass1MVN(var p: tai): Boolean;
  1702. var
  1703. hp1: tai;
  1704. begin
  1705. {
  1706. change
  1707. mvn reg2,reg1
  1708. and reg3,reg4,reg2
  1709. dealloc reg2
  1710. to
  1711. bic reg3,reg4,reg1
  1712. }
  1713. Result := False;
  1714. if (taicpu(p).oper[1]^.typ = top_reg) and
  1715. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1716. MatchInstruction(hp1,A_AND,[],[]) and
  1717. (((taicpu(hp1).ops=3) and
  1718. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1719. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1720. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  1721. ((taicpu(hp1).ops=2) and
  1722. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1723. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1724. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1725. { reg1 might not be modified inbetween }
  1726. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1727. begin
  1728. DebugMsg(SPeepholeOptimization + 'MvnAnd2Bic done', p);
  1729. taicpu(hp1).opcode:=A_BIC;
  1730. if taicpu(hp1).ops=3 then
  1731. begin
  1732. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1733. taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
  1734. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1735. end
  1736. else
  1737. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1738. RemoveCurrentp(p);
  1739. Result := True;
  1740. Exit;
  1741. end;
  1742. end;
  1743. function TCpuAsmOptimizer.OptPass1VMov(var p: tai): Boolean;
  1744. var
  1745. hp1: tai;
  1746. begin
  1747. {
  1748. change
  1749. vmov reg0,reg1,reg2
  1750. vmov reg1,reg2,reg0
  1751. into
  1752. vmov reg0,reg1,reg2
  1753. can be applied regardless if reg0 or reg2 is the vfp register
  1754. }
  1755. Result := False;
  1756. if (taicpu(p).ops = 3) then
  1757. while GetNextInstruction(p, hp1) and
  1758. MatchInstruction(hp1, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1759. (taicpu(hp1).ops = 3) and
  1760. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^) and
  1761. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) and
  1762. MatchOperand(taicpu(p).oper[2]^, taicpu(hp1).oper[1]^) do
  1763. begin
  1764. asml.Remove(hp1);
  1765. hp1.free;
  1766. DebugMsg(SPeepholeOptimization + 'VMovVMov2VMov done', p);
  1767. { Can we do it again? }
  1768. end;
  1769. end;
  1770. function TCpuAsmOptimizer.OptPass1VOp(var p: tai): Boolean;
  1771. var
  1772. hp1: tai;
  1773. begin
  1774. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1775. RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp');
  1776. end;
  1777. function TCpuAsmOptimizer.OptPass1Push(var p: tai): Boolean;
  1778. var
  1779. hp1: tai;
  1780. begin
  1781. Result:=false;
  1782. if (taicpu(p).oper[0]^.regset^=[RS_R14]) and
  1783. GetNextInstruction(p,hp1) and
  1784. MatchInstruction(hp1,A_POP,[C_None],[PF_None]) and
  1785. (taicpu(hp1).oper[0]^.regset^=[RS_R15]) then
  1786. begin
  1787. if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
  1788. begin
  1789. DebugMsg('Peephole Optimization: PushPop2Mov done', p);
  1790. taicpu(p).ops:=2;
  1791. taicpu(p).loadreg(1, NR_R14);
  1792. taicpu(p).loadreg(0, NR_R15);
  1793. taicpu(p).opcode:=A_MOV;
  1794. end
  1795. else
  1796. begin
  1797. DebugMsg('Peephole Optimization: PushPop2Bx done', p);
  1798. taicpu(p).loadreg(0, NR_R14);
  1799. taicpu(p).opcode:=A_BX;
  1800. end;
  1801. RemoveInstruction(hp1);
  1802. Result:=true;
  1803. Exit;
  1804. end;
  1805. end;
  1806. function TCpuAsmOptimizer.OptPass2Bcc(var p: tai): Boolean;
  1807. var
  1808. hp1,hp2,hp3,after_p: tai;
  1809. l : longint;
  1810. WasLast: boolean;
  1811. Label_X, Label_Y: TASmLabel;
  1812. procedure ConvertInstructins(this_hp: tai; newcond: tasmcond);
  1813. var
  1814. next_hp: tai;
  1815. begin
  1816. repeat
  1817. if this_hp.typ=ait_instruction then
  1818. taicpu(this_hp).condition := newcond;
  1819. GetNextInstruction(this_hp, next_hp);
  1820. if MustBeLast(this_hp) then
  1821. Break;
  1822. this_hp := next_hp
  1823. until not(assigned(this_hp)) or
  1824. not(CanBeCond(this_hp)) or
  1825. ((hp1.typ = ait_instruction) and (taicpu(hp1).opcode = A_B)) or
  1826. (this_hp.typ = ait_label);
  1827. end;
  1828. begin
  1829. Result := False;
  1830. if (taicpu(p).condition<>C_None) and
  1831. not(GenerateThumbCode) then
  1832. begin
  1833. { check for
  1834. Bxx xxx
  1835. <several instructions>
  1836. xxx:
  1837. }
  1838. Label_X := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  1839. l:=0;
  1840. WasLast:=False;
  1841. GetNextInstruction(p, hp1);
  1842. after_p := hp1;
  1843. while assigned(hp1) and
  1844. (l<=4) and
  1845. CanBeCond(hp1) and
  1846. { stop on labels }
  1847. not(hp1.typ=ait_label) and
  1848. { avoid that we cannot recognize the case BccB2Cond }
  1849. not((hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_B)) do
  1850. begin
  1851. inc(l);
  1852. if MustBeLast(hp1) then
  1853. begin
  1854. WasLast:=True;
  1855. GetNextInstruction(hp1,hp1);
  1856. break;
  1857. end
  1858. else
  1859. GetNextInstruction(hp1,hp1);
  1860. end;
  1861. if assigned(hp1) then
  1862. begin
  1863. if FindLabel(Label_X, hp1) then
  1864. begin
  1865. if (l<=4) and (l>0) then
  1866. begin
  1867. ConvertInstructins(after_p, inverse_cond(taicpu(p).condition));
  1868. DebugMsg(SPeepholeOptimization + 'Bcc2Cond done', p);
  1869. { wait with removing else GetNextInstruction could
  1870. ignore the label if it was the only usage in the
  1871. jump moved away }
  1872. Label_X.decrefs;
  1873. RemoveCurrentP(p, after_p);
  1874. Result := True;
  1875. Exit;
  1876. end;
  1877. end
  1878. else
  1879. { do not perform further optimizations if there is an instruction
  1880. in block #1 which cannot be optimized.
  1881. }
  1882. if not WasLast then
  1883. begin
  1884. { check further for
  1885. Bcc xxx
  1886. <several instructions 1>
  1887. B yyy
  1888. xxx:
  1889. <several instructions 2>
  1890. yyy:
  1891. }
  1892. { hp2 points to jmp yyy }
  1893. hp2:=hp1;
  1894. { skip hp2 to xxx }
  1895. if assigned(hp2) and
  1896. (l<=3) and
  1897. (hp2.typ=ait_instruction) and
  1898. (taicpu(hp2).is_jmp) and
  1899. (taicpu(hp2).condition=C_None) and
  1900. GetNextInstruction(hp2, hp1) and
  1901. { real label and jump, no further references to the
  1902. label are allowed }
  1903. (Label_X.getrefs = 1) and
  1904. FindLabel(Label_X, hp1) then
  1905. begin
  1906. Label_Y := TAsmLabel(taicpu(hp2).oper[0]^.ref^.symbol);
  1907. l:=0;
  1908. { skip hp1 and hp3 to <several moves 2> }
  1909. GetNextInstruction(hp1, hp1);
  1910. hp3 := hp1;
  1911. while assigned(hp1) and
  1912. CanBeCond(hp1) and
  1913. (l<=3) do
  1914. begin
  1915. inc(l);
  1916. if MustBeLast(hp1) then
  1917. begin
  1918. GetNextInstruction(hp1, hp1);
  1919. break;
  1920. end
  1921. else
  1922. GetNextInstruction(hp1, hp1);
  1923. end;
  1924. { hp1 points to yyy: }
  1925. if assigned(hp1) and
  1926. FindLabel(Label_Y, hp1) then
  1927. begin
  1928. ConvertInstructins(after_p, inverse_cond(taicpu(p).condition));
  1929. ConvertInstructins(hp3, taicpu(p).condition);
  1930. DebugMsg(SPeepholeOptimization + 'BccB2Cond done', after_p);
  1931. { remove B }
  1932. Label_Y.decrefs;
  1933. RemoveInstruction(hp2);
  1934. { remove Bcc }
  1935. Label_X.decrefs;
  1936. RemoveCurrentP(p, after_p);
  1937. Result := True;
  1938. Exit;
  1939. end;
  1940. end;
  1941. end;
  1942. end;
  1943. end;
  1944. end;
  1945. function TCpuAsmOptimizer.OptPass2STR(var p: tai): Boolean;
  1946. var
  1947. hp1: tai;
  1948. Postfix: TOpPostfix;
  1949. OpcodeStr: shortstring;
  1950. begin
  1951. Result := False;
  1952. { Try to merge two STRs into an STM instruction }
  1953. if not(GenerateThumbCode) and (taicpu(p).oper[1]^.typ = top_ref) and
  1954. (taicpu(p).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1955. (
  1956. (taicpu(p).oper[1]^.ref^.base = NR_NO) or
  1957. (taicpu(p).oper[1]^.ref^.index = NR_NO)
  1958. ) and
  1959. (taicpu(p).oppostfix = PF_None) and
  1960. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) then
  1961. begin
  1962. hp1 := p;
  1963. while GetNextInstruction(hp1, hp1) and (hp1.typ = ait_instruction) and
  1964. (taicpu(hp1).opcode = A_STR) do
  1965. if (taicpu(hp1).condition = taicpu(p).condition) and
  1966. (taicpu(hp1).oppostfix = PF_None) and
  1967. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1968. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1969. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[1]^.ref^.base) and
  1970. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[1]^.ref^.index) and
  1971. (
  1972. (
  1973. (taicpu(p).oper[1]^.ref^.offset = 0) and
  1974. (getsupreg(taicpu(hp1).oper[0]^.reg) > getsupreg(taicpu(p).oper[0]^.reg)) and
  1975. (abs(taicpu(hp1).oper[1]^.ref^.offset) = 4)
  1976. ) or (
  1977. (taicpu(hp1).oper[1]^.ref^.offset = 0) and
  1978. (getsupreg(taicpu(hp1).oper[0]^.reg) < getsupreg(taicpu(p).oper[0]^.reg)) and
  1979. (abs(taicpu(p).oper[1]^.ref^.offset) = 4)
  1980. )
  1981. ) then
  1982. begin
  1983. if (getsupreg(taicpu(hp1).oper[0]^.reg) < getsupreg(taicpu(p).oper[0]^.reg)) xor
  1984. (taicpu(hp1).oper[1]^.ref^.offset < taicpu(p).oper[1]^.ref^.offset) then
  1985. begin
  1986. Postfix := PF_DA;
  1987. OpcodeStr := 'DA';
  1988. end
  1989. else
  1990. begin
  1991. Postfix := PF_None;
  1992. OpcodeStr := '';
  1993. end;
  1994. taicpu(hp1).oper[1]^.ref^.offset := 0;
  1995. if taicpu(hp1).oper[1]^.ref^.index = NR_NO then
  1996. begin
  1997. taicpu(hp1).oper[1]^.ref^.index := taicpu(hp1).oper[1]^.ref^.base;
  1998. taicpu(hp1).oper[1]^.ref^.base := NR_NO;
  1999. end;
  2000. taicpu(p).opcode := A_STM;
  2001. taicpu(p).loadregset(1, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg), getsupreg(taicpu(hp1).oper[0]^.reg)]);
  2002. taicpu(p).loadref(0, taicpu(hp1).oper[1]^.ref^);
  2003. taicpu(p).oppostfix := Postfix;
  2004. RemoveInstruction(hp1);
  2005. DebugMsg(SPeepholeOptimization + 'Merging stores: STR/STR -> STM' + OpcodeStr, p);
  2006. Result := True;
  2007. Exit;
  2008. end;
  2009. end;
  2010. end;
  2011. function TCpuAsmOptimizer.OptPass2STM(var p: tai): Boolean;
  2012. var
  2013. hp1: tai;
  2014. CorrectOffset:ASizeInt;
  2015. i, LastReg: TSuperRegister;
  2016. Postfix: TOpPostfix;
  2017. OpcodeStr: shortstring;
  2018. begin
  2019. Result := False;
  2020. { See if STM/STR can be merged into a single STM }
  2021. if (taicpu(p).oper[0]^.ref^.addressmode = AM_OFFSET) then
  2022. begin
  2023. CorrectOffset := 0;
  2024. LastReg := RS_NO;
  2025. for i in taicpu(p).oper[1]^.regset^ do
  2026. begin
  2027. LastReg := i;
  2028. Inc(CorrectOffset, 4);
  2029. end;
  2030. { This while loop effectively doea a Selection Sort on any STR
  2031. instructions that follow }
  2032. hp1 := p;
  2033. while (LastReg < maxcpuregister) and
  2034. GetNextInstruction(hp1, hp1) and (hp1.typ = ait_instruction) and
  2035. (taicpu(hp1).opcode = A_STR) do
  2036. if (taicpu(hp1).condition = taicpu(p).condition) and
  2037. (taicpu(hp1).oppostfix = PF_None) and
  2038. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  2039. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  2040. (
  2041. (
  2042. (taicpu(p).oper[1]^.ref^.base = NR_NO) and
  2043. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.ref^.index)
  2044. ) or (
  2045. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  2046. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.ref^.base)
  2047. )
  2048. ) and
  2049. { Next register must be later in the set }
  2050. (getsupreg(taicpu(hp1).oper[0]^.reg) > LastReg) and
  2051. (
  2052. (
  2053. (taicpu(p).oppostfix = PF_None) and
  2054. (taicpu(hp1).oper[1]^.ref^.offset = CorrectOffset)
  2055. ) or (
  2056. (taicpu(p).oppostfix = PF_DA) and
  2057. (taicpu(hp1).oper[1]^.ref^.offset = -CorrectOffset)
  2058. )
  2059. ) then
  2060. begin
  2061. { Increment the reference values ready for the next STR instruction to find }
  2062. LastReg := getsupreg(taicpu(hp1).oper[0]^.reg);
  2063. Inc(CorrectOffset, 4);
  2064. if (taicpu(p).oppostfix = PF_DA) then
  2065. OpcodeStr := 'DA'
  2066. else
  2067. OpcodeStr := '';
  2068. Include(taicpu(p).oper[1]^.regset^, LastReg);
  2069. DebugMsg(SPeepholeOptimization + 'Merging stores: STM' + OpcodeStr + '/STR -> STM' + OpcodeStr, hp1);
  2070. RemoveInstruction(hp1);
  2071. Result := True;
  2072. { See if we can find another one to merge }
  2073. hp1 := p;
  2074. Continue;
  2075. end;
  2076. end;
  2077. end;
  2078. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  2079. begin
  2080. result := false;
  2081. if p.typ = ait_instruction then
  2082. begin
  2083. case taicpu(p).opcode of
  2084. A_CMP:
  2085. Result := OptPass1CMP(p);
  2086. A_STR:
  2087. Result := OptPass1STR(p);
  2088. A_LDR:
  2089. Result := OptPass1LDR(p);
  2090. A_MOV:
  2091. Result := OptPass1MOV(p);
  2092. A_AND:
  2093. Result := OptPass1And(p);
  2094. A_ADD,
  2095. A_SUB:
  2096. Result := OptPass1ADDSUB(p);
  2097. A_MUL:
  2098. REsult := OptPass1MUL(p);
  2099. A_ADC,
  2100. A_RSB,
  2101. A_RSC,
  2102. A_SBC,
  2103. A_BIC,
  2104. A_EOR,
  2105. A_ORR,
  2106. A_MLA,
  2107. A_MLS,
  2108. A_QADD,A_QADD16,A_QADD8,
  2109. A_QSUB,A_QSUB16,A_QSUB8,
  2110. A_QDADD,A_QDSUB,A_QASX,A_QSAX,
  2111. A_SHADD16,A_SHADD8,A_UHADD16,A_UHADD8,
  2112. A_SHSUB16,A_SHSUB8,A_UHSUB16,A_UHSUB8,
  2113. A_PKHTB,A_PKHBT,
  2114. A_SMUAD,A_SMUSD:
  2115. Result := OptPass1DataCheckMov(p);
  2116. {$ifdef dummy}
  2117. A_MVN:
  2118. Result := OPtPass1MVN(p);
  2119. {$endif dummy}
  2120. A_UXTB:
  2121. Result := OptPass1UXTB(p);
  2122. A_UXTH:
  2123. Result := OptPass1UXTH(p);
  2124. A_SXTB:
  2125. Result := OptPass1SXTB(p);
  2126. A_SXTH:
  2127. Result := OptPass1SXTH(p);
  2128. A_STM:
  2129. Result := OptPass1STM(p);
  2130. A_VMOV:
  2131. Result := OptPass1VMov(p);
  2132. A_VLDR,
  2133. A_VADD,
  2134. A_VMUL,
  2135. A_VDIV,
  2136. A_VSUB,
  2137. A_VSQRT,
  2138. A_VNEG,
  2139. A_VCVT,
  2140. A_VABS:
  2141. Result := OptPass1VOp(p);
  2142. A_PUSH:
  2143. Result := OptPass1Push(p);
  2144. else
  2145. ;
  2146. end;
  2147. end;
  2148. end;
  2149. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  2150. begin
  2151. result := False;
  2152. if p.typ = ait_instruction then
  2153. begin
  2154. case taicpu(p).opcode of
  2155. A_B:
  2156. Result := OptPass2Bcc(p);
  2157. A_STM:
  2158. Result := OptPass2STM(p);
  2159. A_STR:
  2160. Result := OptPass2STR(p);
  2161. else
  2162. ;
  2163. end;
  2164. end;
  2165. end;
  2166. { instructions modifying the CPSR can be only the last instruction }
  2167. function MustBeLast(p : tai) : boolean;
  2168. begin
  2169. Result:=(p.typ=ait_instruction) and
  2170. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  2171. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  2172. (taicpu(p).oppostfix=PF_S));
  2173. end;
  2174. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  2175. begin
  2176. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  2177. Result:=true
  2178. else If MatchInstruction(p1, [A_LDR, A_STR], [], [PF_D]) and
  2179. (getsupreg(taicpu(p1).oper[0]^.reg)+1=getsupreg(reg)) then
  2180. Result:=true
  2181. else
  2182. Result:=inherited RegInInstruction(Reg, p1);
  2183. end;
  2184. const
  2185. { set of opcode which might or do write to memory }
  2186. { TODO : extend armins.dat to contain r/w info }
  2187. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  2188. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD,A_VSTR,A_VSTM];
  2189. { adjust the register live information when swapping the two instructions p and hp1,
  2190. they must follow one after the other }
  2191. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  2192. procedure CheckLiveEnd(reg : tregister);
  2193. var
  2194. supreg : TSuperRegister;
  2195. regtype : TRegisterType;
  2196. begin
  2197. if reg=NR_NO then
  2198. exit;
  2199. regtype:=getregtype(reg);
  2200. supreg:=getsupreg(reg);
  2201. if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_end[supreg]=hp1) and
  2202. RegInInstruction(reg,p) then
  2203. cg.rg[regtype].live_end[supreg]:=p;
  2204. end;
  2205. procedure CheckLiveStart(reg : TRegister);
  2206. var
  2207. supreg : TSuperRegister;
  2208. regtype : TRegisterType;
  2209. begin
  2210. if reg=NR_NO then
  2211. exit;
  2212. regtype:=getregtype(reg);
  2213. supreg:=getsupreg(reg);
  2214. if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_start[supreg]=p) and
  2215. RegInInstruction(reg,hp1) then
  2216. cg.rg[regtype].live_start[supreg]:=hp1;
  2217. end;
  2218. var
  2219. i : longint;
  2220. r : TSuperRegister;
  2221. begin
  2222. { assumption: p is directly followed by hp1 }
  2223. { if live of any reg used by p starts at p and hp1 uses this register then
  2224. set live start to hp1 }
  2225. for i:=0 to p.ops-1 do
  2226. case p.oper[i]^.typ of
  2227. Top_Reg:
  2228. CheckLiveStart(p.oper[i]^.reg);
  2229. Top_Ref:
  2230. begin
  2231. CheckLiveStart(p.oper[i]^.ref^.base);
  2232. CheckLiveStart(p.oper[i]^.ref^.index);
  2233. end;
  2234. Top_Shifterop:
  2235. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  2236. Top_RegSet:
  2237. for r:=RS_R0 to RS_R15 do
  2238. if r in p.oper[i]^.regset^ then
  2239. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2240. else
  2241. ;
  2242. end;
  2243. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  2244. set live end to p }
  2245. for i:=0 to hp1.ops-1 do
  2246. case hp1.oper[i]^.typ of
  2247. Top_Reg:
  2248. CheckLiveEnd(hp1.oper[i]^.reg);
  2249. Top_Ref:
  2250. begin
  2251. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  2252. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  2253. end;
  2254. Top_Shifterop:
  2255. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  2256. Top_RegSet:
  2257. for r:=RS_R0 to RS_R15 do
  2258. if r in hp1.oper[i]^.regset^ then
  2259. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2260. else
  2261. ;
  2262. end;
  2263. end;
  2264. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  2265. { TODO : schedule also forward }
  2266. { TODO : schedule distance > 1 }
  2267. { returns true if p might be a load of a pc relative tls offset }
  2268. function PossibleTLSLoad(const p: tai) : boolean;
  2269. begin
  2270. Result:=(p.typ=ait_instruction) and (taicpu(p).opcode=A_LDR) and (taicpu(p).oper[1]^.typ=top_ref) and (((taicpu(p).oper[1]^.ref^.base=NR_PC) and
  2271. (taicpu(p).oper[1]^.ref^.index<>NR_NO)) or ((taicpu(p).oper[1]^.ref^.base<>NR_NO) and
  2272. (taicpu(p).oper[1]^.ref^.index=NR_PC)));
  2273. end;
  2274. var
  2275. hp1,hp2,hp3,hp4,hp5,insertpos : tai;
  2276. list : TAsmList;
  2277. begin
  2278. result:=true;
  2279. list:=TAsmList.create;
  2280. p:=BlockStart;
  2281. while p<>BlockEnd Do
  2282. begin
  2283. if (p.typ=ait_instruction) and
  2284. GetNextInstruction(p,hp1) and
  2285. (hp1.typ=ait_instruction) and
  2286. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  2287. (taicpu(hp1).oppostfix in [PF_NONE, PF_B, PF_H, PF_SB, PF_SH]) and
  2288. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  2289. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  2290. not(RegModifiedByInstruction(NR_PC,p))
  2291. ) or
  2292. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  2293. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  2294. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  2295. (taicpu(hp1).oper[1]^.ref^.offset=0)
  2296. )
  2297. ) or
  2298. { try to prove that the memory accesses don't overlapp }
  2299. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  2300. (taicpu(p).oper[1]^.typ = top_ref) and
  2301. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  2302. (taicpu(p).oppostfix=PF_None) and
  2303. (taicpu(hp1).oppostfix=PF_None) and
  2304. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  2305. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  2306. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  2307. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  2308. )
  2309. )
  2310. ) and
  2311. GetNextInstruction(hp1,hp2) and
  2312. (hp2.typ=ait_instruction) and
  2313. { loaded register used by next instruction?
  2314. if we ever support labels (they could be skipped in theory) here, the gnu2 tls general-dynamic code could get broken (the ldr before
  2315. the bl may not be scheduled away from the bl) and it needs to be taken care of this case
  2316. }
  2317. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  2318. { loaded register not used by previous instruction? }
  2319. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  2320. { same condition? }
  2321. (taicpu(p).condition=taicpu(hp1).condition) and
  2322. { first instruction might not change the register used as base }
  2323. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  2324. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  2325. ) and
  2326. { first instruction might not change the register used as index }
  2327. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  2328. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  2329. ) and
  2330. { if we modify the basereg AND the first instruction used that reg, we can not schedule }
  2331. ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
  2332. not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) and
  2333. not(PossibleTLSLoad(p)) and
  2334. not(PossibleTLSLoad(hp1)) then
  2335. begin
  2336. hp3:=tai(p.Previous);
  2337. hp5:=tai(p.next);
  2338. asml.Remove(p);
  2339. { if there is a reg. alloc/dealloc/sync instructions or address labels (e.g. for GOT-less PIC)
  2340. associated with p, move it together with p }
  2341. { before the instruction? }
  2342. { find reg allocs,deallocs and PIC labels }
  2343. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  2344. begin
  2345. if ( (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_alloc, ra_dealloc]) and
  2346. RegInInstruction(tai_regalloc(hp3).reg,p) )
  2347. or ( (hp3.typ=ait_label) and (tai_label(hp3).labsym.typ=AT_ADDR) )
  2348. then
  2349. begin
  2350. hp4:=hp3;
  2351. hp3:=tai(hp3.Previous);
  2352. asml.Remove(hp4);
  2353. list.Insert(hp4);
  2354. end
  2355. else
  2356. hp3:=tai(hp3.Previous);
  2357. end;
  2358. list.Concat(p);
  2359. SwapRegLive(taicpu(p),taicpu(hp1));
  2360. { after the instruction? }
  2361. { find reg deallocs and reg syncs }
  2362. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  2363. begin
  2364. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc, ra_sync]) and
  2365. RegInInstruction(tai_regalloc(hp5).reg,p) then
  2366. begin
  2367. hp4:=hp5;
  2368. hp5:=tai(hp5.next);
  2369. asml.Remove(hp4);
  2370. list.Concat(hp4);
  2371. end
  2372. else
  2373. hp5:=tai(hp5.Next);
  2374. end;
  2375. asml.Remove(hp1);
  2376. { if there are address labels associated with hp2, those must
  2377. stay with hp2 (e.g. for GOT-less PIC) }
  2378. insertpos:=hp2;
  2379. while assigned(hp2.previous) and
  2380. (tai(hp2.previous).typ<>ait_instruction) do
  2381. begin
  2382. hp2:=tai(hp2.previous);
  2383. if (hp2.typ=ait_label) and
  2384. (tai_label(hp2).labsym.typ=AT_ADDR) then
  2385. insertpos:=hp2;
  2386. end;
  2387. {$ifdef DEBUG_PREREGSCHEDULER}
  2388. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),insertpos);
  2389. {$endif DEBUG_PREREGSCHEDULER}
  2390. asml.InsertBefore(hp1,insertpos);
  2391. asml.InsertListBefore(insertpos,list);
  2392. p:=tai(p.next);
  2393. end
  2394. else if p.typ=ait_instruction then
  2395. p:=hp1
  2396. else
  2397. p:=tai(p.next);
  2398. end;
  2399. list.Free;
  2400. end;
  2401. procedure DecrementPreceedingIT(list: TAsmList; p: tai);
  2402. var
  2403. hp : tai;
  2404. l : longint;
  2405. begin
  2406. hp := tai(p.Previous);
  2407. l := 1;
  2408. while assigned(hp) and
  2409. (l <= 4) do
  2410. begin
  2411. if hp.typ=ait_instruction then
  2412. begin
  2413. if (taicpu(hp).opcode>=A_IT) and
  2414. (taicpu(hp).opcode <= A_ITTTT) then
  2415. begin
  2416. if (taicpu(hp).opcode = A_IT) and
  2417. (l=1) then
  2418. list.Remove(hp)
  2419. else
  2420. case taicpu(hp).opcode of
  2421. A_ITE:
  2422. if l=2 then taicpu(hp).opcode := A_IT;
  2423. A_ITT:
  2424. if l=2 then taicpu(hp).opcode := A_IT;
  2425. A_ITEE:
  2426. if l=3 then taicpu(hp).opcode := A_ITE;
  2427. A_ITTE:
  2428. if l=3 then taicpu(hp).opcode := A_ITT;
  2429. A_ITET:
  2430. if l=3 then taicpu(hp).opcode := A_ITE;
  2431. A_ITTT:
  2432. if l=3 then taicpu(hp).opcode := A_ITT;
  2433. A_ITEEE:
  2434. if l=4 then taicpu(hp).opcode := A_ITEE;
  2435. A_ITTEE:
  2436. if l=4 then taicpu(hp).opcode := A_ITTE;
  2437. A_ITETE:
  2438. if l=4 then taicpu(hp).opcode := A_ITET;
  2439. A_ITTTE:
  2440. if l=4 then taicpu(hp).opcode := A_ITTT;
  2441. A_ITEET:
  2442. if l=4 then taicpu(hp).opcode := A_ITEE;
  2443. A_ITTET:
  2444. if l=4 then taicpu(hp).opcode := A_ITTE;
  2445. A_ITETT:
  2446. if l=4 then taicpu(hp).opcode := A_ITET;
  2447. A_ITTTT:
  2448. begin
  2449. if l=4 then taicpu(hp).opcode := A_ITTT;
  2450. end
  2451. else
  2452. ;
  2453. end;
  2454. break;
  2455. end;
  2456. {else if (taicpu(hp).condition<>taicpu(p).condition) or
  2457. (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
  2458. break;}
  2459. inc(l);
  2460. end;
  2461. hp := tai(hp.Previous);
  2462. end;
  2463. end;
  2464. function TCpuThumb2AsmOptimizer.OptPass1STM(var p: tai): boolean;
  2465. var
  2466. hp : taicpu;
  2467. begin
  2468. result:=false;
  2469. if MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
  2470. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2471. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2472. ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
  2473. begin
  2474. DebugMsg('Peephole Stm2Push done', p);
  2475. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2476. AsmL.InsertAfter(hp, p);
  2477. asml.Remove(p);
  2478. p:=hp;
  2479. result:=true;
  2480. end;
  2481. end;
  2482. function TCpuThumb2AsmOptimizer.OptPass1LDM(var p: tai): boolean;
  2483. var
  2484. hp : taicpu;
  2485. begin
  2486. result:=false;
  2487. if MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
  2488. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2489. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2490. ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
  2491. begin
  2492. DebugMsg('Peephole Ldm2Pop done', p);
  2493. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2494. asml.InsertBefore(hp, p);
  2495. asml.Remove(p);
  2496. p.Free;
  2497. p:=hp;
  2498. result:=true;
  2499. end;
  2500. end;
  2501. function TCpuThumb2AsmOptimizer.OptPass1AndThumb2(var p : tai) : boolean;
  2502. begin
  2503. result:=false;
  2504. if MatchInstruction(p, [A_AND], [], [PF_None]) and
  2505. (taicpu(p).ops = 2) and
  2506. (taicpu(p).oper[1]^.typ=top_const) and
  2507. ((taicpu(p).oper[1]^.val=255) or
  2508. (taicpu(p).oper[1]^.val=65535)) then
  2509. begin
  2510. DebugMsg('Peephole AndR2Uxt done', p);
  2511. if taicpu(p).oper[1]^.val=255 then
  2512. taicpu(p).opcode:=A_UXTB
  2513. else
  2514. taicpu(p).opcode:=A_UXTH;
  2515. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  2516. result := true;
  2517. end
  2518. else if MatchInstruction(p, [A_AND], [], [PF_None]) and
  2519. (taicpu(p).ops = 3) and
  2520. (taicpu(p).oper[2]^.typ=top_const) and
  2521. ((taicpu(p).oper[2]^.val=255) or
  2522. (taicpu(p).oper[2]^.val=65535)) then
  2523. begin
  2524. DebugMsg('Peephole AndRR2Uxt done', p);
  2525. if taicpu(p).oper[2]^.val=255 then
  2526. taicpu(p).opcode:=A_UXTB
  2527. else
  2528. taicpu(p).opcode:=A_UXTH;
  2529. taicpu(p).ops:=2;
  2530. result := true;
  2531. end;
  2532. end;
  2533. function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  2534. begin
  2535. result:=false;
  2536. if inherited PeepHoleOptPass1Cpu(p) then
  2537. result:=true
  2538. else if p.typ=ait_instruction then
  2539. case taicpu(p).opcode of
  2540. A_STM:
  2541. result:=OptPass1STM(p);
  2542. A_LDM:
  2543. result:=OptPass1LDM(p);
  2544. A_AND:
  2545. result:=OptPass1AndThumb2(p);
  2546. else
  2547. ;
  2548. end;
  2549. end;
  2550. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  2551. var
  2552. p,hp1,hp2: tai;
  2553. l : longint;
  2554. condition : tasmcond;
  2555. { UsedRegs, TmpUsedRegs: TRegSet; }
  2556. begin
  2557. p := BlockStart;
  2558. { UsedRegs := []; }
  2559. while (p <> BlockEnd) Do
  2560. begin
  2561. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  2562. case p.Typ Of
  2563. Ait_Instruction:
  2564. begin
  2565. case taicpu(p).opcode Of
  2566. A_B:
  2567. if taicpu(p).condition<>C_None then
  2568. begin
  2569. { check for
  2570. Bxx xxx
  2571. <several instructions>
  2572. xxx:
  2573. }
  2574. l:=0;
  2575. GetNextInstruction(p, hp1);
  2576. while assigned(hp1) and
  2577. (l<=4) and
  2578. CanBeCond(hp1) and
  2579. { stop on labels }
  2580. not(hp1.typ=ait_label) do
  2581. begin
  2582. inc(l);
  2583. if MustBeLast(hp1) then
  2584. begin
  2585. //hp1:=nil;
  2586. GetNextInstruction(hp1,hp1);
  2587. break;
  2588. end
  2589. else
  2590. GetNextInstruction(hp1,hp1);
  2591. end;
  2592. if assigned(hp1) then
  2593. begin
  2594. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2595. begin
  2596. if (l<=4) and (l>0) then
  2597. begin
  2598. condition:=inverse_cond(taicpu(p).condition);
  2599. hp2:=p;
  2600. GetNextInstruction(p,hp1);
  2601. p:=hp1;
  2602. repeat
  2603. if hp1.typ=ait_instruction then
  2604. taicpu(hp1).condition:=condition;
  2605. if MustBeLast(hp1) then
  2606. begin
  2607. GetNextInstruction(hp1,hp1);
  2608. break;
  2609. end
  2610. else
  2611. GetNextInstruction(hp1,hp1);
  2612. until not(assigned(hp1)) or
  2613. not(CanBeCond(hp1)) or
  2614. (hp1.typ=ait_label);
  2615. { wait with removing else GetNextInstruction could
  2616. ignore the label if it was the only usage in the
  2617. jump moved away }
  2618. asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
  2619. DecrementPreceedingIT(asml, hp2);
  2620. case l of
  2621. 1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
  2622. 2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
  2623. 3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
  2624. 4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
  2625. end;
  2626. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2627. asml.remove(hp2);
  2628. hp2.free;
  2629. continue;
  2630. end;
  2631. end;
  2632. end;
  2633. end;
  2634. else
  2635. ;
  2636. end;
  2637. end;
  2638. else
  2639. ;
  2640. end;
  2641. p := tai(p.next)
  2642. end;
  2643. end;
  2644. function TCpuThumb2AsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  2645. begin
  2646. result:=false;
  2647. if p.typ = ait_instruction then
  2648. begin
  2649. if MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
  2650. (taicpu(p).oper[1]^.typ=top_const) and
  2651. (taicpu(p).oper[1]^.val >= 0) and
  2652. (taicpu(p).oper[1]^.val < 256) and
  2653. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2654. begin
  2655. DebugMsg('Peephole Mov2Movs done', p);
  2656. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2657. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2658. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2659. taicpu(p).oppostfix:=PF_S;
  2660. result:=true;
  2661. end
  2662. else if MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
  2663. (taicpu(p).oper[1]^.typ=top_reg) and
  2664. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2665. begin
  2666. DebugMsg('Peephole Mvn2Mvns done', p);
  2667. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2668. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2669. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2670. taicpu(p).oppostfix:=PF_S;
  2671. result:=true;
  2672. end
  2673. else if MatchInstruction(p, A_RSB, [C_None], [PF_None]) and
  2674. (taicpu(p).ops = 3) and
  2675. (taicpu(p).oper[2]^.typ=top_const) and
  2676. (taicpu(p).oper[2]^.val=0) and
  2677. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2678. begin
  2679. DebugMsg('Peephole Rsb2Rsbs done', p);
  2680. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2681. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2682. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2683. taicpu(p).oppostfix:=PF_S;
  2684. result:=true;
  2685. end
  2686. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2687. (taicpu(p).ops = 3) and
  2688. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2689. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2690. (taicpu(p).oper[2]^.typ=top_const) and
  2691. (taicpu(p).oper[2]^.val >= 0) and
  2692. (taicpu(p).oper[2]^.val < 256) and
  2693. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2694. begin
  2695. DebugMsg('Peephole AddSub2*s done', p);
  2696. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2697. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2698. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2699. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2700. taicpu(p).oppostfix:=PF_S;
  2701. taicpu(p).ops := 2;
  2702. result:=true;
  2703. end
  2704. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2705. (taicpu(p).ops = 2) and
  2706. (taicpu(p).oper[1]^.typ=top_reg) and
  2707. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2708. (not MatchOperand(taicpu(p).oper[1]^, NR_STACK_POINTER_REG)) and
  2709. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2710. begin
  2711. DebugMsg('Peephole AddSub2*s done', p);
  2712. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2713. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2714. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2715. taicpu(p).oppostfix:=PF_S;
  2716. result:=true;
  2717. end
  2718. else if MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
  2719. (taicpu(p).ops = 3) and
  2720. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2721. (taicpu(p).oper[2]^.typ=top_reg) then
  2722. begin
  2723. DebugMsg('Peephole AddRRR2AddRR done', p);
  2724. taicpu(p).ops := 2;
  2725. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2726. result:=true;
  2727. end
  2728. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
  2729. (taicpu(p).ops = 3) and
  2730. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2731. (taicpu(p).oper[2]^.typ=top_reg) and
  2732. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2733. begin
  2734. DebugMsg('Peephole opXXY2opsXY done', p);
  2735. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2736. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2737. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2738. taicpu(p).ops := 2;
  2739. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2740. taicpu(p).oppostfix:=PF_S;
  2741. result:=true;
  2742. end
  2743. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_S]) and
  2744. (taicpu(p).ops = 3) and
  2745. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2746. (taicpu(p).oper[2]^.typ in [top_reg,top_const]) then
  2747. begin
  2748. DebugMsg('Peephole opXXY2opXY done', p);
  2749. taicpu(p).ops := 2;
  2750. if taicpu(p).oper[2]^.typ=top_reg then
  2751. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg)
  2752. else
  2753. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2754. result:=true;
  2755. end
  2756. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
  2757. (taicpu(p).ops = 3) and
  2758. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
  2759. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2760. begin
  2761. DebugMsg('Peephole opXYX2opsXY done', p);
  2762. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2763. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2764. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2765. taicpu(p).oppostfix:=PF_S;
  2766. taicpu(p).ops := 2;
  2767. result:=true;
  2768. end
  2769. else if MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
  2770. (taicpu(p).ops=3) and
  2771. (taicpu(p).oper[2]^.typ=top_shifterop) and
  2772. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
  2773. //MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2774. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2775. begin
  2776. DebugMsg('Peephole Mov2Shift done', p);
  2777. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2778. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2779. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2780. taicpu(p).oppostfix:=PF_S;
  2781. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  2782. SM_LSL: taicpu(p).opcode:=A_LSL;
  2783. SM_LSR: taicpu(p).opcode:=A_LSR;
  2784. SM_ASR: taicpu(p).opcode:=A_ASR;
  2785. SM_ROR: taicpu(p).opcode:=A_ROR;
  2786. else
  2787. internalerror(2019050912);
  2788. end;
  2789. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  2790. taicpu(p).loadreg(2, taicpu(p).oper[2]^.shifterop^.rs)
  2791. else
  2792. taicpu(p).loadconst(2, taicpu(p).oper[2]^.shifterop^.shiftimm);
  2793. result:=true;
  2794. end
  2795. end;
  2796. end;
  2797. begin
  2798. casmoptimizer:=TCpuAsmOptimizer;
  2799. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  2800. End.