aoptcpu.pas 115 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. { $define DEBUG_AOPTCPU}
  22. Interface
  23. uses
  24. cgbase, cgutils, cpubase, aasmtai,
  25. aasmcpu,
  26. aopt, aoptobj, aoptarm;
  27. Type
  28. TCpuAsmOptimizer = class(TARMAsmOptimizer)
  29. { Can't be done in some cases due to the limited range of jumps }
  30. function CanDoJumpOpts: Boolean; override;
  31. { uses the same constructor as TAopObj }
  32. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  33. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  34. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  35. function RemoveSuperfluousVMov(const p : tai; movp : tai; const optimizer : string) : boolean;
  36. { gets the next tai object after current that contains info relevant
  37. to the optimizer in p1 which used the given register or does a
  38. change in program flow.
  39. If there is none, it returns false and
  40. sets p1 to nil }
  41. Function GetNextInstructionUsingRef(Current: tai; Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  42. { outputs a debug message into the assembler file }
  43. procedure DebugMsg(const s: string; p: tai);
  44. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  45. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  46. { With these routines, there's optimisation code that's general for all ARM platforms }
  47. function OptPass1And(var p: tai): Boolean; override;
  48. function OptPass1LDR(var p: tai): Boolean; override;
  49. function OptPass1STR(var p: tai): Boolean; override;
  50. protected
  51. function LookForPreindexedPattern(p: taicpu): boolean;
  52. function LookForPostindexedPattern(p: taicpu): boolean;
  53. { Individual optimisation routines }
  54. function OptPass1DataCheckMov(var p: tai): Boolean;
  55. function OptPass1ADDSUB(var p: tai): Boolean;
  56. function OptPass1CMP(var p: tai): Boolean;
  57. function OptPass1STM(var p: tai): Boolean;
  58. function OptPass1MOV(var p: tai): Boolean;
  59. function OptPass1MUL(var p: tai): Boolean;
  60. function OptPass1MVN(var p: tai): Boolean;
  61. function OptPass1VMov(var p: tai): Boolean;
  62. function OptPass1VOp(var p: tai): Boolean;
  63. function OptPass2Bcc(var p: tai): Boolean;
  64. function OptPass2STM(var p: tai): Boolean;
  65. function OptPass2STR(var p: tai): Boolean;
  66. End;
  67. TCpuPreRegallocScheduler = class(TAsmScheduler)
  68. function SchedulerPass1Cpu(var p: tai): boolean;override;
  69. procedure SwapRegLive(p, hp1: taicpu);
  70. end;
  71. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  72. { uses the same constructor as TAopObj }
  73. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  74. procedure PeepHoleOptPass2;override;
  75. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  76. protected
  77. function OptPass1AndThumb2(var p : tai) : boolean;
  78. function OptPass1LDM(var p : tai) : boolean;
  79. function OptPass1STM(var p : tai) : boolean;
  80. End;
  81. function MustBeLast(p : tai) : boolean;
  82. Implementation
  83. uses
  84. cutils,verbose,globtype,globals,
  85. systems,
  86. cpuinfo,
  87. cgobj,procinfo,
  88. aasmbase,aasmdata;
  89. { Range check must be disabled explicitly as conversions between signed and unsigned
  90. 32-bit values are done without explicit typecasts }
  91. {$R-}
  92. function CanBeCond(p : tai) : boolean;
  93. begin
  94. result:=
  95. not(GenerateThumbCode) and
  96. (p.typ=ait_instruction) and
  97. (taicpu(p).condition=C_None) and
  98. ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
  99. (taicpu(p).opcode<>A_CBZ) and
  100. (taicpu(p).opcode<>A_CBNZ) and
  101. (taicpu(p).opcode<>A_PLD) and
  102. (((taicpu(p).opcode<>A_BLX) and
  103. { BL may need to be converted into BLX by the linker -- could possibly
  104. be allowed in case it's to a local symbol of which we know that it
  105. uses the same instruction set as the current one }
  106. (taicpu(p).opcode<>A_BL)) or
  107. (taicpu(p).oper[0]^.typ=top_reg));
  108. end;
  109. function RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList):Boolean;
  110. begin
  111. Result:=false;
  112. if (taicpu(movp).condition = C_EQ) and
  113. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  114. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  115. begin
  116. asml.insertafter(tai_comment.Create(strpnew('Peephole Optimization: CmpMovMov - Removed redundant moveq')), movp);
  117. asml.remove(movp);
  118. movp.free;
  119. Result:=true;
  120. end;
  121. end;
  122. function AlignedToQWord(const ref : treference) : boolean;
  123. begin
  124. { (safe) heuristics to ensure alignment }
  125. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  126. (((ref.offset>=0) and
  127. ((ref.offset mod 8)=0) and
  128. ((ref.base=NR_R13) or
  129. (ref.index=NR_R13))
  130. ) or
  131. ((ref.offset<=0) and
  132. { when using NR_R11, it has always a value of <qword align>+4 }
  133. ((abs(ref.offset+4) mod 8)=0) and
  134. (current_procinfo.framepointer=NR_R11) and
  135. ((ref.base=NR_R11) or
  136. (ref.index=NR_R11))
  137. )
  138. );
  139. end;
  140. function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
  141. begin
  142. if GenerateThumb2Code then
  143. result := (aoffset<4096) and (aoffset>-256)
  144. else
  145. result := ((pf in [PF_None,PF_B]) and
  146. (abs(aoffset)<4096)) or
  147. (abs(aoffset)<256);
  148. end;
  149. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  150. var
  151. p: taicpu;
  152. i: longint;
  153. begin
  154. instructionLoadsFromReg := false;
  155. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  156. exit;
  157. p:=taicpu(hp);
  158. i:=1;
  159. {For these instructions we have to start on oper[0]}
  160. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  161. A_CMP, A_CMN, A_TST, A_TEQ,
  162. A_B, A_BL, A_BX, A_BLX,
  163. A_SMLAL, A_UMLAL, A_VSTM, A_VLDM]) then i:=0;
  164. while(i<p.ops) do
  165. begin
  166. case p.oper[I]^.typ of
  167. top_reg:
  168. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  169. { STRD }
  170. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  171. top_regset:
  172. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  173. top_shifterop:
  174. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  175. top_ref:
  176. instructionLoadsFromReg :=
  177. (p.oper[I]^.ref^.base = reg) or
  178. (p.oper[I]^.ref^.index = reg);
  179. else
  180. ;
  181. end;
  182. if (i=0) and (p.opcode in [A_LDM,A_VLDM]) then
  183. exit;
  184. if instructionLoadsFromReg then
  185. exit; {Bailout if we found something}
  186. Inc(I);
  187. end;
  188. end;
  189. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  190. var
  191. p: taicpu;
  192. begin
  193. Result := false;
  194. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  195. exit;
  196. p := taicpu(hp);
  197. case p.opcode of
  198. { These operands do not write into a register at all }
  199. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD,
  200. A_VCMP:
  201. exit;
  202. {Take care of post/preincremented store and loads, they will change their base register}
  203. A_STR, A_LDR:
  204. begin
  205. Result := false;
  206. { actually, this does not apply here because post-/preindexed does not mean that a register
  207. is loaded with a new value, it is only modified
  208. (taicpu(p).oper[1]^.typ=top_ref) and
  209. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  210. (taicpu(p).oper[1]^.ref^.base = reg);
  211. }
  212. { STR does not load into it's first register }
  213. if p.opcode = A_STR then
  214. exit;
  215. end;
  216. A_VSTR:
  217. begin
  218. Result := false;
  219. exit;
  220. end;
  221. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  222. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  223. Result :=
  224. (p.oper[1]^.typ = top_reg) and
  225. (p.oper[1]^.reg = reg);
  226. {Loads to oper2 from coprocessor}
  227. {
  228. MCR/MRC is currently not supported in FPC
  229. A_MRC:
  230. Result :=
  231. (p.oper[2]^.typ = top_reg) and
  232. (p.oper[2]^.reg = reg);
  233. }
  234. {Loads to all register in the registerset}
  235. A_LDM, A_VLDM:
  236. Result := (getsupreg(reg) in p.oper[1]^.regset^);
  237. A_POP:
  238. Result := (getsupreg(reg) in p.oper[0]^.regset^) or
  239. (reg=NR_STACK_POINTER_REG);
  240. else
  241. ;
  242. end;
  243. if Result then
  244. exit;
  245. case p.oper[0]^.typ of
  246. {This is the case}
  247. top_reg:
  248. Result := (p.oper[0]^.reg = reg) or
  249. { LDRD }
  250. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  251. {LDM/STM might write a new value to their index register}
  252. top_ref:
  253. Result :=
  254. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  255. (taicpu(p).oper[0]^.ref^.base = reg);
  256. else
  257. ;
  258. end;
  259. end;
  260. function TCpuAsmOptimizer.GetNextInstructionUsingRef(Current: tai;
  261. Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  262. begin
  263. Next:=Current;
  264. repeat
  265. Result:=GetNextInstruction(Next,Next);
  266. if Result and
  267. (Next.typ=ait_instruction) and
  268. (taicpu(Next).opcode in [A_LDR, A_STR]) and
  269. (
  270. ((taicpu(Next).ops = 2) and
  271. (taicpu(Next).oper[1]^.typ = top_ref) and
  272. RefsEqual(taicpu(Next).oper[1]^.ref^,ref)) or
  273. ((taicpu(Next).ops = 3) and { LDRD/STRD }
  274. (taicpu(Next).oper[2]^.typ = top_ref) and
  275. RefsEqual(taicpu(Next).oper[2]^.ref^,ref))
  276. ) then
  277. {We've found an instruction LDR or STR with the same reference}
  278. exit;
  279. until not(Result) or
  280. (Next.typ<>ait_instruction) or
  281. not(cs_opt_level3 in current_settings.optimizerswitches) or
  282. is_calljmp(taicpu(Next).opcode) or
  283. (StopOnStore and (taicpu(Next).opcode in [A_STR, A_STM])) or
  284. RegModifiedByInstruction(NR_PC,Next);
  285. Result:=false;
  286. end;
  287. {$ifdef DEBUG_AOPTCPU}
  288. const
  289. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  290. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  291. begin
  292. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  293. end;
  294. {$else DEBUG_AOPTCPU}
  295. const
  296. SPeepholeOptimization = '';
  297. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  298. begin
  299. end;
  300. {$endif DEBUG_AOPTCPU}
  301. function TCpuAsmOptimizer.CanDoJumpOpts: Boolean;
  302. begin
  303. { Cannot perform these jump optimisations if the ARM architecture has 16-bit thumb codes }
  304. Result := not (
  305. (current_settings.instructionset = is_thumb) and not (CPUARM_HAS_THUMB2 in cpu_capabilities[current_settings.cputype])
  306. );
  307. end;
  308. function TCpuAsmOptimizer.RemoveSuperfluousVMov(const p: tai; movp: tai; const optimizer: string):boolean;
  309. var
  310. alloc,
  311. dealloc : tai_regalloc;
  312. hp1 : tai;
  313. begin
  314. Result:=false;
  315. if ((MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  316. ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) or (taicpu(p).opcode=A_VLDR))
  317. ) or
  318. (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
  319. (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32]))
  320. ) and
  321. (taicpu(movp).ops=2) and
  322. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  323. { the destination register of the mov might not be used beween p and movp }
  324. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  325. { Take care to only do this for instructions which REALLY load to the first register.
  326. Otherwise
  327. vstr reg0, [reg1]
  328. vmov reg2, reg0
  329. will be optimized to
  330. vstr reg2, [reg1]
  331. }
  332. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  333. begin
  334. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  335. if assigned(dealloc) then
  336. begin
  337. DebugMsg(SPeepholeOptimization + optimizer + ' removed superfluous vmov', movp);
  338. result:=true;
  339. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  340. and remove it if possible }
  341. asml.Remove(dealloc);
  342. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  343. if assigned(alloc) then
  344. begin
  345. asml.Remove(alloc);
  346. alloc.free;
  347. dealloc.free;
  348. end
  349. else
  350. asml.InsertAfter(dealloc,p);
  351. { try to move the allocation of the target register }
  352. GetLastInstruction(movp,hp1);
  353. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  354. if assigned(alloc) then
  355. begin
  356. asml.Remove(alloc);
  357. asml.InsertBefore(alloc,p);
  358. { adjust used regs }
  359. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  360. end;
  361. { change
  362. vldr reg0,[reg1]
  363. vmov reg2,reg0
  364. into
  365. ldr reg2,[reg1]
  366. if reg2 is an int register
  367. }
  368. if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
  369. taicpu(p).opcode:=A_LDR;
  370. { finally get rid of the mov }
  371. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  372. asml.remove(movp);
  373. movp.free;
  374. end;
  375. end;
  376. end;
  377. {
  378. optimize
  379. add/sub reg1,reg1,regY/const
  380. ...
  381. ldr/str regX,[reg1]
  382. into
  383. ldr/str regX,[reg1, regY/const]!
  384. }
  385. function TCpuAsmOptimizer.LookForPreindexedPattern(p: taicpu): boolean;
  386. var
  387. hp1: tai;
  388. begin
  389. if GenerateARMCode and
  390. (p.ops=3) and
  391. MatchOperand(p.oper[0]^, p.oper[1]^.reg) and
  392. GetNextInstructionUsingReg(p, hp1, p.oper[0]^.reg) and
  393. (not RegModifiedBetween(p.oper[0]^.reg, p, hp1)) and
  394. MatchInstruction(hp1, [A_LDR,A_STR], [C_None], [PF_None,PF_B,PF_H,PF_SH,PF_SB]) and
  395. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  396. (taicpu(hp1).oper[1]^.ref^.base=p.oper[0]^.reg) and
  397. (taicpu(hp1).oper[0]^.reg<>p.oper[0]^.reg) and
  398. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  399. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  400. (((p.oper[2]^.typ=top_reg) and
  401. (not RegModifiedBetween(p.oper[2]^.reg, p, hp1))) or
  402. ((p.oper[2]^.typ=top_const) and
  403. ((abs(p.oper[2]^.val) < 256) or
  404. ((abs(p.oper[2]^.val) < 4096) and
  405. (taicpu(hp1).oppostfix in [PF_None,PF_B]))))) then
  406. begin
  407. taicpu(hp1).oper[1]^.ref^.addressmode:=AM_PREINDEXED;
  408. if p.oper[2]^.typ=top_reg then
  409. begin
  410. taicpu(hp1).oper[1]^.ref^.index:=p.oper[2]^.reg;
  411. if p.opcode=A_ADD then
  412. taicpu(hp1).oper[1]^.ref^.signindex:=1
  413. else
  414. taicpu(hp1).oper[1]^.ref^.signindex:=-1;
  415. end
  416. else
  417. begin
  418. if p.opcode=A_ADD then
  419. taicpu(hp1).oper[1]^.ref^.offset:=p.oper[2]^.val
  420. else
  421. taicpu(hp1).oper[1]^.ref^.offset:=-p.oper[2]^.val;
  422. end;
  423. result:=true;
  424. end
  425. else
  426. result:=false;
  427. end;
  428. {
  429. optimize
  430. ldr/str regX,[reg1]
  431. ...
  432. add/sub reg1,reg1,regY/const
  433. into
  434. ldr/str regX,[reg1], regY/const
  435. }
  436. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  437. var
  438. hp1 : tai;
  439. begin
  440. Result:=false;
  441. if (p.oper[1]^.typ = top_ref) and
  442. (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  443. (p.oper[1]^.ref^.index=NR_NO) and
  444. (p.oper[1]^.ref^.offset=0) and
  445. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  446. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  447. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  448. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  449. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  450. (
  451. (taicpu(hp1).oper[2]^.typ=top_reg) or
  452. { valid offset? }
  453. ((taicpu(hp1).oper[2]^.typ=top_const) and
  454. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  455. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  456. )
  457. )
  458. ) and
  459. { don't apply the optimization if the base register is loaded }
  460. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  461. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  462. { don't apply the optimization if the (new) index register is loaded }
  463. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  464. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) and
  465. GenerateARMCode then
  466. begin
  467. DebugMsg(SPeepholeOptimization + 'Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  468. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  469. if taicpu(hp1).oper[2]^.typ=top_const then
  470. begin
  471. if taicpu(hp1).opcode=A_ADD then
  472. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  473. else
  474. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  475. end
  476. else
  477. begin
  478. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  479. if taicpu(hp1).opcode=A_ADD then
  480. p.oper[1]^.ref^.signindex:=1
  481. else
  482. p.oper[1]^.ref^.signindex:=-1;
  483. end;
  484. asml.Remove(hp1);
  485. hp1.Free;
  486. Result:=true;
  487. end;
  488. end;
  489. function TCpuAsmOptimizer.OptPass1ADDSUB(var p: tai): Boolean;
  490. var
  491. hp1,hp2: tai;
  492. begin
  493. Result := OptPass1DataCheckMov(p);
  494. {
  495. change
  496. add/sub reg2,reg1,const1
  497. str/ldr reg3,[reg2,const2]
  498. dealloc reg2
  499. to
  500. str/ldr reg3,[reg1,const2+/-const1]
  501. }
  502. if (not GenerateThumbCode) and
  503. (taicpu(p).ops>2) and
  504. (taicpu(p).oper[1]^.typ = top_reg) and
  505. (taicpu(p).oper[2]^.typ = top_const) then
  506. begin
  507. hp1:=p;
  508. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  509. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  510. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  511. (taicpu(hp1).oper[1]^.typ = top_ref) and
  512. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  513. { don't optimize if the register is stored/overwritten }
  514. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  515. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  516. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  517. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  518. ldr postfix }
  519. (((taicpu(p).opcode=A_ADD) and
  520. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  521. ) or
  522. ((taicpu(p).opcode=A_SUB) and
  523. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  524. )
  525. ) do
  526. begin
  527. { neither reg1 nor reg2 might be changed inbetween }
  528. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  529. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  530. break;
  531. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  532. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  533. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  534. begin
  535. { remember last instruction }
  536. hp2:=hp1;
  537. DebugMsg(SPeepholeOptimization + 'Add/SubLdr2Ldr done', p);
  538. hp1:=p;
  539. { fix all ldr/str }
  540. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  541. begin
  542. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  543. if taicpu(p).opcode=A_ADD then
  544. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  545. else
  546. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  547. if hp1=hp2 then
  548. break;
  549. end;
  550. RemoveCurrentP(p);
  551. result:=true;
  552. Exit;
  553. end;
  554. end;
  555. end;
  556. if (taicpu(p).condition = C_None) and
  557. (taicpu(p).oppostfix = PF_None) and
  558. LookForPreindexedPattern(taicpu(p)) then
  559. begin
  560. DebugMsg(SPeepholeOptimization + 'Add/Sub to Preindexed done', p);
  561. RemoveCurrentP(p);
  562. Result:=true;
  563. Exit;
  564. end;
  565. end;
  566. function TCpuAsmOptimizer.OptPass1MUL(var p: tai): Boolean;
  567. var
  568. hp1: tai;
  569. oldreg: tregister;
  570. begin
  571. Result := OptPass1DataCheckMov(p);
  572. {
  573. Turn
  574. mul reg0, z,w
  575. sub/add x, y, reg0
  576. dealloc reg0
  577. into
  578. mls/mla x,z,w,y
  579. }
  580. if (taicpu(p).condition = C_None) and
  581. (taicpu(p).oppostfix = PF_None) and
  582. (taicpu(p).ops=3) and
  583. (taicpu(p).oper[0]^.typ = top_reg) and
  584. (taicpu(p).oper[1]^.typ = top_reg) and
  585. (taicpu(p).oper[2]^.typ = top_reg) and
  586. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  587. MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
  588. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  589. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p, hp1)) and
  590. (((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype>=cpu_armv4)) or
  591. ((taicpu(hp1).opcode=A_SUB) and (current_settings.cputype in [cpu_armv6t2,cpu_armv7,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em]))) and
  592. // CPUs before ARMv6 don't recommend having the same Rd and Rm for MLA.
  593. // TODO: A workaround would be to swap Rm and Rs
  594. (not ((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype<=cpu_armv6) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^))) and
  595. (((taicpu(hp1).ops=3) and
  596. (taicpu(hp1).oper[2]^.typ=top_reg) and
  597. ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  598. (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or
  599. ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  600. (taicpu(hp1).opcode=A_ADD) and
  601. (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or
  602. ((taicpu(hp1).ops=2) and
  603. (taicpu(hp1).oper[1]^.typ=top_reg) and
  604. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  605. (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then
  606. begin
  607. if taicpu(hp1).opcode=A_ADD then
  608. begin
  609. taicpu(hp1).opcode:=A_MLA;
  610. if taicpu(hp1).ops=3 then
  611. begin
  612. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
  613. oldreg:=taicpu(hp1).oper[2]^.reg
  614. else
  615. oldreg:=taicpu(hp1).oper[1]^.reg;
  616. end
  617. else
  618. oldreg:=taicpu(hp1).oper[0]^.reg;
  619. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  620. taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg);
  621. taicpu(hp1).loadreg(3,oldreg);
  622. DebugMsg(SPeepholeOptimization + 'MulAdd2MLA done', p);
  623. end
  624. else
  625. begin
  626. taicpu(hp1).opcode:=A_MLS;
  627. taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
  628. if taicpu(hp1).ops=2 then
  629. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg)
  630. else
  631. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  632. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  633. DebugMsg(SPeepholeOptimization + 'MulSub2MLS done', p);
  634. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  635. AllocRegBetween(taicpu(hp1).oper[2]^.reg,p,hp1,UsedRegs);
  636. AllocRegBetween(taicpu(hp1).oper[3]^.reg,p,hp1,UsedRegs);
  637. end;
  638. taicpu(hp1).ops:=4;
  639. RemoveCurrentP(p);
  640. Result := True;
  641. Exit;
  642. end
  643. end;
  644. function TCpuAsmOptimizer.OptPass1And(var p: tai): Boolean;
  645. begin
  646. Result := OptPass1DataCheckMov(p);
  647. Result := inherited OptPass1And(p) or Result;
  648. end;
  649. function TCpuAsmOptimizer.OptPass1DataCheckMov(var p: tai): Boolean;
  650. var
  651. hp1: tai;
  652. begin
  653. {
  654. change
  655. op reg1, ...
  656. mov reg2, reg1
  657. to
  658. op reg2, ...
  659. }
  660. Result := (taicpu(p).ops >= 3) and
  661. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  662. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  663. end;
  664. function TCpuAsmOptimizer.OptPass1CMP(var p: tai): Boolean;
  665. var
  666. hp1, hp2, hp_last: tai;
  667. MovRem1, MovRem2: Boolean;
  668. begin
  669. Result := False;
  670. { These optimizations can be applied only to the currently enabled operations because
  671. the other operations do not update all flags and FPC does not track flag usage }
  672. if (taicpu(p).condition = C_None) and
  673. (taicpu(p).oper[1]^.typ = top_const) and
  674. GetNextInstruction(p, hp1) then
  675. begin
  676. {
  677. change
  678. cmp reg,const1
  679. moveq reg,const1
  680. movne reg,const2
  681. to
  682. cmp reg,const1
  683. movne reg,const2
  684. }
  685. if MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  686. (taicpu(hp1).oper[1]^.typ = top_const) and
  687. GetNextInstruction(hp1, hp2) and
  688. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  689. (taicpu(hp2).oper[1]^.typ = top_const) then
  690. begin
  691. MovRem1 := RemoveRedundantMove(p, hp1, asml);
  692. MovRem2 := RemoveRedundantMove(p, hp2, asml);
  693. Result:= MovRem1 or MovRem2;
  694. { Make sure that hp1 is still the next instruction after p }
  695. if MovRem1 then
  696. if MovRem2 then
  697. begin
  698. if not GetNextInstruction(p, hp1) then
  699. Exit;
  700. end
  701. else
  702. hp1 := hp2;
  703. end;
  704. {
  705. change
  706. <op> reg,x,y
  707. cmp reg,#0
  708. into
  709. <op>s reg,x,y
  710. }
  711. if (taicpu(p).oppostfix = PF_None) and
  712. (taicpu(p).oper[1]^.val = 0) and
  713. { be careful here, following instructions could use other flags
  714. however after a jump fpc never depends on the value of flags }
  715. { All above instructions set Z and N according to the following
  716. Z := result = 0;
  717. N := result[31];
  718. EQ = Z=1; NE = Z=0;
  719. MI = N=1; PL = N=0; }
  720. (MatchInstruction(hp1, A_B, [C_EQ,C_NE,C_MI,C_PL], []) or
  721. { mov is also possible, but only if there is no shifter operand, it could be an rxx,
  722. we are too lazy to check if it is rxx or something else }
  723. (MatchInstruction(hp1, A_MOV, [C_EQ,C_NE,C_MI,C_PL], []) and (taicpu(hp1).ops=2))) and
  724. GetLastInstruction(p, hp_last) and
  725. MatchInstruction(hp_last, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,
  726. A_EOR,A_AND,A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  727. (
  728. { mlas is only allowed in arm mode }
  729. (taicpu(hp_last).opcode<>A_MLA) or
  730. (current_settings.instructionset<>is_thumb)
  731. ) and
  732. (taicpu(hp_last).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  733. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp1.Next))) then
  734. begin
  735. DebugMsg(SPeepholeOptimization + 'OpCmp2OpS done', hp_last);
  736. taicpu(hp_last).oppostfix:=PF_S;
  737. { move flag allocation if possible }
  738. hp1:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp_last.Next));
  739. if assigned(hp1) then
  740. begin
  741. asml.Remove(hp1);
  742. asml.insertbefore(hp1, hp_last);
  743. end;
  744. RemoveCurrentP(p);
  745. Result:=true;
  746. end;
  747. end;
  748. end;
  749. function TCpuAsmOptimizer.OptPass1LDR(var p: tai): Boolean;
  750. var
  751. hp1: tai;
  752. begin
  753. Result := inherited OptPass1LDR(p);
  754. if Result then
  755. Exit;
  756. { change
  757. ldr reg1,ref
  758. ldr reg2,ref
  759. into ...
  760. }
  761. if (taicpu(p).oper[1]^.typ = top_ref) and
  762. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  763. GetNextInstruction(p,hp1) and
  764. { ldrd is not allowed here }
  765. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  766. begin
  767. {
  768. ...
  769. ldr reg1,ref
  770. mov reg2,reg1
  771. }
  772. if (taicpu(p).oppostfix=taicpu(hp1).oppostfix) and
  773. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  774. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  775. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  776. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  777. begin
  778. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  779. begin
  780. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldr done', hp1);
  781. asml.remove(hp1);
  782. hp1.free;
  783. end
  784. else
  785. begin
  786. DebugMsg(SPeepholeOptimization + 'LdrLdr2LdrMov done', hp1);
  787. taicpu(hp1).opcode:=A_MOV;
  788. taicpu(hp1).oppostfix:=PF_None;
  789. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  790. end;
  791. result := true;
  792. end
  793. {
  794. ...
  795. ldrd reg1,reg1+1,ref
  796. }
  797. else if (GenerateARMCode or GenerateThumb2Code) and
  798. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  799. { ldrd does not allow any postfixes ... }
  800. (taicpu(p).oppostfix=PF_None) and
  801. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  802. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  803. { ldr ensures that either base or index contain no register, else ldr wouldn't
  804. use an offset either
  805. }
  806. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  807. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  808. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  809. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  810. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  811. begin
  812. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldrd done', p);
  813. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  814. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  815. taicpu(p).ops:=3;
  816. taicpu(p).oppostfix:=PF_D;
  817. asml.remove(hp1);
  818. hp1.free;
  819. result:=true;
  820. end;
  821. end;
  822. {
  823. Change
  824. ldrb dst1, [REF]
  825. and dst2, dst1, #255
  826. into
  827. ldrb dst2, [ref]
  828. }
  829. if not(GenerateThumbCode) and
  830. (taicpu(p).oppostfix=PF_B) and
  831. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  832. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_NONE]) and
  833. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  834. (taicpu(hp1).oper[2]^.typ = top_const) and
  835. (taicpu(hp1).oper[2]^.val = $FF) and
  836. not(RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
  837. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  838. begin
  839. DebugMsg(SPeepholeOptimization + 'LdrbAnd2Ldrb done', p);
  840. taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg;
  841. asml.remove(hp1);
  842. hp1.free;
  843. result:=true;
  844. end;
  845. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  846. { Remove superfluous mov after ldr
  847. changes
  848. ldr reg1, ref
  849. mov reg2, reg1
  850. to
  851. ldr reg2, ref
  852. conditions are:
  853. * no ldrd usage
  854. * reg1 must be released after mov
  855. * mov can not contain shifterops
  856. * ldr+mov have the same conditions
  857. * mov does not set flags
  858. }
  859. if (taicpu(p).oppostfix<>PF_D) and
  860. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  861. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr') then
  862. Result:=true;
  863. end;
  864. function TCpuAsmOptimizer.OptPass1STM(var p: tai): Boolean;
  865. var
  866. hp1, hp2, hp3, hp4: tai;
  867. begin
  868. Result := False;
  869. {
  870. change
  871. stmfd r13!,[r14]
  872. sub r13,r13,#4
  873. bl abc
  874. add r13,r13,#4
  875. ldmfd r13!,[r15]
  876. into
  877. b abc
  878. }
  879. if not(ts_thumb_interworking in current_settings.targetswitches) and
  880. (taicpu(p).condition = C_None) and
  881. (taicpu(p).oppostfix = PF_FD) and
  882. (taicpu(p).oper[0]^.typ = top_ref) and
  883. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  884. (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  885. (taicpu(p).oper[0]^.ref^.offset=0) and
  886. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  887. (taicpu(p).oper[1]^.typ = top_regset) and
  888. (taicpu(p).oper[1]^.regset^ = [RS_R14]) and
  889. GetNextInstruction(p, hp1) and
  890. MatchInstruction(hp1, A_SUB, [C_None], [PF_NONE]) and
  891. (taicpu(hp1).oper[0]^.typ = top_reg) and
  892. (taicpu(hp1).oper[0]^.reg = NR_STACK_POINTER_REG) and
  893. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) and
  894. (taicpu(hp1).oper[2]^.typ = top_const) and
  895. GetNextInstruction(hp1, hp2) and
  896. SkipEntryExitMarker(hp2, hp2) and
  897. MatchInstruction(hp2, [A_BL,A_BLX], [C_None], [PF_NONE]) and
  898. (taicpu(hp2).oper[0]^.typ = top_ref) and
  899. GetNextInstruction(hp2, hp3) and
  900. SkipEntryExitMarker(hp3, hp3) and
  901. MatchInstruction(hp3, A_ADD, [C_None], [PF_NONE]) and
  902. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[0]^) and
  903. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[1]^) and
  904. MatchOperand(taicpu(hp1).oper[2]^,taicpu(hp3).oper[2]^) and
  905. GetNextInstruction(hp3, hp4) and
  906. MatchInstruction(hp4, A_LDM, [C_None], [PF_FD]) and
  907. MatchOperand(taicpu(p).oper[0]^,taicpu(hp4).oper[0]^) and
  908. (taicpu(hp4).oper[1]^.typ = top_regset) and
  909. (taicpu(hp4).oper[1]^.regset^ = [RS_R15]) then
  910. begin
  911. asml.Remove(hp1);
  912. asml.Remove(hp3);
  913. asml.Remove(hp4);
  914. taicpu(hp2).opcode:=A_B;
  915. hp1.free;
  916. hp3.free;
  917. hp4.free;
  918. RemoveCurrentp(p, hp2);
  919. DebugMsg(SPeepholeOptimization + 'Bl2B done', p);
  920. Result := True;
  921. end;
  922. end;
  923. function TCpuAsmOptimizer.OptPass1STR(var p: tai): Boolean;
  924. var
  925. hp1: tai;
  926. begin
  927. Result := inherited OptPass1STR(p);
  928. if Result then
  929. Exit;
  930. { Common conditions }
  931. if (taicpu(p).oper[1]^.typ = top_ref) and
  932. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  933. (taicpu(p).oppostfix=PF_None) then
  934. begin
  935. { change
  936. str reg1,ref
  937. ldr reg2,ref
  938. into
  939. str reg1,ref
  940. mov reg2,reg1
  941. }
  942. if (taicpu(p).condition=C_None) and
  943. GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and
  944. MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and
  945. (taicpu(hp1).oper[1]^.typ=top_ref) and
  946. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  947. not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  948. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and
  949. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1))) then
  950. begin
  951. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  952. begin
  953. DebugMsg(SPeepholeOptimization + 'StrLdr2StrMov 1 done', hp1);
  954. asml.remove(hp1);
  955. hp1.free;
  956. end
  957. else
  958. begin
  959. taicpu(hp1).opcode:=A_MOV;
  960. taicpu(hp1).oppostfix:=PF_None;
  961. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  962. DebugMsg(SPeepholeOptimization + 'StrLdr2StrMov 2 done', hp1);
  963. end;
  964. result := True;
  965. end
  966. { change
  967. str reg1,ref
  968. str reg2,ref
  969. into
  970. strd reg1,reg2,ref
  971. }
  972. else if (GenerateARMCode or GenerateThumb2Code) and
  973. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  974. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  975. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  976. AlignedToQWord(taicpu(p).oper[1]^.ref^) and
  977. GetNextInstruction(p,hp1) and
  978. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  979. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  980. { str ensures that either base or index contain no register, else ldr wouldn't
  981. use an offset either
  982. }
  983. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  984. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  985. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) then
  986. begin
  987. DebugMsg(SPeepholeOptimization + 'StrStr2Strd done', p);
  988. taicpu(p).oppostfix:=PF_D;
  989. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  990. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  991. taicpu(p).ops:=3;
  992. asml.remove(hp1);
  993. hp1.free;
  994. result:=true;
  995. end;
  996. end;
  997. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  998. end;
  999. function TCpuAsmOptimizer.OptPass1MOV(var p: tai): Boolean;
  1000. var
  1001. hp1, hpfar1, hp2: tai;
  1002. i, i2: longint;
  1003. tempop: tasmop;
  1004. dealloc: tai_regalloc;
  1005. begin
  1006. Result := False;
  1007. hp1 := nil;
  1008. { fold
  1009. mov reg1,reg0, shift imm1
  1010. mov reg1,reg1, shift imm2
  1011. }
  1012. if (taicpu(p).ops=3) and
  1013. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1014. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1015. getnextinstruction(p,hp1) and
  1016. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1017. (taicpu(hp1).ops=3) and
  1018. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  1019. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1020. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1021. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  1022. begin
  1023. { fold
  1024. mov reg1,reg0, lsl 16
  1025. mov reg1,reg1, lsr 16
  1026. strh reg1, ...
  1027. dealloc reg1
  1028. to
  1029. strh reg1, ...
  1030. dealloc reg1
  1031. }
  1032. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1033. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  1034. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  1035. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  1036. getnextinstruction(hp1,hp2) and
  1037. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  1038. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  1039. begin
  1040. TransferUsedRegs(TmpUsedRegs);
  1041. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1042. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1043. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  1044. begin
  1045. DebugMsg(SPeepholeOptimization + 'Removed superfluous 16 Bit zero extension', hp1);
  1046. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  1047. asml.remove(hp1);
  1048. hp1.free;
  1049. RemoveCurrentP(p, hp2);
  1050. Result:=true;
  1051. Exit;
  1052. end;
  1053. end
  1054. { fold
  1055. mov reg1,reg0, shift imm1
  1056. mov reg1,reg1, shift imm2
  1057. to
  1058. mov reg1,reg0, shift imm1+imm2
  1059. }
  1060. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  1061. { asr makes no use after a lsr, the asr can be foled into the lsr }
  1062. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  1063. begin
  1064. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  1065. { avoid overflows }
  1066. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  1067. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  1068. SM_ROR:
  1069. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  1070. SM_ASR:
  1071. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  1072. SM_LSR,
  1073. SM_LSL:
  1074. begin
  1075. hp2:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  1076. InsertLLItem(p.previous, p.next, hp2);
  1077. p.free;
  1078. p:=hp2;
  1079. end;
  1080. else
  1081. internalerror(2008072803);
  1082. end;
  1083. DebugMsg(SPeepholeOptimization + 'ShiftShift2Shift 1 done', p);
  1084. asml.remove(hp1);
  1085. hp1.free;
  1086. hp1 := nil;
  1087. result := true;
  1088. end
  1089. { fold
  1090. mov reg1,reg0, shift imm1
  1091. mov reg1,reg1, shift imm2
  1092. mov reg1,reg1, shift imm3 ...
  1093. mov reg2,reg1, shift imm3 ...
  1094. }
  1095. else if GetNextInstructionUsingReg(hp1,hp2, taicpu(hp1).oper[0]^.reg) and
  1096. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1097. (taicpu(hp2).ops=3) and
  1098. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1099. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp2)) and
  1100. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1101. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  1102. begin
  1103. { mov reg1,reg0, lsl imm1
  1104. mov reg1,reg1, lsr/asr imm2
  1105. mov reg2,reg1, lsl imm3 ...
  1106. to
  1107. mov reg1,reg0, lsl imm1
  1108. mov reg2,reg1, lsr/asr imm2-imm3
  1109. if
  1110. imm1>=imm2
  1111. }
  1112. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1113. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1114. (taicpu(p).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  1115. begin
  1116. if (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  1117. begin
  1118. if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,p,hp1)) and
  1119. not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  1120. begin
  1121. DebugMsg(SPeepholeOptimization + 'ShiftShiftShift2ShiftShift 1a done', p);
  1122. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm-taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  1123. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1124. asml.remove(hp1);
  1125. asml.remove(hp2);
  1126. hp1.free;
  1127. hp2.free;
  1128. if taicpu(p).oper[2]^.shifterop^.shiftimm>=32 then
  1129. begin
  1130. taicpu(p).freeop(1);
  1131. taicpu(p).freeop(2);
  1132. taicpu(p).loadconst(1,0);
  1133. end;
  1134. result := true;
  1135. Exit;
  1136. end;
  1137. end
  1138. else if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  1139. begin
  1140. DebugMsg(SPeepholeOptimization + 'ShiftShiftShift2ShiftShift 1b done', p);
  1141. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  1142. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1143. asml.remove(hp2);
  1144. hp2.free;
  1145. result := true;
  1146. Exit;
  1147. end;
  1148. end
  1149. { mov reg1,reg0, lsr/asr imm1
  1150. mov reg1,reg1, lsl imm2
  1151. mov reg1,reg1, lsr/asr imm3 ...
  1152. if imm3>=imm1 and imm2>=imm1
  1153. to
  1154. mov reg1,reg0, lsl imm2-imm1
  1155. mov reg1,reg1, lsr/asr imm3 ...
  1156. }
  1157. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1158. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1159. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  1160. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  1161. begin
  1162. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  1163. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1164. DebugMsg(SPeepholeOptimization + 'ShiftShiftShift2ShiftShift 2 done', p);
  1165. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  1166. begin
  1167. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  1168. asml.remove(hp1);
  1169. hp1.free;
  1170. end;
  1171. RemoveCurrentp(p);
  1172. result := true;
  1173. Exit;
  1174. end;
  1175. end;
  1176. end;
  1177. { All the optimisations from this point on require GetNextInstructionUsingReg
  1178. to return True }
  1179. while (
  1180. GetNextInstructionUsingReg(p, hpfar1, taicpu(p).oper[0]^.reg) and
  1181. (hpfar1.typ = ait_instruction)
  1182. ) do
  1183. begin
  1184. { Change the common
  1185. mov r0, r0, lsr #xxx
  1186. and r0, r0, #yyy/bic r0, r0, #xxx
  1187. and remove the superfluous and/bic if possible
  1188. This could be extended to handle more cases.
  1189. }
  1190. { Change
  1191. mov rx, ry, lsr/ror #xxx
  1192. uxtb/uxth rz,rx/and rz,rx,0xFF
  1193. dealloc rx
  1194. to
  1195. uxtb/uxth rz,ry,ror #xxx
  1196. }
  1197. if (GenerateThumb2Code) and
  1198. (taicpu(p).ops=3) and
  1199. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1200. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1201. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and
  1202. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1203. begin
  1204. if MatchInstruction(hpfar1, A_UXTB, [C_None], [PF_None]) and
  1205. (taicpu(hpfar1).ops = 2) and
  1206. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1207. MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1208. begin
  1209. taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1210. taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1211. taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1212. taicpu(hpfar1).ops := 3;
  1213. if not Assigned(hp1) then
  1214. GetNextInstruction(p,hp1);
  1215. RemoveCurrentP(p, hp1);
  1216. result:=true;
  1217. exit;
  1218. end
  1219. else if MatchInstruction(hpfar1, A_UXTH, [C_None], [PF_None]) and
  1220. (taicpu(hpfar1).ops=2) and
  1221. (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and
  1222. MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1223. begin
  1224. taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1225. taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1226. taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1227. taicpu(hpfar1).ops := 3;
  1228. if not Assigned(hp1) then
  1229. GetNextInstruction(p,hp1);
  1230. RemoveCurrentP(p, hp1);
  1231. result:=true;
  1232. exit;
  1233. end
  1234. else if MatchInstruction(hpfar1, A_AND, [C_None], [PF_None]) and
  1235. (taicpu(hpfar1).ops = 3) and
  1236. (taicpu(hpfar1).oper[2]^.typ = top_const) and
  1237. (taicpu(hpfar1).oper[2]^.val = $FF) and
  1238. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1239. MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1240. begin
  1241. taicpu(hpfar1).ops := 3;
  1242. taicpu(hpfar1).opcode := A_UXTB;
  1243. taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1244. taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1245. taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1246. if not Assigned(hp1) then
  1247. GetNextInstruction(p,hp1);
  1248. RemoveCurrentP(p, hp1);
  1249. result:=true;
  1250. exit;
  1251. end;
  1252. end;
  1253. { 2-operald mov optimisations }
  1254. if (taicpu(p).ops = 2) then
  1255. begin
  1256. {
  1257. This removes the mul from
  1258. mov rX,0
  1259. ...
  1260. mul ...,rX,...
  1261. }
  1262. if (taicpu(p).oper[1]^.typ = top_const) then
  1263. begin
  1264. (* if false and
  1265. (taicpu(p).oper[1]^.val=0) and
  1266. MatchInstruction(hpfar1, [A_MUL,A_MLA], [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1267. (((taicpu(hpfar1).oper[1]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^)) or
  1268. ((taicpu(hpfar1).oper[2]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[2]^))) then
  1269. begin
  1270. TransferUsedRegs(TmpUsedRegs);
  1271. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1272. UpdateUsedRegs(TmpUsedRegs, tai(hpfar1.next));
  1273. DebugMsg(SPeepholeOptimization + 'MovMUL/MLA2Mov0 done', p);
  1274. if taicpu(hpfar1).opcode=A_MUL then
  1275. taicpu(hpfar1).loadconst(1,0)
  1276. else
  1277. taicpu(hpfar1).loadreg(1,taicpu(hpfar1).oper[3]^.reg);
  1278. taicpu(hpfar1).ops:=2;
  1279. taicpu(hpfar1).opcode:=A_MOV;
  1280. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hpfar1,TmpUsedRegs)) then
  1281. RemoveCurrentP(p);
  1282. Result:=true;
  1283. exit;
  1284. end
  1285. else*) if (taicpu(p).oper[1]^.val=0) and
  1286. MatchInstruction(hpfar1, A_MLA, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1287. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[3]^) then
  1288. begin
  1289. TransferUsedRegs(TmpUsedRegs);
  1290. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1291. UpdateUsedRegs(TmpUsedRegs, tai(hpfar1.next));
  1292. DebugMsg(SPeepholeOptimization + 'MovMLA2MUL 1 done', p);
  1293. taicpu(hpfar1).ops:=3;
  1294. taicpu(hpfar1).opcode:=A_MUL;
  1295. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hpfar1,TmpUsedRegs)) then
  1296. begin
  1297. RemoveCurrentP(p);
  1298. Result:=true;
  1299. end;
  1300. exit;
  1301. end
  1302. {
  1303. This changes the very common
  1304. mov r0, #0
  1305. str r0, [...]
  1306. mov r0, #0
  1307. str r0, [...]
  1308. and removes all superfluous mov instructions
  1309. }
  1310. else if (taicpu(hpfar1).opcode=A_STR) then
  1311. begin
  1312. hp1 := hpfar1;
  1313. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  1314. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[0]^) and
  1315. GetNextInstruction(hp1, hp2) and
  1316. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1317. (taicpu(hp2).ops = 2) and
  1318. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  1319. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  1320. begin
  1321. DebugMsg(SPeepholeOptimization + 'MovStrMov done', hp2);
  1322. GetNextInstruction(hp2,hp1);
  1323. asml.remove(hp2);
  1324. hp2.free;
  1325. result:=true;
  1326. if not assigned(hp1) then break;
  1327. end;
  1328. if Result then
  1329. Exit;
  1330. end;
  1331. end;
  1332. {
  1333. This removes the first mov from
  1334. mov rX,...
  1335. mov rX,...
  1336. }
  1337. if taicpu(hpfar1).opcode=A_MOV then
  1338. begin
  1339. hp1 := p;
  1340. while MatchInstruction(hpfar1, A_MOV, [taicpu(hp1).condition], [taicpu(hp1).oppostfix]) and
  1341. (taicpu(hpfar1).ops = 2) and
  1342. MatchOperand(taicpu(hp1).oper[0]^, taicpu(hpfar1).oper[0]^) and
  1343. { don't remove the first mov if the second is a mov rX,rX }
  1344. not(MatchOperand(taicpu(hpfar1).oper[0]^, taicpu(hpfar1).oper[1]^)) do
  1345. begin
  1346. { Defer removing the first p until after the while loop }
  1347. if p <> hp1 then
  1348. begin
  1349. DebugMsg(SPeepholeOptimization + 'MovMov done', hp1);
  1350. asml.remove(hp1);
  1351. hp1.free;
  1352. end;
  1353. hp1:=hpfar1;
  1354. GetNextInstruction(hpfar1,hpfar1);
  1355. result:=true;
  1356. if not assigned(hpfar1) then
  1357. Break;
  1358. end;
  1359. if Result then
  1360. begin
  1361. DebugMsg(SPeepholeOptimization + 'MovMov done', p);
  1362. RemoveCurrentp(p);
  1363. Exit;
  1364. end;
  1365. end;
  1366. if RedundantMovProcess(p,hpfar1) then
  1367. begin
  1368. Result:=true;
  1369. { p might not point at a mov anymore }
  1370. exit;
  1371. end;
  1372. { If hpfar1 is nil after the call to RedundantMovProcess, it is
  1373. because it would have become a dangling pointer, so reinitialise it. }
  1374. if not Assigned(hpfar1) then
  1375. Continue;
  1376. { Fold the very common sequence
  1377. mov regA, regB
  1378. ldr* regA, [regA]
  1379. to
  1380. ldr* regA, [regB]
  1381. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1382. }
  1383. if
  1384. // Make sure that Thumb code doesn't propagate a high register into a reference
  1385. (
  1386. (
  1387. GenerateThumbCode and
  1388. (getsupreg(taicpu(p).oper[1]^.reg) < RS_R8)
  1389. ) or (not GenerateThumbCode)
  1390. ) and
  1391. (taicpu(p).oper[1]^.typ = top_reg) and
  1392. (taicpu(p).oppostfix = PF_NONE) and
  1393. MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], []) and
  1394. (taicpu(hpfar1).oper[1]^.typ = top_ref) and
  1395. { We can change the base register only when the instruction uses AM_OFFSET }
  1396. ((taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
  1397. ((taicpu(hpfar1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1398. (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg))
  1399. ) and
  1400. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
  1401. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1402. begin
  1403. DebugMsg(SPeepholeOptimization + 'MovLdr2Ldr done', hpfar1);
  1404. if (taicpu(hpfar1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1405. (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1406. taicpu(hpfar1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  1407. if taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  1408. taicpu(hpfar1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1409. dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, tai(p.Next));
  1410. if Assigned(dealloc) then
  1411. begin
  1412. asml.remove(dealloc);
  1413. asml.InsertAfter(dealloc,hpfar1);
  1414. end;
  1415. if (not Assigned(hp1)) or (p=hp1) then
  1416. GetNextInstruction(p, hp1);
  1417. RemoveCurrentP(p, hp1);
  1418. result:=true;
  1419. Exit;
  1420. end
  1421. end
  1422. { 3-operald mov optimisations }
  1423. else if (taicpu(p).ops = 3) then
  1424. begin
  1425. if (taicpu(p).oper[2]^.typ = top_shifterop) and
  1426. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1427. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  1428. (taicpu(hpfar1).ops>=1) and
  1429. (taicpu(hpfar1).oper[0]^.typ=top_reg) and
  1430. (not RegModifiedBetween(taicpu(hpfar1).oper[0]^.reg, p, hpfar1)) and
  1431. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1432. begin
  1433. if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  1434. MatchInstruction(hpfar1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1435. (taicpu(hpfar1).ops=3) and
  1436. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^) and
  1437. (taicpu(hpfar1).oper[2]^.typ = top_const) and
  1438. { Check if the AND actually would only mask out bits being already zero because of the shift
  1439. }
  1440. ((($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm) and taicpu(hpfar1).oper[2]^.val) =
  1441. ($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm)) then
  1442. begin
  1443. DebugMsg(SPeepholeOptimization + 'LsrAnd2Lsr done', hpfar1);
  1444. taicpu(p).oper[0]^.reg:=taicpu(hpfar1).oper[0]^.reg;
  1445. asml.remove(hpfar1);
  1446. hpfar1.free;
  1447. result:=true;
  1448. Exit;
  1449. end
  1450. else if MatchInstruction(hpfar1, A_BIC, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1451. (taicpu(hpfar1).ops=3) and
  1452. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^) and
  1453. (taicpu(hpfar1).oper[2]^.typ = top_const) and
  1454. { Check if the BIC actually would only mask out bits beeing already zero because of the shift }
  1455. (taicpu(hpfar1).oper[2]^.val<>0) and
  1456. (BsfDWord(taicpu(hpfar1).oper[2]^.val)>=32-taicpu(p).oper[2]^.shifterop^.shiftimm) then
  1457. begin
  1458. DebugMsg(SPeepholeOptimization + 'LsrBic2Lsr done', hpfar1);
  1459. taicpu(p).oper[0]^.reg:=taicpu(hpfar1).oper[0]^.reg;
  1460. asml.remove(hpfar1);
  1461. hpfar1.free;
  1462. result:=true;
  1463. Exit;
  1464. end;
  1465. end;
  1466. { This folds shifterops into following instructions
  1467. mov r0, r1, lsl #8
  1468. add r2, r3, r0
  1469. to
  1470. add r2, r3, r1, lsl #8
  1471. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1472. }
  1473. if (taicpu(p).oper[1]^.typ = top_reg) and
  1474. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1475. (taicpu(p).oppostfix = PF_NONE) and
  1476. MatchInstruction(hpfar1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  1477. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  1478. A_CMP, A_CMN],
  1479. [taicpu(p).condition], [PF_None]) and
  1480. (not ((GenerateThumb2Code) and
  1481. (taicpu(hpfar1).opcode in [A_SBC]) and
  1482. (((taicpu(hpfar1).ops=3) and
  1483. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^.reg)) or
  1484. ((taicpu(hpfar1).ops=2) and
  1485. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[0]^.reg))))) and
  1486. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) and
  1487. (taicpu(hpfar1).ops >= 2) and
  1488. {Currently we can't fold into another shifterop}
  1489. (taicpu(hpfar1).oper[taicpu(hpfar1).ops-1]^.typ = top_reg) and
  1490. {Folding rrx is problematic because of the C-Flag, as we currently can't check
  1491. NR_DEFAULTFLAGS for modification}
  1492. (
  1493. {Everything is fine if we don't use RRX}
  1494. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or
  1495. (
  1496. {If it is RRX, then check if we're just accessing the next instruction}
  1497. Assigned(hp1) and
  1498. (hpfar1 = hp1)
  1499. )
  1500. ) and
  1501. { reg1 might not be modified inbetween }
  1502. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
  1503. { The shifterop can contain a register, might not be modified}
  1504. (
  1505. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or
  1506. not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hpfar1))
  1507. ) and
  1508. (
  1509. {Only ONE of the two src operands is allowed to match}
  1510. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[taicpu(hpfar1).ops-2]^) xor
  1511. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[taicpu(hpfar1).ops-1]^)
  1512. ) then
  1513. begin
  1514. if taicpu(hpfar1).opcode in [A_TST, A_TEQ, A_CMN] then
  1515. I2:=0
  1516. else
  1517. I2:=1;
  1518. for I:=I2 to taicpu(hpfar1).ops-1 do
  1519. if MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[I]^.reg) then
  1520. begin
  1521. { If the parameter matched on the second op from the RIGHT
  1522. we have to switch the parameters, this will not happen for CMP
  1523. were we're only evaluating the most right parameter
  1524. }
  1525. if I <> taicpu(hpfar1).ops-1 then
  1526. begin
  1527. {The SUB operators need to be changed when we swap parameters}
  1528. case taicpu(hpfar1).opcode of
  1529. A_SUB: tempop:=A_RSB;
  1530. A_SBC: tempop:=A_RSC;
  1531. A_RSB: tempop:=A_SUB;
  1532. A_RSC: tempop:=A_SBC;
  1533. else tempop:=taicpu(hpfar1).opcode;
  1534. end;
  1535. if taicpu(hpfar1).ops = 3 then
  1536. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  1537. taicpu(hpfar1).oper[0]^.reg, taicpu(hpfar1).oper[2]^.reg,
  1538. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1539. else
  1540. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  1541. taicpu(hpfar1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1542. taicpu(p).oper[2]^.shifterop^);
  1543. end
  1544. else
  1545. if taicpu(hpfar1).ops = 3 then
  1546. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hpfar1).opcode,
  1547. taicpu(hpfar1).oper[0]^.reg, taicpu(hpfar1).oper[1]^.reg,
  1548. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1549. else
  1550. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hpfar1).opcode,
  1551. taicpu(hpfar1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1552. taicpu(p).oper[2]^.shifterop^);
  1553. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  1554. AllocRegBetween(taicpu(p).oper[2]^.shifterop^.rs,p,hpfar1,UsedRegs);
  1555. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hpfar1,UsedRegs);
  1556. asml.insertbefore(hp2, hpfar1);
  1557. asml.remove(hpfar1);
  1558. hpfar1.free;
  1559. DebugMsg(SPeepholeOptimization + 'FoldShiftProcess done', hp2);
  1560. if not Assigned(hp1) then
  1561. GetNextInstruction(p, hp1)
  1562. else if hp1 = hpfar1 then
  1563. { If hp1 = hpfar1, then it's a dangling pointer }
  1564. hp1 := hp2;
  1565. RemoveCurrentP(p, hp1);
  1566. Result:=true;
  1567. Exit;
  1568. end;
  1569. end;
  1570. {
  1571. Fold
  1572. mov r1, r1, lsl #2
  1573. ldr/ldrb r0, [r0, r1]
  1574. to
  1575. ldr/ldrb r0, [r0, r1, lsl #2]
  1576. XXX: This still needs some work, as we quite often encounter something like
  1577. mov r1, r2, lsl #2
  1578. add r2, r3, #imm
  1579. ldr r0, [r2, r1]
  1580. which can't be folded because r2 is overwritten between the shift and the ldr.
  1581. We could try to shuffle the registers around and fold it into.
  1582. add r1, r3, #imm
  1583. ldr r0, [r1, r2, lsl #2]
  1584. }
  1585. if (not(GenerateThumbCode)) and
  1586. { thumb2 allows only lsl #0..#3 }
  1587. (not(GenerateThumb2Code) or
  1588. ((taicpu(p).oper[2]^.shifterop^.shiftimm in [0..3]) and
  1589. (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL)
  1590. )
  1591. ) and
  1592. (taicpu(p).oper[1]^.typ = top_reg) and
  1593. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1594. { RRX is tough to handle, because it requires tracking the C-Flag,
  1595. it is also extremly unlikely to be emitted this way}
  1596. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) and
  1597. (taicpu(p).oper[2]^.shifterop^.shiftimm <> 0) and
  1598. (taicpu(p).oppostfix = PF_NONE) and
  1599. {Only LDR, LDRB, STR, STRB can handle scaled register indexing}
  1600. (MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B]) or
  1601. (GenerateThumb2Code and
  1602. MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B, PF_SB, PF_H, PF_SH]))
  1603. ) and
  1604. (
  1605. {If this is address by offset, one of the two registers can be used}
  1606. ((taicpu(hpfar1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1607. (
  1608. (taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor
  1609. (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)
  1610. )
  1611. ) or
  1612. {For post and preindexed only the index register can be used}
  1613. ((taicpu(hpfar1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and
  1614. (
  1615. (taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
  1616. (taicpu(hpfar1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
  1617. ) and
  1618. (not GenerateThumb2Code)
  1619. )
  1620. ) and
  1621. { Only fold if both registers are used. Otherwise we are folding p with itself }
  1622. (taicpu(hpfar1).oper[1]^.ref^.index<>NR_NO) and
  1623. (taicpu(hpfar1).oper[1]^.ref^.base<>NR_NO) and
  1624. { Only fold if there isn't another shifterop already, and offset is zero. }
  1625. (taicpu(hpfar1).oper[1]^.ref^.offset = 0) and
  1626. (taicpu(hpfar1).oper[1]^.ref^.shiftmode = SM_None) and
  1627. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
  1628. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1629. begin
  1630. { If the register we want to do the shift for resides in base, we need to swap that}
  1631. if (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1632. taicpu(hpfar1).oper[1]^.ref^.base := taicpu(hpfar1).oper[1]^.ref^.index;
  1633. taicpu(hpfar1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1634. taicpu(hpfar1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
  1635. taicpu(hpfar1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
  1636. DebugMsg(SPeepholeOptimization + 'FoldShiftLdrStr done', hpfar1);
  1637. RemoveCurrentP(p);
  1638. Result:=true;
  1639. Exit;
  1640. end;
  1641. end;
  1642. {
  1643. Often we see shifts and then a superfluous mov to another register
  1644. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  1645. }
  1646. if RemoveSuperfluousMove(p, hpfar1, 'MovMov2Mov') then
  1647. Result:=true;
  1648. Exit;
  1649. end;
  1650. end;
  1651. function TCpuAsmOptimizer.OptPass1MVN(var p: tai): Boolean;
  1652. var
  1653. hp1: tai;
  1654. begin
  1655. {
  1656. change
  1657. mvn reg2,reg1
  1658. and reg3,reg4,reg2
  1659. dealloc reg2
  1660. to
  1661. bic reg3,reg4,reg1
  1662. }
  1663. Result := False;
  1664. if (taicpu(p).oper[1]^.typ = top_reg) and
  1665. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1666. MatchInstruction(hp1,A_AND,[],[]) and
  1667. (((taicpu(hp1).ops=3) and
  1668. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1669. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1670. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  1671. ((taicpu(hp1).ops=2) and
  1672. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1673. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1674. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1675. { reg1 might not be modified inbetween }
  1676. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1677. begin
  1678. DebugMsg(SPeepholeOptimization + 'MvnAnd2Bic done', p);
  1679. taicpu(hp1).opcode:=A_BIC;
  1680. if taicpu(hp1).ops=3 then
  1681. begin
  1682. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1683. taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
  1684. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1685. end
  1686. else
  1687. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1688. RemoveCurrentp(p);
  1689. Result := True;
  1690. Exit;
  1691. end;
  1692. end;
  1693. function TCpuAsmOptimizer.OptPass1VMov(var p: tai): Boolean;
  1694. var
  1695. hp1: tai;
  1696. begin
  1697. {
  1698. change
  1699. vmov reg0,reg1,reg2
  1700. vmov reg1,reg2,reg0
  1701. into
  1702. vmov reg0,reg1,reg2
  1703. can be applied regardless if reg0 or reg2 is the vfp register
  1704. }
  1705. Result := False;
  1706. if (taicpu(p).ops = 3) then
  1707. while GetNextInstruction(p, hp1) and
  1708. MatchInstruction(hp1, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1709. (taicpu(hp1).ops = 3) and
  1710. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^) and
  1711. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) and
  1712. MatchOperand(taicpu(p).oper[2]^, taicpu(hp1).oper[1]^) do
  1713. begin
  1714. asml.Remove(hp1);
  1715. hp1.free;
  1716. DebugMsg(SPeepholeOptimization + 'VMovVMov2VMov done', p);
  1717. { Can we do it again? }
  1718. end;
  1719. end;
  1720. function TCpuAsmOptimizer.OptPass1VOp(var p: tai): Boolean;
  1721. var
  1722. hp1: tai;
  1723. begin
  1724. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1725. RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp');
  1726. end;
  1727. function TCpuAsmOptimizer.OptPass2Bcc(var p: tai): Boolean;
  1728. var
  1729. hp1,hp2,hp3,after_p: tai;
  1730. l : longint;
  1731. WasLast: boolean;
  1732. Label_X, Label_Y: TASmLabel;
  1733. procedure ConvertInstructins(this_hp: tai; newcond: tasmcond);
  1734. var
  1735. next_hp: tai;
  1736. begin
  1737. repeat
  1738. if this_hp.typ=ait_instruction then
  1739. taicpu(this_hp).condition := newcond;
  1740. GetNextInstruction(this_hp, next_hp);
  1741. if MustBeLast(this_hp) then
  1742. Break;
  1743. this_hp := next_hp
  1744. until not(assigned(this_hp)) or
  1745. not(CanBeCond(this_hp)) or
  1746. ((hp1.typ = ait_instruction) and (taicpu(hp1).opcode = A_B)) or
  1747. (this_hp.typ = ait_label);
  1748. end;
  1749. begin
  1750. Result := False;
  1751. if (taicpu(p).condition<>C_None) and
  1752. not(GenerateThumbCode) then
  1753. begin
  1754. { check for
  1755. Bxx xxx
  1756. <several instructions>
  1757. xxx:
  1758. }
  1759. Label_X := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  1760. l:=0;
  1761. WasLast:=False;
  1762. GetNextInstruction(p, hp1);
  1763. after_p := hp1;
  1764. while assigned(hp1) and
  1765. (l<=4) and
  1766. CanBeCond(hp1) and
  1767. { stop on labels }
  1768. not(hp1.typ=ait_label) and
  1769. { avoid that we cannot recognize the case BccB2Cond }
  1770. not((hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_B)) do
  1771. begin
  1772. inc(l);
  1773. if MustBeLast(hp1) then
  1774. begin
  1775. WasLast:=True;
  1776. GetNextInstruction(hp1,hp1);
  1777. break;
  1778. end
  1779. else
  1780. GetNextInstruction(hp1,hp1);
  1781. end;
  1782. if assigned(hp1) then
  1783. begin
  1784. if FindLabel(Label_X, hp1) then
  1785. begin
  1786. if (l<=4) and (l>0) then
  1787. begin
  1788. ConvertInstructins(after_p, inverse_cond(taicpu(p).condition));
  1789. DebugMsg(SPeepholeOptimization + 'Bcc2Cond done', p);
  1790. { wait with removing else GetNextInstruction could
  1791. ignore the label if it was the only usage in the
  1792. jump moved away }
  1793. Label_X.decrefs;
  1794. RemoveCurrentP(p, after_p);
  1795. Result := True;
  1796. Exit;
  1797. end;
  1798. end
  1799. else
  1800. { do not perform further optimizations if there is an instruction
  1801. in block #1 which cannot be optimized.
  1802. }
  1803. if not WasLast then
  1804. begin
  1805. { check further for
  1806. Bcc xxx
  1807. <several instructions 1>
  1808. B yyy
  1809. xxx:
  1810. <several instructions 2>
  1811. yyy:
  1812. }
  1813. { hp2 points to jmp yyy }
  1814. hp2:=hp1;
  1815. { skip hp2 to xxx }
  1816. if assigned(hp2) and
  1817. (l<=3) and
  1818. (hp2.typ=ait_instruction) and
  1819. (taicpu(hp2).is_jmp) and
  1820. (taicpu(hp2).condition=C_None) and
  1821. GetNextInstruction(hp2, hp1) and
  1822. { real label and jump, no further references to the
  1823. label are allowed }
  1824. (Label_X.getrefs = 1) and
  1825. FindLabel(Label_X, hp1) then
  1826. begin
  1827. Label_Y := TAsmLabel(taicpu(hp2).oper[0]^.ref^.symbol);
  1828. l:=0;
  1829. { skip hp1 and hp3 to <several moves 2> }
  1830. GetNextInstruction(hp1, hp1);
  1831. hp3 := hp1;
  1832. while assigned(hp1) and
  1833. CanBeCond(hp1) and
  1834. (l<=3) do
  1835. begin
  1836. inc(l);
  1837. if MustBeLast(hp1) then
  1838. begin
  1839. GetNextInstruction(hp1, hp1);
  1840. break;
  1841. end
  1842. else
  1843. GetNextInstruction(hp1, hp1);
  1844. end;
  1845. { hp1 points to yyy: }
  1846. if assigned(hp1) and
  1847. FindLabel(Label_Y, hp1) then
  1848. begin
  1849. ConvertInstructins(after_p, inverse_cond(taicpu(p).condition));
  1850. ConvertInstructins(hp3, taicpu(p).condition);
  1851. DebugMsg(SPeepholeOptimization + 'BccB2Cond done', after_p);
  1852. { remove B }
  1853. Label_Y.decrefs;
  1854. RemoveInstruction(hp2);
  1855. { remove Bcc }
  1856. Label_X.decrefs;
  1857. RemoveCurrentP(p, after_p);
  1858. Result := True;
  1859. Exit;
  1860. end;
  1861. end;
  1862. end;
  1863. end;
  1864. end;
  1865. end;
  1866. function TCpuAsmOptimizer.OptPass2STR(var p: tai): Boolean;
  1867. var
  1868. hp1: tai;
  1869. Postfix: TOpPostfix;
  1870. OpcodeStr: shortstring;
  1871. begin
  1872. Result := False;
  1873. { Try to merge two STRs into an STM instruction }
  1874. if not(GenerateThumbCode) and (taicpu(p).oper[1]^.typ = top_ref) and
  1875. (taicpu(p).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1876. (
  1877. (taicpu(p).oper[1]^.ref^.base = NR_NO) or
  1878. (taicpu(p).oper[1]^.ref^.index = NR_NO)
  1879. ) and
  1880. (taicpu(p).oppostfix = PF_None) and
  1881. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) then
  1882. begin
  1883. hp1 := p;
  1884. while GetNextInstruction(hp1, hp1) and (hp1.typ = ait_instruction) and
  1885. (taicpu(hp1).opcode = A_STR) do
  1886. if (taicpu(hp1).condition = taicpu(p).condition) and
  1887. (taicpu(hp1).oppostfix = PF_None) and
  1888. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1889. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1890. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[1]^.ref^.base) and
  1891. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[1]^.ref^.index) and
  1892. (
  1893. (
  1894. (taicpu(p).oper[1]^.ref^.offset = 0) and
  1895. (getsupreg(taicpu(hp1).oper[0]^.reg) > getsupreg(taicpu(p).oper[0]^.reg)) and
  1896. (abs(taicpu(hp1).oper[1]^.ref^.offset) = 4)
  1897. ) or (
  1898. (taicpu(hp1).oper[1]^.ref^.offset = 0) and
  1899. (getsupreg(taicpu(hp1).oper[0]^.reg) < getsupreg(taicpu(p).oper[0]^.reg)) and
  1900. (abs(taicpu(p).oper[1]^.ref^.offset) = 4)
  1901. )
  1902. ) then
  1903. begin
  1904. if (getsupreg(taicpu(hp1).oper[0]^.reg) < getsupreg(taicpu(p).oper[0]^.reg)) xor
  1905. (taicpu(hp1).oper[1]^.ref^.offset < taicpu(p).oper[1]^.ref^.offset) then
  1906. begin
  1907. Postfix := PF_DA;
  1908. OpcodeStr := 'DA';
  1909. end
  1910. else
  1911. begin
  1912. Postfix := PF_None;
  1913. OpcodeStr := '';
  1914. end;
  1915. taicpu(hp1).oper[1]^.ref^.offset := 0;
  1916. if taicpu(hp1).oper[1]^.ref^.index = NR_NO then
  1917. begin
  1918. taicpu(hp1).oper[1]^.ref^.index := taicpu(hp1).oper[1]^.ref^.base;
  1919. taicpu(hp1).oper[1]^.ref^.base := NR_NO;
  1920. end;
  1921. taicpu(p).opcode := A_STM;
  1922. taicpu(p).loadregset(1, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg), getsupreg(taicpu(hp1).oper[0]^.reg)]);
  1923. taicpu(p).loadref(0, taicpu(hp1).oper[1]^.ref^);
  1924. taicpu(p).oppostfix := Postfix;
  1925. RemoveInstruction(hp1);
  1926. DebugMsg(SPeepholeOptimization + 'Merging stores: STR/STR -> STM' + OpcodeStr, p);
  1927. Result := True;
  1928. Exit;
  1929. end;
  1930. end;
  1931. end;
  1932. function TCpuAsmOptimizer.OptPass2STM(var p: tai): Boolean;
  1933. var
  1934. hp1: tai;
  1935. CorrectOffset:ASizeInt;
  1936. i, LastReg: TSuperRegister;
  1937. Postfix: TOpPostfix;
  1938. OpcodeStr: shortstring;
  1939. begin
  1940. Result := False;
  1941. { See if STM/STR can be merged into a single STM }
  1942. if (taicpu(p).oper[0]^.ref^.addressmode = AM_OFFSET) then
  1943. begin
  1944. CorrectOffset := 0;
  1945. LastReg := RS_NO;
  1946. for i in taicpu(p).oper[1]^.regset^ do
  1947. begin
  1948. LastReg := i;
  1949. Inc(CorrectOffset, 4);
  1950. end;
  1951. { This while loop effectively doea a Selection Sort on any STR
  1952. instructions that follow }
  1953. hp1 := p;
  1954. while (LastReg < maxcpuregister) and
  1955. GetNextInstruction(hp1, hp1) and (hp1.typ = ait_instruction) and
  1956. (taicpu(hp1).opcode = A_STR) do
  1957. if (taicpu(hp1).condition = taicpu(p).condition) and
  1958. (taicpu(hp1).oppostfix = PF_None) and
  1959. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1960. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1961. (
  1962. (
  1963. (taicpu(p).oper[1]^.ref^.base = NR_NO) and
  1964. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.ref^.index)
  1965. ) or (
  1966. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  1967. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.ref^.base)
  1968. )
  1969. ) and
  1970. { Next register must be later in the set }
  1971. (getsupreg(taicpu(hp1).oper[0]^.reg) > LastReg) and
  1972. (
  1973. (
  1974. (taicpu(p).oppostfix = PF_None) and
  1975. (taicpu(hp1).oper[1]^.ref^.offset = CorrectOffset)
  1976. ) or (
  1977. (taicpu(p).oppostfix = PF_DA) and
  1978. (taicpu(hp1).oper[1]^.ref^.offset = -CorrectOffset)
  1979. )
  1980. ) then
  1981. begin
  1982. { Increment the reference values ready for the next STR instruction to find }
  1983. LastReg := getsupreg(taicpu(hp1).oper[0]^.reg);
  1984. Inc(CorrectOffset, 4);
  1985. if (taicpu(p).oppostfix = PF_DA) then
  1986. OpcodeStr := 'DA'
  1987. else
  1988. OpcodeStr := '';
  1989. Include(taicpu(p).oper[1]^.regset^, LastReg);
  1990. DebugMsg(SPeepholeOptimization + 'Merging stores: STM' + OpcodeStr + '/STR -> STM' + OpcodeStr, hp1);
  1991. RemoveInstruction(hp1);
  1992. Result := True;
  1993. { See if we can find another one to merge }
  1994. hp1 := p;
  1995. Continue;
  1996. end;
  1997. end;
  1998. end;
  1999. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  2000. begin
  2001. result := false;
  2002. if p.typ = ait_instruction then
  2003. begin
  2004. case taicpu(p).opcode of
  2005. A_CMP:
  2006. Result := OptPass1CMP(p);
  2007. A_STR:
  2008. Result := OptPass1STR(p);
  2009. A_LDR:
  2010. Result := OptPass1LDR(p);
  2011. A_MOV:
  2012. Result := OptPass1MOV(p);
  2013. A_AND:
  2014. Result := OptPass1And(p);
  2015. A_ADD,
  2016. A_SUB:
  2017. Result := OptPass1ADDSUB(p);
  2018. A_MUL:
  2019. REsult := OptPass1MUL(p);
  2020. A_ADC,
  2021. A_RSB,
  2022. A_RSC,
  2023. A_SBC,
  2024. A_BIC,
  2025. A_EOR,
  2026. A_ORR,
  2027. A_MLA,
  2028. A_MLS,
  2029. A_QADD,A_QADD16,A_QADD8,
  2030. A_QSUB,A_QSUB16,A_QSUB8,
  2031. A_QDADD,A_QDSUB,A_QASX,A_QSAX,
  2032. A_SHADD16,A_SHADD8,A_UHADD16,A_UHADD8,
  2033. A_SHSUB16,A_SHSUB8,A_UHSUB16,A_UHSUB8,
  2034. A_PKHTB,A_PKHBT,
  2035. A_SMUAD,A_SMUSD:
  2036. Result := OptPass1DataCheckMov(p);
  2037. {$ifdef dummy}
  2038. A_MVN:
  2039. Result := OPtPass1MVN(p);
  2040. {$endif dummy}
  2041. A_UXTB:
  2042. Result := OptPass1UXTB(p);
  2043. A_UXTH:
  2044. Result := OptPass1UXTH(p);
  2045. A_SXTB:
  2046. Result := OptPass1SXTB(p);
  2047. A_SXTH:
  2048. Result := OptPass1SXTH(p);
  2049. A_STM:
  2050. Result := OptPass1STM(p);
  2051. A_VMOV:
  2052. Result := OptPass1VMov(p);
  2053. A_VLDR,
  2054. A_VADD,
  2055. A_VMUL,
  2056. A_VDIV,
  2057. A_VSUB,
  2058. A_VSQRT,
  2059. A_VNEG,
  2060. A_VCVT,
  2061. A_VABS:
  2062. Result := OptPass1VOp(p);
  2063. else
  2064. ;
  2065. end;
  2066. end;
  2067. end;
  2068. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  2069. begin
  2070. result := False;
  2071. if p.typ = ait_instruction then
  2072. begin
  2073. case taicpu(p).opcode of
  2074. A_B:
  2075. Result := OptPass2Bcc(p);
  2076. A_STM:
  2077. Result := OptPass2STM(p);
  2078. A_STR:
  2079. Result := OptPass2STR(p);
  2080. else
  2081. ;
  2082. end;
  2083. end;
  2084. end;
  2085. { instructions modifying the CPSR can be only the last instruction }
  2086. function MustBeLast(p : tai) : boolean;
  2087. begin
  2088. Result:=(p.typ=ait_instruction) and
  2089. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  2090. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  2091. (taicpu(p).oppostfix=PF_S));
  2092. end;
  2093. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  2094. begin
  2095. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  2096. Result:=true
  2097. else If MatchInstruction(p1, [A_LDR, A_STR], [], [PF_D]) and
  2098. (getsupreg(taicpu(p1).oper[0]^.reg)+1=getsupreg(reg)) then
  2099. Result:=true
  2100. else
  2101. Result:=inherited RegInInstruction(Reg, p1);
  2102. end;
  2103. const
  2104. { set of opcode which might or do write to memory }
  2105. { TODO : extend armins.dat to contain r/w info }
  2106. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  2107. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD,A_VSTR,A_VSTM];
  2108. { adjust the register live information when swapping the two instructions p and hp1,
  2109. they must follow one after the other }
  2110. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  2111. procedure CheckLiveEnd(reg : tregister);
  2112. var
  2113. supreg : TSuperRegister;
  2114. regtype : TRegisterType;
  2115. begin
  2116. if reg=NR_NO then
  2117. exit;
  2118. regtype:=getregtype(reg);
  2119. supreg:=getsupreg(reg);
  2120. if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_end[supreg]=hp1) and
  2121. RegInInstruction(reg,p) then
  2122. cg.rg[regtype].live_end[supreg]:=p;
  2123. end;
  2124. procedure CheckLiveStart(reg : TRegister);
  2125. var
  2126. supreg : TSuperRegister;
  2127. regtype : TRegisterType;
  2128. begin
  2129. if reg=NR_NO then
  2130. exit;
  2131. regtype:=getregtype(reg);
  2132. supreg:=getsupreg(reg);
  2133. if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_start[supreg]=p) and
  2134. RegInInstruction(reg,hp1) then
  2135. cg.rg[regtype].live_start[supreg]:=hp1;
  2136. end;
  2137. var
  2138. i : longint;
  2139. r : TSuperRegister;
  2140. begin
  2141. { assumption: p is directly followed by hp1 }
  2142. { if live of any reg used by p starts at p and hp1 uses this register then
  2143. set live start to hp1 }
  2144. for i:=0 to p.ops-1 do
  2145. case p.oper[i]^.typ of
  2146. Top_Reg:
  2147. CheckLiveStart(p.oper[i]^.reg);
  2148. Top_Ref:
  2149. begin
  2150. CheckLiveStart(p.oper[i]^.ref^.base);
  2151. CheckLiveStart(p.oper[i]^.ref^.index);
  2152. end;
  2153. Top_Shifterop:
  2154. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  2155. Top_RegSet:
  2156. for r:=RS_R0 to RS_R15 do
  2157. if r in p.oper[i]^.regset^ then
  2158. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2159. else
  2160. ;
  2161. end;
  2162. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  2163. set live end to p }
  2164. for i:=0 to hp1.ops-1 do
  2165. case hp1.oper[i]^.typ of
  2166. Top_Reg:
  2167. CheckLiveEnd(hp1.oper[i]^.reg);
  2168. Top_Ref:
  2169. begin
  2170. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  2171. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  2172. end;
  2173. Top_Shifterop:
  2174. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  2175. Top_RegSet:
  2176. for r:=RS_R0 to RS_R15 do
  2177. if r in hp1.oper[i]^.regset^ then
  2178. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2179. else
  2180. ;
  2181. end;
  2182. end;
  2183. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  2184. { TODO : schedule also forward }
  2185. { TODO : schedule distance > 1 }
  2186. { returns true if p might be a load of a pc relative tls offset }
  2187. function PossibleTLSLoad(const p: tai) : boolean;
  2188. begin
  2189. Result:=(p.typ=ait_instruction) and (taicpu(p).opcode=A_LDR) and (taicpu(p).oper[1]^.typ=top_ref) and (((taicpu(p).oper[1]^.ref^.base=NR_PC) and
  2190. (taicpu(p).oper[1]^.ref^.index<>NR_NO)) or ((taicpu(p).oper[1]^.ref^.base<>NR_NO) and
  2191. (taicpu(p).oper[1]^.ref^.index=NR_PC)));
  2192. end;
  2193. var
  2194. hp1,hp2,hp3,hp4,hp5,insertpos : tai;
  2195. list : TAsmList;
  2196. begin
  2197. result:=true;
  2198. list:=TAsmList.create;
  2199. p:=BlockStart;
  2200. while p<>BlockEnd Do
  2201. begin
  2202. if (p.typ=ait_instruction) and
  2203. GetNextInstruction(p,hp1) and
  2204. (hp1.typ=ait_instruction) and
  2205. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  2206. (taicpu(hp1).oppostfix in [PF_NONE, PF_B, PF_H, PF_SB, PF_SH]) and
  2207. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  2208. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  2209. not(RegModifiedByInstruction(NR_PC,p))
  2210. ) or
  2211. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  2212. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  2213. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  2214. (taicpu(hp1).oper[1]^.ref^.offset=0)
  2215. )
  2216. ) or
  2217. { try to prove that the memory accesses don't overlapp }
  2218. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  2219. (taicpu(p).oper[1]^.typ = top_ref) and
  2220. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  2221. (taicpu(p).oppostfix=PF_None) and
  2222. (taicpu(hp1).oppostfix=PF_None) and
  2223. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  2224. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  2225. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  2226. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  2227. )
  2228. )
  2229. ) and
  2230. GetNextInstruction(hp1,hp2) and
  2231. (hp2.typ=ait_instruction) and
  2232. { loaded register used by next instruction?
  2233. if we ever support labels (they could be skipped in theory) here, the gnu2 tls general-dynamic code could get broken (the ldr before
  2234. the bl may not be scheduled away from the bl) and it needs to be taken care of this case
  2235. }
  2236. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  2237. { loaded register not used by previous instruction? }
  2238. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  2239. { same condition? }
  2240. (taicpu(p).condition=taicpu(hp1).condition) and
  2241. { first instruction might not change the register used as base }
  2242. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  2243. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  2244. ) and
  2245. { first instruction might not change the register used as index }
  2246. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  2247. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  2248. ) and
  2249. { if we modify the basereg AND the first instruction used that reg, we can not schedule }
  2250. ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
  2251. not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) and
  2252. not(PossibleTLSLoad(p)) and
  2253. not(PossibleTLSLoad(hp1)) then
  2254. begin
  2255. hp3:=tai(p.Previous);
  2256. hp5:=tai(p.next);
  2257. asml.Remove(p);
  2258. { if there is a reg. alloc/dealloc/sync instructions or address labels (e.g. for GOT-less PIC)
  2259. associated with p, move it together with p }
  2260. { before the instruction? }
  2261. { find reg allocs,deallocs and PIC labels }
  2262. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  2263. begin
  2264. if ( (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_alloc, ra_dealloc]) and
  2265. RegInInstruction(tai_regalloc(hp3).reg,p) )
  2266. or ( (hp3.typ=ait_label) and (tai_label(hp3).labsym.typ=AT_ADDR) )
  2267. then
  2268. begin
  2269. hp4:=hp3;
  2270. hp3:=tai(hp3.Previous);
  2271. asml.Remove(hp4);
  2272. list.Insert(hp4);
  2273. end
  2274. else
  2275. hp3:=tai(hp3.Previous);
  2276. end;
  2277. list.Concat(p);
  2278. SwapRegLive(taicpu(p),taicpu(hp1));
  2279. { after the instruction? }
  2280. { find reg deallocs and reg syncs }
  2281. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  2282. begin
  2283. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc, ra_sync]) and
  2284. RegInInstruction(tai_regalloc(hp5).reg,p) then
  2285. begin
  2286. hp4:=hp5;
  2287. hp5:=tai(hp5.next);
  2288. asml.Remove(hp4);
  2289. list.Concat(hp4);
  2290. end
  2291. else
  2292. hp5:=tai(hp5.Next);
  2293. end;
  2294. asml.Remove(hp1);
  2295. { if there are address labels associated with hp2, those must
  2296. stay with hp2 (e.g. for GOT-less PIC) }
  2297. insertpos:=hp2;
  2298. while assigned(hp2.previous) and
  2299. (tai(hp2.previous).typ<>ait_instruction) do
  2300. begin
  2301. hp2:=tai(hp2.previous);
  2302. if (hp2.typ=ait_label) and
  2303. (tai_label(hp2).labsym.typ=AT_ADDR) then
  2304. insertpos:=hp2;
  2305. end;
  2306. {$ifdef DEBUG_PREREGSCHEDULER}
  2307. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),insertpos);
  2308. {$endif DEBUG_PREREGSCHEDULER}
  2309. asml.InsertBefore(hp1,insertpos);
  2310. asml.InsertListBefore(insertpos,list);
  2311. p:=tai(p.next);
  2312. end
  2313. else if p.typ=ait_instruction then
  2314. p:=hp1
  2315. else
  2316. p:=tai(p.next);
  2317. end;
  2318. list.Free;
  2319. end;
  2320. procedure DecrementPreceedingIT(list: TAsmList; p: tai);
  2321. var
  2322. hp : tai;
  2323. l : longint;
  2324. begin
  2325. hp := tai(p.Previous);
  2326. l := 1;
  2327. while assigned(hp) and
  2328. (l <= 4) do
  2329. begin
  2330. if hp.typ=ait_instruction then
  2331. begin
  2332. if (taicpu(hp).opcode>=A_IT) and
  2333. (taicpu(hp).opcode <= A_ITTTT) then
  2334. begin
  2335. if (taicpu(hp).opcode = A_IT) and
  2336. (l=1) then
  2337. list.Remove(hp)
  2338. else
  2339. case taicpu(hp).opcode of
  2340. A_ITE:
  2341. if l=2 then taicpu(hp).opcode := A_IT;
  2342. A_ITT:
  2343. if l=2 then taicpu(hp).opcode := A_IT;
  2344. A_ITEE:
  2345. if l=3 then taicpu(hp).opcode := A_ITE;
  2346. A_ITTE:
  2347. if l=3 then taicpu(hp).opcode := A_ITT;
  2348. A_ITET:
  2349. if l=3 then taicpu(hp).opcode := A_ITE;
  2350. A_ITTT:
  2351. if l=3 then taicpu(hp).opcode := A_ITT;
  2352. A_ITEEE:
  2353. if l=4 then taicpu(hp).opcode := A_ITEE;
  2354. A_ITTEE:
  2355. if l=4 then taicpu(hp).opcode := A_ITTE;
  2356. A_ITETE:
  2357. if l=4 then taicpu(hp).opcode := A_ITET;
  2358. A_ITTTE:
  2359. if l=4 then taicpu(hp).opcode := A_ITTT;
  2360. A_ITEET:
  2361. if l=4 then taicpu(hp).opcode := A_ITEE;
  2362. A_ITTET:
  2363. if l=4 then taicpu(hp).opcode := A_ITTE;
  2364. A_ITETT:
  2365. if l=4 then taicpu(hp).opcode := A_ITET;
  2366. A_ITTTT:
  2367. begin
  2368. if l=4 then taicpu(hp).opcode := A_ITTT;
  2369. end
  2370. else
  2371. ;
  2372. end;
  2373. break;
  2374. end;
  2375. {else if (taicpu(hp).condition<>taicpu(p).condition) or
  2376. (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
  2377. break;}
  2378. inc(l);
  2379. end;
  2380. hp := tai(hp.Previous);
  2381. end;
  2382. end;
  2383. function TCpuThumb2AsmOptimizer.OptPass1STM(var p: tai): boolean;
  2384. var
  2385. hp : taicpu;
  2386. begin
  2387. result:=false;
  2388. if MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
  2389. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2390. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2391. ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
  2392. begin
  2393. DebugMsg('Peephole Stm2Push done', p);
  2394. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2395. AsmL.InsertAfter(hp, p);
  2396. asml.Remove(p);
  2397. p:=hp;
  2398. result:=true;
  2399. end;
  2400. end;
  2401. function TCpuThumb2AsmOptimizer.OptPass1LDM(var p: tai): boolean;
  2402. var
  2403. hp : taicpu;
  2404. begin
  2405. result:=false;
  2406. if MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
  2407. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2408. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2409. ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
  2410. begin
  2411. DebugMsg('Peephole Ldm2Pop done', p);
  2412. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2413. asml.InsertBefore(hp, p);
  2414. asml.Remove(p);
  2415. p.Free;
  2416. p:=hp;
  2417. result:=true;
  2418. end;
  2419. end;
  2420. function TCpuThumb2AsmOptimizer.OptPass1AndThumb2(var p : tai) : boolean;
  2421. begin
  2422. result:=false;
  2423. if MatchInstruction(p, [A_AND], [], [PF_None]) and
  2424. (taicpu(p).ops = 2) and
  2425. (taicpu(p).oper[1]^.typ=top_const) and
  2426. ((taicpu(p).oper[1]^.val=255) or
  2427. (taicpu(p).oper[1]^.val=65535)) then
  2428. begin
  2429. DebugMsg('Peephole AndR2Uxt done', p);
  2430. if taicpu(p).oper[1]^.val=255 then
  2431. taicpu(p).opcode:=A_UXTB
  2432. else
  2433. taicpu(p).opcode:=A_UXTH;
  2434. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  2435. result := true;
  2436. end
  2437. else if MatchInstruction(p, [A_AND], [], [PF_None]) and
  2438. (taicpu(p).ops = 3) and
  2439. (taicpu(p).oper[2]^.typ=top_const) and
  2440. ((taicpu(p).oper[2]^.val=255) or
  2441. (taicpu(p).oper[2]^.val=65535)) then
  2442. begin
  2443. DebugMsg('Peephole AndRR2Uxt done', p);
  2444. if taicpu(p).oper[2]^.val=255 then
  2445. taicpu(p).opcode:=A_UXTB
  2446. else
  2447. taicpu(p).opcode:=A_UXTH;
  2448. taicpu(p).ops:=2;
  2449. result := true;
  2450. end;
  2451. end;
  2452. function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  2453. begin
  2454. result:=false;
  2455. if inherited PeepHoleOptPass1Cpu(p) then
  2456. result:=true
  2457. else if p.typ=ait_instruction then
  2458. case taicpu(p).opcode of
  2459. A_STM:
  2460. result:=OptPass1STM(p);
  2461. A_LDM:
  2462. result:=OptPass1LDM(p);
  2463. A_AND:
  2464. result:=OptPass1AndThumb2(p);
  2465. else
  2466. ;
  2467. end;
  2468. end;
  2469. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  2470. var
  2471. p,hp1,hp2: tai;
  2472. l : longint;
  2473. condition : tasmcond;
  2474. { UsedRegs, TmpUsedRegs: TRegSet; }
  2475. begin
  2476. p := BlockStart;
  2477. { UsedRegs := []; }
  2478. while (p <> BlockEnd) Do
  2479. begin
  2480. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  2481. case p.Typ Of
  2482. Ait_Instruction:
  2483. begin
  2484. case taicpu(p).opcode Of
  2485. A_B:
  2486. if taicpu(p).condition<>C_None then
  2487. begin
  2488. { check for
  2489. Bxx xxx
  2490. <several instructions>
  2491. xxx:
  2492. }
  2493. l:=0;
  2494. GetNextInstruction(p, hp1);
  2495. while assigned(hp1) and
  2496. (l<=4) and
  2497. CanBeCond(hp1) and
  2498. { stop on labels }
  2499. not(hp1.typ=ait_label) do
  2500. begin
  2501. inc(l);
  2502. if MustBeLast(hp1) then
  2503. begin
  2504. //hp1:=nil;
  2505. GetNextInstruction(hp1,hp1);
  2506. break;
  2507. end
  2508. else
  2509. GetNextInstruction(hp1,hp1);
  2510. end;
  2511. if assigned(hp1) then
  2512. begin
  2513. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2514. begin
  2515. if (l<=4) and (l>0) then
  2516. begin
  2517. condition:=inverse_cond(taicpu(p).condition);
  2518. hp2:=p;
  2519. GetNextInstruction(p,hp1);
  2520. p:=hp1;
  2521. repeat
  2522. if hp1.typ=ait_instruction then
  2523. taicpu(hp1).condition:=condition;
  2524. if MustBeLast(hp1) then
  2525. begin
  2526. GetNextInstruction(hp1,hp1);
  2527. break;
  2528. end
  2529. else
  2530. GetNextInstruction(hp1,hp1);
  2531. until not(assigned(hp1)) or
  2532. not(CanBeCond(hp1)) or
  2533. (hp1.typ=ait_label);
  2534. { wait with removing else GetNextInstruction could
  2535. ignore the label if it was the only usage in the
  2536. jump moved away }
  2537. asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
  2538. DecrementPreceedingIT(asml, hp2);
  2539. case l of
  2540. 1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
  2541. 2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
  2542. 3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
  2543. 4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
  2544. end;
  2545. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2546. asml.remove(hp2);
  2547. hp2.free;
  2548. continue;
  2549. end;
  2550. end;
  2551. end;
  2552. end;
  2553. else
  2554. ;
  2555. end;
  2556. end;
  2557. else
  2558. ;
  2559. end;
  2560. p := tai(p.next)
  2561. end;
  2562. end;
  2563. function TCpuThumb2AsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  2564. begin
  2565. result:=false;
  2566. if p.typ = ait_instruction then
  2567. begin
  2568. if MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
  2569. (taicpu(p).oper[1]^.typ=top_const) and
  2570. (taicpu(p).oper[1]^.val >= 0) and
  2571. (taicpu(p).oper[1]^.val < 256) and
  2572. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2573. begin
  2574. DebugMsg('Peephole Mov2Movs done', p);
  2575. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2576. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2577. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2578. taicpu(p).oppostfix:=PF_S;
  2579. result:=true;
  2580. end
  2581. else if MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
  2582. (taicpu(p).oper[1]^.typ=top_reg) and
  2583. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2584. begin
  2585. DebugMsg('Peephole Mvn2Mvns done', p);
  2586. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2587. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2588. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2589. taicpu(p).oppostfix:=PF_S;
  2590. result:=true;
  2591. end
  2592. else if MatchInstruction(p, A_RSB, [C_None], [PF_None]) and
  2593. (taicpu(p).ops = 3) and
  2594. (taicpu(p).oper[2]^.typ=top_const) and
  2595. (taicpu(p).oper[2]^.val=0) and
  2596. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2597. begin
  2598. DebugMsg('Peephole Rsb2Rsbs done', p);
  2599. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2600. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2601. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2602. taicpu(p).oppostfix:=PF_S;
  2603. result:=true;
  2604. end
  2605. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2606. (taicpu(p).ops = 3) and
  2607. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2608. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2609. (taicpu(p).oper[2]^.typ=top_const) and
  2610. (taicpu(p).oper[2]^.val >= 0) and
  2611. (taicpu(p).oper[2]^.val < 256) and
  2612. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2613. begin
  2614. DebugMsg('Peephole AddSub2*s done', p);
  2615. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2616. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2617. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2618. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2619. taicpu(p).oppostfix:=PF_S;
  2620. taicpu(p).ops := 2;
  2621. result:=true;
  2622. end
  2623. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2624. (taicpu(p).ops = 2) and
  2625. (taicpu(p).oper[1]^.typ=top_reg) and
  2626. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2627. (not MatchOperand(taicpu(p).oper[1]^, NR_STACK_POINTER_REG)) and
  2628. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2629. begin
  2630. DebugMsg('Peephole AddSub2*s done', p);
  2631. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2632. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2633. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2634. taicpu(p).oppostfix:=PF_S;
  2635. result:=true;
  2636. end
  2637. else if MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
  2638. (taicpu(p).ops = 3) and
  2639. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2640. (taicpu(p).oper[2]^.typ=top_reg) then
  2641. begin
  2642. DebugMsg('Peephole AddRRR2AddRR done', p);
  2643. taicpu(p).ops := 2;
  2644. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2645. result:=true;
  2646. end
  2647. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
  2648. (taicpu(p).ops = 3) and
  2649. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2650. (taicpu(p).oper[2]^.typ=top_reg) and
  2651. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2652. begin
  2653. DebugMsg('Peephole opXXY2opsXY done', p);
  2654. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2655. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2656. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2657. taicpu(p).ops := 2;
  2658. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2659. taicpu(p).oppostfix:=PF_S;
  2660. result:=true;
  2661. end
  2662. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_S]) and
  2663. (taicpu(p).ops = 3) and
  2664. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2665. (taicpu(p).oper[2]^.typ in [top_reg,top_const]) then
  2666. begin
  2667. DebugMsg('Peephole opXXY2opXY done', p);
  2668. taicpu(p).ops := 2;
  2669. if taicpu(p).oper[2]^.typ=top_reg then
  2670. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg)
  2671. else
  2672. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2673. result:=true;
  2674. end
  2675. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
  2676. (taicpu(p).ops = 3) and
  2677. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
  2678. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2679. begin
  2680. DebugMsg('Peephole opXYX2opsXY done', p);
  2681. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2682. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2683. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2684. taicpu(p).oppostfix:=PF_S;
  2685. taicpu(p).ops := 2;
  2686. result:=true;
  2687. end
  2688. else if MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
  2689. (taicpu(p).ops=3) and
  2690. (taicpu(p).oper[2]^.typ=top_shifterop) and
  2691. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
  2692. //MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2693. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2694. begin
  2695. DebugMsg('Peephole Mov2Shift done', p);
  2696. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2697. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2698. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2699. taicpu(p).oppostfix:=PF_S;
  2700. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  2701. SM_LSL: taicpu(p).opcode:=A_LSL;
  2702. SM_LSR: taicpu(p).opcode:=A_LSR;
  2703. SM_ASR: taicpu(p).opcode:=A_ASR;
  2704. SM_ROR: taicpu(p).opcode:=A_ROR;
  2705. else
  2706. internalerror(2019050912);
  2707. end;
  2708. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  2709. taicpu(p).loadreg(2, taicpu(p).oper[2]^.shifterop^.rs)
  2710. else
  2711. taicpu(p).loadconst(2, taicpu(p).oper[2]^.shifterop^.shiftimm);
  2712. result:=true;
  2713. end
  2714. end;
  2715. end;
  2716. begin
  2717. casmoptimizer:=TCpuAsmOptimizer;
  2718. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  2719. End.