aoptcpu.pas 117 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. { $define DEBUG_AOPTCPU}
  22. Interface
  23. uses
  24. cgbase, cgutils, cpubase, aasmtai,
  25. aasmcpu,
  26. aopt, aoptobj, aoptarm;
  27. Type
  28. TCpuAsmOptimizer = class(TARMAsmOptimizer)
  29. { Can't be done in some cases due to the limited range of jumps }
  30. function CanDoJumpOpts: Boolean; override;
  31. { uses the same constructor as TAopObj }
  32. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  33. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  34. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  35. function RemoveSuperfluousVMov(const p : tai; movp : tai; const optimizer : string) : boolean;
  36. { gets the next tai object after current that contains info relevant
  37. to the optimizer in p1 which used the given register or does a
  38. change in program flow.
  39. If there is none, it returns false and
  40. sets p1 to nil }
  41. Function GetNextInstructionUsingRef(Current: tai; Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  42. { outputs a debug message into the assembler file }
  43. procedure DebugMsg(const s: string; p: tai);
  44. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  45. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  46. { With these routines, there's optimisation code that's general for all ARM platforms }
  47. function OptPass1And(var p: tai): Boolean; override;
  48. function OptPass1LDR(var p: tai): Boolean; override;
  49. function OptPass1STR(var p: tai): Boolean; override;
  50. protected
  51. function LookForPreindexedPattern(p: taicpu): boolean;
  52. function LookForPostindexedPattern(p: taicpu): boolean;
  53. { Individual optimisation routines }
  54. function OptPass1DataCheckMov(var p: tai): Boolean;
  55. function OptPass1ADDSUB(var p: tai): Boolean;
  56. function OptPass1CMP(var p: tai): Boolean;
  57. function OptPass1STM(var p: tai): Boolean;
  58. function OptPass1MOV(var p: tai): Boolean;
  59. function OptPass1MUL(var p: tai): Boolean;
  60. function OptPass1MVN(var p: tai): Boolean;
  61. function OptPass1VMov(var p: tai): Boolean;
  62. function OptPass1VOp(var p: tai): Boolean;
  63. function OptPass2Bcc(var p: tai): Boolean;
  64. function OptPass2STM(var p: tai): Boolean;
  65. function OptPass2STR(var p: tai): Boolean;
  66. End;
  67. TCpuPreRegallocScheduler = class(TAsmScheduler)
  68. function SchedulerPass1Cpu(var p: tai): boolean;override;
  69. procedure SwapRegLive(p, hp1: taicpu);
  70. end;
  71. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  72. { uses the same constructor as TAopObj }
  73. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  74. procedure PeepHoleOptPass2;override;
  75. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  76. protected
  77. function OptPass1AndThumb2(var p : tai) : boolean;
  78. function OptPass1LDM(var p : tai) : boolean;
  79. function OptPass1STM(var p : tai) : boolean;
  80. End;
  81. function MustBeLast(p : tai) : boolean;
  82. Implementation
  83. uses
  84. cutils,verbose,globtype,globals,
  85. systems,
  86. cpuinfo,
  87. cgobj,procinfo,
  88. aasmbase,aasmdata,
  89. aoptutils;
  90. { Range check must be disabled explicitly as conversions between signed and unsigned
  91. 32-bit values are done without explicit typecasts }
  92. {$R-}
  93. function CanBeCond(p : tai) : boolean;
  94. begin
  95. result:=
  96. not(GenerateThumbCode) and
  97. (p.typ=ait_instruction) and
  98. (taicpu(p).condition=C_None) and
  99. ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
  100. (taicpu(p).opcode<>A_CBZ) and
  101. (taicpu(p).opcode<>A_CBNZ) and
  102. (taicpu(p).opcode<>A_PLD) and
  103. (((taicpu(p).opcode<>A_BLX) and
  104. { BL may need to be converted into BLX by the linker -- could possibly
  105. be allowed in case it's to a local symbol of which we know that it
  106. uses the same instruction set as the current one }
  107. (taicpu(p).opcode<>A_BL)) or
  108. (taicpu(p).oper[0]^.typ=top_reg));
  109. end;
  110. function RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList):Boolean;
  111. begin
  112. Result:=false;
  113. if (taicpu(movp).condition = C_EQ) and
  114. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  115. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  116. begin
  117. asml.insertafter(tai_comment.Create(strpnew('Peephole Optimization: CmpMovMov - Removed redundant moveq')), movp);
  118. asml.remove(movp);
  119. movp.free;
  120. Result:=true;
  121. end;
  122. end;
  123. function AlignedToQWord(const ref : treference) : boolean;
  124. begin
  125. { (safe) heuristics to ensure alignment }
  126. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  127. (((ref.offset>=0) and
  128. ((ref.offset mod 8)=0) and
  129. ((ref.base=NR_R13) or
  130. (ref.index=NR_R13))
  131. ) or
  132. ((ref.offset<=0) and
  133. { when using NR_R11, it has always a value of <qword align>+4 }
  134. ((abs(ref.offset+4) mod 8)=0) and
  135. (current_procinfo.framepointer=NR_R11) and
  136. ((ref.base=NR_R11) or
  137. (ref.index=NR_R11))
  138. )
  139. );
  140. end;
  141. function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
  142. begin
  143. if GenerateThumb2Code then
  144. result := (aoffset<4096) and (aoffset>-256)
  145. else
  146. result := ((pf in [PF_None,PF_B]) and
  147. (abs(aoffset)<4096)) or
  148. (abs(aoffset)<256);
  149. end;
  150. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  151. var
  152. p: taicpu;
  153. i: longint;
  154. begin
  155. instructionLoadsFromReg := false;
  156. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  157. exit;
  158. p:=taicpu(hp);
  159. i:=1;
  160. {For these instructions we have to start on oper[0]}
  161. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  162. A_CMP, A_CMN, A_TST, A_TEQ,
  163. A_B, A_BL, A_BX, A_BLX,
  164. A_SMLAL, A_UMLAL, A_VSTM, A_VLDM]) then i:=0;
  165. while(i<p.ops) do
  166. begin
  167. case p.oper[I]^.typ of
  168. top_reg:
  169. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  170. { STRD }
  171. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  172. top_regset:
  173. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  174. top_shifterop:
  175. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  176. top_ref:
  177. instructionLoadsFromReg :=
  178. (p.oper[I]^.ref^.base = reg) or
  179. (p.oper[I]^.ref^.index = reg);
  180. else
  181. ;
  182. end;
  183. if (i=0) and (p.opcode in [A_LDM,A_VLDM]) then
  184. exit;
  185. if instructionLoadsFromReg then
  186. exit; {Bailout if we found something}
  187. Inc(I);
  188. end;
  189. end;
  190. function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  191. var
  192. p: taicpu;
  193. begin
  194. Result := false;
  195. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  196. exit;
  197. p := taicpu(hp);
  198. case p.opcode of
  199. { These operands do not write into a register at all }
  200. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD,
  201. A_VCMP:
  202. exit;
  203. {Take care of post/preincremented store and loads, they will change their base register}
  204. A_STR, A_LDR:
  205. begin
  206. Result := false;
  207. { actually, this does not apply here because post-/preindexed does not mean that a register
  208. is loaded with a new value, it is only modified
  209. (taicpu(p).oper[1]^.typ=top_ref) and
  210. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  211. (taicpu(p).oper[1]^.ref^.base = reg);
  212. }
  213. { STR does not load into it's first register }
  214. if p.opcode = A_STR then
  215. exit;
  216. end;
  217. A_VSTR:
  218. begin
  219. Result := false;
  220. exit;
  221. end;
  222. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  223. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  224. Result :=
  225. (p.oper[1]^.typ = top_reg) and
  226. (p.oper[1]^.reg = reg);
  227. {Loads to oper2 from coprocessor}
  228. {
  229. MCR/MRC is currently not supported in FPC
  230. A_MRC:
  231. Result :=
  232. (p.oper[2]^.typ = top_reg) and
  233. (p.oper[2]^.reg = reg);
  234. }
  235. {Loads to all register in the registerset}
  236. A_LDM, A_VLDM:
  237. Result := (getsupreg(reg) in p.oper[1]^.regset^);
  238. A_POP:
  239. Result := (getsupreg(reg) in p.oper[0]^.regset^) or
  240. (reg=NR_STACK_POINTER_REG);
  241. else
  242. ;
  243. end;
  244. if Result then
  245. exit;
  246. case p.oper[0]^.typ of
  247. {This is the case}
  248. top_reg:
  249. Result := (p.oper[0]^.reg = reg) or
  250. { LDRD }
  251. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  252. {LDM/STM might write a new value to their index register}
  253. top_ref:
  254. Result :=
  255. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  256. (taicpu(p).oper[0]^.ref^.base = reg);
  257. else
  258. ;
  259. end;
  260. end;
  261. function TCpuAsmOptimizer.GetNextInstructionUsingRef(Current: tai;
  262. Out Next: tai; const ref: TReference; StopOnStore: Boolean = true): Boolean;
  263. begin
  264. Next:=Current;
  265. repeat
  266. Result:=GetNextInstruction(Next,Next);
  267. if Result and
  268. (Next.typ=ait_instruction) and
  269. (taicpu(Next).opcode in [A_LDR, A_STR]) and
  270. (
  271. ((taicpu(Next).ops = 2) and
  272. (taicpu(Next).oper[1]^.typ = top_ref) and
  273. RefsEqual(taicpu(Next).oper[1]^.ref^,ref)) or
  274. ((taicpu(Next).ops = 3) and { LDRD/STRD }
  275. (taicpu(Next).oper[2]^.typ = top_ref) and
  276. RefsEqual(taicpu(Next).oper[2]^.ref^,ref))
  277. ) then
  278. {We've found an instruction LDR or STR with the same reference}
  279. exit;
  280. until not(Result) or
  281. (Next.typ<>ait_instruction) or
  282. not(cs_opt_level3 in current_settings.optimizerswitches) or
  283. is_calljmp(taicpu(Next).opcode) or
  284. (StopOnStore and (taicpu(Next).opcode in [A_STR, A_STM])) or
  285. RegModifiedByInstruction(NR_PC,Next);
  286. Result:=false;
  287. end;
  288. {$ifdef DEBUG_AOPTCPU}
  289. const
  290. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  291. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  292. begin
  293. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  294. end;
  295. {$else DEBUG_AOPTCPU}
  296. const
  297. SPeepholeOptimization = '';
  298. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  299. begin
  300. end;
  301. {$endif DEBUG_AOPTCPU}
  302. function TCpuAsmOptimizer.CanDoJumpOpts: Boolean;
  303. begin
  304. { Cannot perform these jump optimisations if the ARM architecture has 16-bit thumb codes }
  305. Result := not (
  306. (current_settings.instructionset = is_thumb) and not (CPUARM_HAS_THUMB2 in cpu_capabilities[current_settings.cputype])
  307. );
  308. end;
  309. function TCpuAsmOptimizer.RemoveSuperfluousVMov(const p: tai; movp: tai; const optimizer: string):boolean;
  310. var
  311. alloc,
  312. dealloc : tai_regalloc;
  313. hp1 : tai;
  314. begin
  315. Result:=false;
  316. if ((MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  317. ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) or (taicpu(p).opcode=A_VLDR))
  318. ) or
  319. (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
  320. (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32]))
  321. ) and
  322. (taicpu(movp).ops=2) and
  323. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  324. { the destination register of the mov might not be used beween p and movp }
  325. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  326. { Take care to only do this for instructions which REALLY load to the first register.
  327. Otherwise
  328. vstr reg0, [reg1]
  329. vmov reg2, reg0
  330. will be optimized to
  331. vstr reg2, [reg1]
  332. }
  333. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  334. begin
  335. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  336. if assigned(dealloc) then
  337. begin
  338. DebugMsg(SPeepholeOptimization + optimizer + ' removed superfluous vmov', movp);
  339. result:=true;
  340. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  341. and remove it if possible }
  342. asml.Remove(dealloc);
  343. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  344. if assigned(alloc) then
  345. begin
  346. asml.Remove(alloc);
  347. alloc.free;
  348. dealloc.free;
  349. end
  350. else
  351. asml.InsertAfter(dealloc,p);
  352. { try to move the allocation of the target register }
  353. GetLastInstruction(movp,hp1);
  354. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  355. if assigned(alloc) then
  356. begin
  357. asml.Remove(alloc);
  358. asml.InsertBefore(alloc,p);
  359. { adjust used regs }
  360. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  361. end;
  362. { change
  363. vldr reg0,[reg1]
  364. vmov reg2,reg0
  365. into
  366. ldr reg2,[reg1]
  367. if reg2 is an int register
  368. }
  369. if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
  370. taicpu(p).opcode:=A_LDR;
  371. { finally get rid of the mov }
  372. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  373. asml.remove(movp);
  374. movp.free;
  375. end;
  376. end;
  377. end;
  378. {
  379. optimize
  380. add/sub reg1,reg1,regY/const
  381. ...
  382. ldr/str regX,[reg1]
  383. into
  384. ldr/str regX,[reg1, regY/const]!
  385. }
  386. function TCpuAsmOptimizer.LookForPreindexedPattern(p: taicpu): boolean;
  387. var
  388. hp1: tai;
  389. begin
  390. if GenerateARMCode and
  391. (p.ops=3) and
  392. MatchOperand(p.oper[0]^, p.oper[1]^.reg) and
  393. GetNextInstructionUsingReg(p, hp1, p.oper[0]^.reg) and
  394. (not RegModifiedBetween(p.oper[0]^.reg, p, hp1)) and
  395. MatchInstruction(hp1, [A_LDR,A_STR], [C_None], [PF_None,PF_B,PF_H,PF_SH,PF_SB]) and
  396. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  397. (taicpu(hp1).oper[1]^.ref^.base=p.oper[0]^.reg) and
  398. (taicpu(hp1).oper[0]^.reg<>p.oper[0]^.reg) and
  399. (taicpu(hp1).oper[1]^.ref^.offset=0) and
  400. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  401. (((p.oper[2]^.typ=top_reg) and
  402. (not RegModifiedBetween(p.oper[2]^.reg, p, hp1))) or
  403. ((p.oper[2]^.typ=top_const) and
  404. ((abs(p.oper[2]^.val) < 256) or
  405. ((abs(p.oper[2]^.val) < 4096) and
  406. (taicpu(hp1).oppostfix in [PF_None,PF_B]))))) then
  407. begin
  408. taicpu(hp1).oper[1]^.ref^.addressmode:=AM_PREINDEXED;
  409. if p.oper[2]^.typ=top_reg then
  410. begin
  411. taicpu(hp1).oper[1]^.ref^.index:=p.oper[2]^.reg;
  412. if p.opcode=A_ADD then
  413. taicpu(hp1).oper[1]^.ref^.signindex:=1
  414. else
  415. taicpu(hp1).oper[1]^.ref^.signindex:=-1;
  416. end
  417. else
  418. begin
  419. if p.opcode=A_ADD then
  420. taicpu(hp1).oper[1]^.ref^.offset:=p.oper[2]^.val
  421. else
  422. taicpu(hp1).oper[1]^.ref^.offset:=-p.oper[2]^.val;
  423. end;
  424. result:=true;
  425. end
  426. else
  427. result:=false;
  428. end;
  429. {
  430. optimize
  431. ldr/str regX,[reg1]
  432. ...
  433. add/sub reg1,reg1,regY/const
  434. into
  435. ldr/str regX,[reg1], regY/const
  436. }
  437. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  438. var
  439. hp1 : tai;
  440. begin
  441. Result:=false;
  442. if (p.oper[1]^.typ = top_ref) and
  443. (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  444. (p.oper[1]^.ref^.index=NR_NO) and
  445. (p.oper[1]^.ref^.offset=0) and
  446. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  447. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  448. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  449. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  450. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  451. (
  452. (taicpu(hp1).oper[2]^.typ=top_reg) or
  453. { valid offset? }
  454. ((taicpu(hp1).oper[2]^.typ=top_const) and
  455. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  456. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  457. )
  458. )
  459. ) and
  460. { don't apply the optimization if the base register is loaded }
  461. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  462. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  463. { don't apply the optimization if the (new) index register is loaded }
  464. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  465. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) and
  466. GenerateARMCode then
  467. begin
  468. DebugMsg(SPeepholeOptimization + 'Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  469. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  470. if taicpu(hp1).oper[2]^.typ=top_const then
  471. begin
  472. if taicpu(hp1).opcode=A_ADD then
  473. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  474. else
  475. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  476. end
  477. else
  478. begin
  479. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  480. if taicpu(hp1).opcode=A_ADD then
  481. p.oper[1]^.ref^.signindex:=1
  482. else
  483. p.oper[1]^.ref^.signindex:=-1;
  484. end;
  485. asml.Remove(hp1);
  486. hp1.Free;
  487. Result:=true;
  488. end;
  489. end;
  490. function TCpuAsmOptimizer.OptPass1ADDSUB(var p: tai): Boolean;
  491. var
  492. hp1,hp2: tai;
  493. sign: Integer;
  494. newvalue: TCGInt;
  495. b: byte;
  496. begin
  497. Result := OptPass1DataCheckMov(p);
  498. {
  499. change
  500. add/sub reg2,reg1,const1
  501. str/ldr reg3,[reg2,const2]
  502. dealloc reg2
  503. to
  504. str/ldr reg3,[reg1,const2+/-const1]
  505. }
  506. if (not GenerateThumbCode) and
  507. (taicpu(p).ops>2) and
  508. (taicpu(p).oper[1]^.typ = top_reg) and
  509. (taicpu(p).oper[2]^.typ = top_const) then
  510. begin
  511. hp1:=p;
  512. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  513. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  514. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  515. (taicpu(hp1).oper[1]^.typ = top_ref) and
  516. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  517. { don't optimize if the register is stored/overwritten }
  518. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  519. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  520. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  521. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  522. ldr postfix }
  523. (((taicpu(p).opcode=A_ADD) and
  524. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  525. ) or
  526. ((taicpu(p).opcode=A_SUB) and
  527. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  528. )
  529. ) do
  530. begin
  531. { neither reg1 nor reg2 might be changed inbetween }
  532. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  533. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  534. break;
  535. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  536. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  537. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  538. begin
  539. { remember last instruction }
  540. hp2:=hp1;
  541. DebugMsg(SPeepholeOptimization + 'Add/SubLdr2Ldr done', p);
  542. hp1:=p;
  543. { fix all ldr/str }
  544. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  545. begin
  546. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  547. if taicpu(p).opcode=A_ADD then
  548. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  549. else
  550. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  551. if hp1=hp2 then
  552. break;
  553. end;
  554. RemoveCurrentP(p);
  555. result:=true;
  556. Exit;
  557. end;
  558. end;
  559. end;
  560. {
  561. optimize
  562. add/sub rx,ry,const1
  563. add/sub rx,rx,const2
  564. into
  565. add/sub rx,ry,const1+/-const
  566. check if the first operation has no postfix and condition
  567. }
  568. if MatchInstruction(p,[A_ADD,A_SUB],[C_None],[PF_None]) and
  569. MatchOptype(taicpu(p),top_reg,top_reg,top_const) and
  570. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  571. MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
  572. MatchOptype(taicpu(hp1),top_reg,top_reg,top_const) and
  573. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
  574. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  575. begin
  576. sign:=1;
  577. if (taicpu(p).opcode=A_SUB) xor (taicpu(hp1).opcode=A_SUB) then
  578. sign:=-1;
  579. newvalue:=taicpu(p).oper[2]^.val+sign*taicpu(hp1).oper[2]^.val;
  580. if (not(GenerateThumbCode) and is_shifter_const(newvalue,b)) or
  581. (GenerateThumbCode and is_thumb_imm(newvalue)) then
  582. begin
  583. DebugMsg(SPeepholeOptimization + 'Merge Add/Sub done', p);
  584. taicpu(p).oper[2]^.val:=newvalue;
  585. RemoveInstruction(hp1);
  586. Result:=true;
  587. if newvalue=0 then
  588. begin
  589. RemoveCurrentP(p);
  590. Exit;
  591. end;
  592. end;
  593. end;
  594. if (taicpu(p).condition = C_None) and
  595. (taicpu(p).oppostfix = PF_None) and
  596. LookForPreindexedPattern(taicpu(p)) then
  597. begin
  598. DebugMsg(SPeepholeOptimization + 'Add/Sub to Preindexed done', p);
  599. RemoveCurrentP(p);
  600. Result:=true;
  601. Exit;
  602. end;
  603. end;
  604. function TCpuAsmOptimizer.OptPass1MUL(var p: tai): Boolean;
  605. var
  606. hp1: tai;
  607. oldreg: tregister;
  608. begin
  609. Result := OptPass1DataCheckMov(p);
  610. {
  611. Turn
  612. mul reg0, z,w
  613. sub/add x, y, reg0
  614. dealloc reg0
  615. into
  616. mls/mla x,z,w,y
  617. }
  618. if (taicpu(p).condition = C_None) and
  619. (taicpu(p).oppostfix = PF_None) and
  620. (taicpu(p).ops=3) and
  621. (taicpu(p).oper[0]^.typ = top_reg) and
  622. (taicpu(p).oper[1]^.typ = top_reg) and
  623. (taicpu(p).oper[2]^.typ = top_reg) and
  624. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  625. MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
  626. (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
  627. (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p, hp1)) and
  628. (((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype>=cpu_armv4)) or
  629. ((taicpu(hp1).opcode=A_SUB) and (current_settings.cputype in [cpu_armv6t2,cpu_armv7,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em]))) and
  630. // CPUs before ARMv6 don't recommend having the same Rd and Rm for MLA.
  631. // TODO: A workaround would be to swap Rm and Rs
  632. (not ((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype<=cpu_armv6) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^))) and
  633. (((taicpu(hp1).ops=3) and
  634. (taicpu(hp1).oper[2]^.typ=top_reg) and
  635. ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
  636. (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or
  637. ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  638. (taicpu(hp1).opcode=A_ADD) and
  639. (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or
  640. ((taicpu(hp1).ops=2) and
  641. (taicpu(hp1).oper[1]^.typ=top_reg) and
  642. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  643. (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then
  644. begin
  645. if taicpu(hp1).opcode=A_ADD then
  646. begin
  647. taicpu(hp1).opcode:=A_MLA;
  648. if taicpu(hp1).ops=3 then
  649. begin
  650. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
  651. oldreg:=taicpu(hp1).oper[2]^.reg
  652. else
  653. oldreg:=taicpu(hp1).oper[1]^.reg;
  654. end
  655. else
  656. oldreg:=taicpu(hp1).oper[0]^.reg;
  657. taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
  658. taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg);
  659. taicpu(hp1).loadreg(3,oldreg);
  660. DebugMsg(SPeepholeOptimization + 'MulAdd2MLA done', p);
  661. end
  662. else
  663. begin
  664. taicpu(hp1).opcode:=A_MLS;
  665. taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
  666. if taicpu(hp1).ops=2 then
  667. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg)
  668. else
  669. taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
  670. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  671. DebugMsg(SPeepholeOptimization + 'MulSub2MLS done', p);
  672. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
  673. AllocRegBetween(taicpu(hp1).oper[2]^.reg,p,hp1,UsedRegs);
  674. AllocRegBetween(taicpu(hp1).oper[3]^.reg,p,hp1,UsedRegs);
  675. end;
  676. taicpu(hp1).ops:=4;
  677. RemoveCurrentP(p);
  678. Result := True;
  679. Exit;
  680. end
  681. end;
  682. function TCpuAsmOptimizer.OptPass1And(var p: tai): Boolean;
  683. begin
  684. Result := OptPass1DataCheckMov(p);
  685. Result := inherited OptPass1And(p) or Result;
  686. end;
  687. function TCpuAsmOptimizer.OptPass1DataCheckMov(var p: tai): Boolean;
  688. var
  689. hp1: tai;
  690. begin
  691. {
  692. change
  693. op reg1, ...
  694. mov reg2, reg1
  695. to
  696. op reg2, ...
  697. }
  698. Result := (taicpu(p).ops >= 3) and
  699. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  700. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  701. end;
  702. function TCpuAsmOptimizer.OptPass1CMP(var p: tai): Boolean;
  703. var
  704. hp1, hp2, hp_last: tai;
  705. MovRem1, MovRem2: Boolean;
  706. begin
  707. Result := False;
  708. { These optimizations can be applied only to the currently enabled operations because
  709. the other operations do not update all flags and FPC does not track flag usage }
  710. if (taicpu(p).condition = C_None) and
  711. (taicpu(p).oper[1]^.typ = top_const) and
  712. GetNextInstruction(p, hp1) then
  713. begin
  714. {
  715. change
  716. cmp reg,const1
  717. moveq reg,const1
  718. movne reg,const2
  719. to
  720. cmp reg,const1
  721. movne reg,const2
  722. }
  723. if MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  724. (taicpu(hp1).oper[1]^.typ = top_const) and
  725. GetNextInstruction(hp1, hp2) and
  726. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  727. (taicpu(hp2).oper[1]^.typ = top_const) then
  728. begin
  729. MovRem1 := RemoveRedundantMove(p, hp1, asml);
  730. MovRem2 := RemoveRedundantMove(p, hp2, asml);
  731. Result:= MovRem1 or MovRem2;
  732. { Make sure that hp1 is still the next instruction after p }
  733. if MovRem1 then
  734. if MovRem2 then
  735. begin
  736. if not GetNextInstruction(p, hp1) then
  737. Exit;
  738. end
  739. else
  740. hp1 := hp2;
  741. end;
  742. {
  743. change
  744. <op> reg,x,y
  745. cmp reg,#0
  746. into
  747. <op>s reg,x,y
  748. }
  749. if (taicpu(p).oppostfix = PF_None) and
  750. (taicpu(p).oper[1]^.val = 0) and
  751. { be careful here, following instructions could use other flags
  752. however after a jump fpc never depends on the value of flags }
  753. { All above instructions set Z and N according to the following
  754. Z := result = 0;
  755. N := result[31];
  756. EQ = Z=1; NE = Z=0;
  757. MI = N=1; PL = N=0; }
  758. (MatchInstruction(hp1, A_B, [C_EQ,C_NE,C_MI,C_PL], []) or
  759. { mov is also possible, but only if there is no shifter operand, it could be an rxx,
  760. we are too lazy to check if it is rxx or something else }
  761. (MatchInstruction(hp1, A_MOV, [C_EQ,C_NE,C_MI,C_PL], []) and (taicpu(hp1).ops=2))) and
  762. GetLastInstruction(p, hp_last) and
  763. MatchInstruction(hp_last, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,
  764. A_EOR,A_AND,A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  765. (
  766. { mlas is only allowed in arm mode }
  767. (taicpu(hp_last).opcode<>A_MLA) or
  768. (current_settings.instructionset<>is_thumb)
  769. ) and
  770. (taicpu(hp_last).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  771. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp1.Next))) then
  772. begin
  773. DebugMsg(SPeepholeOptimization + 'OpCmp2OpS done', hp_last);
  774. taicpu(hp_last).oppostfix:=PF_S;
  775. { move flag allocation if possible }
  776. hp1:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp_last.Next));
  777. if assigned(hp1) then
  778. begin
  779. asml.Remove(hp1);
  780. asml.insertbefore(hp1, hp_last);
  781. end;
  782. RemoveCurrentP(p);
  783. Result:=true;
  784. end;
  785. end;
  786. end;
  787. function TCpuAsmOptimizer.OptPass1LDR(var p: tai): Boolean;
  788. var
  789. hp1: tai;
  790. begin
  791. Result := inherited OptPass1LDR(p);
  792. if Result then
  793. Exit;
  794. { change
  795. ldr reg1,ref
  796. ldr reg2,ref
  797. into ...
  798. }
  799. if (taicpu(p).oper[1]^.typ = top_ref) and
  800. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  801. GetNextInstruction(p,hp1) and
  802. { ldrd is not allowed here }
  803. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  804. begin
  805. {
  806. ...
  807. ldr reg1,ref
  808. mov reg2,reg1
  809. }
  810. if (taicpu(p).oppostfix=taicpu(hp1).oppostfix) and
  811. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  812. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  813. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  814. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  815. begin
  816. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  817. begin
  818. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldr done', hp1);
  819. asml.remove(hp1);
  820. hp1.free;
  821. end
  822. else
  823. begin
  824. DebugMsg(SPeepholeOptimization + 'LdrLdr2LdrMov done', hp1);
  825. taicpu(hp1).opcode:=A_MOV;
  826. taicpu(hp1).oppostfix:=PF_None;
  827. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  828. end;
  829. result := true;
  830. end
  831. {
  832. ...
  833. ldrd reg1,reg1+1,ref
  834. }
  835. else if (GenerateARMCode or GenerateThumb2Code) and
  836. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  837. { ldrd does not allow any postfixes ... }
  838. (taicpu(p).oppostfix=PF_None) and
  839. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  840. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  841. { ldr ensures that either base or index contain no register, else ldr wouldn't
  842. use an offset either
  843. }
  844. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  845. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  846. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  847. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  848. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  849. begin
  850. DebugMsg(SPeepholeOptimization + 'LdrLdr2Ldrd done', p);
  851. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  852. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  853. taicpu(p).ops:=3;
  854. taicpu(p).oppostfix:=PF_D;
  855. asml.remove(hp1);
  856. hp1.free;
  857. result:=true;
  858. end;
  859. end;
  860. {
  861. Change
  862. ldrb dst1, [REF]
  863. and dst2, dst1, #255
  864. into
  865. ldrb dst2, [ref]
  866. }
  867. if not(GenerateThumbCode) and
  868. (taicpu(p).oppostfix=PF_B) and
  869. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  870. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_NONE]) and
  871. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  872. (taicpu(hp1).oper[2]^.typ = top_const) and
  873. (taicpu(hp1).oper[2]^.val = $FF) and
  874. not(RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
  875. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
  876. begin
  877. DebugMsg(SPeepholeOptimization + 'LdrbAnd2Ldrb done', p);
  878. taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg;
  879. asml.remove(hp1);
  880. hp1.free;
  881. result:=true;
  882. end;
  883. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  884. { Remove superfluous mov after ldr
  885. changes
  886. ldr reg1, ref
  887. mov reg2, reg1
  888. to
  889. ldr reg2, ref
  890. conditions are:
  891. * no ldrd usage
  892. * reg1 must be released after mov
  893. * mov can not contain shifterops
  894. * ldr+mov have the same conditions
  895. * mov does not set flags
  896. }
  897. if (taicpu(p).oppostfix<>PF_D) and
  898. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  899. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr') then
  900. Result:=true;
  901. end;
  902. function TCpuAsmOptimizer.OptPass1STM(var p: tai): Boolean;
  903. var
  904. hp1, hp2, hp3, hp4: tai;
  905. begin
  906. Result := False;
  907. {
  908. change
  909. stmfd r13!,[r14]
  910. sub r13,r13,#4
  911. bl abc
  912. add r13,r13,#4
  913. ldmfd r13!,[r15]
  914. into
  915. b abc
  916. }
  917. if not(ts_thumb_interworking in current_settings.targetswitches) and
  918. (taicpu(p).condition = C_None) and
  919. (taicpu(p).oppostfix = PF_FD) and
  920. (taicpu(p).oper[0]^.typ = top_ref) and
  921. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  922. (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  923. (taicpu(p).oper[0]^.ref^.offset=0) and
  924. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  925. (taicpu(p).oper[1]^.typ = top_regset) and
  926. (taicpu(p).oper[1]^.regset^ = [RS_R14]) and
  927. GetNextInstruction(p, hp1) and
  928. MatchInstruction(hp1, A_SUB, [C_None], [PF_NONE]) and
  929. (taicpu(hp1).oper[0]^.typ = top_reg) and
  930. (taicpu(hp1).oper[0]^.reg = NR_STACK_POINTER_REG) and
  931. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) and
  932. (taicpu(hp1).oper[2]^.typ = top_const) and
  933. GetNextInstruction(hp1, hp2) and
  934. SkipEntryExitMarker(hp2, hp2) and
  935. MatchInstruction(hp2, [A_BL,A_BLX], [C_None], [PF_NONE]) and
  936. (taicpu(hp2).oper[0]^.typ = top_ref) and
  937. GetNextInstruction(hp2, hp3) and
  938. SkipEntryExitMarker(hp3, hp3) and
  939. MatchInstruction(hp3, A_ADD, [C_None], [PF_NONE]) and
  940. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[0]^) and
  941. MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[1]^) and
  942. MatchOperand(taicpu(hp1).oper[2]^,taicpu(hp3).oper[2]^) and
  943. GetNextInstruction(hp3, hp4) and
  944. MatchInstruction(hp4, A_LDM, [C_None], [PF_FD]) and
  945. MatchOperand(taicpu(p).oper[0]^,taicpu(hp4).oper[0]^) and
  946. (taicpu(hp4).oper[1]^.typ = top_regset) and
  947. (taicpu(hp4).oper[1]^.regset^ = [RS_R15]) then
  948. begin
  949. asml.Remove(hp1);
  950. asml.Remove(hp3);
  951. asml.Remove(hp4);
  952. taicpu(hp2).opcode:=A_B;
  953. hp1.free;
  954. hp3.free;
  955. hp4.free;
  956. RemoveCurrentp(p, hp2);
  957. DebugMsg(SPeepholeOptimization + 'Bl2B done', p);
  958. Result := True;
  959. end;
  960. end;
  961. function TCpuAsmOptimizer.OptPass1STR(var p: tai): Boolean;
  962. var
  963. hp1: tai;
  964. begin
  965. Result := inherited OptPass1STR(p);
  966. if Result then
  967. Exit;
  968. { Common conditions }
  969. if (taicpu(p).oper[1]^.typ = top_ref) and
  970. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  971. (taicpu(p).oppostfix=PF_None) then
  972. begin
  973. { change
  974. str reg1,ref
  975. ldr reg2,ref
  976. into
  977. str reg1,ref
  978. mov reg2,reg1
  979. }
  980. if (taicpu(p).condition=C_None) and
  981. GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and
  982. MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and
  983. (taicpu(hp1).oper[1]^.typ=top_ref) and
  984. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  985. not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
  986. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and
  987. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1))) then
  988. begin
  989. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  990. begin
  991. DebugMsg(SPeepholeOptimization + 'StrLdr2StrMov 1 done', hp1);
  992. asml.remove(hp1);
  993. hp1.free;
  994. end
  995. else
  996. begin
  997. taicpu(hp1).opcode:=A_MOV;
  998. taicpu(hp1).oppostfix:=PF_None;
  999. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1000. DebugMsg(SPeepholeOptimization + 'StrLdr2StrMov 2 done', hp1);
  1001. end;
  1002. result := True;
  1003. end
  1004. { change
  1005. str reg1,ref
  1006. str reg2,ref
  1007. into
  1008. strd reg1,reg2,ref
  1009. }
  1010. else if (GenerateARMCode or GenerateThumb2Code) and
  1011. (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  1012. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  1013. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  1014. AlignedToQWord(taicpu(p).oper[1]^.ref^) and
  1015. GetNextInstruction(p,hp1) and
  1016. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  1017. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  1018. { str ensures that either base or index contain no register, else ldr wouldn't
  1019. use an offset either
  1020. }
  1021. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1022. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  1023. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) then
  1024. begin
  1025. DebugMsg(SPeepholeOptimization + 'StrStr2Strd done', p);
  1026. taicpu(p).oppostfix:=PF_D;
  1027. taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
  1028. taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
  1029. taicpu(p).ops:=3;
  1030. asml.remove(hp1);
  1031. hp1.free;
  1032. result:=true;
  1033. end;
  1034. end;
  1035. Result:=LookForPostindexedPattern(taicpu(p)) or Result;
  1036. end;
  1037. function TCpuAsmOptimizer.OptPass1MOV(var p: tai): Boolean;
  1038. var
  1039. hp1, hpfar1, hp2: tai;
  1040. i, i2: longint;
  1041. tempop: tasmop;
  1042. dealloc: tai_regalloc;
  1043. begin
  1044. Result := False;
  1045. hp1 := nil;
  1046. { fold
  1047. mov reg1,reg0, shift imm1
  1048. mov reg1,reg1, shift imm2
  1049. }
  1050. if (taicpu(p).ops=3) and
  1051. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1052. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1053. getnextinstruction(p,hp1) and
  1054. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  1055. (taicpu(hp1).ops=3) and
  1056. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  1057. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1058. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  1059. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  1060. begin
  1061. { fold
  1062. mov reg1,reg0, lsl 16
  1063. mov reg1,reg1, lsr 16
  1064. strh reg1, ...
  1065. dealloc reg1
  1066. to
  1067. strh reg1, ...
  1068. dealloc reg1
  1069. }
  1070. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1071. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  1072. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  1073. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  1074. getnextinstruction(hp1,hp2) and
  1075. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  1076. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  1077. begin
  1078. TransferUsedRegs(TmpUsedRegs);
  1079. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1080. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1081. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  1082. begin
  1083. DebugMsg(SPeepholeOptimization + 'Removed superfluous 16 Bit zero extension', hp1);
  1084. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  1085. asml.remove(hp1);
  1086. hp1.free;
  1087. RemoveCurrentP(p, hp2);
  1088. Result:=true;
  1089. Exit;
  1090. end;
  1091. end
  1092. { fold
  1093. mov reg1,reg0, shift imm1
  1094. mov reg1,reg1, shift imm2
  1095. to
  1096. mov reg1,reg0, shift imm1+imm2
  1097. }
  1098. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  1099. { asr makes no use after a lsr, the asr can be foled into the lsr }
  1100. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  1101. begin
  1102. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  1103. { avoid overflows }
  1104. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  1105. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  1106. SM_ROR:
  1107. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  1108. SM_ASR:
  1109. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  1110. SM_LSR,
  1111. SM_LSL:
  1112. begin
  1113. hp2:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  1114. InsertLLItem(p.previous, p.next, hp2);
  1115. p.free;
  1116. p:=hp2;
  1117. end;
  1118. else
  1119. internalerror(2008072803);
  1120. end;
  1121. DebugMsg(SPeepholeOptimization + 'ShiftShift2Shift 1 done', p);
  1122. asml.remove(hp1);
  1123. hp1.free;
  1124. hp1 := nil;
  1125. result := true;
  1126. end
  1127. { fold
  1128. mov reg1,reg0, shift imm1
  1129. mov reg1,reg1, shift imm2
  1130. mov reg1,reg1, shift imm3 ...
  1131. mov reg2,reg1, shift imm3 ...
  1132. }
  1133. else if GetNextInstructionUsingReg(hp1,hp2, taicpu(hp1).oper[0]^.reg) and
  1134. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1135. (taicpu(hp2).ops=3) and
  1136. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  1137. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp2)) and
  1138. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  1139. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  1140. begin
  1141. { mov reg1,reg0, lsl imm1
  1142. mov reg1,reg1, lsr/asr imm2
  1143. mov reg2,reg1, lsl imm3 ...
  1144. to
  1145. mov reg1,reg0, lsl imm1
  1146. mov reg2,reg1, lsr/asr imm2-imm3
  1147. if
  1148. imm1>=imm2
  1149. }
  1150. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1151. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1152. (taicpu(p).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  1153. begin
  1154. if (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
  1155. begin
  1156. if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,p,hp1)) and
  1157. not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  1158. begin
  1159. DebugMsg(SPeepholeOptimization + 'ShiftShiftShift2ShiftShift 1a done', p);
  1160. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm-taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  1161. taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1162. asml.remove(hp1);
  1163. asml.remove(hp2);
  1164. hp1.free;
  1165. hp2.free;
  1166. if taicpu(p).oper[2]^.shifterop^.shiftimm>=32 then
  1167. begin
  1168. taicpu(p).freeop(1);
  1169. taicpu(p).freeop(2);
  1170. taicpu(p).loadconst(1,0);
  1171. end;
  1172. result := true;
  1173. Exit;
  1174. end;
  1175. end
  1176. else if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
  1177. begin
  1178. DebugMsg(SPeepholeOptimization + 'ShiftShiftShift2ShiftShift 1b done', p);
  1179. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  1180. taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
  1181. asml.remove(hp2);
  1182. hp2.free;
  1183. result := true;
  1184. Exit;
  1185. end;
  1186. end
  1187. { mov reg1,reg0, lsr/asr imm1
  1188. mov reg1,reg1, lsl imm2
  1189. mov reg1,reg1, lsr/asr imm3 ...
  1190. if imm3>=imm1 and imm2>=imm1
  1191. to
  1192. mov reg1,reg0, lsl imm2-imm1
  1193. mov reg1,reg1, lsr/asr imm3 ...
  1194. }
  1195. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  1196. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  1197. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  1198. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  1199. begin
  1200. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  1201. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  1202. DebugMsg(SPeepholeOptimization + 'ShiftShiftShift2ShiftShift 2 done', p);
  1203. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  1204. begin
  1205. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  1206. asml.remove(hp1);
  1207. hp1.free;
  1208. end;
  1209. RemoveCurrentp(p);
  1210. result := true;
  1211. Exit;
  1212. end;
  1213. end;
  1214. end;
  1215. { All the optimisations from this point on require GetNextInstructionUsingReg
  1216. to return True }
  1217. while (
  1218. GetNextInstructionUsingReg(p, hpfar1, taicpu(p).oper[0]^.reg) and
  1219. (hpfar1.typ = ait_instruction)
  1220. ) do
  1221. begin
  1222. { Change the common
  1223. mov r0, r0, lsr #xxx
  1224. and r0, r0, #yyy/bic r0, r0, #xxx
  1225. and remove the superfluous and/bic if possible
  1226. This could be extended to handle more cases.
  1227. }
  1228. { Change
  1229. mov rx, ry, lsr/ror #xxx
  1230. uxtb/uxth rz,rx/and rz,rx,0xFF
  1231. dealloc rx
  1232. to
  1233. uxtb/uxth rz,ry,ror #xxx
  1234. }
  1235. if (GenerateThumb2Code) and
  1236. (taicpu(p).ops=3) and
  1237. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1238. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1239. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and
  1240. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1241. begin
  1242. if MatchInstruction(hpfar1, A_UXTB, [C_None], [PF_None]) and
  1243. (taicpu(hpfar1).ops = 2) and
  1244. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1245. MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1246. begin
  1247. taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1248. taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1249. taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1250. taicpu(hpfar1).ops := 3;
  1251. if not Assigned(hp1) then
  1252. GetNextInstruction(p,hp1);
  1253. RemoveCurrentP(p, hp1);
  1254. result:=true;
  1255. exit;
  1256. end
  1257. else if MatchInstruction(hpfar1, A_UXTH, [C_None], [PF_None]) and
  1258. (taicpu(hpfar1).ops=2) and
  1259. (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and
  1260. MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1261. begin
  1262. taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1263. taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1264. taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1265. taicpu(hpfar1).ops := 3;
  1266. if not Assigned(hp1) then
  1267. GetNextInstruction(p,hp1);
  1268. RemoveCurrentP(p, hp1);
  1269. result:=true;
  1270. exit;
  1271. end
  1272. else if MatchInstruction(hpfar1, A_AND, [C_None], [PF_None]) and
  1273. (taicpu(hpfar1).ops = 3) and
  1274. (taicpu(hpfar1).oper[2]^.typ = top_const) and
  1275. (taicpu(hpfar1).oper[2]^.val = $FF) and
  1276. (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
  1277. MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1278. begin
  1279. taicpu(hpfar1).ops := 3;
  1280. taicpu(hpfar1).opcode := A_UXTB;
  1281. taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  1282. taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
  1283. taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
  1284. if not Assigned(hp1) then
  1285. GetNextInstruction(p,hp1);
  1286. RemoveCurrentP(p, hp1);
  1287. result:=true;
  1288. exit;
  1289. end;
  1290. end;
  1291. { 2-operald mov optimisations }
  1292. if (taicpu(p).ops = 2) then
  1293. begin
  1294. {
  1295. This removes the mul from
  1296. mov rX,0
  1297. ...
  1298. mul ...,rX,...
  1299. }
  1300. if (taicpu(p).oper[1]^.typ = top_const) then
  1301. begin
  1302. (* if false and
  1303. (taicpu(p).oper[1]^.val=0) and
  1304. MatchInstruction(hpfar1, [A_MUL,A_MLA], [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1305. (((taicpu(hpfar1).oper[1]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^)) or
  1306. ((taicpu(hpfar1).oper[2]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[2]^))) then
  1307. begin
  1308. TransferUsedRegs(TmpUsedRegs);
  1309. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1310. UpdateUsedRegs(TmpUsedRegs, tai(hpfar1.next));
  1311. DebugMsg(SPeepholeOptimization + 'MovMUL/MLA2Mov0 done', p);
  1312. if taicpu(hpfar1).opcode=A_MUL then
  1313. taicpu(hpfar1).loadconst(1,0)
  1314. else
  1315. taicpu(hpfar1).loadreg(1,taicpu(hpfar1).oper[3]^.reg);
  1316. taicpu(hpfar1).ops:=2;
  1317. taicpu(hpfar1).opcode:=A_MOV;
  1318. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hpfar1,TmpUsedRegs)) then
  1319. RemoveCurrentP(p);
  1320. Result:=true;
  1321. exit;
  1322. end
  1323. else*) if (taicpu(p).oper[1]^.val=0) and
  1324. MatchInstruction(hpfar1, A_MLA, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1325. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[3]^) then
  1326. begin
  1327. TransferUsedRegs(TmpUsedRegs);
  1328. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1329. UpdateUsedRegs(TmpUsedRegs, tai(hpfar1.next));
  1330. DebugMsg(SPeepholeOptimization + 'MovMLA2MUL 1 done', p);
  1331. taicpu(hpfar1).ops:=3;
  1332. taicpu(hpfar1).opcode:=A_MUL;
  1333. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hpfar1,TmpUsedRegs)) then
  1334. begin
  1335. RemoveCurrentP(p);
  1336. Result:=true;
  1337. end;
  1338. exit;
  1339. end
  1340. {
  1341. This changes the very common
  1342. mov r0, #0
  1343. str r0, [...]
  1344. mov r0, #0
  1345. str r0, [...]
  1346. and removes all superfluous mov instructions
  1347. }
  1348. else if (taicpu(hpfar1).opcode=A_STR) then
  1349. begin
  1350. hp1 := hpfar1;
  1351. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  1352. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[0]^) and
  1353. GetNextInstruction(hp1, hp2) and
  1354. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  1355. (taicpu(hp2).ops = 2) and
  1356. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  1357. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  1358. begin
  1359. DebugMsg(SPeepholeOptimization + 'MovStrMov done', hp2);
  1360. GetNextInstruction(hp2,hp1);
  1361. asml.remove(hp2);
  1362. hp2.free;
  1363. result:=true;
  1364. if not assigned(hp1) then break;
  1365. end;
  1366. if Result then
  1367. Exit;
  1368. end;
  1369. end;
  1370. {
  1371. This removes the first mov from
  1372. mov rX,...
  1373. mov rX,...
  1374. }
  1375. if taicpu(hpfar1).opcode=A_MOV then
  1376. begin
  1377. hp1 := p;
  1378. while MatchInstruction(hpfar1, A_MOV, [taicpu(hp1).condition], [taicpu(hp1).oppostfix]) and
  1379. (taicpu(hpfar1).ops = 2) and
  1380. MatchOperand(taicpu(hp1).oper[0]^, taicpu(hpfar1).oper[0]^) and
  1381. { don't remove the first mov if the second is a mov rX,rX }
  1382. not(MatchOperand(taicpu(hpfar1).oper[0]^, taicpu(hpfar1).oper[1]^)) do
  1383. begin
  1384. { Defer removing the first p until after the while loop }
  1385. if p <> hp1 then
  1386. begin
  1387. DebugMsg(SPeepholeOptimization + 'MovMov done', hp1);
  1388. asml.remove(hp1);
  1389. hp1.free;
  1390. end;
  1391. hp1:=hpfar1;
  1392. GetNextInstruction(hpfar1,hpfar1);
  1393. result:=true;
  1394. if not assigned(hpfar1) then
  1395. Break;
  1396. end;
  1397. if Result then
  1398. begin
  1399. DebugMsg(SPeepholeOptimization + 'MovMov done', p);
  1400. RemoveCurrentp(p);
  1401. Exit;
  1402. end;
  1403. end;
  1404. if RedundantMovProcess(p,hpfar1) then
  1405. begin
  1406. Result:=true;
  1407. { p might not point at a mov anymore }
  1408. exit;
  1409. end;
  1410. { If hpfar1 is nil after the call to RedundantMovProcess, it is
  1411. because it would have become a dangling pointer, so reinitialise it. }
  1412. if not Assigned(hpfar1) then
  1413. Continue;
  1414. { Fold the very common sequence
  1415. mov regA, regB
  1416. ldr* regA, [regA]
  1417. to
  1418. ldr* regA, [regB]
  1419. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1420. }
  1421. if
  1422. // Make sure that Thumb code doesn't propagate a high register into a reference
  1423. (
  1424. (
  1425. GenerateThumbCode and
  1426. (getsupreg(taicpu(p).oper[1]^.reg) < RS_R8)
  1427. ) or (not GenerateThumbCode)
  1428. ) and
  1429. (taicpu(p).oper[1]^.typ = top_reg) and
  1430. (taicpu(p).oppostfix = PF_NONE) and
  1431. MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], []) and
  1432. (taicpu(hpfar1).oper[1]^.typ = top_ref) and
  1433. { We can change the base register only when the instruction uses AM_OFFSET }
  1434. ((taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
  1435. ((taicpu(hpfar1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1436. (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg))
  1437. ) and
  1438. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
  1439. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1440. begin
  1441. DebugMsg(SPeepholeOptimization + 'MovLdr2Ldr done', hpfar1);
  1442. if (taicpu(hpfar1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1443. (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1444. taicpu(hpfar1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
  1445. if taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
  1446. taicpu(hpfar1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1447. dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, tai(p.Next));
  1448. if Assigned(dealloc) then
  1449. begin
  1450. asml.remove(dealloc);
  1451. asml.InsertAfter(dealloc,hpfar1);
  1452. end;
  1453. if (not Assigned(hp1)) or (p=hp1) then
  1454. GetNextInstruction(p, hp1);
  1455. RemoveCurrentP(p, hp1);
  1456. result:=true;
  1457. Exit;
  1458. end
  1459. end
  1460. { 3-operald mov optimisations }
  1461. else if (taicpu(p).ops = 3) then
  1462. begin
  1463. if (taicpu(p).oper[2]^.typ = top_shifterop) and
  1464. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  1465. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  1466. (taicpu(hpfar1).ops>=1) and
  1467. (taicpu(hpfar1).oper[0]^.typ=top_reg) and
  1468. (not RegModifiedBetween(taicpu(hpfar1).oper[0]^.reg, p, hpfar1)) and
  1469. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1470. begin
  1471. if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  1472. MatchInstruction(hpfar1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1473. (taicpu(hpfar1).ops=3) and
  1474. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^) and
  1475. (taicpu(hpfar1).oper[2]^.typ = top_const) and
  1476. { Check if the AND actually would only mask out bits being already zero because of the shift
  1477. }
  1478. ((($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm) and taicpu(hpfar1).oper[2]^.val) =
  1479. ($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm)) then
  1480. begin
  1481. DebugMsg(SPeepholeOptimization + 'LsrAnd2Lsr done', hpfar1);
  1482. taicpu(p).oper[0]^.reg:=taicpu(hpfar1).oper[0]^.reg;
  1483. asml.remove(hpfar1);
  1484. hpfar1.free;
  1485. result:=true;
  1486. Exit;
  1487. end
  1488. else if MatchInstruction(hpfar1, A_BIC, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1489. (taicpu(hpfar1).ops=3) and
  1490. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^) and
  1491. (taicpu(hpfar1).oper[2]^.typ = top_const) and
  1492. { Check if the BIC actually would only mask out bits beeing already zero because of the shift }
  1493. (taicpu(hpfar1).oper[2]^.val<>0) and
  1494. (BsfDWord(taicpu(hpfar1).oper[2]^.val)>=32-taicpu(p).oper[2]^.shifterop^.shiftimm) then
  1495. begin
  1496. DebugMsg(SPeepholeOptimization + 'LsrBic2Lsr done', hpfar1);
  1497. taicpu(p).oper[0]^.reg:=taicpu(hpfar1).oper[0]^.reg;
  1498. asml.remove(hpfar1);
  1499. hpfar1.free;
  1500. result:=true;
  1501. Exit;
  1502. end;
  1503. end;
  1504. { This folds shifterops into following instructions
  1505. mov r0, r1, lsl #8
  1506. add r2, r3, r0
  1507. to
  1508. add r2, r3, r1, lsl #8
  1509. CAUTION! If this one is successful p might not be a mov instruction anymore!
  1510. }
  1511. if (taicpu(p).oper[1]^.typ = top_reg) and
  1512. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1513. (taicpu(p).oppostfix = PF_NONE) and
  1514. MatchInstruction(hpfar1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  1515. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  1516. A_CMP, A_CMN],
  1517. [taicpu(p).condition], [PF_None]) and
  1518. (not ((GenerateThumb2Code) and
  1519. (taicpu(hpfar1).opcode in [A_SBC]) and
  1520. (((taicpu(hpfar1).ops=3) and
  1521. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^.reg)) or
  1522. ((taicpu(hpfar1).ops=2) and
  1523. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[0]^.reg))))) and
  1524. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) and
  1525. (taicpu(hpfar1).ops >= 2) and
  1526. {Currently we can't fold into another shifterop}
  1527. (taicpu(hpfar1).oper[taicpu(hpfar1).ops-1]^.typ = top_reg) and
  1528. {Folding rrx is problematic because of the C-Flag, as we currently can't check
  1529. NR_DEFAULTFLAGS for modification}
  1530. (
  1531. {Everything is fine if we don't use RRX}
  1532. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or
  1533. (
  1534. {If it is RRX, then check if we're just accessing the next instruction}
  1535. Assigned(hp1) and
  1536. (hpfar1 = hp1)
  1537. )
  1538. ) and
  1539. { reg1 might not be modified inbetween }
  1540. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
  1541. { The shifterop can contain a register, might not be modified}
  1542. (
  1543. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or
  1544. not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hpfar1))
  1545. ) and
  1546. (
  1547. {Only ONE of the two src operands is allowed to match}
  1548. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[taicpu(hpfar1).ops-2]^) xor
  1549. MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[taicpu(hpfar1).ops-1]^)
  1550. ) then
  1551. begin
  1552. if taicpu(hpfar1).opcode in [A_TST, A_TEQ, A_CMN] then
  1553. I2:=0
  1554. else
  1555. I2:=1;
  1556. for I:=I2 to taicpu(hpfar1).ops-1 do
  1557. if MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[I]^.reg) then
  1558. begin
  1559. { If the parameter matched on the second op from the RIGHT
  1560. we have to switch the parameters, this will not happen for CMP
  1561. were we're only evaluating the most right parameter
  1562. }
  1563. if I <> taicpu(hpfar1).ops-1 then
  1564. begin
  1565. {The SUB operators need to be changed when we swap parameters}
  1566. case taicpu(hpfar1).opcode of
  1567. A_SUB: tempop:=A_RSB;
  1568. A_SBC: tempop:=A_RSC;
  1569. A_RSB: tempop:=A_SUB;
  1570. A_RSC: tempop:=A_SBC;
  1571. else tempop:=taicpu(hpfar1).opcode;
  1572. end;
  1573. if taicpu(hpfar1).ops = 3 then
  1574. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  1575. taicpu(hpfar1).oper[0]^.reg, taicpu(hpfar1).oper[2]^.reg,
  1576. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1577. else
  1578. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  1579. taicpu(hpfar1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1580. taicpu(p).oper[2]^.shifterop^);
  1581. end
  1582. else
  1583. if taicpu(hpfar1).ops = 3 then
  1584. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hpfar1).opcode,
  1585. taicpu(hpfar1).oper[0]^.reg, taicpu(hpfar1).oper[1]^.reg,
  1586. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  1587. else
  1588. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hpfar1).opcode,
  1589. taicpu(hpfar1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  1590. taicpu(p).oper[2]^.shifterop^);
  1591. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  1592. AllocRegBetween(taicpu(p).oper[2]^.shifterop^.rs,p,hpfar1,UsedRegs);
  1593. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hpfar1,UsedRegs);
  1594. asml.insertbefore(hp2, hpfar1);
  1595. asml.remove(hpfar1);
  1596. hpfar1.free;
  1597. DebugMsg(SPeepholeOptimization + 'FoldShiftProcess done', hp2);
  1598. if not Assigned(hp1) then
  1599. GetNextInstruction(p, hp1)
  1600. else if hp1 = hpfar1 then
  1601. { If hp1 = hpfar1, then it's a dangling pointer }
  1602. hp1 := hp2;
  1603. RemoveCurrentP(p, hp1);
  1604. Result:=true;
  1605. Exit;
  1606. end;
  1607. end;
  1608. {
  1609. Fold
  1610. mov r1, r1, lsl #2
  1611. ldr/ldrb r0, [r0, r1]
  1612. to
  1613. ldr/ldrb r0, [r0, r1, lsl #2]
  1614. XXX: This still needs some work, as we quite often encounter something like
  1615. mov r1, r2, lsl #2
  1616. add r2, r3, #imm
  1617. ldr r0, [r2, r1]
  1618. which can't be folded because r2 is overwritten between the shift and the ldr.
  1619. We could try to shuffle the registers around and fold it into.
  1620. add r1, r3, #imm
  1621. ldr r0, [r1, r2, lsl #2]
  1622. }
  1623. if (not(GenerateThumbCode)) and
  1624. { thumb2 allows only lsl #0..#3 }
  1625. (not(GenerateThumb2Code) or
  1626. ((taicpu(p).oper[2]^.shifterop^.shiftimm in [0..3]) and
  1627. (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL)
  1628. )
  1629. ) and
  1630. (taicpu(p).oper[1]^.typ = top_reg) and
  1631. (taicpu(p).oper[2]^.typ = top_shifterop) and
  1632. { RRX is tough to handle, because it requires tracking the C-Flag,
  1633. it is also extremly unlikely to be emitted this way}
  1634. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) and
  1635. (taicpu(p).oper[2]^.shifterop^.shiftimm <> 0) and
  1636. (taicpu(p).oppostfix = PF_NONE) and
  1637. {Only LDR, LDRB, STR, STRB can handle scaled register indexing}
  1638. (MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B]) or
  1639. (GenerateThumb2Code and
  1640. MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B, PF_SB, PF_H, PF_SH]))
  1641. ) and
  1642. (
  1643. {If this is address by offset, one of the two registers can be used}
  1644. ((taicpu(hpfar1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1645. (
  1646. (taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor
  1647. (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)
  1648. )
  1649. ) or
  1650. {For post and preindexed only the index register can be used}
  1651. ((taicpu(hpfar1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and
  1652. (
  1653. (taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
  1654. (taicpu(hpfar1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
  1655. ) and
  1656. (not GenerateThumb2Code)
  1657. )
  1658. ) and
  1659. { Only fold if both registers are used. Otherwise we are folding p with itself }
  1660. (taicpu(hpfar1).oper[1]^.ref^.index<>NR_NO) and
  1661. (taicpu(hpfar1).oper[1]^.ref^.base<>NR_NO) and
  1662. { Only fold if there isn't another shifterop already, and offset is zero. }
  1663. (taicpu(hpfar1).oper[1]^.ref^.offset = 0) and
  1664. (taicpu(hpfar1).oper[1]^.ref^.shiftmode = SM_None) and
  1665. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
  1666. RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
  1667. begin
  1668. { If the register we want to do the shift for resides in base, we need to swap that}
  1669. if (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
  1670. taicpu(hpfar1).oper[1]^.ref^.base := taicpu(hpfar1).oper[1]^.ref^.index;
  1671. taicpu(hpfar1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
  1672. taicpu(hpfar1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
  1673. taicpu(hpfar1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
  1674. DebugMsg(SPeepholeOptimization + 'FoldShiftLdrStr done', hpfar1);
  1675. RemoveCurrentP(p);
  1676. Result:=true;
  1677. Exit;
  1678. end;
  1679. end;
  1680. {
  1681. Often we see shifts and then a superfluous mov to another register
  1682. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  1683. }
  1684. if RemoveSuperfluousMove(p, hpfar1, 'MovMov2Mov') then
  1685. Result:=true;
  1686. Exit;
  1687. end;
  1688. end;
  1689. function TCpuAsmOptimizer.OptPass1MVN(var p: tai): Boolean;
  1690. var
  1691. hp1: tai;
  1692. begin
  1693. {
  1694. change
  1695. mvn reg2,reg1
  1696. and reg3,reg4,reg2
  1697. dealloc reg2
  1698. to
  1699. bic reg3,reg4,reg1
  1700. }
  1701. Result := False;
  1702. if (taicpu(p).oper[1]^.typ = top_reg) and
  1703. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1704. MatchInstruction(hp1,A_AND,[],[]) and
  1705. (((taicpu(hp1).ops=3) and
  1706. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1707. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1708. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  1709. ((taicpu(hp1).ops=2) and
  1710. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1711. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1712. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1713. { reg1 might not be modified inbetween }
  1714. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1715. begin
  1716. DebugMsg(SPeepholeOptimization + 'MvnAnd2Bic done', p);
  1717. taicpu(hp1).opcode:=A_BIC;
  1718. if taicpu(hp1).ops=3 then
  1719. begin
  1720. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1721. taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
  1722. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1723. end
  1724. else
  1725. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1726. RemoveCurrentp(p);
  1727. Result := True;
  1728. Exit;
  1729. end;
  1730. end;
  1731. function TCpuAsmOptimizer.OptPass1VMov(var p: tai): Boolean;
  1732. var
  1733. hp1: tai;
  1734. begin
  1735. {
  1736. change
  1737. vmov reg0,reg1,reg2
  1738. vmov reg1,reg2,reg0
  1739. into
  1740. vmov reg0,reg1,reg2
  1741. can be applied regardless if reg0 or reg2 is the vfp register
  1742. }
  1743. Result := False;
  1744. if (taicpu(p).ops = 3) then
  1745. while GetNextInstruction(p, hp1) and
  1746. MatchInstruction(hp1, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  1747. (taicpu(hp1).ops = 3) and
  1748. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^) and
  1749. MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) and
  1750. MatchOperand(taicpu(p).oper[2]^, taicpu(hp1).oper[1]^) do
  1751. begin
  1752. asml.Remove(hp1);
  1753. hp1.free;
  1754. DebugMsg(SPeepholeOptimization + 'VMovVMov2VMov done', p);
  1755. { Can we do it again? }
  1756. end;
  1757. end;
  1758. function TCpuAsmOptimizer.OptPass1VOp(var p: tai): Boolean;
  1759. var
  1760. hp1: tai;
  1761. begin
  1762. Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  1763. RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp');
  1764. end;
  1765. function TCpuAsmOptimizer.OptPass2Bcc(var p: tai): Boolean;
  1766. var
  1767. hp1,hp2,hp3,after_p: tai;
  1768. l : longint;
  1769. WasLast: boolean;
  1770. Label_X, Label_Y: TASmLabel;
  1771. procedure ConvertInstructins(this_hp: tai; newcond: tasmcond);
  1772. var
  1773. next_hp: tai;
  1774. begin
  1775. repeat
  1776. if this_hp.typ=ait_instruction then
  1777. taicpu(this_hp).condition := newcond;
  1778. GetNextInstruction(this_hp, next_hp);
  1779. if MustBeLast(this_hp) then
  1780. Break;
  1781. this_hp := next_hp
  1782. until not(assigned(this_hp)) or
  1783. not(CanBeCond(this_hp)) or
  1784. ((hp1.typ = ait_instruction) and (taicpu(hp1).opcode = A_B)) or
  1785. (this_hp.typ = ait_label);
  1786. end;
  1787. begin
  1788. Result := False;
  1789. if (taicpu(p).condition<>C_None) and
  1790. not(GenerateThumbCode) then
  1791. begin
  1792. { check for
  1793. Bxx xxx
  1794. <several instructions>
  1795. xxx:
  1796. }
  1797. Label_X := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  1798. l:=0;
  1799. WasLast:=False;
  1800. GetNextInstruction(p, hp1);
  1801. after_p := hp1;
  1802. while assigned(hp1) and
  1803. (l<=4) and
  1804. CanBeCond(hp1) and
  1805. { stop on labels }
  1806. not(hp1.typ=ait_label) and
  1807. { avoid that we cannot recognize the case BccB2Cond }
  1808. not((hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_B)) do
  1809. begin
  1810. inc(l);
  1811. if MustBeLast(hp1) then
  1812. begin
  1813. WasLast:=True;
  1814. GetNextInstruction(hp1,hp1);
  1815. break;
  1816. end
  1817. else
  1818. GetNextInstruction(hp1,hp1);
  1819. end;
  1820. if assigned(hp1) then
  1821. begin
  1822. if FindLabel(Label_X, hp1) then
  1823. begin
  1824. if (l<=4) and (l>0) then
  1825. begin
  1826. ConvertInstructins(after_p, inverse_cond(taicpu(p).condition));
  1827. DebugMsg(SPeepholeOptimization + 'Bcc2Cond done', p);
  1828. { wait with removing else GetNextInstruction could
  1829. ignore the label if it was the only usage in the
  1830. jump moved away }
  1831. Label_X.decrefs;
  1832. RemoveCurrentP(p, after_p);
  1833. Result := True;
  1834. Exit;
  1835. end;
  1836. end
  1837. else
  1838. { do not perform further optimizations if there is an instruction
  1839. in block #1 which cannot be optimized.
  1840. }
  1841. if not WasLast then
  1842. begin
  1843. { check further for
  1844. Bcc xxx
  1845. <several instructions 1>
  1846. B yyy
  1847. xxx:
  1848. <several instructions 2>
  1849. yyy:
  1850. }
  1851. { hp2 points to jmp yyy }
  1852. hp2:=hp1;
  1853. { skip hp2 to xxx }
  1854. if assigned(hp2) and
  1855. (l<=3) and
  1856. (hp2.typ=ait_instruction) and
  1857. (taicpu(hp2).is_jmp) and
  1858. (taicpu(hp2).condition=C_None) and
  1859. GetNextInstruction(hp2, hp1) and
  1860. { real label and jump, no further references to the
  1861. label are allowed }
  1862. (Label_X.getrefs = 1) and
  1863. FindLabel(Label_X, hp1) then
  1864. begin
  1865. Label_Y := TAsmLabel(taicpu(hp2).oper[0]^.ref^.symbol);
  1866. l:=0;
  1867. { skip hp1 and hp3 to <several moves 2> }
  1868. GetNextInstruction(hp1, hp1);
  1869. hp3 := hp1;
  1870. while assigned(hp1) and
  1871. CanBeCond(hp1) and
  1872. (l<=3) do
  1873. begin
  1874. inc(l);
  1875. if MustBeLast(hp1) then
  1876. begin
  1877. GetNextInstruction(hp1, hp1);
  1878. break;
  1879. end
  1880. else
  1881. GetNextInstruction(hp1, hp1);
  1882. end;
  1883. { hp1 points to yyy: }
  1884. if assigned(hp1) and
  1885. FindLabel(Label_Y, hp1) then
  1886. begin
  1887. ConvertInstructins(after_p, inverse_cond(taicpu(p).condition));
  1888. ConvertInstructins(hp3, taicpu(p).condition);
  1889. DebugMsg(SPeepholeOptimization + 'BccB2Cond done', after_p);
  1890. { remove B }
  1891. Label_Y.decrefs;
  1892. RemoveInstruction(hp2);
  1893. { remove Bcc }
  1894. Label_X.decrefs;
  1895. RemoveCurrentP(p, after_p);
  1896. Result := True;
  1897. Exit;
  1898. end;
  1899. end;
  1900. end;
  1901. end;
  1902. end;
  1903. end;
  1904. function TCpuAsmOptimizer.OptPass2STR(var p: tai): Boolean;
  1905. var
  1906. hp1: tai;
  1907. Postfix: TOpPostfix;
  1908. OpcodeStr: shortstring;
  1909. begin
  1910. Result := False;
  1911. { Try to merge two STRs into an STM instruction }
  1912. if not(GenerateThumbCode) and (taicpu(p).oper[1]^.typ = top_ref) and
  1913. (taicpu(p).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1914. (
  1915. (taicpu(p).oper[1]^.ref^.base = NR_NO) or
  1916. (taicpu(p).oper[1]^.ref^.index = NR_NO)
  1917. ) and
  1918. (taicpu(p).oppostfix = PF_None) and
  1919. (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) then
  1920. begin
  1921. hp1 := p;
  1922. while GetNextInstruction(hp1, hp1) and (hp1.typ = ait_instruction) and
  1923. (taicpu(hp1).opcode = A_STR) do
  1924. if (taicpu(hp1).condition = taicpu(p).condition) and
  1925. (taicpu(hp1).oppostfix = PF_None) and
  1926. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1927. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1928. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[1]^.ref^.base) and
  1929. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[1]^.ref^.index) and
  1930. (
  1931. (
  1932. (taicpu(p).oper[1]^.ref^.offset = 0) and
  1933. (getsupreg(taicpu(hp1).oper[0]^.reg) > getsupreg(taicpu(p).oper[0]^.reg)) and
  1934. (abs(taicpu(hp1).oper[1]^.ref^.offset) = 4)
  1935. ) or (
  1936. (taicpu(hp1).oper[1]^.ref^.offset = 0) and
  1937. (getsupreg(taicpu(hp1).oper[0]^.reg) < getsupreg(taicpu(p).oper[0]^.reg)) and
  1938. (abs(taicpu(p).oper[1]^.ref^.offset) = 4)
  1939. )
  1940. ) then
  1941. begin
  1942. if (getsupreg(taicpu(hp1).oper[0]^.reg) < getsupreg(taicpu(p).oper[0]^.reg)) xor
  1943. (taicpu(hp1).oper[1]^.ref^.offset < taicpu(p).oper[1]^.ref^.offset) then
  1944. begin
  1945. Postfix := PF_DA;
  1946. OpcodeStr := 'DA';
  1947. end
  1948. else
  1949. begin
  1950. Postfix := PF_None;
  1951. OpcodeStr := '';
  1952. end;
  1953. taicpu(hp1).oper[1]^.ref^.offset := 0;
  1954. if taicpu(hp1).oper[1]^.ref^.index = NR_NO then
  1955. begin
  1956. taicpu(hp1).oper[1]^.ref^.index := taicpu(hp1).oper[1]^.ref^.base;
  1957. taicpu(hp1).oper[1]^.ref^.base := NR_NO;
  1958. end;
  1959. taicpu(p).opcode := A_STM;
  1960. taicpu(p).loadregset(1, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg), getsupreg(taicpu(hp1).oper[0]^.reg)]);
  1961. taicpu(p).loadref(0, taicpu(hp1).oper[1]^.ref^);
  1962. taicpu(p).oppostfix := Postfix;
  1963. RemoveInstruction(hp1);
  1964. DebugMsg(SPeepholeOptimization + 'Merging stores: STR/STR -> STM' + OpcodeStr, p);
  1965. Result := True;
  1966. Exit;
  1967. end;
  1968. end;
  1969. end;
  1970. function TCpuAsmOptimizer.OptPass2STM(var p: tai): Boolean;
  1971. var
  1972. hp1: tai;
  1973. CorrectOffset:ASizeInt;
  1974. i, LastReg: TSuperRegister;
  1975. Postfix: TOpPostfix;
  1976. OpcodeStr: shortstring;
  1977. begin
  1978. Result := False;
  1979. { See if STM/STR can be merged into a single STM }
  1980. if (taicpu(p).oper[0]^.ref^.addressmode = AM_OFFSET) then
  1981. begin
  1982. CorrectOffset := 0;
  1983. LastReg := RS_NO;
  1984. for i in taicpu(p).oper[1]^.regset^ do
  1985. begin
  1986. LastReg := i;
  1987. Inc(CorrectOffset, 4);
  1988. end;
  1989. { This while loop effectively doea a Selection Sort on any STR
  1990. instructions that follow }
  1991. hp1 := p;
  1992. while (LastReg < maxcpuregister) and
  1993. GetNextInstruction(hp1, hp1) and (hp1.typ = ait_instruction) and
  1994. (taicpu(hp1).opcode = A_STR) do
  1995. if (taicpu(hp1).condition = taicpu(p).condition) and
  1996. (taicpu(hp1).oppostfix = PF_None) and
  1997. (getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
  1998. (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
  1999. (
  2000. (
  2001. (taicpu(p).oper[1]^.ref^.base = NR_NO) and
  2002. (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.ref^.index)
  2003. ) or (
  2004. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  2005. (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.ref^.base)
  2006. )
  2007. ) and
  2008. { Next register must be later in the set }
  2009. (getsupreg(taicpu(hp1).oper[0]^.reg) > LastReg) and
  2010. (
  2011. (
  2012. (taicpu(p).oppostfix = PF_None) and
  2013. (taicpu(hp1).oper[1]^.ref^.offset = CorrectOffset)
  2014. ) or (
  2015. (taicpu(p).oppostfix = PF_DA) and
  2016. (taicpu(hp1).oper[1]^.ref^.offset = -CorrectOffset)
  2017. )
  2018. ) then
  2019. begin
  2020. { Increment the reference values ready for the next STR instruction to find }
  2021. LastReg := getsupreg(taicpu(hp1).oper[0]^.reg);
  2022. Inc(CorrectOffset, 4);
  2023. if (taicpu(p).oppostfix = PF_DA) then
  2024. OpcodeStr := 'DA'
  2025. else
  2026. OpcodeStr := '';
  2027. Include(taicpu(p).oper[1]^.regset^, LastReg);
  2028. DebugMsg(SPeepholeOptimization + 'Merging stores: STM' + OpcodeStr + '/STR -> STM' + OpcodeStr, hp1);
  2029. RemoveInstruction(hp1);
  2030. Result := True;
  2031. { See if we can find another one to merge }
  2032. hp1 := p;
  2033. Continue;
  2034. end;
  2035. end;
  2036. end;
  2037. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  2038. begin
  2039. result := false;
  2040. if p.typ = ait_instruction then
  2041. begin
  2042. case taicpu(p).opcode of
  2043. A_CMP:
  2044. Result := OptPass1CMP(p);
  2045. A_STR:
  2046. Result := OptPass1STR(p);
  2047. A_LDR:
  2048. Result := OptPass1LDR(p);
  2049. A_MOV:
  2050. Result := OptPass1MOV(p);
  2051. A_AND:
  2052. Result := OptPass1And(p);
  2053. A_ADD,
  2054. A_SUB:
  2055. Result := OptPass1ADDSUB(p);
  2056. A_MUL:
  2057. REsult := OptPass1MUL(p);
  2058. A_ADC,
  2059. A_RSB,
  2060. A_RSC,
  2061. A_SBC,
  2062. A_BIC,
  2063. A_EOR,
  2064. A_ORR,
  2065. A_MLA,
  2066. A_MLS,
  2067. A_QADD,A_QADD16,A_QADD8,
  2068. A_QSUB,A_QSUB16,A_QSUB8,
  2069. A_QDADD,A_QDSUB,A_QASX,A_QSAX,
  2070. A_SHADD16,A_SHADD8,A_UHADD16,A_UHADD8,
  2071. A_SHSUB16,A_SHSUB8,A_UHSUB16,A_UHSUB8,
  2072. A_PKHTB,A_PKHBT,
  2073. A_SMUAD,A_SMUSD:
  2074. Result := OptPass1DataCheckMov(p);
  2075. {$ifdef dummy}
  2076. A_MVN:
  2077. Result := OPtPass1MVN(p);
  2078. {$endif dummy}
  2079. A_UXTB:
  2080. Result := OptPass1UXTB(p);
  2081. A_UXTH:
  2082. Result := OptPass1UXTH(p);
  2083. A_SXTB:
  2084. Result := OptPass1SXTB(p);
  2085. A_SXTH:
  2086. Result := OptPass1SXTH(p);
  2087. A_STM:
  2088. Result := OptPass1STM(p);
  2089. A_VMOV:
  2090. Result := OptPass1VMov(p);
  2091. A_VLDR,
  2092. A_VADD,
  2093. A_VMUL,
  2094. A_VDIV,
  2095. A_VSUB,
  2096. A_VSQRT,
  2097. A_VNEG,
  2098. A_VCVT,
  2099. A_VABS:
  2100. Result := OptPass1VOp(p);
  2101. else
  2102. ;
  2103. end;
  2104. end;
  2105. end;
  2106. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
  2107. begin
  2108. result := False;
  2109. if p.typ = ait_instruction then
  2110. begin
  2111. case taicpu(p).opcode of
  2112. A_B:
  2113. Result := OptPass2Bcc(p);
  2114. A_STM:
  2115. Result := OptPass2STM(p);
  2116. A_STR:
  2117. Result := OptPass2STR(p);
  2118. else
  2119. ;
  2120. end;
  2121. end;
  2122. end;
  2123. { instructions modifying the CPSR can be only the last instruction }
  2124. function MustBeLast(p : tai) : boolean;
  2125. begin
  2126. Result:=(p.typ=ait_instruction) and
  2127. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  2128. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  2129. (taicpu(p).oppostfix=PF_S));
  2130. end;
  2131. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  2132. begin
  2133. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  2134. Result:=true
  2135. else If MatchInstruction(p1, [A_LDR, A_STR], [], [PF_D]) and
  2136. (getsupreg(taicpu(p1).oper[0]^.reg)+1=getsupreg(reg)) then
  2137. Result:=true
  2138. else
  2139. Result:=inherited RegInInstruction(Reg, p1);
  2140. end;
  2141. const
  2142. { set of opcode which might or do write to memory }
  2143. { TODO : extend armins.dat to contain r/w info }
  2144. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  2145. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD,A_VSTR,A_VSTM];
  2146. { adjust the register live information when swapping the two instructions p and hp1,
  2147. they must follow one after the other }
  2148. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  2149. procedure CheckLiveEnd(reg : tregister);
  2150. var
  2151. supreg : TSuperRegister;
  2152. regtype : TRegisterType;
  2153. begin
  2154. if reg=NR_NO then
  2155. exit;
  2156. regtype:=getregtype(reg);
  2157. supreg:=getsupreg(reg);
  2158. if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_end[supreg]=hp1) and
  2159. RegInInstruction(reg,p) then
  2160. cg.rg[regtype].live_end[supreg]:=p;
  2161. end;
  2162. procedure CheckLiveStart(reg : TRegister);
  2163. var
  2164. supreg : TSuperRegister;
  2165. regtype : TRegisterType;
  2166. begin
  2167. if reg=NR_NO then
  2168. exit;
  2169. regtype:=getregtype(reg);
  2170. supreg:=getsupreg(reg);
  2171. if assigned(cg.rg[regtype]) and (cg.rg[regtype].live_start[supreg]=p) and
  2172. RegInInstruction(reg,hp1) then
  2173. cg.rg[regtype].live_start[supreg]:=hp1;
  2174. end;
  2175. var
  2176. i : longint;
  2177. r : TSuperRegister;
  2178. begin
  2179. { assumption: p is directly followed by hp1 }
  2180. { if live of any reg used by p starts at p and hp1 uses this register then
  2181. set live start to hp1 }
  2182. for i:=0 to p.ops-1 do
  2183. case p.oper[i]^.typ of
  2184. Top_Reg:
  2185. CheckLiveStart(p.oper[i]^.reg);
  2186. Top_Ref:
  2187. begin
  2188. CheckLiveStart(p.oper[i]^.ref^.base);
  2189. CheckLiveStart(p.oper[i]^.ref^.index);
  2190. end;
  2191. Top_Shifterop:
  2192. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  2193. Top_RegSet:
  2194. for r:=RS_R0 to RS_R15 do
  2195. if r in p.oper[i]^.regset^ then
  2196. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2197. else
  2198. ;
  2199. end;
  2200. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  2201. set live end to p }
  2202. for i:=0 to hp1.ops-1 do
  2203. case hp1.oper[i]^.typ of
  2204. Top_Reg:
  2205. CheckLiveEnd(hp1.oper[i]^.reg);
  2206. Top_Ref:
  2207. begin
  2208. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  2209. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  2210. end;
  2211. Top_Shifterop:
  2212. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  2213. Top_RegSet:
  2214. for r:=RS_R0 to RS_R15 do
  2215. if r in hp1.oper[i]^.regset^ then
  2216. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  2217. else
  2218. ;
  2219. end;
  2220. end;
  2221. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  2222. { TODO : schedule also forward }
  2223. { TODO : schedule distance > 1 }
  2224. { returns true if p might be a load of a pc relative tls offset }
  2225. function PossibleTLSLoad(const p: tai) : boolean;
  2226. begin
  2227. Result:=(p.typ=ait_instruction) and (taicpu(p).opcode=A_LDR) and (taicpu(p).oper[1]^.typ=top_ref) and (((taicpu(p).oper[1]^.ref^.base=NR_PC) and
  2228. (taicpu(p).oper[1]^.ref^.index<>NR_NO)) or ((taicpu(p).oper[1]^.ref^.base<>NR_NO) and
  2229. (taicpu(p).oper[1]^.ref^.index=NR_PC)));
  2230. end;
  2231. var
  2232. hp1,hp2,hp3,hp4,hp5,insertpos : tai;
  2233. list : TAsmList;
  2234. begin
  2235. result:=true;
  2236. list:=TAsmList.create;
  2237. p:=BlockStart;
  2238. while p<>BlockEnd Do
  2239. begin
  2240. if (p.typ=ait_instruction) and
  2241. GetNextInstruction(p,hp1) and
  2242. (hp1.typ=ait_instruction) and
  2243. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  2244. (taicpu(hp1).oppostfix in [PF_NONE, PF_B, PF_H, PF_SB, PF_SH]) and
  2245. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  2246. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  2247. not(RegModifiedByInstruction(NR_PC,p))
  2248. ) or
  2249. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  2250. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  2251. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  2252. (taicpu(hp1).oper[1]^.ref^.offset=0)
  2253. )
  2254. ) or
  2255. { try to prove that the memory accesses don't overlapp }
  2256. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  2257. (taicpu(p).oper[1]^.typ = top_ref) and
  2258. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  2259. (taicpu(p).oppostfix=PF_None) and
  2260. (taicpu(hp1).oppostfix=PF_None) and
  2261. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  2262. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  2263. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  2264. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  2265. )
  2266. )
  2267. ) and
  2268. GetNextInstruction(hp1,hp2) and
  2269. (hp2.typ=ait_instruction) and
  2270. { loaded register used by next instruction?
  2271. if we ever support labels (they could be skipped in theory) here, the gnu2 tls general-dynamic code could get broken (the ldr before
  2272. the bl may not be scheduled away from the bl) and it needs to be taken care of this case
  2273. }
  2274. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  2275. { loaded register not used by previous instruction? }
  2276. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  2277. { same condition? }
  2278. (taicpu(p).condition=taicpu(hp1).condition) and
  2279. { first instruction might not change the register used as base }
  2280. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  2281. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  2282. ) and
  2283. { first instruction might not change the register used as index }
  2284. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  2285. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  2286. ) and
  2287. { if we modify the basereg AND the first instruction used that reg, we can not schedule }
  2288. ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
  2289. not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) and
  2290. not(PossibleTLSLoad(p)) and
  2291. not(PossibleTLSLoad(hp1)) then
  2292. begin
  2293. hp3:=tai(p.Previous);
  2294. hp5:=tai(p.next);
  2295. asml.Remove(p);
  2296. { if there is a reg. alloc/dealloc/sync instructions or address labels (e.g. for GOT-less PIC)
  2297. associated with p, move it together with p }
  2298. { before the instruction? }
  2299. { find reg allocs,deallocs and PIC labels }
  2300. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  2301. begin
  2302. if ( (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_alloc, ra_dealloc]) and
  2303. RegInInstruction(tai_regalloc(hp3).reg,p) )
  2304. or ( (hp3.typ=ait_label) and (tai_label(hp3).labsym.typ=AT_ADDR) )
  2305. then
  2306. begin
  2307. hp4:=hp3;
  2308. hp3:=tai(hp3.Previous);
  2309. asml.Remove(hp4);
  2310. list.Insert(hp4);
  2311. end
  2312. else
  2313. hp3:=tai(hp3.Previous);
  2314. end;
  2315. list.Concat(p);
  2316. SwapRegLive(taicpu(p),taicpu(hp1));
  2317. { after the instruction? }
  2318. { find reg deallocs and reg syncs }
  2319. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  2320. begin
  2321. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc, ra_sync]) and
  2322. RegInInstruction(tai_regalloc(hp5).reg,p) then
  2323. begin
  2324. hp4:=hp5;
  2325. hp5:=tai(hp5.next);
  2326. asml.Remove(hp4);
  2327. list.Concat(hp4);
  2328. end
  2329. else
  2330. hp5:=tai(hp5.Next);
  2331. end;
  2332. asml.Remove(hp1);
  2333. { if there are address labels associated with hp2, those must
  2334. stay with hp2 (e.g. for GOT-less PIC) }
  2335. insertpos:=hp2;
  2336. while assigned(hp2.previous) and
  2337. (tai(hp2.previous).typ<>ait_instruction) do
  2338. begin
  2339. hp2:=tai(hp2.previous);
  2340. if (hp2.typ=ait_label) and
  2341. (tai_label(hp2).labsym.typ=AT_ADDR) then
  2342. insertpos:=hp2;
  2343. end;
  2344. {$ifdef DEBUG_PREREGSCHEDULER}
  2345. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),insertpos);
  2346. {$endif DEBUG_PREREGSCHEDULER}
  2347. asml.InsertBefore(hp1,insertpos);
  2348. asml.InsertListBefore(insertpos,list);
  2349. p:=tai(p.next);
  2350. end
  2351. else if p.typ=ait_instruction then
  2352. p:=hp1
  2353. else
  2354. p:=tai(p.next);
  2355. end;
  2356. list.Free;
  2357. end;
  2358. procedure DecrementPreceedingIT(list: TAsmList; p: tai);
  2359. var
  2360. hp : tai;
  2361. l : longint;
  2362. begin
  2363. hp := tai(p.Previous);
  2364. l := 1;
  2365. while assigned(hp) and
  2366. (l <= 4) do
  2367. begin
  2368. if hp.typ=ait_instruction then
  2369. begin
  2370. if (taicpu(hp).opcode>=A_IT) and
  2371. (taicpu(hp).opcode <= A_ITTTT) then
  2372. begin
  2373. if (taicpu(hp).opcode = A_IT) and
  2374. (l=1) then
  2375. list.Remove(hp)
  2376. else
  2377. case taicpu(hp).opcode of
  2378. A_ITE:
  2379. if l=2 then taicpu(hp).opcode := A_IT;
  2380. A_ITT:
  2381. if l=2 then taicpu(hp).opcode := A_IT;
  2382. A_ITEE:
  2383. if l=3 then taicpu(hp).opcode := A_ITE;
  2384. A_ITTE:
  2385. if l=3 then taicpu(hp).opcode := A_ITT;
  2386. A_ITET:
  2387. if l=3 then taicpu(hp).opcode := A_ITE;
  2388. A_ITTT:
  2389. if l=3 then taicpu(hp).opcode := A_ITT;
  2390. A_ITEEE:
  2391. if l=4 then taicpu(hp).opcode := A_ITEE;
  2392. A_ITTEE:
  2393. if l=4 then taicpu(hp).opcode := A_ITTE;
  2394. A_ITETE:
  2395. if l=4 then taicpu(hp).opcode := A_ITET;
  2396. A_ITTTE:
  2397. if l=4 then taicpu(hp).opcode := A_ITTT;
  2398. A_ITEET:
  2399. if l=4 then taicpu(hp).opcode := A_ITEE;
  2400. A_ITTET:
  2401. if l=4 then taicpu(hp).opcode := A_ITTE;
  2402. A_ITETT:
  2403. if l=4 then taicpu(hp).opcode := A_ITET;
  2404. A_ITTTT:
  2405. begin
  2406. if l=4 then taicpu(hp).opcode := A_ITTT;
  2407. end
  2408. else
  2409. ;
  2410. end;
  2411. break;
  2412. end;
  2413. {else if (taicpu(hp).condition<>taicpu(p).condition) or
  2414. (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
  2415. break;}
  2416. inc(l);
  2417. end;
  2418. hp := tai(hp.Previous);
  2419. end;
  2420. end;
  2421. function TCpuThumb2AsmOptimizer.OptPass1STM(var p: tai): boolean;
  2422. var
  2423. hp : taicpu;
  2424. begin
  2425. result:=false;
  2426. if MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
  2427. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2428. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2429. ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
  2430. begin
  2431. DebugMsg('Peephole Stm2Push done', p);
  2432. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2433. AsmL.InsertAfter(hp, p);
  2434. asml.Remove(p);
  2435. p:=hp;
  2436. result:=true;
  2437. end;
  2438. end;
  2439. function TCpuThumb2AsmOptimizer.OptPass1LDM(var p: tai): boolean;
  2440. var
  2441. hp : taicpu;
  2442. begin
  2443. result:=false;
  2444. if MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
  2445. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  2446. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  2447. ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
  2448. begin
  2449. DebugMsg('Peephole Ldm2Pop done', p);
  2450. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  2451. asml.InsertBefore(hp, p);
  2452. asml.Remove(p);
  2453. p.Free;
  2454. p:=hp;
  2455. result:=true;
  2456. end;
  2457. end;
  2458. function TCpuThumb2AsmOptimizer.OptPass1AndThumb2(var p : tai) : boolean;
  2459. begin
  2460. result:=false;
  2461. if MatchInstruction(p, [A_AND], [], [PF_None]) and
  2462. (taicpu(p).ops = 2) and
  2463. (taicpu(p).oper[1]^.typ=top_const) and
  2464. ((taicpu(p).oper[1]^.val=255) or
  2465. (taicpu(p).oper[1]^.val=65535)) then
  2466. begin
  2467. DebugMsg('Peephole AndR2Uxt done', p);
  2468. if taicpu(p).oper[1]^.val=255 then
  2469. taicpu(p).opcode:=A_UXTB
  2470. else
  2471. taicpu(p).opcode:=A_UXTH;
  2472. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  2473. result := true;
  2474. end
  2475. else if MatchInstruction(p, [A_AND], [], [PF_None]) and
  2476. (taicpu(p).ops = 3) and
  2477. (taicpu(p).oper[2]^.typ=top_const) and
  2478. ((taicpu(p).oper[2]^.val=255) or
  2479. (taicpu(p).oper[2]^.val=65535)) then
  2480. begin
  2481. DebugMsg('Peephole AndRR2Uxt done', p);
  2482. if taicpu(p).oper[2]^.val=255 then
  2483. taicpu(p).opcode:=A_UXTB
  2484. else
  2485. taicpu(p).opcode:=A_UXTH;
  2486. taicpu(p).ops:=2;
  2487. result := true;
  2488. end;
  2489. end;
  2490. function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  2491. begin
  2492. result:=false;
  2493. if inherited PeepHoleOptPass1Cpu(p) then
  2494. result:=true
  2495. else if p.typ=ait_instruction then
  2496. case taicpu(p).opcode of
  2497. A_STM:
  2498. result:=OptPass1STM(p);
  2499. A_LDM:
  2500. result:=OptPass1LDM(p);
  2501. A_AND:
  2502. result:=OptPass1AndThumb2(p);
  2503. else
  2504. ;
  2505. end;
  2506. end;
  2507. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  2508. var
  2509. p,hp1,hp2: tai;
  2510. l : longint;
  2511. condition : tasmcond;
  2512. { UsedRegs, TmpUsedRegs: TRegSet; }
  2513. begin
  2514. p := BlockStart;
  2515. { UsedRegs := []; }
  2516. while (p <> BlockEnd) Do
  2517. begin
  2518. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  2519. case p.Typ Of
  2520. Ait_Instruction:
  2521. begin
  2522. case taicpu(p).opcode Of
  2523. A_B:
  2524. if taicpu(p).condition<>C_None then
  2525. begin
  2526. { check for
  2527. Bxx xxx
  2528. <several instructions>
  2529. xxx:
  2530. }
  2531. l:=0;
  2532. GetNextInstruction(p, hp1);
  2533. while assigned(hp1) and
  2534. (l<=4) and
  2535. CanBeCond(hp1) and
  2536. { stop on labels }
  2537. not(hp1.typ=ait_label) do
  2538. begin
  2539. inc(l);
  2540. if MustBeLast(hp1) then
  2541. begin
  2542. //hp1:=nil;
  2543. GetNextInstruction(hp1,hp1);
  2544. break;
  2545. end
  2546. else
  2547. GetNextInstruction(hp1,hp1);
  2548. end;
  2549. if assigned(hp1) then
  2550. begin
  2551. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2552. begin
  2553. if (l<=4) and (l>0) then
  2554. begin
  2555. condition:=inverse_cond(taicpu(p).condition);
  2556. hp2:=p;
  2557. GetNextInstruction(p,hp1);
  2558. p:=hp1;
  2559. repeat
  2560. if hp1.typ=ait_instruction then
  2561. taicpu(hp1).condition:=condition;
  2562. if MustBeLast(hp1) then
  2563. begin
  2564. GetNextInstruction(hp1,hp1);
  2565. break;
  2566. end
  2567. else
  2568. GetNextInstruction(hp1,hp1);
  2569. until not(assigned(hp1)) or
  2570. not(CanBeCond(hp1)) or
  2571. (hp1.typ=ait_label);
  2572. { wait with removing else GetNextInstruction could
  2573. ignore the label if it was the only usage in the
  2574. jump moved away }
  2575. asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
  2576. DecrementPreceedingIT(asml, hp2);
  2577. case l of
  2578. 1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
  2579. 2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
  2580. 3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
  2581. 4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
  2582. end;
  2583. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2584. asml.remove(hp2);
  2585. hp2.free;
  2586. continue;
  2587. end;
  2588. end;
  2589. end;
  2590. end;
  2591. else
  2592. ;
  2593. end;
  2594. end;
  2595. else
  2596. ;
  2597. end;
  2598. p := tai(p.next)
  2599. end;
  2600. end;
  2601. function TCpuThumb2AsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  2602. begin
  2603. result:=false;
  2604. if p.typ = ait_instruction then
  2605. begin
  2606. if MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
  2607. (taicpu(p).oper[1]^.typ=top_const) and
  2608. (taicpu(p).oper[1]^.val >= 0) and
  2609. (taicpu(p).oper[1]^.val < 256) and
  2610. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2611. begin
  2612. DebugMsg('Peephole Mov2Movs done', p);
  2613. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2614. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2615. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2616. taicpu(p).oppostfix:=PF_S;
  2617. result:=true;
  2618. end
  2619. else if MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
  2620. (taicpu(p).oper[1]^.typ=top_reg) and
  2621. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2622. begin
  2623. DebugMsg('Peephole Mvn2Mvns done', p);
  2624. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2625. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2626. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2627. taicpu(p).oppostfix:=PF_S;
  2628. result:=true;
  2629. end
  2630. else if MatchInstruction(p, A_RSB, [C_None], [PF_None]) and
  2631. (taicpu(p).ops = 3) and
  2632. (taicpu(p).oper[2]^.typ=top_const) and
  2633. (taicpu(p).oper[2]^.val=0) and
  2634. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2635. begin
  2636. DebugMsg('Peephole Rsb2Rsbs done', p);
  2637. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2638. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2639. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2640. taicpu(p).oppostfix:=PF_S;
  2641. result:=true;
  2642. end
  2643. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2644. (taicpu(p).ops = 3) and
  2645. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2646. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2647. (taicpu(p).oper[2]^.typ=top_const) and
  2648. (taicpu(p).oper[2]^.val >= 0) and
  2649. (taicpu(p).oper[2]^.val < 256) and
  2650. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2651. begin
  2652. DebugMsg('Peephole AddSub2*s done', p);
  2653. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2654. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2655. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2656. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2657. taicpu(p).oppostfix:=PF_S;
  2658. taicpu(p).ops := 2;
  2659. result:=true;
  2660. end
  2661. else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  2662. (taicpu(p).ops = 2) and
  2663. (taicpu(p).oper[1]^.typ=top_reg) and
  2664. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  2665. (not MatchOperand(taicpu(p).oper[1]^, NR_STACK_POINTER_REG)) and
  2666. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2667. begin
  2668. DebugMsg('Peephole AddSub2*s done', p);
  2669. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2670. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2671. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2672. taicpu(p).oppostfix:=PF_S;
  2673. result:=true;
  2674. end
  2675. else if MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
  2676. (taicpu(p).ops = 3) and
  2677. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2678. (taicpu(p).oper[2]^.typ=top_reg) then
  2679. begin
  2680. DebugMsg('Peephole AddRRR2AddRR done', p);
  2681. taicpu(p).ops := 2;
  2682. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2683. result:=true;
  2684. end
  2685. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
  2686. (taicpu(p).ops = 3) and
  2687. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2688. (taicpu(p).oper[2]^.typ=top_reg) and
  2689. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2690. begin
  2691. DebugMsg('Peephole opXXY2opsXY done', p);
  2692. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2693. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2694. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2695. taicpu(p).ops := 2;
  2696. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  2697. taicpu(p).oppostfix:=PF_S;
  2698. result:=true;
  2699. end
  2700. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_S]) and
  2701. (taicpu(p).ops = 3) and
  2702. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2703. (taicpu(p).oper[2]^.typ in [top_reg,top_const]) then
  2704. begin
  2705. DebugMsg('Peephole opXXY2opXY done', p);
  2706. taicpu(p).ops := 2;
  2707. if taicpu(p).oper[2]^.typ=top_reg then
  2708. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg)
  2709. else
  2710. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  2711. result:=true;
  2712. end
  2713. else if MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
  2714. (taicpu(p).ops = 3) and
  2715. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
  2716. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2717. begin
  2718. DebugMsg('Peephole opXYX2opsXY done', p);
  2719. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2720. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2721. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2722. taicpu(p).oppostfix:=PF_S;
  2723. taicpu(p).ops := 2;
  2724. result:=true;
  2725. end
  2726. else if MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
  2727. (taicpu(p).ops=3) and
  2728. (taicpu(p).oper[2]^.typ=top_shifterop) and
  2729. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
  2730. //MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  2731. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2732. begin
  2733. DebugMsg('Peephole Mov2Shift done', p);
  2734. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  2735. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  2736. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  2737. taicpu(p).oppostfix:=PF_S;
  2738. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  2739. SM_LSL: taicpu(p).opcode:=A_LSL;
  2740. SM_LSR: taicpu(p).opcode:=A_LSR;
  2741. SM_ASR: taicpu(p).opcode:=A_ASR;
  2742. SM_ROR: taicpu(p).opcode:=A_ROR;
  2743. else
  2744. internalerror(2019050912);
  2745. end;
  2746. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  2747. taicpu(p).loadreg(2, taicpu(p).oper[2]^.shifterop^.rs)
  2748. else
  2749. taicpu(p).loadconst(2, taicpu(p).oper[2]^.shifterop^.shiftimm);
  2750. result:=true;
  2751. end
  2752. end;
  2753. end;
  2754. begin
  2755. casmoptimizer:=TCpuAsmOptimizer;
  2756. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  2757. End.