aoptcpu.pas 98 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj, cclasses;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  31. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  32. var AllUsedRegs: TAllUsedRegs): Boolean;
  33. { gets the next tai object after current that contains info relevant
  34. to the optimizer in p1 which used the given register or does a
  35. change in program flow.
  36. If there is none, it returns false and
  37. sets p1 to nil }
  38. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  39. { outputs a debug message into the assembler file }
  40. procedure DebugMsg(const s: string; p: tai);
  41. protected
  42. function LookForPostindexedPattern(p: taicpu): boolean;
  43. End;
  44. TCpuPreRegallocScheduler = class(TAsmScheduler)
  45. function SchedulerPass1Cpu(var p: tai): boolean;override;
  46. procedure SwapRegLive(p, hp1: taicpu);
  47. end;
  48. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  49. { uses the same constructor as TAopObj }
  50. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  51. procedure PeepHoleOptPass2;override;
  52. End;
  53. function MustBeLast(p : tai) : boolean;
  54. Implementation
  55. uses
  56. cutils,verbose,globals,
  57. systems,
  58. cpuinfo,
  59. cgobj,cgutils,procinfo,
  60. aasmbase,aasmdata;
  61. function CanBeCond(p : tai) : boolean;
  62. begin
  63. result:=
  64. (p.typ=ait_instruction) and
  65. (taicpu(p).condition=C_None) and
  66. ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
  67. (taicpu(p).opcode<>A_CBZ) and
  68. (taicpu(p).opcode<>A_CBNZ) and
  69. (taicpu(p).opcode<>A_PLD) and
  70. ((taicpu(p).opcode<>A_BLX) or
  71. (taicpu(p).oper[0]^.typ=top_reg));
  72. end;
  73. function RefsEqual(const r1, r2: treference): boolean;
  74. begin
  75. refsequal :=
  76. (r1.offset = r2.offset) and
  77. (r1.base = r2.base) and
  78. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  79. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  80. (r1.relsymbol = r2.relsymbol) and
  81. (r1.signindex = r2.signindex) and
  82. (r1.shiftimm = r2.shiftimm) and
  83. (r1.addressmode = r2.addressmode) and
  84. (r1.shiftmode = r2.shiftmode);
  85. end;
  86. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  87. begin
  88. result :=
  89. (instr.typ = ait_instruction) and
  90. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  91. ((cond = []) or (taicpu(instr).condition in cond)) and
  92. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  93. end;
  94. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  95. begin
  96. result :=
  97. (instr.typ = ait_instruction) and
  98. (taicpu(instr).opcode = op) and
  99. ((cond = []) or (taicpu(instr).condition in cond)) and
  100. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  101. end;
  102. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  103. begin
  104. result := oper1.typ = oper2.typ;
  105. if result then
  106. case oper1.typ of
  107. top_const:
  108. Result:=oper1.val = oper2.val;
  109. top_reg:
  110. Result:=oper1.reg = oper2.reg;
  111. top_conditioncode:
  112. Result:=oper1.cc = oper2.cc;
  113. top_ref:
  114. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  115. else Result:=false;
  116. end
  117. end;
  118. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  119. begin
  120. result := (oper.typ = top_reg) and (oper.reg = reg);
  121. end;
  122. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  123. begin
  124. if (taicpu(movp).condition = C_EQ) and
  125. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  126. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  127. begin
  128. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  129. asml.remove(movp);
  130. movp.free;
  131. end;
  132. end;
  133. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  134. var
  135. p: taicpu;
  136. begin
  137. p := taicpu(hp);
  138. regLoadedWithNewValue := false;
  139. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  140. exit;
  141. case p.opcode of
  142. { These operands do not write into a register at all }
  143. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  144. exit;
  145. {Take care of post/preincremented store and loads, they will change their base register}
  146. A_STR, A_LDR:
  147. begin
  148. regLoadedWithNewValue :=
  149. (taicpu(p).oper[1]^.typ=top_ref) and
  150. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  151. (taicpu(p).oper[1]^.ref^.base = reg);
  152. {STR does not load into it's first register}
  153. if p.opcode = A_STR then exit;
  154. end;
  155. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  156. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  157. regLoadedWithNewValue :=
  158. (p.oper[1]^.typ = top_reg) and
  159. (p.oper[1]^.reg = reg);
  160. {Loads to oper2 from coprocessor}
  161. {
  162. MCR/MRC is currently not supported in FPC
  163. A_MRC:
  164. regLoadedWithNewValue :=
  165. (p.oper[2]^.typ = top_reg) and
  166. (p.oper[2]^.reg = reg);
  167. }
  168. {Loads to all register in the registerset}
  169. A_LDM:
  170. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  171. end;
  172. if regLoadedWithNewValue then
  173. exit;
  174. case p.oper[0]^.typ of
  175. {This is the case}
  176. top_reg:
  177. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  178. { LDRD }
  179. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  180. {LDM/STM might write a new value to their index register}
  181. top_ref:
  182. regLoadedWithNewValue :=
  183. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  184. (taicpu(p).oper[0]^.ref^.base = reg);
  185. end;
  186. end;
  187. function AlignedToQWord(const ref : treference) : boolean;
  188. begin
  189. { (safe) heuristics to ensure alignment }
  190. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  191. (((ref.offset>=0) and
  192. ((ref.offset mod 8)=0) and
  193. ((ref.base=NR_R13) or
  194. (ref.index=NR_R13))
  195. ) or
  196. ((ref.offset<=0) and
  197. { when using NR_R11, it has always a value of <qword align>+4 }
  198. ((abs(ref.offset+4) mod 8)=0) and
  199. (current_procinfo.framepointer=NR_R11) and
  200. ((ref.base=NR_R11) or
  201. (ref.index=NR_R11))
  202. )
  203. );
  204. end;
  205. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  206. var
  207. p: taicpu;
  208. i: longint;
  209. begin
  210. instructionLoadsFromReg := false;
  211. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  212. exit;
  213. p:=taicpu(hp);
  214. i:=1;
  215. {For these instructions we have to start on oper[0]}
  216. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  217. A_CMP, A_CMN, A_TST, A_TEQ,
  218. A_B, A_BL, A_BX, A_BLX,
  219. A_SMLAL, A_UMLAL]) then i:=0;
  220. while(i<p.ops) do
  221. begin
  222. case p.oper[I]^.typ of
  223. top_reg:
  224. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  225. { STRD }
  226. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  227. top_regset:
  228. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  229. top_shifterop:
  230. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  231. top_ref:
  232. instructionLoadsFromReg :=
  233. (p.oper[I]^.ref^.base = reg) or
  234. (p.oper[I]^.ref^.index = reg);
  235. end;
  236. if instructionLoadsFromReg then exit; {Bailout if we found something}
  237. Inc(I);
  238. end;
  239. end;
  240. function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
  241. begin
  242. if current_settings.cputype in cpu_thumb2 then
  243. result := (aoffset<4096) and (aoffset>-256)
  244. else
  245. result := ((pf in [PF_None,PF_B]) and
  246. (abs(aoffset)<4096)) or
  247. (abs(aoffset)<256);
  248. end;
  249. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  250. var AllUsedRegs: TAllUsedRegs): Boolean;
  251. begin
  252. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  253. RegUsedAfterInstruction :=
  254. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  255. not(regLoadedWithNewValue(reg,p)) and
  256. (
  257. not(GetNextInstruction(p,p)) or
  258. instructionLoadsFromReg(reg,p) or
  259. not(regLoadedWithNewValue(reg,p))
  260. );
  261. end;
  262. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  263. var Next: tai; reg: TRegister): Boolean;
  264. begin
  265. Next:=Current;
  266. repeat
  267. Result:=GetNextInstruction(Next,Next);
  268. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  269. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  270. end;
  271. {$ifdef DEBUG_AOPTCPU}
  272. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  273. begin
  274. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  275. end;
  276. {$else DEBUG_AOPTCPU}
  277. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  278. begin
  279. end;
  280. {$endif DEBUG_AOPTCPU}
  281. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  282. var
  283. alloc,
  284. dealloc : tai_regalloc;
  285. hp1 : tai;
  286. begin
  287. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  288. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  289. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  290. { don't mess with moves to pc }
  291. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  292. { don't mess with moves to lr }
  293. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  294. { the destination register of the mov might not be used beween p and movp }
  295. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  296. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  297. (taicpu(p).opcode<>A_CBZ) and
  298. (taicpu(p).opcode<>A_CBNZ) and
  299. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  300. not (
  301. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  302. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  303. (current_settings.cputype < cpu_armv6)
  304. ) and
  305. { Take care to only do this for instructions which REALLY load to the first register.
  306. Otherwise
  307. str reg0, [reg1]
  308. mov reg2, reg0
  309. will be optimized to
  310. str reg2, [reg1]
  311. }
  312. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  313. begin
  314. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  315. if assigned(dealloc) then
  316. begin
  317. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  318. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  319. and remove it if possible }
  320. GetLastInstruction(p,hp1);
  321. asml.Remove(dealloc);
  322. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  323. if assigned(alloc) then
  324. begin
  325. asml.Remove(alloc);
  326. alloc.free;
  327. dealloc.free;
  328. end
  329. else
  330. asml.InsertAfter(dealloc,p);
  331. { try to move the allocation of the target register }
  332. GetLastInstruction(movp,hp1);
  333. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  334. if assigned(alloc) then
  335. begin
  336. asml.Remove(alloc);
  337. asml.InsertBefore(alloc,p);
  338. { adjust used regs }
  339. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  340. end;
  341. { finally get rid of the mov }
  342. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  343. asml.remove(movp);
  344. movp.free;
  345. end;
  346. end;
  347. end;
  348. {
  349. optimize
  350. ldr/str regX,[reg1]
  351. ...
  352. add/sub reg1,reg1,regY/const
  353. into
  354. ldr/str regX,[reg1], regY/const
  355. }
  356. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  357. var
  358. hp1 : tai;
  359. begin
  360. Result:=false;
  361. if (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  362. (p.oper[1]^.ref^.index=NR_NO) and
  363. (p.oper[1]^.ref^.offset=0) and
  364. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  365. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  366. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  367. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  368. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  369. (
  370. (taicpu(hp1).oper[2]^.typ=top_reg) or
  371. { valid offset? }
  372. ((taicpu(hp1).oper[2]^.typ=top_const) and
  373. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  374. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  375. )
  376. )
  377. ) and
  378. { don't apply the optimization if the base register is loaded }
  379. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  380. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  381. { don't apply the optimization if the (new) index register is loaded }
  382. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  383. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
  384. begin
  385. DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  386. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  387. if taicpu(hp1).oper[2]^.typ=top_const then
  388. begin
  389. if taicpu(hp1).opcode=A_ADD then
  390. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  391. else
  392. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  393. end
  394. else
  395. begin
  396. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  397. if taicpu(hp1).opcode=A_ADD then
  398. p.oper[1]^.ref^.signindex:=1
  399. else
  400. p.oper[1]^.ref^.signindex:=-1;
  401. end;
  402. asml.Remove(hp1);
  403. hp1.Free;
  404. Result:=true;
  405. end;
  406. end;
  407. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  408. var
  409. hp1,hp2: tai;
  410. i, i2: longint;
  411. TmpUsedRegs: TAllUsedRegs;
  412. tempop: tasmop;
  413. function IsPowerOf2(const value: DWord): boolean; inline;
  414. begin
  415. Result:=(value and (value - 1)) = 0;
  416. end;
  417. begin
  418. result := false;
  419. case p.typ of
  420. ait_instruction:
  421. begin
  422. {
  423. change
  424. <op> reg,x,y
  425. cmp reg,#0
  426. into
  427. <op>s reg,x,y
  428. }
  429. { this optimization can applied only to the currently enabled operations because
  430. the other operations do not update all flags and FPC does not track flag usage }
  431. if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
  432. A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  433. GetNextInstruction(p, hp1) and
  434. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  435. (taicpu(hp1).oper[1]^.typ = top_const) and
  436. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  437. (taicpu(hp1).oper[1]^.val = 0) and
  438. GetNextInstruction(hp1, hp2) and
  439. { be careful here, following instructions could use other flags
  440. however after a jump fpc never depends on the value of flags }
  441. { All above instructions set Z and N according to the following
  442. Z := result = 0;
  443. N := result[31];
  444. EQ = Z=1; NE = Z=0;
  445. MI = N=1; PL = N=0; }
  446. MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) and
  447. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  448. begin
  449. DebugMsg('Peephole OpCmp2OpS done', p);
  450. taicpu(p).oppostfix:=PF_S;
  451. { move flag allocation if possible }
  452. GetLastInstruction(hp1, hp2);
  453. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  454. if assigned(hp2) then
  455. begin
  456. asml.Remove(hp2);
  457. asml.insertbefore(hp2, p);
  458. end;
  459. asml.remove(hp1);
  460. hp1.free;
  461. end
  462. else
  463. case taicpu(p).opcode of
  464. A_STR:
  465. begin
  466. { change
  467. str reg1,ref
  468. ldr reg2,ref
  469. into
  470. str reg1,ref
  471. mov reg2,reg1
  472. }
  473. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  474. (taicpu(p).oppostfix=PF_None) and
  475. GetNextInstruction(p,hp1) and
  476. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  477. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  478. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  479. begin
  480. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  481. begin
  482. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  483. asml.remove(hp1);
  484. hp1.free;
  485. end
  486. else
  487. begin
  488. taicpu(hp1).opcode:=A_MOV;
  489. taicpu(hp1).oppostfix:=PF_None;
  490. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  491. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  492. end;
  493. result := true;
  494. end
  495. { change
  496. str reg1,ref
  497. str reg2,ref
  498. into
  499. strd reg1,ref
  500. }
  501. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  502. (taicpu(p).oppostfix=PF_None) and
  503. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  504. GetNextInstruction(p,hp1) and
  505. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  506. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  507. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  508. { str ensures that either base or index contain no register, else ldr wouldn't
  509. use an offset either
  510. }
  511. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  512. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  513. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  514. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  515. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  516. begin
  517. DebugMsg('Peephole StrStr2Strd done', p);
  518. taicpu(p).oppostfix:=PF_D;
  519. asml.remove(hp1);
  520. hp1.free;
  521. end;
  522. LookForPostindexedPattern(taicpu(p));
  523. end;
  524. A_LDR:
  525. begin
  526. { change
  527. ldr reg1,ref
  528. ldr reg2,ref
  529. into ...
  530. }
  531. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  532. GetNextInstruction(p,hp1) and
  533. { ldrd is not allowed here }
  534. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  535. begin
  536. {
  537. ...
  538. ldr reg1,ref
  539. mov reg2,reg1
  540. }
  541. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  542. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  543. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  544. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  545. begin
  546. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  547. begin
  548. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  549. asml.remove(hp1);
  550. hp1.free;
  551. end
  552. else
  553. begin
  554. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  555. taicpu(hp1).opcode:=A_MOV;
  556. taicpu(hp1).oppostfix:=PF_None;
  557. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  558. end;
  559. result := true;
  560. end
  561. {
  562. ...
  563. ldrd reg1,ref
  564. }
  565. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  566. { ldrd does not allow any postfixes ... }
  567. (taicpu(p).oppostfix=PF_None) and
  568. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  569. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  570. { ldr ensures that either base or index contain no register, else ldr wouldn't
  571. use an offset either
  572. }
  573. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  574. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  575. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  576. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  577. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  578. begin
  579. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  580. taicpu(p).oppostfix:=PF_D;
  581. asml.remove(hp1);
  582. hp1.free;
  583. end;
  584. end;
  585. LookForPostindexedPattern(taicpu(p));
  586. { Remove superfluous mov after ldr
  587. changes
  588. ldr reg1, ref
  589. mov reg2, reg1
  590. to
  591. ldr reg2, ref
  592. conditions are:
  593. * no ldrd usage
  594. * reg1 must be released after mov
  595. * mov can not contain shifterops
  596. * ldr+mov have the same conditions
  597. * mov does not set flags
  598. }
  599. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  600. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  601. end;
  602. A_MOV:
  603. begin
  604. { fold
  605. mov reg1,reg0, shift imm1
  606. mov reg1,reg1, shift imm2
  607. }
  608. if (taicpu(p).ops=3) and
  609. (taicpu(p).oper[2]^.typ = top_shifterop) and
  610. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  611. getnextinstruction(p,hp1) and
  612. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  613. (taicpu(hp1).ops=3) and
  614. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  615. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  616. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  617. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  618. begin
  619. { fold
  620. mov reg1,reg0, lsl 16
  621. mov reg1,reg1, lsr 16
  622. strh reg1, ...
  623. dealloc reg1
  624. to
  625. strh reg1, ...
  626. dealloc reg1
  627. }
  628. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  629. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  630. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  631. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  632. getnextinstruction(hp1,hp2) and
  633. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  634. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  635. begin
  636. CopyUsedRegs(TmpUsedRegs);
  637. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  638. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  639. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  640. begin
  641. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  642. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  643. asml.remove(p);
  644. asml.remove(hp1);
  645. p.free;
  646. hp1.free;
  647. p:=hp2;
  648. end;
  649. ReleaseUsedRegs(TmpUsedRegs);
  650. end
  651. { fold
  652. mov reg1,reg0, shift imm1
  653. mov reg1,reg1, shift imm2
  654. to
  655. mov reg1,reg0, shift imm1+imm2
  656. }
  657. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  658. { asr makes no use after a lsr, the asr can be foled into the lsr }
  659. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  660. begin
  661. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  662. { avoid overflows }
  663. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  664. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  665. SM_ROR:
  666. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  667. SM_ASR:
  668. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  669. SM_LSR,
  670. SM_LSL:
  671. begin
  672. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  673. InsertLLItem(p.previous, p.next, hp1);
  674. p.free;
  675. p:=hp1;
  676. end;
  677. else
  678. internalerror(2008072803);
  679. end;
  680. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  681. asml.remove(hp1);
  682. hp1.free;
  683. result := true;
  684. end
  685. { fold
  686. mov reg1,reg0, shift imm1
  687. mov reg1,reg1, shift imm2
  688. mov reg1,reg1, shift imm3 ...
  689. }
  690. else if getnextinstruction(hp1,hp2) and
  691. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  692. (taicpu(hp2).ops=3) and
  693. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  694. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  695. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  696. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  697. begin
  698. { mov reg1,reg0, lsl imm1
  699. mov reg1,reg1, lsr/asr imm2
  700. mov reg1,reg1, lsl imm3 ...
  701. if imm3<=imm1 and imm2>=imm3
  702. to
  703. mov reg1,reg0, lsl imm1
  704. mov reg1,reg1, lsr/asr imm2-imm3
  705. }
  706. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  707. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  708. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  709. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  710. begin
  711. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  712. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1 done', p);
  713. asml.remove(hp2);
  714. hp2.free;
  715. result := true;
  716. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  717. begin
  718. asml.remove(hp1);
  719. hp1.free;
  720. end;
  721. end
  722. { mov reg1,reg0, lsr/asr imm1
  723. mov reg1,reg1, lsl imm2
  724. mov reg1,reg1, lsr/asr imm3 ...
  725. if imm3>=imm1 and imm2>=imm1
  726. to
  727. mov reg1,reg0, lsl imm2-imm1
  728. mov reg1,reg1, lsr/asr imm3 ...
  729. }
  730. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  731. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  732. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  733. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  734. begin
  735. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  736. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  737. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  738. asml.remove(p);
  739. p.free;
  740. p:=hp2;
  741. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  742. begin
  743. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  744. asml.remove(hp1);
  745. hp1.free;
  746. p:=hp2;
  747. end;
  748. result := true;
  749. end;
  750. end;
  751. end;
  752. { Change the common
  753. mov r0, r0, lsr #24
  754. and r0, r0, #255
  755. and remove the superfluous and
  756. This could be extended to handle more cases.
  757. }
  758. if (taicpu(p).ops=3) and
  759. (taicpu(p).oper[2]^.typ = top_shifterop) and
  760. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  761. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  762. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  763. GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
  764. (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
  765. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, hp1)) and
  766. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  767. (taicpu(hp1).ops=3) and
  768. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  769. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  770. (taicpu(hp1).oper[2]^.typ = top_const) and
  771. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  772. For LSR #25 and an AndConst of 255 that whould go like this:
  773. 255 and ((2 shl (32-25))-1)
  774. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  775. LSR #25 and AndConst of 254:
  776. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  777. }
  778. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  779. begin
  780. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  781. asml.remove(hp1);
  782. hp1.free;
  783. end;
  784. {
  785. optimize
  786. mov rX, yyyy
  787. ....
  788. }
  789. if (taicpu(p).ops = 2) and
  790. GetNextInstruction(p,hp1) and
  791. (tai(hp1).typ = ait_instruction) then
  792. begin
  793. {
  794. This changes the very common
  795. mov r0, #0
  796. str r0, [...]
  797. mov r0, #0
  798. str r0, [...]
  799. and removes all superfluous mov instructions
  800. }
  801. if (taicpu(p).oper[1]^.typ = top_const) and
  802. (taicpu(hp1).opcode=A_STR) then
  803. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  804. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  805. GetNextInstruction(hp1, hp2) and
  806. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  807. (taicpu(hp2).ops = 2) and
  808. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  809. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  810. begin
  811. DebugMsg('Peephole MovStrMov done', hp2);
  812. GetNextInstruction(hp2,hp1);
  813. asml.remove(hp2);
  814. hp2.free;
  815. if not assigned(hp1) then break;
  816. end
  817. {
  818. This removes the first mov from
  819. mov rX,...
  820. mov rX,...
  821. }
  822. else if taicpu(hp1).opcode=A_MOV then
  823. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  824. (taicpu(hp1).ops = 2) and
  825. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  826. { don't remove the first mov if the second is a mov rX,rX }
  827. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  828. begin
  829. DebugMsg('Peephole MovMov done', p);
  830. asml.remove(p);
  831. p.free;
  832. p:=hp1;
  833. GetNextInstruction(hp1,hp1);
  834. if not assigned(hp1) then
  835. break;
  836. end;
  837. end;
  838. {
  839. change
  840. mov r1, r0
  841. add r1, r1, #1
  842. to
  843. add r1, r0, #1
  844. Todo: Make it work for mov+cmp too
  845. CAUTION! If this one is successful p might not be a mov instruction anymore!
  846. }
  847. if (taicpu(p).ops = 2) and
  848. (taicpu(p).oper[1]^.typ = top_reg) and
  849. (taicpu(p).oppostfix = PF_NONE) and
  850. GetNextInstruction(p, hp1) and
  851. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  852. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  853. [taicpu(p).condition], []) and
  854. {MOV and MVN might only have 2 ops}
  855. (taicpu(hp1).ops >= 2) and
  856. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  857. (taicpu(hp1).oper[1]^.typ = top_reg) and
  858. (
  859. (taicpu(hp1).ops = 2) or
  860. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
  861. ) then
  862. begin
  863. { When we get here we still don't know if the registers match}
  864. for I:=1 to 2 do
  865. {
  866. If the first loop was successful p will be replaced with hp1.
  867. The checks will still be ok, because all required information
  868. will also be in hp1 then.
  869. }
  870. if (taicpu(hp1).ops > I) and
  871. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  872. begin
  873. DebugMsg('Peephole RedundantMovProcess done', hp1);
  874. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  875. if p<>hp1 then
  876. begin
  877. asml.remove(p);
  878. p.free;
  879. p:=hp1;
  880. end;
  881. end;
  882. end;
  883. { This folds shifterops into following instructions
  884. mov r0, r1, lsl #8
  885. add r2, r3, r0
  886. to
  887. add r2, r3, r1, lsl #8
  888. CAUTION! If this one is successful p might not be a mov instruction anymore!
  889. }
  890. if (taicpu(p).opcode = A_MOV) and
  891. (taicpu(p).ops = 3) and
  892. (taicpu(p).oper[1]^.typ = top_reg) and
  893. (taicpu(p).oper[2]^.typ = top_shifterop) and
  894. (taicpu(p).oppostfix = PF_NONE) and
  895. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  896. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  897. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  898. A_CMP, A_CMN],
  899. [taicpu(p).condition], [PF_None]) and
  900. (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
  901. regLoadedWithNewValue(taicpu(p).oper[0]^.reg, hp1)) and
  902. (taicpu(hp1).ops >= 2) and
  903. {Currently we can't fold into another shifterop}
  904. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  905. {Folding rrx is problematic because of the C-Flag, as we currently can't check
  906. NR_DEFAULTFLAGS for modification}
  907. (
  908. {Everything is fine if we don't use RRX}
  909. (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or
  910. (
  911. {If it is RRX, then check if we're just accessing the next instruction}
  912. GetNextInstruction(p, hp2) and
  913. (hp1 = hp2)
  914. )
  915. ) and
  916. { reg1 might not be modified inbetween }
  917. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  918. { The shifterop can contain a register, might not be modified}
  919. (
  920. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or
  921. not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hp1))
  922. ) and
  923. (
  924. {Only ONE of the two src operands is allowed to match}
  925. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  926. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  927. ) then
  928. begin
  929. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  930. I2:=0
  931. else
  932. I2:=1;
  933. for I:=I2 to taicpu(hp1).ops-1 do
  934. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  935. begin
  936. { If the parameter matched on the second op from the RIGHT
  937. we have to switch the parameters, this will not happen for CMP
  938. were we're only evaluating the most right parameter
  939. }
  940. if I <> taicpu(hp1).ops-1 then
  941. begin
  942. {The SUB operators need to be changed when we swap parameters}
  943. case taicpu(hp1).opcode of
  944. A_SUB: tempop:=A_RSB;
  945. A_SBC: tempop:=A_RSC;
  946. A_RSB: tempop:=A_SUB;
  947. A_RSC: tempop:=A_SBC;
  948. else tempop:=taicpu(hp1).opcode;
  949. end;
  950. if taicpu(hp1).ops = 3 then
  951. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  952. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  953. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  954. else
  955. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  956. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  957. taicpu(p).oper[2]^.shifterop^);
  958. end
  959. else
  960. if taicpu(hp1).ops = 3 then
  961. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  962. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  963. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  964. else
  965. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  966. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  967. taicpu(p).oper[2]^.shifterop^);
  968. asml.insertbefore(hp2, hp1);
  969. asml.remove(p);
  970. asml.remove(hp1);
  971. p.free;
  972. hp1.free;
  973. p:=hp2;
  974. GetNextInstruction(p,hp1);
  975. DebugMsg('Peephole FoldShiftProcess done', p);
  976. break;
  977. end;
  978. end;
  979. {
  980. Often we see shifts and then a superfluous mov to another register
  981. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  982. }
  983. if (taicpu(p).opcode = A_MOV) and
  984. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  985. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  986. end;
  987. A_ADD,
  988. A_ADC,
  989. A_RSB,
  990. A_RSC,
  991. A_SUB,
  992. A_SBC,
  993. A_AND,
  994. A_BIC,
  995. A_EOR,
  996. A_ORR,
  997. A_MLA,
  998. A_MUL:
  999. begin
  1000. {
  1001. optimize
  1002. and reg2,reg1,const1
  1003. ...
  1004. }
  1005. if (taicpu(p).opcode = A_AND) and
  1006. (taicpu(p).ops>2) and
  1007. (taicpu(p).oper[1]^.typ = top_reg) and
  1008. (taicpu(p).oper[2]^.typ = top_const) then
  1009. begin
  1010. {
  1011. change
  1012. and reg2,reg1,const1
  1013. and reg3,reg2,const2
  1014. to
  1015. and reg3,reg1,(const1 and const2)
  1016. }
  1017. if GetNextInstruction(p, hp1) and
  1018. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  1019. { either reg3 and reg2 are equal or reg2 is deallocated after the and }
  1020. (MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) or
  1021. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)))) and
  1022. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1023. (taicpu(hp1).oper[2]^.typ = top_const) then
  1024. begin
  1025. DebugMsg('Peephole AndAnd2And done', p);
  1026. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  1027. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  1028. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  1029. asml.remove(hp1);
  1030. hp1.free;
  1031. end
  1032. {
  1033. change
  1034. and reg2,reg1,255
  1035. strb reg2,[...]
  1036. dealloc reg2
  1037. to
  1038. strb reg1,[...]
  1039. }
  1040. else if (taicpu(p).oper[2]^.val = 255) and
  1041. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1042. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1043. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1044. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1045. { the reference in strb might not use reg2 }
  1046. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1047. { reg1 might not be modified inbetween }
  1048. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1049. begin
  1050. DebugMsg('Peephole AndStrb2Strb done', p);
  1051. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1052. asml.remove(p);
  1053. p.free;
  1054. p:=hp1;
  1055. end;
  1056. end;
  1057. {
  1058. change
  1059. add/sub reg2,reg1,const1
  1060. str/ldr reg3,[reg2,const2]
  1061. dealloc reg2
  1062. to
  1063. str/ldr reg3,[reg1,const2+/-const1]
  1064. }
  1065. if (taicpu(p).opcode in [A_ADD,A_SUB]) and
  1066. (taicpu(p).ops>2) and
  1067. (taicpu(p).oper[1]^.typ = top_reg) and
  1068. (taicpu(p).oper[2]^.typ = top_const) then
  1069. begin
  1070. hp1:=p;
  1071. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  1072. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  1073. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  1074. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  1075. { don't optimize if the register is stored/overwritten }
  1076. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  1077. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1078. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1079. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  1080. ldr postfix }
  1081. (((taicpu(p).opcode=A_ADD) and
  1082. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1083. ) or
  1084. ((taicpu(p).opcode=A_SUB) and
  1085. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1086. )
  1087. ) do
  1088. begin
  1089. { neither reg1 nor reg2 might be changed inbetween }
  1090. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  1091. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  1092. break;
  1093. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  1094. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  1095. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1096. begin
  1097. { remember last instruction }
  1098. hp2:=hp1;
  1099. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  1100. hp1:=p;
  1101. { fix all ldr/str }
  1102. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  1103. begin
  1104. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  1105. if taicpu(p).opcode=A_ADD then
  1106. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  1107. else
  1108. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  1109. if hp1=hp2 then
  1110. break;
  1111. end;
  1112. GetNextInstruction(p,hp1);
  1113. asml.remove(p);
  1114. p.free;
  1115. p:=hp1;
  1116. break;
  1117. end;
  1118. end;
  1119. end;
  1120. {
  1121. change
  1122. add reg1, ...
  1123. mov reg2, reg1
  1124. to
  1125. add reg2, ...
  1126. }
  1127. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  1128. begin
  1129. if (taicpu(p).ops=3) then
  1130. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  1131. end;
  1132. end;
  1133. {$ifdef dummy}
  1134. A_MVN:
  1135. begin
  1136. {
  1137. change
  1138. mvn reg2,reg1
  1139. and reg3,reg4,reg2
  1140. dealloc reg2
  1141. to
  1142. bic reg3,reg4,reg1
  1143. }
  1144. if (taicpu(p).oper[1]^.typ = top_reg) and
  1145. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1146. MatchInstruction(hp1,A_AND,[],[]) and
  1147. (((taicpu(hp1).ops=3) and
  1148. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1149. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1150. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  1151. ((taicpu(hp1).ops=2) and
  1152. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1153. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1154. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1155. { reg1 might not be modified inbetween }
  1156. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1157. begin
  1158. DebugMsg('Peephole MvnAnd2Bic done', p);
  1159. taicpu(hp1).opcode:=A_BIC;
  1160. if taicpu(hp1).ops=3 then
  1161. begin
  1162. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1163. taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
  1164. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1165. end
  1166. else
  1167. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1168. asml.remove(p);
  1169. p.free;
  1170. p:=hp1;
  1171. end;
  1172. end;
  1173. {$endif dummy}
  1174. A_UXTB:
  1175. begin
  1176. {
  1177. change
  1178. uxtb reg2,reg1
  1179. strb reg2,[...]
  1180. dealloc reg2
  1181. to
  1182. strb reg1,[...]
  1183. }
  1184. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1185. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1186. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1187. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1188. { the reference in strb might not use reg2 }
  1189. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1190. { reg1 might not be modified inbetween }
  1191. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1192. begin
  1193. DebugMsg('Peephole UxtbStrb2Strb done', p);
  1194. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1195. asml.remove(p);
  1196. p.free;
  1197. p:=hp1;
  1198. end
  1199. {
  1200. change
  1201. uxtb reg2,reg1
  1202. uxth reg3,reg2
  1203. dealloc reg2
  1204. to
  1205. uxtb reg3,reg1
  1206. }
  1207. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1208. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1209. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1210. (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
  1211. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg)) and
  1212. { reg1 might not be modified inbetween }
  1213. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1214. begin
  1215. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  1216. taicpu(hp1).opcode:=A_UXTB;
  1217. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1218. asml.remove(p);
  1219. p.free;
  1220. p:=hp1;
  1221. end
  1222. {
  1223. change
  1224. uxtb reg2,reg1
  1225. uxtb reg3,reg2
  1226. dealloc reg2
  1227. to
  1228. uxtb reg3,reg1
  1229. }
  1230. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1231. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1232. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  1233. (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
  1234. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg)) and
  1235. { reg1 might not be modified inbetween }
  1236. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1237. begin
  1238. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  1239. taicpu(hp1).opcode:=A_UXTB;
  1240. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1241. asml.remove(p);
  1242. p.free;
  1243. p:=hp1;
  1244. end
  1245. {
  1246. change
  1247. uxth reg2,reg1
  1248. uxth reg3,reg2
  1249. dealloc reg2
  1250. to
  1251. uxth reg3,reg1
  1252. }
  1253. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  1254. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1255. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1256. (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
  1257. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg)) and
  1258. { reg1 might not be modified inbetween }
  1259. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1260. begin
  1261. DebugMsg('Peephole UxthUxth2Uxth done', p);
  1262. taicpu(hp1).opcode:=A_UXTH;
  1263. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1264. asml.remove(p);
  1265. p.free;
  1266. p:=hp1;
  1267. end;
  1268. end;
  1269. A_UXTH:
  1270. begin
  1271. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1272. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1273. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  1274. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1275. { the reference in strb might not use reg2 }
  1276. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1277. { reg1 might not be modified inbetween }
  1278. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1279. begin
  1280. DebugMsg('Peephole UXTHStrh2Strh done', p);
  1281. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1282. asml.remove(p);
  1283. p.free;
  1284. p:=hp1;
  1285. end;
  1286. end;
  1287. A_CMP:
  1288. begin
  1289. {
  1290. change
  1291. cmp reg,const1
  1292. moveq reg,const1
  1293. movne reg,const2
  1294. to
  1295. cmp reg,const1
  1296. movne reg,const2
  1297. }
  1298. if (taicpu(p).oper[1]^.typ = top_const) and
  1299. GetNextInstruction(p, hp1) and
  1300. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1301. (taicpu(hp1).oper[1]^.typ = top_const) and
  1302. GetNextInstruction(hp1, hp2) and
  1303. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1304. (taicpu(hp1).oper[1]^.typ = top_const) then
  1305. begin
  1306. RemoveRedundantMove(p, hp1, asml);
  1307. RemoveRedundantMove(p, hp2, asml);
  1308. end;
  1309. end;
  1310. end;
  1311. end;
  1312. end;
  1313. end;
  1314. { instructions modifying the CPSR can be only the last instruction }
  1315. function MustBeLast(p : tai) : boolean;
  1316. begin
  1317. Result:=(p.typ=ait_instruction) and
  1318. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  1319. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  1320. (taicpu(p).oppostfix=PF_S));
  1321. end;
  1322. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  1323. var
  1324. p,hp1,hp2: tai;
  1325. l : longint;
  1326. condition : tasmcond;
  1327. hp3: tai;
  1328. WasLast: boolean;
  1329. { UsedRegs, TmpUsedRegs: TRegSet; }
  1330. begin
  1331. p := BlockStart;
  1332. { UsedRegs := []; }
  1333. while (p <> BlockEnd) Do
  1334. begin
  1335. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1336. case p.Typ Of
  1337. Ait_Instruction:
  1338. begin
  1339. case taicpu(p).opcode Of
  1340. A_B:
  1341. if taicpu(p).condition<>C_None then
  1342. begin
  1343. { check for
  1344. Bxx xxx
  1345. <several instructions>
  1346. xxx:
  1347. }
  1348. l:=0;
  1349. WasLast:=False;
  1350. GetNextInstruction(p, hp1);
  1351. while assigned(hp1) and
  1352. (l<=4) and
  1353. CanBeCond(hp1) and
  1354. { stop on labels }
  1355. not(hp1.typ=ait_label) do
  1356. begin
  1357. inc(l);
  1358. if MustBeLast(hp1) then
  1359. begin
  1360. WasLast:=True;
  1361. GetNextInstruction(hp1,hp1);
  1362. break;
  1363. end
  1364. else
  1365. GetNextInstruction(hp1,hp1);
  1366. end;
  1367. if assigned(hp1) then
  1368. begin
  1369. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1370. begin
  1371. if (l<=4) and (l>0) then
  1372. begin
  1373. condition:=inverse_cond(taicpu(p).condition);
  1374. hp2:=p;
  1375. GetNextInstruction(p,hp1);
  1376. p:=hp1;
  1377. repeat
  1378. if hp1.typ=ait_instruction then
  1379. taicpu(hp1).condition:=condition;
  1380. if MustBeLast(hp1) then
  1381. begin
  1382. GetNextInstruction(hp1,hp1);
  1383. break;
  1384. end
  1385. else
  1386. GetNextInstruction(hp1,hp1);
  1387. until not(assigned(hp1)) or
  1388. not(CanBeCond(hp1)) or
  1389. (hp1.typ=ait_label);
  1390. { wait with removing else GetNextInstruction could
  1391. ignore the label if it was the only usage in the
  1392. jump moved away }
  1393. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1394. asml.remove(hp2);
  1395. hp2.free;
  1396. continue;
  1397. end;
  1398. end
  1399. else
  1400. { do not perform further optimizations if there is inctructon
  1401. in block #1 which can not be optimized.
  1402. }
  1403. if not WasLast then
  1404. begin
  1405. { check further for
  1406. Bcc xxx
  1407. <several instructions 1>
  1408. B yyy
  1409. xxx:
  1410. <several instructions 2>
  1411. yyy:
  1412. }
  1413. { hp2 points to jmp yyy }
  1414. hp2:=hp1;
  1415. { skip hp1 to xxx }
  1416. GetNextInstruction(hp1, hp1);
  1417. if assigned(hp2) and
  1418. assigned(hp1) and
  1419. (l<=3) and
  1420. (hp2.typ=ait_instruction) and
  1421. (taicpu(hp2).is_jmp) and
  1422. (taicpu(hp2).condition=C_None) and
  1423. { real label and jump, no further references to the
  1424. label are allowed }
  1425. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1426. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1427. begin
  1428. l:=0;
  1429. { skip hp1 to <several moves 2> }
  1430. GetNextInstruction(hp1, hp1);
  1431. while assigned(hp1) and
  1432. CanBeCond(hp1) do
  1433. begin
  1434. inc(l);
  1435. GetNextInstruction(hp1, hp1);
  1436. end;
  1437. { hp1 points to yyy: }
  1438. if assigned(hp1) and
  1439. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1440. begin
  1441. condition:=inverse_cond(taicpu(p).condition);
  1442. GetNextInstruction(p,hp1);
  1443. hp3:=p;
  1444. p:=hp1;
  1445. repeat
  1446. if hp1.typ=ait_instruction then
  1447. taicpu(hp1).condition:=condition;
  1448. GetNextInstruction(hp1,hp1);
  1449. until not(assigned(hp1)) or
  1450. not(CanBeCond(hp1));
  1451. { hp2 is still at jmp yyy }
  1452. GetNextInstruction(hp2,hp1);
  1453. { hp2 is now at xxx: }
  1454. condition:=inverse_cond(condition);
  1455. GetNextInstruction(hp1,hp1);
  1456. { hp1 is now at <several movs 2> }
  1457. repeat
  1458. taicpu(hp1).condition:=condition;
  1459. GetNextInstruction(hp1,hp1);
  1460. until not(assigned(hp1)) or
  1461. not(CanBeCond(hp1)) or
  1462. (hp1.typ=ait_label);
  1463. {
  1464. asml.remove(hp1.next)
  1465. hp1.next.free;
  1466. asml.remove(hp1);
  1467. hp1.free;
  1468. }
  1469. { remove Bcc }
  1470. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1471. asml.remove(hp3);
  1472. hp3.free;
  1473. { remove jmp }
  1474. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1475. asml.remove(hp2);
  1476. hp2.free;
  1477. continue;
  1478. end;
  1479. end;
  1480. end;
  1481. end;
  1482. end;
  1483. end;
  1484. end;
  1485. end;
  1486. p := tai(p.next)
  1487. end;
  1488. end;
  1489. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1490. begin
  1491. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1492. Result:=true
  1493. else
  1494. Result:=inherited RegInInstruction(Reg, p1);
  1495. end;
  1496. const
  1497. { set of opcode which might or do write to memory }
  1498. { TODO : extend armins.dat to contain r/w info }
  1499. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1500. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1501. { adjust the register live information when swapping the two instructions p and hp1,
  1502. they must follow one after the other }
  1503. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1504. procedure CheckLiveEnd(reg : tregister);
  1505. var
  1506. supreg : TSuperRegister;
  1507. regtype : TRegisterType;
  1508. begin
  1509. if reg=NR_NO then
  1510. exit;
  1511. regtype:=getregtype(reg);
  1512. supreg:=getsupreg(reg);
  1513. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1514. RegInInstruction(reg,p) then
  1515. cg.rg[regtype].live_end[supreg]:=p;
  1516. end;
  1517. procedure CheckLiveStart(reg : TRegister);
  1518. var
  1519. supreg : TSuperRegister;
  1520. regtype : TRegisterType;
  1521. begin
  1522. if reg=NR_NO then
  1523. exit;
  1524. regtype:=getregtype(reg);
  1525. supreg:=getsupreg(reg);
  1526. if (cg.rg[regtype].live_start[supreg]=p) and
  1527. RegInInstruction(reg,hp1) then
  1528. cg.rg[regtype].live_start[supreg]:=hp1;
  1529. end;
  1530. var
  1531. i : longint;
  1532. r : TSuperRegister;
  1533. begin
  1534. { assumption: p is directly followed by hp1 }
  1535. { if live of any reg used by p starts at p and hp1 uses this register then
  1536. set live start to hp1 }
  1537. for i:=0 to p.ops-1 do
  1538. case p.oper[i]^.typ of
  1539. Top_Reg:
  1540. CheckLiveStart(p.oper[i]^.reg);
  1541. Top_Ref:
  1542. begin
  1543. CheckLiveStart(p.oper[i]^.ref^.base);
  1544. CheckLiveStart(p.oper[i]^.ref^.index);
  1545. end;
  1546. Top_Shifterop:
  1547. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1548. Top_RegSet:
  1549. for r:=RS_R0 to RS_R15 do
  1550. if r in p.oper[i]^.regset^ then
  1551. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1552. end;
  1553. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1554. set live end to p }
  1555. for i:=0 to hp1.ops-1 do
  1556. case hp1.oper[i]^.typ of
  1557. Top_Reg:
  1558. CheckLiveEnd(hp1.oper[i]^.reg);
  1559. Top_Ref:
  1560. begin
  1561. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1562. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1563. end;
  1564. Top_Shifterop:
  1565. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1566. Top_RegSet:
  1567. for r:=RS_R0 to RS_R15 do
  1568. if r in hp1.oper[i]^.regset^ then
  1569. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1570. end;
  1571. end;
  1572. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1573. { TODO : schedule also forward }
  1574. { TODO : schedule distance > 1 }
  1575. var
  1576. hp1,hp2,hp3,hp4,hp5 : tai;
  1577. list : TAsmList;
  1578. begin
  1579. result:=true;
  1580. list:=TAsmList.Create;
  1581. p:=BlockStart;
  1582. while p<>BlockEnd Do
  1583. begin
  1584. if (p.typ=ait_instruction) and
  1585. GetNextInstruction(p,hp1) and
  1586. (hp1.typ=ait_instruction) and
  1587. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1588. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1589. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1590. not(RegModifiedByInstruction(NR_PC,p))
  1591. ) or
  1592. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1593. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1594. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1595. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1596. )
  1597. ) or
  1598. { try to prove that the memory accesses don't overlapp }
  1599. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1600. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1601. (taicpu(p).oppostfix=PF_None) and
  1602. (taicpu(hp1).oppostfix=PF_None) and
  1603. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1604. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1605. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1606. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1607. )
  1608. )
  1609. ) and
  1610. GetNextInstruction(hp1,hp2) and
  1611. (hp2.typ=ait_instruction) and
  1612. { loaded register used by next instruction? }
  1613. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1614. { loaded register not used by previous instruction? }
  1615. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1616. { same condition? }
  1617. (taicpu(p).condition=taicpu(hp1).condition) and
  1618. { first instruction might not change the register used as base }
  1619. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1620. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1621. ) and
  1622. { first instruction might not change the register used as index }
  1623. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1624. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1625. ) then
  1626. begin
  1627. hp3:=tai(p.Previous);
  1628. hp5:=tai(p.next);
  1629. asml.Remove(p);
  1630. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1631. { before the instruction? }
  1632. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1633. begin
  1634. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1635. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1636. begin
  1637. hp4:=hp3;
  1638. hp3:=tai(hp3.Previous);
  1639. asml.Remove(hp4);
  1640. list.Concat(hp4);
  1641. end
  1642. else
  1643. hp3:=tai(hp3.Previous);
  1644. end;
  1645. list.Concat(p);
  1646. SwapRegLive(taicpu(p),taicpu(hp1));
  1647. { after the instruction? }
  1648. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1649. begin
  1650. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1651. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1652. begin
  1653. hp4:=hp5;
  1654. hp5:=tai(hp5.next);
  1655. asml.Remove(hp4);
  1656. list.Concat(hp4);
  1657. end
  1658. else
  1659. hp5:=tai(hp5.Next);
  1660. end;
  1661. asml.Remove(hp1);
  1662. {$ifdef DEBUG_PREREGSCHEDULER}
  1663. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1664. {$endif DEBUG_PREREGSCHEDULER}
  1665. asml.InsertBefore(hp1,hp2);
  1666. asml.InsertListBefore(hp2,list);
  1667. p:=tai(p.next)
  1668. end
  1669. else if p.typ=ait_instruction then
  1670. p:=hp1
  1671. else
  1672. p:=tai(p.next);
  1673. end;
  1674. list.Free;
  1675. end;
  1676. procedure DecrementPreceedingIT(list: TAsmList; p: tai);
  1677. var
  1678. hp : tai;
  1679. l : longint;
  1680. begin
  1681. hp := tai(p.Previous);
  1682. l := 1;
  1683. while assigned(hp) and
  1684. (l <= 4) do
  1685. begin
  1686. if hp.typ=ait_instruction then
  1687. begin
  1688. if (taicpu(hp).opcode>=A_IT) and
  1689. (taicpu(hp).opcode <= A_ITTTT) then
  1690. begin
  1691. if (taicpu(hp).opcode = A_IT) and
  1692. (l=1) then
  1693. list.Remove(hp)
  1694. else
  1695. case taicpu(hp).opcode of
  1696. A_ITE:
  1697. if l=2 then taicpu(hp).opcode := A_IT;
  1698. A_ITT:
  1699. if l=2 then taicpu(hp).opcode := A_IT;
  1700. A_ITEE:
  1701. if l=3 then taicpu(hp).opcode := A_ITE;
  1702. A_ITTE:
  1703. if l=3 then taicpu(hp).opcode := A_ITT;
  1704. A_ITET:
  1705. if l=3 then taicpu(hp).opcode := A_ITE;
  1706. A_ITTT:
  1707. if l=3 then taicpu(hp).opcode := A_ITT;
  1708. A_ITEEE:
  1709. if l=4 then taicpu(hp).opcode := A_ITEE;
  1710. A_ITTEE:
  1711. if l=4 then taicpu(hp).opcode := A_ITTE;
  1712. A_ITETE:
  1713. if l=4 then taicpu(hp).opcode := A_ITET;
  1714. A_ITTTE:
  1715. if l=4 then taicpu(hp).opcode := A_ITTT;
  1716. A_ITEET:
  1717. if l=4 then taicpu(hp).opcode := A_ITEE;
  1718. A_ITTET:
  1719. if l=4 then taicpu(hp).opcode := A_ITTE;
  1720. A_ITETT:
  1721. if l=4 then taicpu(hp).opcode := A_ITET;
  1722. A_ITTTT:
  1723. if l=4 then taicpu(hp).opcode := A_ITTT;
  1724. end;
  1725. break;
  1726. end;
  1727. {else if (taicpu(hp).condition<>taicpu(p).condition) or
  1728. (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
  1729. break;}
  1730. inc(l);
  1731. end;
  1732. hp := tai(hp.Previous);
  1733. end;
  1734. end;
  1735. function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1736. var
  1737. hp : taicpu;
  1738. hp1,hp2 : tai;
  1739. begin
  1740. if (p.typ=ait_instruction) and
  1741. MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
  1742. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  1743. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  1744. ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
  1745. begin
  1746. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  1747. AsmL.InsertAfter(hp, p);
  1748. asml.Remove(p);
  1749. p:=hp;
  1750. result:=true;
  1751. end
  1752. else if (p.typ=ait_instruction) and
  1753. MatchInstruction(p, A_STR, [C_None], [PF_None]) and
  1754. (taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
  1755. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  1756. (taicpu(p).oper[1]^.ref^.offset=-4) and
  1757. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,14]) then
  1758. begin
  1759. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  1760. asml.InsertAfter(hp, p);
  1761. asml.Remove(p);
  1762. p.Free;
  1763. p:=hp;
  1764. result:=true;
  1765. end
  1766. else if (p.typ=ait_instruction) and
  1767. MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
  1768. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  1769. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  1770. ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
  1771. begin
  1772. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  1773. asml.InsertBefore(hp, p);
  1774. asml.Remove(p);
  1775. p.Free;
  1776. p:=hp;
  1777. result:=true;
  1778. end
  1779. else if (p.typ=ait_instruction) and
  1780. MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
  1781. (taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
  1782. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  1783. (taicpu(p).oper[1]^.ref^.offset=4) and
  1784. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,15]) then
  1785. begin
  1786. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  1787. asml.InsertBefore(hp, p);
  1788. asml.Remove(p);
  1789. p.Free;
  1790. p:=hp;
  1791. result:=true;
  1792. end
  1793. else if (p.typ=ait_instruction) and
  1794. MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
  1795. (taicpu(p).oper[1]^.typ=top_const) and
  1796. (taicpu(p).oper[1]^.val >= 0) and
  1797. (taicpu(p).oper[1]^.val < 256) and
  1798. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1799. begin
  1800. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1801. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1802. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1803. taicpu(p).oppostfix:=PF_S;
  1804. result:=true;
  1805. end
  1806. else if (p.typ=ait_instruction) and
  1807. MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
  1808. (taicpu(p).oper[1]^.typ=top_reg) and
  1809. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1810. begin
  1811. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1812. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1813. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1814. taicpu(p).oppostfix:=PF_S;
  1815. result:=true;
  1816. end
  1817. else if (p.typ=ait_instruction) and
  1818. MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  1819. (taicpu(p).ops = 3) and
  1820. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1821. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  1822. (taicpu(p).oper[2]^.typ=top_const) and
  1823. (taicpu(p).oper[2]^.val >= 0) and
  1824. (taicpu(p).oper[2]^.val < 256) and
  1825. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1826. begin
  1827. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1828. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1829. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1830. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  1831. taicpu(p).oppostfix:=PF_S;
  1832. taicpu(p).ops := 2;
  1833. result:=true;
  1834. end
  1835. else if (p.typ=ait_instruction) and
  1836. MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
  1837. (taicpu(p).ops = 3) and
  1838. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1839. (taicpu(p).oper[2]^.typ=top_reg) then
  1840. begin
  1841. taicpu(p).ops := 2;
  1842. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  1843. result:=true;
  1844. end
  1845. else if (p.typ=ait_instruction) and
  1846. MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
  1847. (taicpu(p).ops = 3) and
  1848. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1849. (taicpu(p).oper[2]^.typ=top_reg) and
  1850. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1851. begin
  1852. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1853. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1854. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1855. taicpu(p).ops := 2;
  1856. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  1857. taicpu(p).oppostfix:=PF_S;
  1858. result:=true;
  1859. end
  1860. else if (p.typ=ait_instruction) and
  1861. MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
  1862. (taicpu(p).ops = 3) and
  1863. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
  1864. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1865. begin
  1866. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1867. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1868. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1869. taicpu(p).oppostfix:=PF_S;
  1870. taicpu(p).ops := 2;
  1871. result:=true;
  1872. end
  1873. else if (p.typ=ait_instruction) and
  1874. MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
  1875. (taicpu(p).ops=3) and
  1876. (taicpu(p).oper[2]^.typ=top_shifterop) and
  1877. (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
  1878. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1879. (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  1880. begin
  1881. asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
  1882. asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
  1883. IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
  1884. taicpu(p).oppostfix:=PF_S;
  1885. taicpu(p).ops := 2;
  1886. if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
  1887. taicpu(p).loadreg(1, taicpu(p).oper[2]^.shifterop^.rs)
  1888. else
  1889. taicpu(p).loadconst(1, taicpu(p).oper[2]^.shifterop^.shiftimm);
  1890. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  1891. SM_LSL: taicpu(p).opcode:=A_LSL;
  1892. SM_LSR: taicpu(p).opcode:=A_LSR;
  1893. SM_ASR: taicpu(p).opcode:=A_ASR;
  1894. SM_ROR: taicpu(p).opcode:=A_ROR;
  1895. end;
  1896. result:=true;
  1897. end
  1898. else if (p.typ=ait_instruction) and
  1899. MatchInstruction(p, [A_AND], [], [PF_None]) and
  1900. (taicpu(p).ops = 2) and
  1901. (taicpu(p).oper[1]^.typ=top_const) and
  1902. ((taicpu(p).oper[1]^.val=255) or
  1903. (taicpu(p).oper[1]^.val=65535)) then
  1904. begin
  1905. if taicpu(p).oper[1]^.val=255 then
  1906. taicpu(p).opcode:=A_UXTB
  1907. else
  1908. taicpu(p).opcode:=A_UXTH;
  1909. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  1910. result := true;
  1911. end
  1912. else if (p.typ=ait_instruction) and
  1913. MatchInstruction(p, [A_AND], [], [PF_None]) and
  1914. (taicpu(p).ops = 3) and
  1915. (taicpu(p).oper[2]^.typ=top_const) and
  1916. ((taicpu(p).oper[2]^.val=255) or
  1917. (taicpu(p).oper[2]^.val=65535)) then
  1918. begin
  1919. if taicpu(p).oper[2]^.val=255 then
  1920. taicpu(p).opcode:=A_UXTB
  1921. else
  1922. taicpu(p).opcode:=A_UXTH;
  1923. taicpu(p).ops:=2;
  1924. result := true;
  1925. end
  1926. {
  1927. Turn
  1928. mul reg0, z,w
  1929. sub/add x, y, reg0
  1930. dealloc reg0
  1931. into
  1932. mls/mla x,y,z,w
  1933. }
  1934. {
  1935. According to Jeppe Johansen this currently uses operands in the wrong order.
  1936. else if (p.typ=ait_instruction) and
  1937. MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and
  1938. (taicpu(p).ops=3) and
  1939. (taicpu(p).oper[0]^.typ = top_reg) and
  1940. (taicpu(p).oper[1]^.typ = top_reg) and
  1941. (taicpu(p).oper[2]^.typ = top_reg) and
  1942. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1943. MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
  1944. (((taicpu(hp1).ops=3) and
  1945. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1946. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1947. (MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  1948. (taicpu(hp1).opcode=A_ADD)))) or
  1949. ((taicpu(hp1).ops=2) and
  1950. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1951. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1952. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1953. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
  1954. not(RegModifiedBetween(taicpu(p).oper[2]^.reg,p,hp1)) then
  1955. begin
  1956. if taicpu(hp1).opcode=A_ADD then
  1957. begin
  1958. taicpu(hp1).opcode:=A_MLA;
  1959. if taicpu(hp1).ops=3 then
  1960. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
  1961. taicpu(hp1).loadreg(1,taicpu(hp1).oper[2]^.reg);
  1962. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  1963. taicpu(hp1).loadreg(3,taicpu(p).oper[2]^.reg);
  1964. DebugMsg('MulAdd2MLA done', p);
  1965. taicpu(hp1).ops:=4;
  1966. asml.remove(p);
  1967. p.free;
  1968. p:=hp1;
  1969. end
  1970. else
  1971. begin
  1972. taicpu(hp1).opcode:=A_MLS;
  1973. if taicpu(hp1).ops=2 then
  1974. taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1975. taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
  1976. taicpu(hp1).loadreg(3,taicpu(p).oper[2]^.reg);
  1977. DebugMsg('MulSub2MLS done', p);
  1978. taicpu(hp1).ops:=4;
  1979. asml.remove(p);
  1980. p.free;
  1981. p:=hp1;
  1982. end;
  1983. result:=true;
  1984. end
  1985. }
  1986. {else if (p.typ=ait_instruction) and
  1987. MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
  1988. (taicpu(p).oper[1]^.typ=top_const) and
  1989. (taicpu(p).oper[1]^.val=0) and
  1990. GetNextInstruction(p,hp1) and
  1991. (taicpu(hp1).opcode=A_B) and
  1992. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  1993. begin
  1994. if taicpu(hp1).condition = C_EQ then
  1995. hp2:=taicpu.op_reg_ref(A_CBZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^)
  1996. else
  1997. hp2:=taicpu.op_reg_ref(A_CBNZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^);
  1998. taicpu(hp2).is_jmp := true;
  1999. asml.InsertAfter(hp2, hp1);
  2000. asml.Remove(hp1);
  2001. hp1.Free;
  2002. asml.Remove(p);
  2003. p.Free;
  2004. p := hp2;
  2005. result := true;
  2006. end}
  2007. else
  2008. Result := inherited PeepHoleOptPass1Cpu(p);
  2009. end;
  2010. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  2011. var
  2012. p,hp1,hp2: tai;
  2013. l,l2 : longint;
  2014. condition : tasmcond;
  2015. hp3: tai;
  2016. WasLast: boolean;
  2017. { UsedRegs, TmpUsedRegs: TRegSet; }
  2018. begin
  2019. p := BlockStart;
  2020. { UsedRegs := []; }
  2021. while (p <> BlockEnd) Do
  2022. begin
  2023. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  2024. case p.Typ Of
  2025. Ait_Instruction:
  2026. begin
  2027. case taicpu(p).opcode Of
  2028. A_B:
  2029. if taicpu(p).condition<>C_None then
  2030. begin
  2031. { check for
  2032. Bxx xxx
  2033. <several instructions>
  2034. xxx:
  2035. }
  2036. l:=0;
  2037. GetNextInstruction(p, hp1);
  2038. while assigned(hp1) and
  2039. (l<=4) and
  2040. CanBeCond(hp1) and
  2041. { stop on labels }
  2042. not(hp1.typ=ait_label) do
  2043. begin
  2044. inc(l);
  2045. if MustBeLast(hp1) then
  2046. begin
  2047. //hp1:=nil;
  2048. GetNextInstruction(hp1,hp1);
  2049. break;
  2050. end
  2051. else
  2052. GetNextInstruction(hp1,hp1);
  2053. end;
  2054. if assigned(hp1) then
  2055. begin
  2056. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2057. begin
  2058. if (l<=4) and (l>0) then
  2059. begin
  2060. condition:=inverse_cond(taicpu(p).condition);
  2061. hp2:=p;
  2062. GetNextInstruction(p,hp1);
  2063. p:=hp1;
  2064. repeat
  2065. if hp1.typ=ait_instruction then
  2066. taicpu(hp1).condition:=condition;
  2067. if MustBeLast(hp1) then
  2068. begin
  2069. GetNextInstruction(hp1,hp1);
  2070. break;
  2071. end
  2072. else
  2073. GetNextInstruction(hp1,hp1);
  2074. until not(assigned(hp1)) or
  2075. not(CanBeCond(hp1)) or
  2076. (hp1.typ=ait_label);
  2077. { wait with removing else GetNextInstruction could
  2078. ignore the label if it was the only usage in the
  2079. jump moved away }
  2080. asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
  2081. DecrementPreceedingIT(asml, hp2);
  2082. case l of
  2083. 1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
  2084. 2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
  2085. 3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
  2086. 4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
  2087. end;
  2088. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2089. asml.remove(hp2);
  2090. hp2.free;
  2091. continue;
  2092. end;
  2093. end;
  2094. end;
  2095. end;
  2096. end;
  2097. end;
  2098. end;
  2099. p := tai(p.next)
  2100. end;
  2101. end;
  2102. begin
  2103. casmoptimizer:=TCpuAsmOptimizer;
  2104. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  2105. End.