aoptcpu.pas 87 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj, cclasses;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  31. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  32. var AllUsedRegs: TAllUsedRegs): Boolean;
  33. { gets the next tai object after current that contains info relevant
  34. to the optimizer in p1 which used the given register or does a
  35. change in program flow.
  36. If there is none, it returns false and
  37. sets p1 to nil }
  38. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  39. { outputs a debug message into the assembler file }
  40. procedure DebugMsg(const s: string; p: tai);
  41. protected
  42. function LookForPostindexedPattern(p: taicpu): boolean;
  43. End;
  44. TCpuPreRegallocScheduler = class(TAsmScheduler)
  45. function SchedulerPass1Cpu(var p: tai): boolean;override;
  46. procedure SwapRegLive(p, hp1: taicpu);
  47. end;
  48. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  49. { uses the same constructor as TAopObj }
  50. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  51. procedure PeepHoleOptPass2;override;
  52. End;
  53. Implementation
  54. uses
  55. cutils,verbose,globals,
  56. systems,
  57. cpuinfo,
  58. cgobj,cgutils,procinfo,
  59. aasmbase,aasmdata;
  60. function CanBeCond(p : tai) : boolean;
  61. begin
  62. result:=
  63. (p.typ=ait_instruction) and
  64. (taicpu(p).condition=C_None) and
  65. ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
  66. (taicpu(p).opcode<>A_CBZ) and
  67. (taicpu(p).opcode<>A_CBNZ) and
  68. (taicpu(p).opcode<>A_PLD) and
  69. ((taicpu(p).opcode<>A_BLX) or
  70. (taicpu(p).oper[0]^.typ=top_reg));
  71. end;
  72. function RefsEqual(const r1, r2: treference): boolean;
  73. begin
  74. refsequal :=
  75. (r1.offset = r2.offset) and
  76. (r1.base = r2.base) and
  77. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  78. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  79. (r1.relsymbol = r2.relsymbol) and
  80. (r1.signindex = r2.signindex) and
  81. (r1.shiftimm = r2.shiftimm) and
  82. (r1.addressmode = r2.addressmode) and
  83. (r1.shiftmode = r2.shiftmode);
  84. end;
  85. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  86. begin
  87. result :=
  88. (instr.typ = ait_instruction) and
  89. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  90. ((cond = []) or (taicpu(instr).condition in cond)) and
  91. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  92. end;
  93. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  94. begin
  95. result :=
  96. (instr.typ = ait_instruction) and
  97. (taicpu(instr).opcode = op) and
  98. ((cond = []) or (taicpu(instr).condition in cond)) and
  99. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  100. end;
  101. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  102. begin
  103. result := oper1.typ = oper2.typ;
  104. if result then
  105. case oper1.typ of
  106. top_const:
  107. Result:=oper1.val = oper2.val;
  108. top_reg:
  109. Result:=oper1.reg = oper2.reg;
  110. top_conditioncode:
  111. Result:=oper1.cc = oper2.cc;
  112. top_ref:
  113. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  114. else Result:=false;
  115. end
  116. end;
  117. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  118. begin
  119. result := (oper.typ = top_reg) and (oper.reg = reg);
  120. end;
  121. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  122. begin
  123. if (taicpu(movp).condition = C_EQ) and
  124. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  125. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  126. begin
  127. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  128. asml.remove(movp);
  129. movp.free;
  130. end;
  131. end;
  132. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  133. var
  134. p: taicpu;
  135. begin
  136. p := taicpu(hp);
  137. regLoadedWithNewValue := false;
  138. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  139. exit;
  140. case p.opcode of
  141. { These operands do not write into a register at all }
  142. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  143. exit;
  144. {Take care of post/preincremented store and loads, they will change their base register}
  145. A_STR, A_LDR:
  146. regLoadedWithNewValue :=
  147. (taicpu(p).oper[1]^.typ=top_ref) and
  148. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  149. (taicpu(p).oper[1]^.ref^.base = reg);
  150. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  151. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  152. regLoadedWithNewValue :=
  153. (p.oper[1]^.typ = top_reg) and
  154. (p.oper[1]^.reg = reg);
  155. {Loads to oper2 from coprocessor}
  156. {
  157. MCR/MRC is currently not supported in FPC
  158. A_MRC:
  159. regLoadedWithNewValue :=
  160. (p.oper[2]^.typ = top_reg) and
  161. (p.oper[2]^.reg = reg);
  162. }
  163. {Loads to all register in the registerset}
  164. A_LDM:
  165. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  166. end;
  167. if regLoadedWithNewValue then
  168. exit;
  169. case p.oper[0]^.typ of
  170. {This is the case}
  171. top_reg:
  172. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  173. { LDRD }
  174. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  175. {LDM/STM might write a new value to their index register}
  176. top_ref:
  177. regLoadedWithNewValue :=
  178. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  179. (taicpu(p).oper[0]^.ref^.base = reg);
  180. end;
  181. end;
  182. function AlignedToQWord(const ref : treference) : boolean;
  183. begin
  184. { (safe) heuristics to ensure alignment }
  185. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  186. (((ref.offset>=0) and
  187. ((ref.offset mod 8)=0) and
  188. ((ref.base=NR_R13) or
  189. (ref.index=NR_R13))
  190. ) or
  191. ((ref.offset<=0) and
  192. { when using NR_R11, it has always a value of <qword align>+4 }
  193. ((abs(ref.offset+4) mod 8)=0) and
  194. (current_procinfo.framepointer=NR_R11) and
  195. ((ref.base=NR_R11) or
  196. (ref.index=NR_R11))
  197. )
  198. );
  199. end;
  200. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  201. var
  202. p: taicpu;
  203. i: longint;
  204. begin
  205. instructionLoadsFromReg := false;
  206. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  207. exit;
  208. p:=taicpu(hp);
  209. i:=1;
  210. {For these instructions we have to start on oper[0]}
  211. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  212. A_CMP, A_CMN, A_TST, A_TEQ,
  213. A_B, A_BL, A_BX, A_BLX,
  214. A_SMLAL, A_UMLAL]) then i:=0;
  215. while(i<p.ops) do
  216. begin
  217. case p.oper[I]^.typ of
  218. top_reg:
  219. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  220. { STRD }
  221. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  222. top_regset:
  223. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  224. top_shifterop:
  225. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  226. top_ref:
  227. instructionLoadsFromReg :=
  228. (p.oper[I]^.ref^.base = reg) or
  229. (p.oper[I]^.ref^.index = reg);
  230. end;
  231. if instructionLoadsFromReg then exit; {Bailout if we found something}
  232. Inc(I);
  233. end;
  234. end;
  235. function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
  236. begin
  237. if current_settings.cputype in cpu_thumb2 then
  238. result := (aoffset<4096) and (aoffset>-256)
  239. else
  240. result := ((pf in [PF_None,PF_B]) and
  241. (abs(aoffset)<4096)) or
  242. (abs(aoffset)<256);
  243. end;
  244. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  245. var AllUsedRegs: TAllUsedRegs): Boolean;
  246. begin
  247. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  248. RegUsedAfterInstruction :=
  249. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  250. not(regLoadedWithNewValue(reg,p)) and
  251. (
  252. not(GetNextInstruction(p,p)) or
  253. instructionLoadsFromReg(reg,p) or
  254. not(regLoadedWithNewValue(reg,p))
  255. );
  256. end;
  257. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  258. var Next: tai; reg: TRegister): Boolean;
  259. begin
  260. Next:=Current;
  261. repeat
  262. Result:=GetNextInstruction(Next,Next);
  263. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  264. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  265. end;
  266. {$ifdef DEBUG_AOPTCPU}
  267. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  268. begin
  269. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  270. end;
  271. {$else DEBUG_AOPTCPU}
  272. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  273. begin
  274. end;
  275. {$endif DEBUG_AOPTCPU}
  276. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  277. var
  278. alloc,
  279. dealloc : tai_regalloc;
  280. hp1 : tai;
  281. begin
  282. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  283. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  284. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  285. { don't mess with moves to pc }
  286. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  287. { don't mess with moves to lr }
  288. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  289. { the destination register of the mov might not be used beween p and movp }
  290. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  291. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  292. (taicpu(p).opcode<>A_CBZ) and
  293. (taicpu(p).opcode<>A_CBNZ) and
  294. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  295. not (
  296. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  297. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  298. ) then
  299. begin
  300. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  301. if assigned(dealloc) then
  302. begin
  303. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  304. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  305. and remove it if possible }
  306. GetLastInstruction(p,hp1);
  307. asml.Remove(dealloc);
  308. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  309. if assigned(alloc) then
  310. begin
  311. asml.Remove(alloc);
  312. alloc.free;
  313. dealloc.free;
  314. end
  315. else
  316. asml.InsertAfter(dealloc,p);
  317. { try to move the allocation of the target register }
  318. GetLastInstruction(movp,hp1);
  319. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  320. if assigned(alloc) then
  321. begin
  322. asml.Remove(alloc);
  323. asml.InsertBefore(alloc,p);
  324. { adjust used regs }
  325. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  326. end;
  327. { finally get rid of the mov }
  328. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  329. asml.remove(movp);
  330. movp.free;
  331. end;
  332. end;
  333. end;
  334. {
  335. optimize
  336. ldr/str regX,[reg1]
  337. ...
  338. add/sub reg1,reg1,regY/const
  339. into
  340. ldr/str regX,[reg1], regY/const
  341. }
  342. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  343. var
  344. hp1 : tai;
  345. begin
  346. Result:=false;
  347. if (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  348. (p.oper[1]^.ref^.index=NR_NO) and
  349. (p.oper[1]^.ref^.offset=0) and
  350. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  351. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  352. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  353. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  354. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  355. (
  356. (taicpu(hp1).oper[2]^.typ=top_reg) or
  357. { valid offset? }
  358. ((taicpu(hp1).oper[2]^.typ=top_const) and
  359. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  360. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  361. )
  362. )
  363. ) and
  364. { don't apply the optimization if the base register is loaded }
  365. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  366. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  367. { don't apply the optimization if the (new) index register is loaded }
  368. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  369. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
  370. begin
  371. DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  372. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  373. if taicpu(hp1).oper[2]^.typ=top_const then
  374. begin
  375. if taicpu(hp1).opcode=A_ADD then
  376. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  377. else
  378. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  379. end
  380. else
  381. begin
  382. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  383. if taicpu(hp1).opcode=A_ADD then
  384. p.oper[1]^.ref^.signindex:=1
  385. else
  386. p.oper[1]^.ref^.signindex:=-1;
  387. end;
  388. asml.Remove(hp1);
  389. hp1.Free;
  390. Result:=true;
  391. end;
  392. end;
  393. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  394. var
  395. hp1,hp2: tai;
  396. i, i2: longint;
  397. TmpUsedRegs: TAllUsedRegs;
  398. tempop: tasmop;
  399. function IsPowerOf2(const value: DWord): boolean; inline;
  400. begin
  401. Result:=(value and (value - 1)) = 0;
  402. end;
  403. begin
  404. result := false;
  405. case p.typ of
  406. ait_instruction:
  407. begin
  408. {
  409. change
  410. <op> reg,x,y
  411. cmp reg,#0
  412. into
  413. <op>s reg,x,y
  414. }
  415. { this optimization can applied only to the currently enabled operations because
  416. the other operations do not update all flags and FPC does not track flag usage }
  417. if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
  418. A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  419. GetNextInstruction(p, hp1) and
  420. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  421. (taicpu(hp1).oper[1]^.typ = top_const) and
  422. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  423. (taicpu(hp1).oper[1]^.val = 0) and
  424. GetNextInstruction(hp1, hp2) and
  425. { be careful here, following instructions could use other flags
  426. however after a jump fpc never depends on the value of flags }
  427. { All above instructions set Z and N according to the following
  428. Z := result = 0;
  429. N := result[31];
  430. EQ = Z=1; NE = Z=0;
  431. MI = N=1; PL = N=0; }
  432. MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) and
  433. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  434. begin
  435. DebugMsg('Peephole OpCmp2OpS done', p);
  436. taicpu(p).oppostfix:=PF_S;
  437. { move flag allocation if possible }
  438. GetLastInstruction(hp1, hp2);
  439. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  440. if assigned(hp2) then
  441. begin
  442. asml.Remove(hp2);
  443. asml.insertbefore(hp2, p);
  444. end;
  445. asml.remove(hp1);
  446. hp1.free;
  447. end
  448. else
  449. case taicpu(p).opcode of
  450. A_STR:
  451. begin
  452. { change
  453. str reg1,ref
  454. ldr reg2,ref
  455. into
  456. str reg1,ref
  457. mov reg2,reg1
  458. }
  459. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  460. (taicpu(p).oppostfix=PF_None) and
  461. GetNextInstruction(p,hp1) and
  462. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  463. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  464. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  465. begin
  466. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  467. begin
  468. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  469. asml.remove(hp1);
  470. hp1.free;
  471. end
  472. else
  473. begin
  474. taicpu(hp1).opcode:=A_MOV;
  475. taicpu(hp1).oppostfix:=PF_None;
  476. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  477. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  478. end;
  479. result := true;
  480. end
  481. { change
  482. str reg1,ref
  483. str reg2,ref
  484. into
  485. strd reg1,ref
  486. }
  487. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  488. (taicpu(p).oppostfix=PF_None) and
  489. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  490. GetNextInstruction(p,hp1) and
  491. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  492. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  493. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  494. { str ensures that either base or index contain no register, else ldr wouldn't
  495. use an offset either
  496. }
  497. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  498. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  499. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  500. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  501. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  502. begin
  503. DebugMsg('Peephole StrStr2Strd done', p);
  504. taicpu(p).oppostfix:=PF_D;
  505. asml.remove(hp1);
  506. hp1.free;
  507. end;
  508. LookForPostindexedPattern(taicpu(p));
  509. end;
  510. A_LDR:
  511. begin
  512. { change
  513. ldr reg1,ref
  514. ldr reg2,ref
  515. into ...
  516. }
  517. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  518. GetNextInstruction(p,hp1) and
  519. { ldrd is not allowed here }
  520. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  521. begin
  522. {
  523. ...
  524. ldr reg1,ref
  525. mov reg2,reg1
  526. }
  527. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  528. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  529. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  530. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  531. begin
  532. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  533. begin
  534. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  535. asml.remove(hp1);
  536. hp1.free;
  537. end
  538. else
  539. begin
  540. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  541. taicpu(hp1).opcode:=A_MOV;
  542. taicpu(hp1).oppostfix:=PF_None;
  543. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  544. end;
  545. result := true;
  546. end
  547. {
  548. ...
  549. ldrd reg1,ref
  550. }
  551. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  552. { ldrd does not allow any postfixes ... }
  553. (taicpu(p).oppostfix=PF_None) and
  554. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  555. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  556. { ldr ensures that either base or index contain no register, else ldr wouldn't
  557. use an offset either
  558. }
  559. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  560. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  561. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  562. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  563. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  564. begin
  565. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  566. taicpu(p).oppostfix:=PF_D;
  567. asml.remove(hp1);
  568. hp1.free;
  569. end;
  570. end;
  571. LookForPostindexedPattern(taicpu(p));
  572. { Remove superfluous mov after ldr
  573. changes
  574. ldr reg1, ref
  575. mov reg2, reg1
  576. to
  577. ldr reg2, ref
  578. conditions are:
  579. * no ldrd usage
  580. * reg1 must be released after mov
  581. * mov can not contain shifterops
  582. * ldr+mov have the same conditions
  583. * mov does not set flags
  584. }
  585. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  586. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  587. end;
  588. A_MOV:
  589. begin
  590. { fold
  591. mov reg1,reg0, shift imm1
  592. mov reg1,reg1, shift imm2
  593. }
  594. if (taicpu(p).ops=3) and
  595. (taicpu(p).oper[2]^.typ = top_shifterop) and
  596. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  597. getnextinstruction(p,hp1) and
  598. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  599. (taicpu(hp1).ops=3) and
  600. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  601. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  602. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  603. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  604. begin
  605. { fold
  606. mov reg1,reg0, lsl 16
  607. mov reg1,reg1, lsr 16
  608. strh reg1, ...
  609. dealloc reg1
  610. to
  611. strh reg1, ...
  612. dealloc reg1
  613. }
  614. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  615. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  616. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  617. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  618. getnextinstruction(hp1,hp2) and
  619. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  620. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  621. begin
  622. CopyUsedRegs(TmpUsedRegs);
  623. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  624. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  625. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  626. begin
  627. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  628. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  629. asml.remove(p);
  630. asml.remove(hp1);
  631. p.free;
  632. hp1.free;
  633. p:=hp2;
  634. end;
  635. ReleaseUsedRegs(TmpUsedRegs);
  636. end
  637. { fold
  638. mov reg1,reg0, shift imm1
  639. mov reg1,reg1, shift imm2
  640. to
  641. mov reg1,reg0, shift imm1+imm2
  642. }
  643. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  644. { asr makes no use after a lsr, the asr can be foled into the lsr }
  645. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  646. begin
  647. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  648. { avoid overflows }
  649. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  650. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  651. SM_ROR:
  652. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  653. SM_ASR:
  654. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  655. SM_LSR,
  656. SM_LSL:
  657. begin
  658. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  659. InsertLLItem(p.previous, p.next, hp1);
  660. p.free;
  661. p:=hp1;
  662. end;
  663. else
  664. internalerror(2008072803);
  665. end;
  666. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  667. asml.remove(hp1);
  668. hp1.free;
  669. result := true;
  670. end
  671. { fold
  672. mov reg1,reg0, shift imm1
  673. mov reg1,reg1, shift imm2
  674. mov reg1,reg1, shift imm3 ...
  675. }
  676. else if getnextinstruction(hp1,hp2) and
  677. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  678. (taicpu(hp2).ops=3) and
  679. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  680. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  681. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  682. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  683. begin
  684. { mov reg1,reg0, lsl imm1
  685. mov reg1,reg1, lsr/asr imm2
  686. mov reg1,reg1, lsl imm3 ...
  687. if imm3<=imm1 and imm2>=imm3
  688. to
  689. mov reg1,reg0, lsl imm1
  690. mov reg1,reg1, lsr/asr imm2-imm3
  691. }
  692. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  693. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  694. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  695. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  696. begin
  697. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  698. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1 done', p);
  699. asml.remove(hp2);
  700. hp2.free;
  701. result := true;
  702. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  703. begin
  704. asml.remove(hp1);
  705. hp1.free;
  706. end;
  707. end
  708. { mov reg1,reg0, lsr/asr imm1
  709. mov reg1,reg1, lsl imm2
  710. mov reg1,reg1, lsr/asr imm3 ...
  711. if imm3>=imm1 and imm2>=imm1
  712. to
  713. mov reg1,reg0, lsl imm2-imm1
  714. mov reg1,reg1, lsr/asr imm3 ...
  715. }
  716. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  717. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  718. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  719. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  720. begin
  721. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  722. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  723. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  724. asml.remove(p);
  725. p.free;
  726. p:=hp2;
  727. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  728. begin
  729. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  730. asml.remove(hp1);
  731. hp1.free;
  732. p:=hp2;
  733. end;
  734. result := true;
  735. end;
  736. end;
  737. end;
  738. { Change the common
  739. mov r0, r0, lsr #24
  740. and r0, r0, #255
  741. and remove the superfluous and
  742. This could be extended to handle more cases.
  743. }
  744. if (taicpu(p).ops=3) and
  745. (taicpu(p).oper[2]^.typ = top_shifterop) and
  746. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  747. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  748. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  749. getnextinstruction(p,hp1) and
  750. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  751. (taicpu(hp1).ops=3) and
  752. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  753. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  754. (taicpu(hp1).oper[2]^.typ = top_const) and
  755. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  756. For LSR #25 and an AndConst of 255 that whould go like this:
  757. 255 and ((2 shl (32-25))-1)
  758. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  759. LSR #25 and AndConst of 254:
  760. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  761. }
  762. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  763. begin
  764. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  765. asml.remove(hp1);
  766. hp1.free;
  767. end;
  768. {
  769. optimize
  770. mov rX, yyyy
  771. ....
  772. }
  773. if (taicpu(p).ops = 2) and
  774. GetNextInstruction(p,hp1) and
  775. (tai(hp1).typ = ait_instruction) then
  776. begin
  777. {
  778. This changes the very common
  779. mov r0, #0
  780. str r0, [...]
  781. mov r0, #0
  782. str r0, [...]
  783. and removes all superfluous mov instructions
  784. }
  785. if (taicpu(p).oper[1]^.typ = top_const) and
  786. (taicpu(hp1).opcode=A_STR) then
  787. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  788. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  789. GetNextInstruction(hp1, hp2) and
  790. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  791. (taicpu(hp2).ops = 2) and
  792. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  793. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  794. begin
  795. DebugMsg('Peephole MovStrMov done', hp2);
  796. GetNextInstruction(hp2,hp1);
  797. asml.remove(hp2);
  798. hp2.free;
  799. if not assigned(hp1) then break;
  800. end
  801. {
  802. This removes the first mov from
  803. mov rX,...
  804. mov rX,...
  805. }
  806. else if taicpu(hp1).opcode=A_MOV then
  807. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  808. (taicpu(hp1).ops = 2) and
  809. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  810. { don't remove the first mov if the second is a mov rX,rX }
  811. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  812. begin
  813. DebugMsg('Peephole MovMov done', p);
  814. asml.remove(p);
  815. p.free;
  816. p:=hp1;
  817. GetNextInstruction(hp1,hp1);
  818. if not assigned(hp1) then
  819. break;
  820. end;
  821. end;
  822. {
  823. change
  824. mov r1, r0
  825. add r1, r1, #1
  826. to
  827. add r1, r0, #1
  828. Todo: Make it work for mov+cmp too
  829. CAUTION! If this one is successful p might not be a mov instruction anymore!
  830. }
  831. if (taicpu(p).ops = 2) and
  832. (taicpu(p).oper[1]^.typ = top_reg) and
  833. (taicpu(p).oppostfix = PF_NONE) and
  834. GetNextInstruction(p, hp1) and
  835. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  836. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  837. [taicpu(p).condition], []) and
  838. {MOV and MVN might only have 2 ops}
  839. (taicpu(hp1).ops = 3) and
  840. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  841. (taicpu(hp1).oper[1]^.typ = top_reg) and
  842. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  843. begin
  844. { When we get here we still don't know if the registers match}
  845. for I:=1 to 2 do
  846. {
  847. If the first loop was successful p will be replaced with hp1.
  848. The checks will still be ok, because all required information
  849. will also be in hp1 then.
  850. }
  851. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  852. begin
  853. DebugMsg('Peephole RedundantMovProcess done', hp1);
  854. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  855. if p<>hp1 then
  856. begin
  857. asml.remove(p);
  858. p.free;
  859. p:=hp1;
  860. end;
  861. end;
  862. end;
  863. { This folds shifterops into following instructions
  864. mov r0, r1, lsl #8
  865. add r2, r3, r0
  866. to
  867. add r2, r3, r1, lsl #8
  868. CAUTION! If this one is successful p might not be a mov instruction anymore!
  869. }
  870. if (taicpu(p).opcode = A_MOV) and
  871. (taicpu(p).ops = 3) and
  872. (taicpu(p).oper[1]^.typ = top_reg) and
  873. (taicpu(p).oper[2]^.typ = top_shifterop) and
  874. (taicpu(p).oppostfix = PF_NONE) and
  875. GetNextInstruction(p, hp1) and
  876. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  877. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  878. A_CMP, A_CMN],
  879. [taicpu(p).condition], [PF_None]) and
  880. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  881. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  882. (
  883. {Only ONE of the two src operands is allowed to match}
  884. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  885. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  886. ) then
  887. begin
  888. CopyUsedRegs(TmpUsedRegs);
  889. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  890. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  891. I2:=0
  892. else
  893. I2:=1;
  894. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  895. for I:=I2 to taicpu(hp1).ops-1 do
  896. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  897. begin
  898. { If the parameter matched on the second op from the RIGHT
  899. we have to switch the parameters, this will not happen for CMP
  900. were we're only evaluating the most right parameter
  901. }
  902. if I <> taicpu(hp1).ops-1 then
  903. begin
  904. {The SUB operators need to be changed when we swap parameters}
  905. case taicpu(hp1).opcode of
  906. A_SUB: tempop:=A_RSB;
  907. A_SBC: tempop:=A_RSC;
  908. A_RSB: tempop:=A_SUB;
  909. A_RSC: tempop:=A_SBC;
  910. else tempop:=taicpu(hp1).opcode;
  911. end;
  912. if taicpu(hp1).ops = 3 then
  913. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  914. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  915. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  916. else
  917. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  918. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  919. taicpu(p).oper[2]^.shifterop^);
  920. end
  921. else
  922. if taicpu(hp1).ops = 3 then
  923. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  924. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  925. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  926. else
  927. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  928. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  929. taicpu(p).oper[2]^.shifterop^);
  930. asml.insertbefore(hp2, p);
  931. asml.remove(p);
  932. asml.remove(hp1);
  933. p.free;
  934. hp1.free;
  935. p:=hp2;
  936. GetNextInstruction(p,hp1);
  937. DebugMsg('Peephole FoldShiftProcess done', p);
  938. break;
  939. end;
  940. ReleaseUsedRegs(TmpUsedRegs);
  941. end;
  942. {
  943. Often we see shifts and then a superfluous mov to another register
  944. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  945. }
  946. if (taicpu(p).opcode = A_MOV) and
  947. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  948. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  949. end;
  950. A_ADD,
  951. A_ADC,
  952. A_RSB,
  953. A_RSC,
  954. A_SUB,
  955. A_SBC,
  956. A_AND,
  957. A_BIC,
  958. A_EOR,
  959. A_ORR,
  960. A_MLA,
  961. A_MUL:
  962. begin
  963. {
  964. optimize
  965. and reg2,reg1,const1
  966. ...
  967. }
  968. if (taicpu(p).opcode = A_AND) and
  969. (taicpu(p).oper[1]^.typ = top_reg) and
  970. (taicpu(p).oper[2]^.typ = top_const) then
  971. begin
  972. {
  973. change
  974. and reg2,reg1,const1
  975. and reg3,reg2,const2
  976. to
  977. and reg3,reg1,(const1 and const2)
  978. }
  979. if GetNextInstruction(p, hp1) and
  980. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  981. { either reg3 and reg2 are equal or reg2 is deallocated after the and }
  982. (MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) or
  983. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)))) and
  984. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  985. (taicpu(hp1).oper[2]^.typ = top_const) then
  986. begin
  987. DebugMsg('Peephole AndAnd2And done', p);
  988. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  989. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  990. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  991. asml.remove(hp1);
  992. hp1.free;
  993. end
  994. {
  995. change
  996. and reg2,reg1,255
  997. strb reg2,[...]
  998. dealloc reg2
  999. to
  1000. strb reg1,[...]
  1001. }
  1002. else if (taicpu(p).oper[2]^.val = 255) and
  1003. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1004. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1005. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1006. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1007. { the reference in strb might not use reg2 }
  1008. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1009. { reg1 might not be modified inbetween }
  1010. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1011. begin
  1012. DebugMsg('Peephole AndStrb2Strb done', p);
  1013. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1014. asml.remove(p);
  1015. p.free;
  1016. p:=hp1;
  1017. end;
  1018. end;
  1019. {
  1020. change
  1021. add/sub reg2,reg1,const1
  1022. str/ldr reg3,[reg2,const2]
  1023. dealloc reg2
  1024. to
  1025. str/ldr reg3,[reg1,const2+/-const1]
  1026. }
  1027. if (taicpu(p).opcode in [A_ADD,A_SUB]) and
  1028. (taicpu(p).oper[1]^.typ = top_reg) and
  1029. (taicpu(p).oper[2]^.typ = top_const) then
  1030. begin
  1031. hp1:=p;
  1032. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  1033. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  1034. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  1035. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  1036. { don't optimize if the register is stored/overwritten }
  1037. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  1038. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1039. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1040. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  1041. ldr postfix }
  1042. (((taicpu(p).opcode=A_ADD) and
  1043. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1044. ) or
  1045. ((taicpu(p).opcode=A_SUB) and
  1046. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1047. )
  1048. ) do
  1049. begin
  1050. { neither reg1 nor reg2 might be changed inbetween }
  1051. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  1052. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  1053. break;
  1054. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  1055. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  1056. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1057. begin
  1058. { remember last instruction }
  1059. hp2:=hp1;
  1060. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  1061. hp1:=p;
  1062. { fix all ldr/str }
  1063. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  1064. begin
  1065. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  1066. if taicpu(p).opcode=A_ADD then
  1067. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  1068. else
  1069. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  1070. if hp1=hp2 then
  1071. break;
  1072. end;
  1073. GetNextInstruction(p,hp1);
  1074. asml.remove(p);
  1075. p.free;
  1076. p:=hp1;
  1077. break;
  1078. end;
  1079. end;
  1080. end;
  1081. {
  1082. change
  1083. add reg1, ...
  1084. mov reg2, reg1
  1085. to
  1086. add reg2, ...
  1087. }
  1088. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  1089. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  1090. end;
  1091. A_MVN:
  1092. begin
  1093. {
  1094. change
  1095. mvn reg2,reg1
  1096. and reg3,reg4,reg2
  1097. dealloc reg2
  1098. to
  1099. bic reg3,reg4,reg1
  1100. }
  1101. if (taicpu(p).oper[1]^.typ = top_reg) and
  1102. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1103. MatchInstruction(hp1,A_AND,[],[]) and
  1104. (((taicpu(hp1).ops=3) and
  1105. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1106. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1107. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  1108. ((taicpu(hp1).ops=2) and
  1109. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1110. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1111. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1112. { reg1 might not be modified inbetween }
  1113. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1114. begin
  1115. DebugMsg('Peephole MvnAnd2Bic done', p);
  1116. taicpu(hp1).opcode:=A_BIC;
  1117. if taicpu(hp1).ops=3 then
  1118. begin
  1119. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1120. taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
  1121. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1122. end
  1123. else
  1124. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1125. asml.remove(p);
  1126. p.free;
  1127. p:=hp1;
  1128. end;
  1129. end;
  1130. A_UXTB,
  1131. A_SXTB:
  1132. begin
  1133. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1134. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1135. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1136. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1137. { the reference in strb might not use reg2 }
  1138. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1139. { reg1 might not be modified inbetween }
  1140. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1141. begin
  1142. DebugMsg('Peephole xXTBStrb2Strb done', p);
  1143. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1144. asml.remove(p);
  1145. p.free;
  1146. p:=hp1;
  1147. end;
  1148. end;
  1149. A_UXTH,
  1150. A_SXTH:
  1151. begin
  1152. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1153. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1154. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  1155. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1156. { the reference in strb might not use reg2 }
  1157. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1158. { reg1 might not be modified inbetween }
  1159. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1160. begin
  1161. DebugMsg('Peephole xXTBStrh2Strh done', p);
  1162. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1163. asml.remove(p);
  1164. p.free;
  1165. p:=hp1;
  1166. end;
  1167. end;
  1168. A_CMP:
  1169. begin
  1170. {
  1171. change
  1172. cmp reg,const1
  1173. moveq reg,const1
  1174. movne reg,const2
  1175. to
  1176. cmp reg,const1
  1177. movne reg,const2
  1178. }
  1179. if (taicpu(p).oper[1]^.typ = top_const) and
  1180. GetNextInstruction(p, hp1) and
  1181. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1182. (taicpu(hp1).oper[1]^.typ = top_const) and
  1183. GetNextInstruction(hp1, hp2) and
  1184. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1185. (taicpu(hp1).oper[1]^.typ = top_const) then
  1186. begin
  1187. RemoveRedundantMove(p, hp1, asml);
  1188. RemoveRedundantMove(p, hp2, asml);
  1189. end;
  1190. end;
  1191. end;
  1192. end;
  1193. end;
  1194. end;
  1195. { instructions modifying the CPSR can be only the last instruction }
  1196. function MustBeLast(p : tai) : boolean;
  1197. begin
  1198. Result:=(p.typ=ait_instruction) and
  1199. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  1200. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  1201. (taicpu(p).oppostfix=PF_S));
  1202. end;
  1203. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  1204. var
  1205. p,hp1,hp2: tai;
  1206. l : longint;
  1207. condition : tasmcond;
  1208. hp3: tai;
  1209. WasLast: boolean;
  1210. { UsedRegs, TmpUsedRegs: TRegSet; }
  1211. begin
  1212. p := BlockStart;
  1213. { UsedRegs := []; }
  1214. while (p <> BlockEnd) Do
  1215. begin
  1216. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1217. case p.Typ Of
  1218. Ait_Instruction:
  1219. begin
  1220. case taicpu(p).opcode Of
  1221. A_B:
  1222. if taicpu(p).condition<>C_None then
  1223. begin
  1224. { check for
  1225. Bxx xxx
  1226. <several instructions>
  1227. xxx:
  1228. }
  1229. l:=0;
  1230. WasLast:=False;
  1231. GetNextInstruction(p, hp1);
  1232. while assigned(hp1) and
  1233. (l<=4) and
  1234. CanBeCond(hp1) and
  1235. { stop on labels }
  1236. not(hp1.typ=ait_label) do
  1237. begin
  1238. inc(l);
  1239. if MustBeLast(hp1) then
  1240. begin
  1241. WasLast:=True;
  1242. GetNextInstruction(hp1,hp1);
  1243. break;
  1244. end
  1245. else
  1246. GetNextInstruction(hp1,hp1);
  1247. end;
  1248. if assigned(hp1) then
  1249. begin
  1250. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1251. begin
  1252. if (l<=4) and (l>0) then
  1253. begin
  1254. condition:=inverse_cond(taicpu(p).condition);
  1255. hp2:=p;
  1256. GetNextInstruction(p,hp1);
  1257. p:=hp1;
  1258. repeat
  1259. if hp1.typ=ait_instruction then
  1260. taicpu(hp1).condition:=condition;
  1261. if MustBeLast(hp1) then
  1262. begin
  1263. GetNextInstruction(hp1,hp1);
  1264. break;
  1265. end
  1266. else
  1267. GetNextInstruction(hp1,hp1);
  1268. until not(assigned(hp1)) or
  1269. not(CanBeCond(hp1)) or
  1270. (hp1.typ=ait_label);
  1271. { wait with removing else GetNextInstruction could
  1272. ignore the label if it was the only usage in the
  1273. jump moved away }
  1274. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1275. asml.remove(hp2);
  1276. hp2.free;
  1277. continue;
  1278. end;
  1279. end
  1280. else
  1281. { do not perform further optimizations if there is inctructon
  1282. in block #1 which can not be optimized.
  1283. }
  1284. if not WasLast then
  1285. begin
  1286. { check further for
  1287. Bcc xxx
  1288. <several instructions 1>
  1289. B yyy
  1290. xxx:
  1291. <several instructions 2>
  1292. yyy:
  1293. }
  1294. { hp2 points to jmp yyy }
  1295. hp2:=hp1;
  1296. { skip hp1 to xxx }
  1297. GetNextInstruction(hp1, hp1);
  1298. if assigned(hp2) and
  1299. assigned(hp1) and
  1300. (l<=3) and
  1301. (hp2.typ=ait_instruction) and
  1302. (taicpu(hp2).is_jmp) and
  1303. (taicpu(hp2).condition=C_None) and
  1304. { real label and jump, no further references to the
  1305. label are allowed }
  1306. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1307. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1308. begin
  1309. l:=0;
  1310. { skip hp1 to <several moves 2> }
  1311. GetNextInstruction(hp1, hp1);
  1312. while assigned(hp1) and
  1313. CanBeCond(hp1) do
  1314. begin
  1315. inc(l);
  1316. GetNextInstruction(hp1, hp1);
  1317. end;
  1318. { hp1 points to yyy: }
  1319. if assigned(hp1) and
  1320. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1321. begin
  1322. condition:=inverse_cond(taicpu(p).condition);
  1323. GetNextInstruction(p,hp1);
  1324. hp3:=p;
  1325. p:=hp1;
  1326. repeat
  1327. if hp1.typ=ait_instruction then
  1328. taicpu(hp1).condition:=condition;
  1329. GetNextInstruction(hp1,hp1);
  1330. until not(assigned(hp1)) or
  1331. not(CanBeCond(hp1));
  1332. { hp2 is still at jmp yyy }
  1333. GetNextInstruction(hp2,hp1);
  1334. { hp2 is now at xxx: }
  1335. condition:=inverse_cond(condition);
  1336. GetNextInstruction(hp1,hp1);
  1337. { hp1 is now at <several movs 2> }
  1338. repeat
  1339. taicpu(hp1).condition:=condition;
  1340. GetNextInstruction(hp1,hp1);
  1341. until not(assigned(hp1)) or
  1342. not(CanBeCond(hp1)) or
  1343. (hp1.typ=ait_label);
  1344. {
  1345. asml.remove(hp1.next)
  1346. hp1.next.free;
  1347. asml.remove(hp1);
  1348. hp1.free;
  1349. }
  1350. { remove Bcc }
  1351. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1352. asml.remove(hp3);
  1353. hp3.free;
  1354. { remove jmp }
  1355. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1356. asml.remove(hp2);
  1357. hp2.free;
  1358. continue;
  1359. end;
  1360. end;
  1361. end;
  1362. end;
  1363. end;
  1364. end;
  1365. end;
  1366. end;
  1367. p := tai(p.next)
  1368. end;
  1369. end;
  1370. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1371. begin
  1372. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1373. Result:=true
  1374. else
  1375. Result:=inherited RegInInstruction(Reg, p1);
  1376. end;
  1377. const
  1378. { set of opcode which might or do write to memory }
  1379. { TODO : extend armins.dat to contain r/w info }
  1380. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1381. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1382. { adjust the register live information when swapping the two instructions p and hp1,
  1383. they must follow one after the other }
  1384. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1385. procedure CheckLiveEnd(reg : tregister);
  1386. var
  1387. supreg : TSuperRegister;
  1388. regtype : TRegisterType;
  1389. begin
  1390. if reg=NR_NO then
  1391. exit;
  1392. regtype:=getregtype(reg);
  1393. supreg:=getsupreg(reg);
  1394. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1395. RegInInstruction(reg,p) then
  1396. cg.rg[regtype].live_end[supreg]:=p;
  1397. end;
  1398. procedure CheckLiveStart(reg : TRegister);
  1399. var
  1400. supreg : TSuperRegister;
  1401. regtype : TRegisterType;
  1402. begin
  1403. if reg=NR_NO then
  1404. exit;
  1405. regtype:=getregtype(reg);
  1406. supreg:=getsupreg(reg);
  1407. if (cg.rg[regtype].live_start[supreg]=p) and
  1408. RegInInstruction(reg,hp1) then
  1409. cg.rg[regtype].live_start[supreg]:=hp1;
  1410. end;
  1411. var
  1412. i : longint;
  1413. r : TSuperRegister;
  1414. begin
  1415. { assumption: p is directly followed by hp1 }
  1416. { if live of any reg used by p starts at p and hp1 uses this register then
  1417. set live start to hp1 }
  1418. for i:=0 to p.ops-1 do
  1419. case p.oper[i]^.typ of
  1420. Top_Reg:
  1421. CheckLiveStart(p.oper[i]^.reg);
  1422. Top_Ref:
  1423. begin
  1424. CheckLiveStart(p.oper[i]^.ref^.base);
  1425. CheckLiveStart(p.oper[i]^.ref^.index);
  1426. end;
  1427. Top_Shifterop:
  1428. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1429. Top_RegSet:
  1430. for r:=RS_R0 to RS_R15 do
  1431. if r in p.oper[i]^.regset^ then
  1432. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1433. end;
  1434. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1435. set live end to p }
  1436. for i:=0 to hp1.ops-1 do
  1437. case hp1.oper[i]^.typ of
  1438. Top_Reg:
  1439. CheckLiveEnd(hp1.oper[i]^.reg);
  1440. Top_Ref:
  1441. begin
  1442. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1443. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1444. end;
  1445. Top_Shifterop:
  1446. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1447. Top_RegSet:
  1448. for r:=RS_R0 to RS_R15 do
  1449. if r in hp1.oper[i]^.regset^ then
  1450. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1451. end;
  1452. end;
  1453. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1454. { TODO : schedule also forward }
  1455. { TODO : schedule distance > 1 }
  1456. var
  1457. hp1,hp2,hp3,hp4,hp5 : tai;
  1458. list : TAsmList;
  1459. begin
  1460. result:=true;
  1461. list:=TAsmList.Create;
  1462. p:=BlockStart;
  1463. while p<>BlockEnd Do
  1464. begin
  1465. if (p.typ=ait_instruction) and
  1466. GetNextInstruction(p,hp1) and
  1467. (hp1.typ=ait_instruction) and
  1468. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1469. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1470. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1471. not(RegModifiedByInstruction(NR_PC,p))
  1472. ) or
  1473. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1474. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1475. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1476. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1477. )
  1478. ) or
  1479. { try to prove that the memory accesses don't overlapp }
  1480. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1481. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1482. (taicpu(p).oppostfix=PF_None) and
  1483. (taicpu(hp1).oppostfix=PF_None) and
  1484. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1485. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1486. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1487. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1488. )
  1489. )
  1490. ) and
  1491. GetNextInstruction(hp1,hp2) and
  1492. (hp2.typ=ait_instruction) and
  1493. { loaded register used by next instruction? }
  1494. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1495. { loaded register not used by previous instruction? }
  1496. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1497. { same condition? }
  1498. (taicpu(p).condition=taicpu(hp1).condition) and
  1499. { first instruction might not change the register used as base }
  1500. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1501. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1502. ) and
  1503. { first instruction might not change the register used as index }
  1504. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1505. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1506. ) then
  1507. begin
  1508. hp3:=tai(p.Previous);
  1509. hp5:=tai(p.next);
  1510. asml.Remove(p);
  1511. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1512. { before the instruction? }
  1513. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1514. begin
  1515. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1516. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1517. begin
  1518. hp4:=hp3;
  1519. hp3:=tai(hp3.Previous);
  1520. asml.Remove(hp4);
  1521. list.Concat(hp4);
  1522. end
  1523. else
  1524. hp3:=tai(hp3.Previous);
  1525. end;
  1526. list.Concat(p);
  1527. SwapRegLive(taicpu(p),taicpu(hp1));
  1528. { after the instruction? }
  1529. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1530. begin
  1531. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1532. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1533. begin
  1534. hp4:=hp5;
  1535. hp5:=tai(hp5.next);
  1536. asml.Remove(hp4);
  1537. list.Concat(hp4);
  1538. end
  1539. else
  1540. hp5:=tai(hp5.Next);
  1541. end;
  1542. asml.Remove(hp1);
  1543. {$ifdef DEBUG_PREREGSCHEDULER}
  1544. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1545. {$endif DEBUG_PREREGSCHEDULER}
  1546. asml.InsertBefore(hp1,hp2);
  1547. asml.InsertListBefore(hp2,list);
  1548. p:=tai(p.next)
  1549. end
  1550. else if p.typ=ait_instruction then
  1551. p:=hp1
  1552. else
  1553. p:=tai(p.next);
  1554. end;
  1555. list.Free;
  1556. end;
  1557. procedure DecrementPreceedingIT(list: TAsmList; p: tai);
  1558. var
  1559. hp : tai;
  1560. l : longint;
  1561. begin
  1562. hp := tai(p.Previous);
  1563. l := 1;
  1564. while assigned(hp) and
  1565. (l <= 4) do
  1566. begin
  1567. if hp.typ=ait_instruction then
  1568. begin
  1569. if (taicpu(hp).opcode>=A_IT) and
  1570. (taicpu(hp).opcode <= A_ITTTT) then
  1571. begin
  1572. if (taicpu(hp).opcode = A_IT) and
  1573. (l=1) then
  1574. list.Remove(hp)
  1575. else
  1576. case taicpu(hp).opcode of
  1577. A_ITE:
  1578. if l=2 then taicpu(hp).opcode := A_IT;
  1579. A_ITT:
  1580. if l=2 then taicpu(hp).opcode := A_IT;
  1581. A_ITEE:
  1582. if l=3 then taicpu(hp).opcode := A_ITE;
  1583. A_ITTE:
  1584. if l=3 then taicpu(hp).opcode := A_ITT;
  1585. A_ITET:
  1586. if l=3 then taicpu(hp).opcode := A_ITE;
  1587. A_ITTT:
  1588. if l=3 then taicpu(hp).opcode := A_ITT;
  1589. A_ITEEE:
  1590. if l=4 then taicpu(hp).opcode := A_ITEE;
  1591. A_ITTEE:
  1592. if l=4 then taicpu(hp).opcode := A_ITTE;
  1593. A_ITETE:
  1594. if l=4 then taicpu(hp).opcode := A_ITET;
  1595. A_ITTTE:
  1596. if l=4 then taicpu(hp).opcode := A_ITTT;
  1597. A_ITEET:
  1598. if l=4 then taicpu(hp).opcode := A_ITEE;
  1599. A_ITTET:
  1600. if l=4 then taicpu(hp).opcode := A_ITTE;
  1601. A_ITETT:
  1602. if l=4 then taicpu(hp).opcode := A_ITET;
  1603. A_ITTTT:
  1604. if l=4 then taicpu(hp).opcode := A_ITTT;
  1605. end;
  1606. break;
  1607. end;
  1608. {else if (taicpu(hp).condition<>taicpu(p).condition) or
  1609. (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
  1610. break;}
  1611. inc(l);
  1612. end;
  1613. hp := tai(hp.Previous);
  1614. end;
  1615. end;
  1616. function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1617. var
  1618. hp : taicpu;
  1619. hp1,hp2 : tai;
  1620. begin
  1621. if (p.typ=ait_instruction) and
  1622. MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
  1623. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  1624. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  1625. ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
  1626. begin
  1627. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  1628. AsmL.InsertAfter(hp, p);
  1629. asml.Remove(p);
  1630. p:=hp;
  1631. result:=true;
  1632. end
  1633. else if (p.typ=ait_instruction) and
  1634. MatchInstruction(p, A_STR, [C_None], [PF_None]) and
  1635. (taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
  1636. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  1637. (taicpu(p).oper[1]^.ref^.offset=-4) and
  1638. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,14]) then
  1639. begin
  1640. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  1641. asml.InsertAfter(hp, p);
  1642. asml.Remove(p);
  1643. p.Free;
  1644. p:=hp;
  1645. result:=true;
  1646. end
  1647. else if (p.typ=ait_instruction) and
  1648. MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
  1649. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  1650. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  1651. ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
  1652. begin
  1653. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  1654. asml.InsertBefore(hp, p);
  1655. asml.Remove(p);
  1656. p.Free;
  1657. p:=hp;
  1658. result:=true;
  1659. end
  1660. else if (p.typ=ait_instruction) and
  1661. MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
  1662. (taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
  1663. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  1664. (taicpu(p).oper[1]^.ref^.offset=4) and
  1665. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,15]) then
  1666. begin
  1667. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  1668. asml.InsertBefore(hp, p);
  1669. asml.Remove(p);
  1670. p.Free;
  1671. p:=hp;
  1672. result:=true;
  1673. end
  1674. else if (p.typ=ait_instruction) and
  1675. MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
  1676. (taicpu(p).oper[1]^.typ=top_const) and
  1677. (taicpu(p).oper[1]^.val >= 0) and
  1678. (taicpu(p).oper[1]^.val < 256) and
  1679. (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
  1680. begin
  1681. taicpu(p).oppostfix:=PF_S;
  1682. result:=true;
  1683. end
  1684. else if (p.typ=ait_instruction) and
  1685. MatchInstruction(p, A_MVN, [], [PF_None]) and
  1686. (taicpu(p).oper[1]^.typ=top_reg) and
  1687. (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
  1688. begin
  1689. taicpu(p).oppostfix:=PF_S;
  1690. result:=true;
  1691. end
  1692. else if (p.typ=ait_instruction) and
  1693. MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  1694. (taicpu(p).ops = 3) and
  1695. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1696. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  1697. (taicpu(p).oper[2]^.typ=top_const) and
  1698. (taicpu(p).oper[2]^.val >= 0) and
  1699. (taicpu(p).oper[2]^.val < 256) and
  1700. (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
  1701. begin
  1702. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  1703. taicpu(p).oppostfix:=PF_S;
  1704. taicpu(p).ops := 2;
  1705. result:=true;
  1706. end
  1707. {else if (p.typ=ait_instruction) and
  1708. MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_LSL,A_LSR,A_ASR,A_ROR], [], [PF_None,PF_S]) and
  1709. (taicpu(p).ops = 3) and
  1710. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1711. (taicpu(p).oper[2]^.typ=top_reg) and
  1712. (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
  1713. begin
  1714. taicpu(p).ops := 2;
  1715. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  1716. taicpu(p).oppostfix:=PF_S;
  1717. result:=true;
  1718. end
  1719. else if (p.typ=ait_instruction) and
  1720. MatchInstruction(p, [A_AND,A_ORR,A_EOR], [], [PF_None,PF_S]) and
  1721. (taicpu(p).ops = 3) and
  1722. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
  1723. (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
  1724. begin
  1725. taicpu(p).oppostfix:=PF_S;
  1726. taicpu(p).ops := 2;
  1727. result:=true;
  1728. end}
  1729. else if (p.typ=ait_instruction) and
  1730. MatchInstruction(p, [A_AND], [], [PF_None]) and
  1731. (taicpu(p).ops = 2) and
  1732. (taicpu(p).oper[1]^.typ=top_const) and
  1733. ((taicpu(p).oper[1]^.val=255) or
  1734. (taicpu(p).oper[1]^.val=65535)) then
  1735. begin
  1736. if taicpu(p).oper[1]^.val=255 then
  1737. taicpu(p).opcode:=A_UXTB
  1738. else
  1739. taicpu(p).opcode:=A_UXTH;
  1740. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  1741. result := true;
  1742. end
  1743. else if (p.typ=ait_instruction) and
  1744. MatchInstruction(p, [A_AND], [], [PF_None]) and
  1745. (taicpu(p).ops = 3) and
  1746. (taicpu(p).oper[2]^.typ=top_const) and
  1747. ((taicpu(p).oper[2]^.val=255) or
  1748. (taicpu(p).oper[2]^.val=65535)) then
  1749. begin
  1750. if taicpu(p).oper[2]^.val=255 then
  1751. taicpu(p).opcode:=A_UXTB
  1752. else
  1753. taicpu(p).opcode:=A_UXTH;
  1754. taicpu(p).ops:=2;
  1755. result := true;
  1756. end
  1757. {else if (p.typ=ait_instruction) and
  1758. MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
  1759. (taicpu(p).oper[1]^.typ=top_const) and
  1760. (taicpu(p).oper[1]^.val=0) and
  1761. GetNextInstruction(p,hp1) and
  1762. (taicpu(hp1).opcode=A_B) and
  1763. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  1764. begin
  1765. if taicpu(hp1).condition = C_EQ then
  1766. hp2:=taicpu.op_reg_ref(A_CBZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^)
  1767. else
  1768. hp2:=taicpu.op_reg_ref(A_CBNZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^);
  1769. taicpu(hp2).is_jmp := true;
  1770. asml.InsertAfter(hp2, hp1);
  1771. asml.Remove(hp1);
  1772. hp1.Free;
  1773. asml.Remove(p);
  1774. p.Free;
  1775. p := hp2;
  1776. result := true;
  1777. end}
  1778. else
  1779. Result := inherited PeepHoleOptPass1Cpu(p);
  1780. end;
  1781. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1782. var
  1783. p,hp1,hp2: tai;
  1784. l,l2 : longint;
  1785. condition : tasmcond;
  1786. hp3: tai;
  1787. WasLast: boolean;
  1788. { UsedRegs, TmpUsedRegs: TRegSet; }
  1789. begin
  1790. p := BlockStart;
  1791. { UsedRegs := []; }
  1792. while (p <> BlockEnd) Do
  1793. begin
  1794. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1795. case p.Typ Of
  1796. Ait_Instruction:
  1797. begin
  1798. case taicpu(p).opcode Of
  1799. A_B:
  1800. if taicpu(p).condition<>C_None then
  1801. begin
  1802. { check for
  1803. Bxx xxx
  1804. <several instructions>
  1805. xxx:
  1806. }
  1807. l:=0;
  1808. GetNextInstruction(p, hp1);
  1809. while assigned(hp1) and
  1810. (l<=4) and
  1811. CanBeCond(hp1) and
  1812. { stop on labels }
  1813. not(hp1.typ=ait_label) do
  1814. begin
  1815. inc(l);
  1816. if MustBeLast(hp1) then
  1817. begin
  1818. //hp1:=nil;
  1819. GetNextInstruction(hp1,hp1);
  1820. break;
  1821. end
  1822. else
  1823. GetNextInstruction(hp1,hp1);
  1824. end;
  1825. if assigned(hp1) then
  1826. begin
  1827. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1828. begin
  1829. if (l<=4) and (l>0) then
  1830. begin
  1831. condition:=inverse_cond(taicpu(p).condition);
  1832. hp2:=p;
  1833. GetNextInstruction(p,hp1);
  1834. p:=hp1;
  1835. repeat
  1836. if hp1.typ=ait_instruction then
  1837. taicpu(hp1).condition:=condition;
  1838. if MustBeLast(hp1) then
  1839. begin
  1840. GetNextInstruction(hp1,hp1);
  1841. break;
  1842. end
  1843. else
  1844. GetNextInstruction(hp1,hp1);
  1845. until not(assigned(hp1)) or
  1846. not(CanBeCond(hp1)) or
  1847. (hp1.typ=ait_label);
  1848. { wait with removing else GetNextInstruction could
  1849. ignore the label if it was the only usage in the
  1850. jump moved away }
  1851. asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
  1852. DecrementPreceedingIT(asml, hp2);
  1853. case l of
  1854. 1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
  1855. 2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
  1856. 3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
  1857. 4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
  1858. end;
  1859. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1860. asml.remove(hp2);
  1861. hp2.free;
  1862. continue;
  1863. end;
  1864. end;
  1865. end;
  1866. end;
  1867. end;
  1868. end;
  1869. end;
  1870. p := tai(p.next)
  1871. end;
  1872. end;
  1873. begin
  1874. casmoptimizer:=TCpuAsmOptimizer;
  1875. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1876. End.