aoptcpu.pas 89 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. {$define DEBUG_AOPTCPU}
  22. Interface
  23. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj, cclasses;
  24. Type
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  31. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  32. var AllUsedRegs: TAllUsedRegs): Boolean;
  33. { gets the next tai object after current that contains info relevant
  34. to the optimizer in p1 which used the given register or does a
  35. change in program flow.
  36. If there is none, it returns false and
  37. sets p1 to nil }
  38. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  39. { outputs a debug message into the assembler file }
  40. procedure DebugMsg(const s: string; p: tai);
  41. protected
  42. function LookForPostindexedPattern(p: taicpu): boolean;
  43. End;
  44. TCpuPreRegallocScheduler = class(TAsmScheduler)
  45. function SchedulerPass1Cpu(var p: tai): boolean;override;
  46. procedure SwapRegLive(p, hp1: taicpu);
  47. end;
  48. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  49. { uses the same constructor as TAopObj }
  50. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  51. procedure PeepHoleOptPass2;override;
  52. End;
  53. function MustBeLast(p : tai) : boolean;
  54. Implementation
  55. uses
  56. cutils,verbose,globals,
  57. systems,
  58. cpuinfo,
  59. cgobj,cgutils,procinfo,
  60. aasmbase,aasmdata;
  61. function CanBeCond(p : tai) : boolean;
  62. begin
  63. result:=
  64. (p.typ=ait_instruction) and
  65. (taicpu(p).condition=C_None) and
  66. ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
  67. (taicpu(p).opcode<>A_CBZ) and
  68. (taicpu(p).opcode<>A_CBNZ) and
  69. (taicpu(p).opcode<>A_PLD) and
  70. ((taicpu(p).opcode<>A_BLX) or
  71. (taicpu(p).oper[0]^.typ=top_reg));
  72. end;
  73. function RefsEqual(const r1, r2: treference): boolean;
  74. begin
  75. refsequal :=
  76. (r1.offset = r2.offset) and
  77. (r1.base = r2.base) and
  78. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  79. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  80. (r1.relsymbol = r2.relsymbol) and
  81. (r1.signindex = r2.signindex) and
  82. (r1.shiftimm = r2.shiftimm) and
  83. (r1.addressmode = r2.addressmode) and
  84. (r1.shiftmode = r2.shiftmode);
  85. end;
  86. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  87. begin
  88. result :=
  89. (instr.typ = ait_instruction) and
  90. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  91. ((cond = []) or (taicpu(instr).condition in cond)) and
  92. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  93. end;
  94. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  95. begin
  96. result :=
  97. (instr.typ = ait_instruction) and
  98. (taicpu(instr).opcode = op) and
  99. ((cond = []) or (taicpu(instr).condition in cond)) and
  100. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  101. end;
  102. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  103. begin
  104. result := oper1.typ = oper2.typ;
  105. if result then
  106. case oper1.typ of
  107. top_const:
  108. Result:=oper1.val = oper2.val;
  109. top_reg:
  110. Result:=oper1.reg = oper2.reg;
  111. top_conditioncode:
  112. Result:=oper1.cc = oper2.cc;
  113. top_ref:
  114. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  115. else Result:=false;
  116. end
  117. end;
  118. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  119. begin
  120. result := (oper.typ = top_reg) and (oper.reg = reg);
  121. end;
  122. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  123. begin
  124. if (taicpu(movp).condition = C_EQ) and
  125. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  126. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  127. begin
  128. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  129. asml.remove(movp);
  130. movp.free;
  131. end;
  132. end;
  133. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  134. var
  135. p: taicpu;
  136. begin
  137. p := taicpu(hp);
  138. regLoadedWithNewValue := false;
  139. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  140. exit;
  141. case p.opcode of
  142. { These operands do not write into a register at all }
  143. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  144. exit;
  145. {Take care of post/preincremented store and loads, they will change their base register}
  146. A_STR, A_LDR:
  147. regLoadedWithNewValue :=
  148. (taicpu(p).oper[1]^.typ=top_ref) and
  149. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  150. (taicpu(p).oper[1]^.ref^.base = reg);
  151. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  152. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  153. regLoadedWithNewValue :=
  154. (p.oper[1]^.typ = top_reg) and
  155. (p.oper[1]^.reg = reg);
  156. {Loads to oper2 from coprocessor}
  157. {
  158. MCR/MRC is currently not supported in FPC
  159. A_MRC:
  160. regLoadedWithNewValue :=
  161. (p.oper[2]^.typ = top_reg) and
  162. (p.oper[2]^.reg = reg);
  163. }
  164. {Loads to all register in the registerset}
  165. A_LDM:
  166. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  167. end;
  168. if regLoadedWithNewValue then
  169. exit;
  170. case p.oper[0]^.typ of
  171. {This is the case}
  172. top_reg:
  173. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  174. { LDRD }
  175. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  176. {LDM/STM might write a new value to their index register}
  177. top_ref:
  178. regLoadedWithNewValue :=
  179. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  180. (taicpu(p).oper[0]^.ref^.base = reg);
  181. end;
  182. end;
  183. function AlignedToQWord(const ref : treference) : boolean;
  184. begin
  185. { (safe) heuristics to ensure alignment }
  186. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  187. (((ref.offset>=0) and
  188. ((ref.offset mod 8)=0) and
  189. ((ref.base=NR_R13) or
  190. (ref.index=NR_R13))
  191. ) or
  192. ((ref.offset<=0) and
  193. { when using NR_R11, it has always a value of <qword align>+4 }
  194. ((abs(ref.offset+4) mod 8)=0) and
  195. (current_procinfo.framepointer=NR_R11) and
  196. ((ref.base=NR_R11) or
  197. (ref.index=NR_R11))
  198. )
  199. );
  200. end;
  201. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  202. var
  203. p: taicpu;
  204. i: longint;
  205. begin
  206. instructionLoadsFromReg := false;
  207. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  208. exit;
  209. p:=taicpu(hp);
  210. i:=1;
  211. {For these instructions we have to start on oper[0]}
  212. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  213. A_CMP, A_CMN, A_TST, A_TEQ,
  214. A_B, A_BL, A_BX, A_BLX,
  215. A_SMLAL, A_UMLAL]) then i:=0;
  216. while(i<p.ops) do
  217. begin
  218. case p.oper[I]^.typ of
  219. top_reg:
  220. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  221. { STRD }
  222. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  223. top_regset:
  224. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  225. top_shifterop:
  226. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  227. top_ref:
  228. instructionLoadsFromReg :=
  229. (p.oper[I]^.ref^.base = reg) or
  230. (p.oper[I]^.ref^.index = reg);
  231. end;
  232. if instructionLoadsFromReg then exit; {Bailout if we found something}
  233. Inc(I);
  234. end;
  235. end;
  236. function isValidConstLoadStoreOffset(const aoffset: longint; const pf: TOpPostfix) : boolean;
  237. begin
  238. if current_settings.cputype in cpu_thumb2 then
  239. result := (aoffset<4096) and (aoffset>-256)
  240. else
  241. result := ((pf in [PF_None,PF_B]) and
  242. (abs(aoffset)<4096)) or
  243. (abs(aoffset)<256);
  244. end;
  245. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  246. var AllUsedRegs: TAllUsedRegs): Boolean;
  247. begin
  248. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  249. RegUsedAfterInstruction :=
  250. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  251. not(regLoadedWithNewValue(reg,p)) and
  252. (
  253. not(GetNextInstruction(p,p)) or
  254. instructionLoadsFromReg(reg,p) or
  255. not(regLoadedWithNewValue(reg,p))
  256. );
  257. end;
  258. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  259. var Next: tai; reg: TRegister): Boolean;
  260. begin
  261. Next:=Current;
  262. repeat
  263. Result:=GetNextInstruction(Next,Next);
  264. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  265. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  266. end;
  267. {$ifdef DEBUG_AOPTCPU}
  268. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
  269. begin
  270. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  271. end;
  272. {$else DEBUG_AOPTCPU}
  273. procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  274. begin
  275. end;
  276. {$endif DEBUG_AOPTCPU}
  277. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  278. var
  279. alloc,
  280. dealloc : tai_regalloc;
  281. hp1 : tai;
  282. begin
  283. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  284. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  285. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  286. { don't mess with moves to pc }
  287. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  288. { don't mess with moves to lr }
  289. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  290. { the destination register of the mov might not be used beween p and movp }
  291. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  292. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  293. (taicpu(p).opcode<>A_CBZ) and
  294. (taicpu(p).opcode<>A_CBNZ) and
  295. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  296. not (
  297. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  298. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  299. ) then
  300. begin
  301. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  302. if assigned(dealloc) then
  303. begin
  304. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  305. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  306. and remove it if possible }
  307. GetLastInstruction(p,hp1);
  308. asml.Remove(dealloc);
  309. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  310. if assigned(alloc) then
  311. begin
  312. asml.Remove(alloc);
  313. alloc.free;
  314. dealloc.free;
  315. end
  316. else
  317. asml.InsertAfter(dealloc,p);
  318. { try to move the allocation of the target register }
  319. GetLastInstruction(movp,hp1);
  320. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  321. if assigned(alloc) then
  322. begin
  323. asml.Remove(alloc);
  324. asml.InsertBefore(alloc,p);
  325. { adjust used regs }
  326. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  327. end;
  328. { finally get rid of the mov }
  329. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  330. asml.remove(movp);
  331. movp.free;
  332. end;
  333. end;
  334. end;
  335. {
  336. optimize
  337. ldr/str regX,[reg1]
  338. ...
  339. add/sub reg1,reg1,regY/const
  340. into
  341. ldr/str regX,[reg1], regY/const
  342. }
  343. function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
  344. var
  345. hp1 : tai;
  346. begin
  347. Result:=false;
  348. if (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
  349. (p.oper[1]^.ref^.index=NR_NO) and
  350. (p.oper[1]^.ref^.offset=0) and
  351. GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
  352. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  353. MatchInstruction(hp1, [A_ADD, A_SUB], [C_None], [PF_None]) and
  354. (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
  355. (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
  356. (
  357. (taicpu(hp1).oper[2]^.typ=top_reg) or
  358. { valid offset? }
  359. ((taicpu(hp1).oper[2]^.typ=top_const) and
  360. ((abs(taicpu(hp1).oper[2]^.val)<256) or
  361. ((abs(taicpu(hp1).oper[2]^.val)<4096) and (p.oppostfix in [PF_None,PF_B]))
  362. )
  363. )
  364. ) and
  365. { don't apply the optimization if the base register is loaded }
  366. (p.oper[0]^.reg<>p.oper[1]^.ref^.base) and
  367. not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
  368. { don't apply the optimization if the (new) index register is loaded }
  369. (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
  370. not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
  371. begin
  372. DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
  373. p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
  374. if taicpu(hp1).oper[2]^.typ=top_const then
  375. begin
  376. if taicpu(hp1).opcode=A_ADD then
  377. p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
  378. else
  379. p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
  380. end
  381. else
  382. begin
  383. p.oper[1]^.ref^.index:=taicpu(hp1).oper[2]^.reg;
  384. if taicpu(hp1).opcode=A_ADD then
  385. p.oper[1]^.ref^.signindex:=1
  386. else
  387. p.oper[1]^.ref^.signindex:=-1;
  388. end;
  389. asml.Remove(hp1);
  390. hp1.Free;
  391. Result:=true;
  392. end;
  393. end;
  394. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  395. var
  396. hp1,hp2: tai;
  397. i, i2: longint;
  398. TmpUsedRegs: TAllUsedRegs;
  399. tempop: tasmop;
  400. function IsPowerOf2(const value: DWord): boolean; inline;
  401. begin
  402. Result:=(value and (value - 1)) = 0;
  403. end;
  404. begin
  405. result := false;
  406. case p.typ of
  407. ait_instruction:
  408. begin
  409. {
  410. change
  411. <op> reg,x,y
  412. cmp reg,#0
  413. into
  414. <op>s reg,x,y
  415. }
  416. { this optimization can applied only to the currently enabled operations because
  417. the other operations do not update all flags and FPC does not track flag usage }
  418. if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
  419. A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
  420. GetNextInstruction(p, hp1) and
  421. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  422. (taicpu(hp1).oper[1]^.typ = top_const) and
  423. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  424. (taicpu(hp1).oper[1]^.val = 0) and
  425. GetNextInstruction(hp1, hp2) and
  426. { be careful here, following instructions could use other flags
  427. however after a jump fpc never depends on the value of flags }
  428. { All above instructions set Z and N according to the following
  429. Z := result = 0;
  430. N := result[31];
  431. EQ = Z=1; NE = Z=0;
  432. MI = N=1; PL = N=0; }
  433. MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) and
  434. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  435. begin
  436. DebugMsg('Peephole OpCmp2OpS done', p);
  437. taicpu(p).oppostfix:=PF_S;
  438. { move flag allocation if possible }
  439. GetLastInstruction(hp1, hp2);
  440. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  441. if assigned(hp2) then
  442. begin
  443. asml.Remove(hp2);
  444. asml.insertbefore(hp2, p);
  445. end;
  446. asml.remove(hp1);
  447. hp1.free;
  448. end
  449. else
  450. case taicpu(p).opcode of
  451. A_STR:
  452. begin
  453. { change
  454. str reg1,ref
  455. ldr reg2,ref
  456. into
  457. str reg1,ref
  458. mov reg2,reg1
  459. }
  460. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  461. (taicpu(p).oppostfix=PF_None) and
  462. GetNextInstruction(p,hp1) and
  463. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  464. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  465. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  466. begin
  467. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  468. begin
  469. DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
  470. asml.remove(hp1);
  471. hp1.free;
  472. end
  473. else
  474. begin
  475. taicpu(hp1).opcode:=A_MOV;
  476. taicpu(hp1).oppostfix:=PF_None;
  477. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  478. DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
  479. end;
  480. result := true;
  481. end
  482. { change
  483. str reg1,ref
  484. str reg2,ref
  485. into
  486. strd reg1,ref
  487. }
  488. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  489. (taicpu(p).oppostfix=PF_None) and
  490. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  491. GetNextInstruction(p,hp1) and
  492. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  493. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  494. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  495. { str ensures that either base or index contain no register, else ldr wouldn't
  496. use an offset either
  497. }
  498. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  499. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  500. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  501. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  502. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  503. begin
  504. DebugMsg('Peephole StrStr2Strd done', p);
  505. taicpu(p).oppostfix:=PF_D;
  506. asml.remove(hp1);
  507. hp1.free;
  508. end;
  509. LookForPostindexedPattern(taicpu(p));
  510. end;
  511. A_LDR:
  512. begin
  513. { change
  514. ldr reg1,ref
  515. ldr reg2,ref
  516. into ...
  517. }
  518. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  519. GetNextInstruction(p,hp1) and
  520. { ldrd is not allowed here }
  521. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  522. begin
  523. {
  524. ...
  525. ldr reg1,ref
  526. mov reg2,reg1
  527. }
  528. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  529. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  530. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  531. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  532. begin
  533. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  534. begin
  535. DebugMsg('Peephole LdrLdr2Ldr done', hp1);
  536. asml.remove(hp1);
  537. hp1.free;
  538. end
  539. else
  540. begin
  541. DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
  542. taicpu(hp1).opcode:=A_MOV;
  543. taicpu(hp1).oppostfix:=PF_None;
  544. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  545. end;
  546. result := true;
  547. end
  548. {
  549. ...
  550. ldrd reg1,ref
  551. }
  552. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  553. { ldrd does not allow any postfixes ... }
  554. (taicpu(p).oppostfix=PF_None) and
  555. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  556. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  557. { ldr ensures that either base or index contain no register, else ldr wouldn't
  558. use an offset either
  559. }
  560. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  561. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  562. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  563. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  564. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  565. begin
  566. DebugMsg('Peephole LdrLdr2Ldrd done', p);
  567. taicpu(p).oppostfix:=PF_D;
  568. asml.remove(hp1);
  569. hp1.free;
  570. end;
  571. end;
  572. LookForPostindexedPattern(taicpu(p));
  573. { Remove superfluous mov after ldr
  574. changes
  575. ldr reg1, ref
  576. mov reg2, reg1
  577. to
  578. ldr reg2, ref
  579. conditions are:
  580. * no ldrd usage
  581. * reg1 must be released after mov
  582. * mov can not contain shifterops
  583. * ldr+mov have the same conditions
  584. * mov does not set flags
  585. }
  586. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  587. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  588. end;
  589. A_MOV:
  590. begin
  591. { fold
  592. mov reg1,reg0, shift imm1
  593. mov reg1,reg1, shift imm2
  594. }
  595. if (taicpu(p).ops=3) and
  596. (taicpu(p).oper[2]^.typ = top_shifterop) and
  597. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  598. getnextinstruction(p,hp1) and
  599. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  600. (taicpu(hp1).ops=3) and
  601. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  602. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  603. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  604. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  605. begin
  606. { fold
  607. mov reg1,reg0, lsl 16
  608. mov reg1,reg1, lsr 16
  609. strh reg1, ...
  610. dealloc reg1
  611. to
  612. strh reg1, ...
  613. dealloc reg1
  614. }
  615. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  616. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  617. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  618. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  619. getnextinstruction(hp1,hp2) and
  620. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  621. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  622. begin
  623. CopyUsedRegs(TmpUsedRegs);
  624. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  625. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  626. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  627. begin
  628. DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
  629. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  630. asml.remove(p);
  631. asml.remove(hp1);
  632. p.free;
  633. hp1.free;
  634. p:=hp2;
  635. end;
  636. ReleaseUsedRegs(TmpUsedRegs);
  637. end
  638. { fold
  639. mov reg1,reg0, shift imm1
  640. mov reg1,reg1, shift imm2
  641. to
  642. mov reg1,reg0, shift imm1+imm2
  643. }
  644. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  645. { asr makes no use after a lsr, the asr can be foled into the lsr }
  646. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  647. begin
  648. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  649. { avoid overflows }
  650. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  651. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  652. SM_ROR:
  653. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  654. SM_ASR:
  655. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  656. SM_LSR,
  657. SM_LSL:
  658. begin
  659. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  660. InsertLLItem(p.previous, p.next, hp1);
  661. p.free;
  662. p:=hp1;
  663. end;
  664. else
  665. internalerror(2008072803);
  666. end;
  667. DebugMsg('Peephole ShiftShift2Shift 1 done', p);
  668. asml.remove(hp1);
  669. hp1.free;
  670. result := true;
  671. end
  672. { fold
  673. mov reg1,reg0, shift imm1
  674. mov reg1,reg1, shift imm2
  675. mov reg1,reg1, shift imm3 ...
  676. }
  677. else if getnextinstruction(hp1,hp2) and
  678. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  679. (taicpu(hp2).ops=3) and
  680. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  681. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  682. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  683. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  684. begin
  685. { mov reg1,reg0, lsl imm1
  686. mov reg1,reg1, lsr/asr imm2
  687. mov reg1,reg1, lsl imm3 ...
  688. if imm3<=imm1 and imm2>=imm3
  689. to
  690. mov reg1,reg0, lsl imm1
  691. mov reg1,reg1, lsr/asr imm2-imm3
  692. }
  693. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  694. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  695. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  696. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  697. begin
  698. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  699. DebugMsg('Peephole ShiftShiftShift2ShiftShift 1 done', p);
  700. asml.remove(hp2);
  701. hp2.free;
  702. result := true;
  703. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  704. begin
  705. asml.remove(hp1);
  706. hp1.free;
  707. end;
  708. end
  709. { mov reg1,reg0, lsr/asr imm1
  710. mov reg1,reg1, lsl imm2
  711. mov reg1,reg1, lsr/asr imm3 ...
  712. if imm3>=imm1 and imm2>=imm1
  713. to
  714. mov reg1,reg0, lsl imm2-imm1
  715. mov reg1,reg1, lsr/asr imm3 ...
  716. }
  717. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  718. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  719. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  720. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  721. begin
  722. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  723. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  724. DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
  725. asml.remove(p);
  726. p.free;
  727. p:=hp2;
  728. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  729. begin
  730. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  731. asml.remove(hp1);
  732. hp1.free;
  733. p:=hp2;
  734. end;
  735. result := true;
  736. end;
  737. end;
  738. end;
  739. { Change the common
  740. mov r0, r0, lsr #24
  741. and r0, r0, #255
  742. and remove the superfluous and
  743. This could be extended to handle more cases.
  744. }
  745. if (taicpu(p).ops=3) and
  746. (taicpu(p).oper[2]^.typ = top_shifterop) and
  747. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  748. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  749. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  750. getnextinstruction(p,hp1) and
  751. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  752. (taicpu(hp1).ops=3) and
  753. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  754. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  755. (taicpu(hp1).oper[2]^.typ = top_const) and
  756. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  757. For LSR #25 and an AndConst of 255 that whould go like this:
  758. 255 and ((2 shl (32-25))-1)
  759. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  760. LSR #25 and AndConst of 254:
  761. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  762. }
  763. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  764. begin
  765. DebugMsg('Peephole LsrAnd2Lsr done', hp1);
  766. asml.remove(hp1);
  767. hp1.free;
  768. end;
  769. {
  770. optimize
  771. mov rX, yyyy
  772. ....
  773. }
  774. if (taicpu(p).ops = 2) and
  775. GetNextInstruction(p,hp1) and
  776. (tai(hp1).typ = ait_instruction) then
  777. begin
  778. {
  779. This changes the very common
  780. mov r0, #0
  781. str r0, [...]
  782. mov r0, #0
  783. str r0, [...]
  784. and removes all superfluous mov instructions
  785. }
  786. if (taicpu(p).oper[1]^.typ = top_const) and
  787. (taicpu(hp1).opcode=A_STR) then
  788. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  789. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  790. GetNextInstruction(hp1, hp2) and
  791. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  792. (taicpu(hp2).ops = 2) and
  793. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  794. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  795. begin
  796. DebugMsg('Peephole MovStrMov done', hp2);
  797. GetNextInstruction(hp2,hp1);
  798. asml.remove(hp2);
  799. hp2.free;
  800. if not assigned(hp1) then break;
  801. end
  802. {
  803. This removes the first mov from
  804. mov rX,...
  805. mov rX,...
  806. }
  807. else if taicpu(hp1).opcode=A_MOV then
  808. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  809. (taicpu(hp1).ops = 2) and
  810. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  811. { don't remove the first mov if the second is a mov rX,rX }
  812. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  813. begin
  814. DebugMsg('Peephole MovMov done', p);
  815. asml.remove(p);
  816. p.free;
  817. p:=hp1;
  818. GetNextInstruction(hp1,hp1);
  819. if not assigned(hp1) then
  820. break;
  821. end;
  822. end;
  823. {
  824. change
  825. mov r1, r0
  826. add r1, r1, #1
  827. to
  828. add r1, r0, #1
  829. Todo: Make it work for mov+cmp too
  830. CAUTION! If this one is successful p might not be a mov instruction anymore!
  831. }
  832. if (taicpu(p).ops = 2) and
  833. (taicpu(p).oper[1]^.typ = top_reg) and
  834. (taicpu(p).oppostfix = PF_NONE) and
  835. GetNextInstruction(p, hp1) and
  836. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  837. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
  838. [taicpu(p).condition], []) and
  839. {MOV and MVN might only have 2 ops}
  840. (taicpu(hp1).ops = 3) and
  841. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  842. (taicpu(hp1).oper[1]^.typ = top_reg) and
  843. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  844. begin
  845. { When we get here we still don't know if the registers match}
  846. for I:=1 to 2 do
  847. {
  848. If the first loop was successful p will be replaced with hp1.
  849. The checks will still be ok, because all required information
  850. will also be in hp1 then.
  851. }
  852. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  853. begin
  854. DebugMsg('Peephole RedundantMovProcess done', hp1);
  855. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  856. if p<>hp1 then
  857. begin
  858. asml.remove(p);
  859. p.free;
  860. p:=hp1;
  861. end;
  862. end;
  863. end;
  864. { This folds shifterops into following instructions
  865. mov r0, r1, lsl #8
  866. add r2, r3, r0
  867. to
  868. add r2, r3, r1, lsl #8
  869. CAUTION! If this one is successful p might not be a mov instruction anymore!
  870. }
  871. if (taicpu(p).opcode = A_MOV) and
  872. (taicpu(p).ops = 3) and
  873. (taicpu(p).oper[1]^.typ = top_reg) and
  874. (taicpu(p).oper[2]^.typ = top_shifterop) and
  875. (taicpu(p).oppostfix = PF_NONE) and
  876. GetNextInstruction(p, hp1) and
  877. MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  878. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  879. A_CMP, A_CMN],
  880. [taicpu(p).condition], [PF_None]) and
  881. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  882. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  883. (
  884. {Only ONE of the two src operands is allowed to match}
  885. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  886. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  887. ) then
  888. begin
  889. CopyUsedRegs(TmpUsedRegs);
  890. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  891. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  892. I2:=0
  893. else
  894. I2:=1;
  895. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  896. for I:=I2 to taicpu(hp1).ops-1 do
  897. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  898. begin
  899. { If the parameter matched on the second op from the RIGHT
  900. we have to switch the parameters, this will not happen for CMP
  901. were we're only evaluating the most right parameter
  902. }
  903. if I <> taicpu(hp1).ops-1 then
  904. begin
  905. {The SUB operators need to be changed when we swap parameters}
  906. case taicpu(hp1).opcode of
  907. A_SUB: tempop:=A_RSB;
  908. A_SBC: tempop:=A_RSC;
  909. A_RSB: tempop:=A_SUB;
  910. A_RSC: tempop:=A_SBC;
  911. else tempop:=taicpu(hp1).opcode;
  912. end;
  913. if taicpu(hp1).ops = 3 then
  914. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  915. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  916. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  917. else
  918. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  919. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  920. taicpu(p).oper[2]^.shifterop^);
  921. end
  922. else
  923. if taicpu(hp1).ops = 3 then
  924. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  925. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  926. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  927. else
  928. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  929. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  930. taicpu(p).oper[2]^.shifterop^);
  931. asml.insertbefore(hp2, p);
  932. asml.remove(p);
  933. asml.remove(hp1);
  934. p.free;
  935. hp1.free;
  936. p:=hp2;
  937. GetNextInstruction(p,hp1);
  938. DebugMsg('Peephole FoldShiftProcess done', p);
  939. break;
  940. end;
  941. ReleaseUsedRegs(TmpUsedRegs);
  942. end;
  943. {
  944. Often we see shifts and then a superfluous mov to another register
  945. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  946. }
  947. if (taicpu(p).opcode = A_MOV) and
  948. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  949. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  950. end;
  951. A_ADD,
  952. A_ADC,
  953. A_RSB,
  954. A_RSC,
  955. A_SUB,
  956. A_SBC,
  957. A_AND,
  958. A_BIC,
  959. A_EOR,
  960. A_ORR,
  961. A_MLA,
  962. A_MUL:
  963. begin
  964. {
  965. optimize
  966. and reg2,reg1,const1
  967. ...
  968. }
  969. if (taicpu(p).opcode = A_AND) and
  970. (taicpu(p).oper[1]^.typ = top_reg) and
  971. (taicpu(p).oper[2]^.typ = top_const) then
  972. begin
  973. {
  974. change
  975. and reg2,reg1,const1
  976. and reg3,reg2,const2
  977. to
  978. and reg3,reg1,(const1 and const2)
  979. }
  980. if GetNextInstruction(p, hp1) and
  981. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  982. { either reg3 and reg2 are equal or reg2 is deallocated after the and }
  983. (MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) or
  984. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)))) and
  985. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  986. (taicpu(hp1).oper[2]^.typ = top_const) then
  987. begin
  988. DebugMsg('Peephole AndAnd2And done', p);
  989. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  990. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  991. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  992. asml.remove(hp1);
  993. hp1.free;
  994. end
  995. {
  996. change
  997. and reg2,reg1,255
  998. strb reg2,[...]
  999. dealloc reg2
  1000. to
  1001. strb reg1,[...]
  1002. }
  1003. else if (taicpu(p).oper[2]^.val = 255) and
  1004. MatchInstruction(p, A_AND, [C_None], [PF_None]) and
  1005. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1006. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1007. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1008. { the reference in strb might not use reg2 }
  1009. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1010. { reg1 might not be modified inbetween }
  1011. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1012. begin
  1013. DebugMsg('Peephole AndStrb2Strb done', p);
  1014. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1015. asml.remove(p);
  1016. p.free;
  1017. p:=hp1;
  1018. end;
  1019. end;
  1020. {
  1021. change
  1022. add/sub reg2,reg1,const1
  1023. str/ldr reg3,[reg2,const2]
  1024. dealloc reg2
  1025. to
  1026. str/ldr reg3,[reg1,const2+/-const1]
  1027. }
  1028. if (taicpu(p).opcode in [A_ADD,A_SUB]) and
  1029. (taicpu(p).oper[1]^.typ = top_reg) and
  1030. (taicpu(p).oper[2]^.typ = top_const) then
  1031. begin
  1032. hp1:=p;
  1033. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
  1034. { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
  1035. MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
  1036. (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
  1037. { don't optimize if the register is stored/overwritten }
  1038. (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
  1039. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1040. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
  1041. { new offset must be valid: either in the range of 8 or 12 bit, depend on the
  1042. ldr postfix }
  1043. (((taicpu(p).opcode=A_ADD) and
  1044. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1045. ) or
  1046. ((taicpu(p).opcode=A_SUB) and
  1047. isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
  1048. )
  1049. ) do
  1050. begin
  1051. { neither reg1 nor reg2 might be changed inbetween }
  1052. if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
  1053. RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
  1054. break;
  1055. { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
  1056. if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
  1057. assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
  1058. begin
  1059. { remember last instruction }
  1060. hp2:=hp1;
  1061. DebugMsg('Peephole Add/SubLdr2Ldr done', p);
  1062. hp1:=p;
  1063. { fix all ldr/str }
  1064. while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
  1065. begin
  1066. taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
  1067. if taicpu(p).opcode=A_ADD then
  1068. inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
  1069. else
  1070. dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
  1071. if hp1=hp2 then
  1072. break;
  1073. end;
  1074. GetNextInstruction(p,hp1);
  1075. asml.remove(p);
  1076. p.free;
  1077. p:=hp1;
  1078. break;
  1079. end;
  1080. end;
  1081. end;
  1082. {
  1083. change
  1084. add reg1, ...
  1085. mov reg2, reg1
  1086. to
  1087. add reg2, ...
  1088. }
  1089. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  1090. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  1091. end;
  1092. A_MVN:
  1093. begin
  1094. {
  1095. change
  1096. mvn reg2,reg1
  1097. and reg3,reg4,reg2
  1098. dealloc reg2
  1099. to
  1100. bic reg3,reg4,reg1
  1101. }
  1102. if (taicpu(p).oper[1]^.typ = top_reg) and
  1103. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1104. MatchInstruction(hp1,A_AND,[],[]) and
  1105. (((taicpu(hp1).ops=3) and
  1106. (taicpu(hp1).oper[2]^.typ=top_reg) and
  1107. (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
  1108. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
  1109. ((taicpu(hp1).ops=2) and
  1110. (taicpu(hp1).oper[1]^.typ=top_reg) and
  1111. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
  1112. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1113. { reg1 might not be modified inbetween }
  1114. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1115. begin
  1116. DebugMsg('Peephole MvnAnd2Bic done', p);
  1117. taicpu(hp1).opcode:=A_BIC;
  1118. if taicpu(hp1).ops=3 then
  1119. begin
  1120. if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
  1121. taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
  1122. taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
  1123. end
  1124. else
  1125. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1126. asml.remove(p);
  1127. p.free;
  1128. p:=hp1;
  1129. end;
  1130. end;
  1131. A_UXTB:
  1132. begin
  1133. {
  1134. change
  1135. uxtb reg2,reg1
  1136. strb reg2,[...]
  1137. dealloc reg2
  1138. to
  1139. strb reg1,[...]
  1140. }
  1141. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1142. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1143. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  1144. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1145. { the reference in strb might not use reg2 }
  1146. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1147. { reg1 might not be modified inbetween }
  1148. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1149. begin
  1150. DebugMsg('Peephole UxtbStrb2Strb done', p);
  1151. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1152. asml.remove(p);
  1153. p.free;
  1154. p:=hp1;
  1155. end
  1156. {
  1157. change
  1158. uxtb reg2,reg1
  1159. uxth reg3,reg2
  1160. dealloc reg2
  1161. to
  1162. uxtb reg3,reg1
  1163. }
  1164. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  1165. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1166. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  1167. (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
  1168. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg)) and
  1169. { reg1 might not be modified inbetween }
  1170. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1171. begin
  1172. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  1173. taicpu(hp1).opcode:=A_UXTB;
  1174. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  1175. asml.remove(p);
  1176. p.free;
  1177. p:=hp1;
  1178. end;
  1179. end;
  1180. A_UXTH:
  1181. begin
  1182. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  1183. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  1184. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  1185. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  1186. { the reference in strb might not use reg2 }
  1187. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  1188. { reg1 might not be modified inbetween }
  1189. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  1190. begin
  1191. DebugMsg('Peephole UXTHStrh2Strh done', p);
  1192. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  1193. asml.remove(p);
  1194. p.free;
  1195. p:=hp1;
  1196. end;
  1197. end;
  1198. A_CMP:
  1199. begin
  1200. {
  1201. change
  1202. cmp reg,const1
  1203. moveq reg,const1
  1204. movne reg,const2
  1205. to
  1206. cmp reg,const1
  1207. movne reg,const2
  1208. }
  1209. if (taicpu(p).oper[1]^.typ = top_const) and
  1210. GetNextInstruction(p, hp1) and
  1211. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1212. (taicpu(hp1).oper[1]^.typ = top_const) and
  1213. GetNextInstruction(hp1, hp2) and
  1214. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  1215. (taicpu(hp1).oper[1]^.typ = top_const) then
  1216. begin
  1217. RemoveRedundantMove(p, hp1, asml);
  1218. RemoveRedundantMove(p, hp2, asml);
  1219. end;
  1220. end;
  1221. end;
  1222. end;
  1223. end;
  1224. end;
  1225. { instructions modifying the CPSR can be only the last instruction }
  1226. function MustBeLast(p : tai) : boolean;
  1227. begin
  1228. Result:=(p.typ=ait_instruction) and
  1229. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  1230. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  1231. (taicpu(p).oppostfix=PF_S));
  1232. end;
  1233. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  1234. var
  1235. p,hp1,hp2: tai;
  1236. l : longint;
  1237. condition : tasmcond;
  1238. hp3: tai;
  1239. WasLast: boolean;
  1240. { UsedRegs, TmpUsedRegs: TRegSet; }
  1241. begin
  1242. p := BlockStart;
  1243. { UsedRegs := []; }
  1244. while (p <> BlockEnd) Do
  1245. begin
  1246. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1247. case p.Typ Of
  1248. Ait_Instruction:
  1249. begin
  1250. case taicpu(p).opcode Of
  1251. A_B:
  1252. if taicpu(p).condition<>C_None then
  1253. begin
  1254. { check for
  1255. Bxx xxx
  1256. <several instructions>
  1257. xxx:
  1258. }
  1259. l:=0;
  1260. WasLast:=False;
  1261. GetNextInstruction(p, hp1);
  1262. while assigned(hp1) and
  1263. (l<=4) and
  1264. CanBeCond(hp1) and
  1265. { stop on labels }
  1266. not(hp1.typ=ait_label) do
  1267. begin
  1268. inc(l);
  1269. if MustBeLast(hp1) then
  1270. begin
  1271. WasLast:=True;
  1272. GetNextInstruction(hp1,hp1);
  1273. break;
  1274. end
  1275. else
  1276. GetNextInstruction(hp1,hp1);
  1277. end;
  1278. if assigned(hp1) then
  1279. begin
  1280. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1281. begin
  1282. if (l<=4) and (l>0) then
  1283. begin
  1284. condition:=inverse_cond(taicpu(p).condition);
  1285. hp2:=p;
  1286. GetNextInstruction(p,hp1);
  1287. p:=hp1;
  1288. repeat
  1289. if hp1.typ=ait_instruction then
  1290. taicpu(hp1).condition:=condition;
  1291. if MustBeLast(hp1) then
  1292. begin
  1293. GetNextInstruction(hp1,hp1);
  1294. break;
  1295. end
  1296. else
  1297. GetNextInstruction(hp1,hp1);
  1298. until not(assigned(hp1)) or
  1299. not(CanBeCond(hp1)) or
  1300. (hp1.typ=ait_label);
  1301. { wait with removing else GetNextInstruction could
  1302. ignore the label if it was the only usage in the
  1303. jump moved away }
  1304. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1305. asml.remove(hp2);
  1306. hp2.free;
  1307. continue;
  1308. end;
  1309. end
  1310. else
  1311. { do not perform further optimizations if there is inctructon
  1312. in block #1 which can not be optimized.
  1313. }
  1314. if not WasLast then
  1315. begin
  1316. { check further for
  1317. Bcc xxx
  1318. <several instructions 1>
  1319. B yyy
  1320. xxx:
  1321. <several instructions 2>
  1322. yyy:
  1323. }
  1324. { hp2 points to jmp yyy }
  1325. hp2:=hp1;
  1326. { skip hp1 to xxx }
  1327. GetNextInstruction(hp1, hp1);
  1328. if assigned(hp2) and
  1329. assigned(hp1) and
  1330. (l<=3) and
  1331. (hp2.typ=ait_instruction) and
  1332. (taicpu(hp2).is_jmp) and
  1333. (taicpu(hp2).condition=C_None) and
  1334. { real label and jump, no further references to the
  1335. label are allowed }
  1336. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1337. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1338. begin
  1339. l:=0;
  1340. { skip hp1 to <several moves 2> }
  1341. GetNextInstruction(hp1, hp1);
  1342. while assigned(hp1) and
  1343. CanBeCond(hp1) do
  1344. begin
  1345. inc(l);
  1346. GetNextInstruction(hp1, hp1);
  1347. end;
  1348. { hp1 points to yyy: }
  1349. if assigned(hp1) and
  1350. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1351. begin
  1352. condition:=inverse_cond(taicpu(p).condition);
  1353. GetNextInstruction(p,hp1);
  1354. hp3:=p;
  1355. p:=hp1;
  1356. repeat
  1357. if hp1.typ=ait_instruction then
  1358. taicpu(hp1).condition:=condition;
  1359. GetNextInstruction(hp1,hp1);
  1360. until not(assigned(hp1)) or
  1361. not(CanBeCond(hp1));
  1362. { hp2 is still at jmp yyy }
  1363. GetNextInstruction(hp2,hp1);
  1364. { hp2 is now at xxx: }
  1365. condition:=inverse_cond(condition);
  1366. GetNextInstruction(hp1,hp1);
  1367. { hp1 is now at <several movs 2> }
  1368. repeat
  1369. taicpu(hp1).condition:=condition;
  1370. GetNextInstruction(hp1,hp1);
  1371. until not(assigned(hp1)) or
  1372. not(CanBeCond(hp1)) or
  1373. (hp1.typ=ait_label);
  1374. {
  1375. asml.remove(hp1.next)
  1376. hp1.next.free;
  1377. asml.remove(hp1);
  1378. hp1.free;
  1379. }
  1380. { remove Bcc }
  1381. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1382. asml.remove(hp3);
  1383. hp3.free;
  1384. { remove jmp }
  1385. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1386. asml.remove(hp2);
  1387. hp2.free;
  1388. continue;
  1389. end;
  1390. end;
  1391. end;
  1392. end;
  1393. end;
  1394. end;
  1395. end;
  1396. end;
  1397. p := tai(p.next)
  1398. end;
  1399. end;
  1400. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1401. begin
  1402. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1403. Result:=true
  1404. else
  1405. Result:=inherited RegInInstruction(Reg, p1);
  1406. end;
  1407. const
  1408. { set of opcode which might or do write to memory }
  1409. { TODO : extend armins.dat to contain r/w info }
  1410. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1411. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1412. { adjust the register live information when swapping the two instructions p and hp1,
  1413. they must follow one after the other }
  1414. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1415. procedure CheckLiveEnd(reg : tregister);
  1416. var
  1417. supreg : TSuperRegister;
  1418. regtype : TRegisterType;
  1419. begin
  1420. if reg=NR_NO then
  1421. exit;
  1422. regtype:=getregtype(reg);
  1423. supreg:=getsupreg(reg);
  1424. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1425. RegInInstruction(reg,p) then
  1426. cg.rg[regtype].live_end[supreg]:=p;
  1427. end;
  1428. procedure CheckLiveStart(reg : TRegister);
  1429. var
  1430. supreg : TSuperRegister;
  1431. regtype : TRegisterType;
  1432. begin
  1433. if reg=NR_NO then
  1434. exit;
  1435. regtype:=getregtype(reg);
  1436. supreg:=getsupreg(reg);
  1437. if (cg.rg[regtype].live_start[supreg]=p) and
  1438. RegInInstruction(reg,hp1) then
  1439. cg.rg[regtype].live_start[supreg]:=hp1;
  1440. end;
  1441. var
  1442. i : longint;
  1443. r : TSuperRegister;
  1444. begin
  1445. { assumption: p is directly followed by hp1 }
  1446. { if live of any reg used by p starts at p and hp1 uses this register then
  1447. set live start to hp1 }
  1448. for i:=0 to p.ops-1 do
  1449. case p.oper[i]^.typ of
  1450. Top_Reg:
  1451. CheckLiveStart(p.oper[i]^.reg);
  1452. Top_Ref:
  1453. begin
  1454. CheckLiveStart(p.oper[i]^.ref^.base);
  1455. CheckLiveStart(p.oper[i]^.ref^.index);
  1456. end;
  1457. Top_Shifterop:
  1458. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1459. Top_RegSet:
  1460. for r:=RS_R0 to RS_R15 do
  1461. if r in p.oper[i]^.regset^ then
  1462. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1463. end;
  1464. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1465. set live end to p }
  1466. for i:=0 to hp1.ops-1 do
  1467. case hp1.oper[i]^.typ of
  1468. Top_Reg:
  1469. CheckLiveEnd(hp1.oper[i]^.reg);
  1470. Top_Ref:
  1471. begin
  1472. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1473. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1474. end;
  1475. Top_Shifterop:
  1476. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1477. Top_RegSet:
  1478. for r:=RS_R0 to RS_R15 do
  1479. if r in hp1.oper[i]^.regset^ then
  1480. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1481. end;
  1482. end;
  1483. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1484. { TODO : schedule also forward }
  1485. { TODO : schedule distance > 1 }
  1486. var
  1487. hp1,hp2,hp3,hp4,hp5 : tai;
  1488. list : TAsmList;
  1489. begin
  1490. result:=true;
  1491. list:=TAsmList.Create;
  1492. p:=BlockStart;
  1493. while p<>BlockEnd Do
  1494. begin
  1495. if (p.typ=ait_instruction) and
  1496. GetNextInstruction(p,hp1) and
  1497. (hp1.typ=ait_instruction) and
  1498. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1499. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1500. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1501. not(RegModifiedByInstruction(NR_PC,p))
  1502. ) or
  1503. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1504. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1505. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1506. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1507. )
  1508. ) or
  1509. { try to prove that the memory accesses don't overlapp }
  1510. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1511. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1512. (taicpu(p).oppostfix=PF_None) and
  1513. (taicpu(hp1).oppostfix=PF_None) and
  1514. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1515. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1516. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1517. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1518. )
  1519. )
  1520. ) and
  1521. GetNextInstruction(hp1,hp2) and
  1522. (hp2.typ=ait_instruction) and
  1523. { loaded register used by next instruction? }
  1524. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1525. { loaded register not used by previous instruction? }
  1526. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1527. { same condition? }
  1528. (taicpu(p).condition=taicpu(hp1).condition) and
  1529. { first instruction might not change the register used as base }
  1530. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1531. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1532. ) and
  1533. { first instruction might not change the register used as index }
  1534. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1535. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1536. ) then
  1537. begin
  1538. hp3:=tai(p.Previous);
  1539. hp5:=tai(p.next);
  1540. asml.Remove(p);
  1541. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1542. { before the instruction? }
  1543. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1544. begin
  1545. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1546. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1547. begin
  1548. hp4:=hp3;
  1549. hp3:=tai(hp3.Previous);
  1550. asml.Remove(hp4);
  1551. list.Concat(hp4);
  1552. end
  1553. else
  1554. hp3:=tai(hp3.Previous);
  1555. end;
  1556. list.Concat(p);
  1557. SwapRegLive(taicpu(p),taicpu(hp1));
  1558. { after the instruction? }
  1559. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1560. begin
  1561. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1562. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1563. begin
  1564. hp4:=hp5;
  1565. hp5:=tai(hp5.next);
  1566. asml.Remove(hp4);
  1567. list.Concat(hp4);
  1568. end
  1569. else
  1570. hp5:=tai(hp5.Next);
  1571. end;
  1572. asml.Remove(hp1);
  1573. {$ifdef DEBUG_PREREGSCHEDULER}
  1574. asml.insertbefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1575. {$endif DEBUG_PREREGSCHEDULER}
  1576. asml.InsertBefore(hp1,hp2);
  1577. asml.InsertListBefore(hp2,list);
  1578. p:=tai(p.next)
  1579. end
  1580. else if p.typ=ait_instruction then
  1581. p:=hp1
  1582. else
  1583. p:=tai(p.next);
  1584. end;
  1585. list.Free;
  1586. end;
  1587. procedure DecrementPreceedingIT(list: TAsmList; p: tai);
  1588. var
  1589. hp : tai;
  1590. l : longint;
  1591. begin
  1592. hp := tai(p.Previous);
  1593. l := 1;
  1594. while assigned(hp) and
  1595. (l <= 4) do
  1596. begin
  1597. if hp.typ=ait_instruction then
  1598. begin
  1599. if (taicpu(hp).opcode>=A_IT) and
  1600. (taicpu(hp).opcode <= A_ITTTT) then
  1601. begin
  1602. if (taicpu(hp).opcode = A_IT) and
  1603. (l=1) then
  1604. list.Remove(hp)
  1605. else
  1606. case taicpu(hp).opcode of
  1607. A_ITE:
  1608. if l=2 then taicpu(hp).opcode := A_IT;
  1609. A_ITT:
  1610. if l=2 then taicpu(hp).opcode := A_IT;
  1611. A_ITEE:
  1612. if l=3 then taicpu(hp).opcode := A_ITE;
  1613. A_ITTE:
  1614. if l=3 then taicpu(hp).opcode := A_ITT;
  1615. A_ITET:
  1616. if l=3 then taicpu(hp).opcode := A_ITE;
  1617. A_ITTT:
  1618. if l=3 then taicpu(hp).opcode := A_ITT;
  1619. A_ITEEE:
  1620. if l=4 then taicpu(hp).opcode := A_ITEE;
  1621. A_ITTEE:
  1622. if l=4 then taicpu(hp).opcode := A_ITTE;
  1623. A_ITETE:
  1624. if l=4 then taicpu(hp).opcode := A_ITET;
  1625. A_ITTTE:
  1626. if l=4 then taicpu(hp).opcode := A_ITTT;
  1627. A_ITEET:
  1628. if l=4 then taicpu(hp).opcode := A_ITEE;
  1629. A_ITTET:
  1630. if l=4 then taicpu(hp).opcode := A_ITTE;
  1631. A_ITETT:
  1632. if l=4 then taicpu(hp).opcode := A_ITET;
  1633. A_ITTTT:
  1634. if l=4 then taicpu(hp).opcode := A_ITTT;
  1635. end;
  1636. break;
  1637. end;
  1638. {else if (taicpu(hp).condition<>taicpu(p).condition) or
  1639. (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
  1640. break;}
  1641. inc(l);
  1642. end;
  1643. hp := tai(hp.Previous);
  1644. end;
  1645. end;
  1646. function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1647. var
  1648. hp : taicpu;
  1649. hp1,hp2 : tai;
  1650. begin
  1651. if (p.typ=ait_instruction) and
  1652. MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
  1653. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  1654. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  1655. ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
  1656. begin
  1657. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  1658. AsmL.InsertAfter(hp, p);
  1659. asml.Remove(p);
  1660. p:=hp;
  1661. result:=true;
  1662. end
  1663. else if (p.typ=ait_instruction) and
  1664. MatchInstruction(p, A_STR, [C_None], [PF_None]) and
  1665. (taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
  1666. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  1667. (taicpu(p).oper[1]^.ref^.offset=-4) and
  1668. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,14]) then
  1669. begin
  1670. hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  1671. asml.InsertAfter(hp, p);
  1672. asml.Remove(p);
  1673. p.Free;
  1674. p:=hp;
  1675. result:=true;
  1676. end
  1677. else if (p.typ=ait_instruction) and
  1678. MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
  1679. (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
  1680. (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
  1681. ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
  1682. begin
  1683. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
  1684. asml.InsertBefore(hp, p);
  1685. asml.Remove(p);
  1686. p.Free;
  1687. p:=hp;
  1688. result:=true;
  1689. end
  1690. else if (p.typ=ait_instruction) and
  1691. MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
  1692. (taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
  1693. (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
  1694. (taicpu(p).oper[1]^.ref^.offset=4) and
  1695. (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,15]) then
  1696. begin
  1697. hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
  1698. asml.InsertBefore(hp, p);
  1699. asml.Remove(p);
  1700. p.Free;
  1701. p:=hp;
  1702. result:=true;
  1703. end
  1704. else if (p.typ=ait_instruction) and
  1705. MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
  1706. (taicpu(p).oper[1]^.typ=top_const) and
  1707. (taicpu(p).oper[1]^.val >= 0) and
  1708. (taicpu(p).oper[1]^.val < 256) and
  1709. (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
  1710. begin
  1711. taicpu(p).oppostfix:=PF_S;
  1712. result:=true;
  1713. end
  1714. else if (p.typ=ait_instruction) and
  1715. MatchInstruction(p, A_MVN, [], [PF_None]) and
  1716. (taicpu(p).oper[1]^.typ=top_reg) and
  1717. (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
  1718. begin
  1719. taicpu(p).oppostfix:=PF_S;
  1720. result:=true;
  1721. end
  1722. else if (p.typ=ait_instruction) and
  1723. MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
  1724. (taicpu(p).ops = 3) and
  1725. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1726. (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
  1727. (taicpu(p).oper[2]^.typ=top_const) and
  1728. (taicpu(p).oper[2]^.val >= 0) and
  1729. (taicpu(p).oper[2]^.val < 256) and
  1730. (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
  1731. begin
  1732. taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
  1733. taicpu(p).oppostfix:=PF_S;
  1734. taicpu(p).ops := 2;
  1735. result:=true;
  1736. end
  1737. {else if (p.typ=ait_instruction) and
  1738. MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_LSL,A_LSR,A_ASR,A_ROR], [], [PF_None,PF_S]) and
  1739. (taicpu(p).ops = 3) and
  1740. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
  1741. (taicpu(p).oper[2]^.typ=top_reg) and
  1742. (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
  1743. begin
  1744. taicpu(p).ops := 2;
  1745. taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
  1746. taicpu(p).oppostfix:=PF_S;
  1747. result:=true;
  1748. end
  1749. else if (p.typ=ait_instruction) and
  1750. MatchInstruction(p, [A_AND,A_ORR,A_EOR], [], [PF_None,PF_S]) and
  1751. (taicpu(p).ops = 3) and
  1752. MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
  1753. (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
  1754. begin
  1755. taicpu(p).oppostfix:=PF_S;
  1756. taicpu(p).ops := 2;
  1757. result:=true;
  1758. end}
  1759. else if (p.typ=ait_instruction) and
  1760. MatchInstruction(p, [A_AND], [], [PF_None]) and
  1761. (taicpu(p).ops = 2) and
  1762. (taicpu(p).oper[1]^.typ=top_const) and
  1763. ((taicpu(p).oper[1]^.val=255) or
  1764. (taicpu(p).oper[1]^.val=65535)) then
  1765. begin
  1766. if taicpu(p).oper[1]^.val=255 then
  1767. taicpu(p).opcode:=A_UXTB
  1768. else
  1769. taicpu(p).opcode:=A_UXTH;
  1770. taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
  1771. result := true;
  1772. end
  1773. else if (p.typ=ait_instruction) and
  1774. MatchInstruction(p, [A_AND], [], [PF_None]) and
  1775. (taicpu(p).ops = 3) and
  1776. (taicpu(p).oper[2]^.typ=top_const) and
  1777. ((taicpu(p).oper[2]^.val=255) or
  1778. (taicpu(p).oper[2]^.val=65535)) then
  1779. begin
  1780. if taicpu(p).oper[2]^.val=255 then
  1781. taicpu(p).opcode:=A_UXTB
  1782. else
  1783. taicpu(p).opcode:=A_UXTH;
  1784. taicpu(p).ops:=2;
  1785. result := true;
  1786. end
  1787. {else if (p.typ=ait_instruction) and
  1788. MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
  1789. (taicpu(p).oper[1]^.typ=top_const) and
  1790. (taicpu(p).oper[1]^.val=0) and
  1791. GetNextInstruction(p,hp1) and
  1792. (taicpu(hp1).opcode=A_B) and
  1793. (taicpu(hp1).condition in [C_EQ,C_NE]) then
  1794. begin
  1795. if taicpu(hp1).condition = C_EQ then
  1796. hp2:=taicpu.op_reg_ref(A_CBZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^)
  1797. else
  1798. hp2:=taicpu.op_reg_ref(A_CBNZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^);
  1799. taicpu(hp2).is_jmp := true;
  1800. asml.InsertAfter(hp2, hp1);
  1801. asml.Remove(hp1);
  1802. hp1.Free;
  1803. asml.Remove(p);
  1804. p.Free;
  1805. p := hp2;
  1806. result := true;
  1807. end}
  1808. else
  1809. Result := inherited PeepHoleOptPass1Cpu(p);
  1810. end;
  1811. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1812. var
  1813. p,hp1,hp2: tai;
  1814. l,l2 : longint;
  1815. condition : tasmcond;
  1816. hp3: tai;
  1817. WasLast: boolean;
  1818. { UsedRegs, TmpUsedRegs: TRegSet; }
  1819. begin
  1820. p := BlockStart;
  1821. { UsedRegs := []; }
  1822. while (p <> BlockEnd) Do
  1823. begin
  1824. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  1825. case p.Typ Of
  1826. Ait_Instruction:
  1827. begin
  1828. case taicpu(p).opcode Of
  1829. A_B:
  1830. if taicpu(p).condition<>C_None then
  1831. begin
  1832. { check for
  1833. Bxx xxx
  1834. <several instructions>
  1835. xxx:
  1836. }
  1837. l:=0;
  1838. GetNextInstruction(p, hp1);
  1839. while assigned(hp1) and
  1840. (l<=4) and
  1841. CanBeCond(hp1) and
  1842. { stop on labels }
  1843. not(hp1.typ=ait_label) do
  1844. begin
  1845. inc(l);
  1846. if MustBeLast(hp1) then
  1847. begin
  1848. //hp1:=nil;
  1849. GetNextInstruction(hp1,hp1);
  1850. break;
  1851. end
  1852. else
  1853. GetNextInstruction(hp1,hp1);
  1854. end;
  1855. if assigned(hp1) then
  1856. begin
  1857. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1858. begin
  1859. if (l<=4) and (l>0) then
  1860. begin
  1861. condition:=inverse_cond(taicpu(p).condition);
  1862. hp2:=p;
  1863. GetNextInstruction(p,hp1);
  1864. p:=hp1;
  1865. repeat
  1866. if hp1.typ=ait_instruction then
  1867. taicpu(hp1).condition:=condition;
  1868. if MustBeLast(hp1) then
  1869. begin
  1870. GetNextInstruction(hp1,hp1);
  1871. break;
  1872. end
  1873. else
  1874. GetNextInstruction(hp1,hp1);
  1875. until not(assigned(hp1)) or
  1876. not(CanBeCond(hp1)) or
  1877. (hp1.typ=ait_label);
  1878. { wait with removing else GetNextInstruction could
  1879. ignore the label if it was the only usage in the
  1880. jump moved away }
  1881. asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
  1882. DecrementPreceedingIT(asml, hp2);
  1883. case l of
  1884. 1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
  1885. 2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
  1886. 3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
  1887. 4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
  1888. end;
  1889. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1890. asml.remove(hp2);
  1891. hp2.free;
  1892. continue;
  1893. end;
  1894. end;
  1895. end;
  1896. end;
  1897. end;
  1898. end;
  1899. end;
  1900. p := tai(p.next)
  1901. end;
  1902. end;
  1903. begin
  1904. casmoptimizer:=TCpuAsmOptimizer;
  1905. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1906. End.