aoptcpu.pas 61 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. Interface
  22. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. { uses the same constructor as TAopObj }
  26. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  27. procedure PeepHoleOptPass2;override;
  28. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  29. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  30. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  31. var AllUsedRegs: TAllUsedRegs): Boolean;
  32. { gets the next tai object after current that contains info relevant
  33. to the optimizer in p1 which used the given register or does a
  34. change in program flow.
  35. If there is none, it returns false and
  36. sets p1 to nil }
  37. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  38. End;
  39. TCpuPreRegallocScheduler = class(TAsmScheduler)
  40. function SchedulerPass1Cpu(var p: tai): boolean;override;
  41. procedure SwapRegLive(p, hp1: taicpu);
  42. end;
  43. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  44. { uses the same constructor as TAopObj }
  45. procedure PeepHoleOptPass2;override;
  46. End;
  47. Implementation
  48. uses
  49. cutils,verbose,globals,
  50. systems,
  51. cpuinfo,
  52. cgobj,cgutils,procinfo,
  53. aasmbase,aasmdata;
  54. function CanBeCond(p : tai) : boolean;
  55. begin
  56. result:=
  57. (p.typ=ait_instruction) and
  58. (taicpu(p).condition=C_None) and
  59. (taicpu(p).opcode<>A_PLD) and
  60. ((taicpu(p).opcode<>A_BLX) or
  61. (taicpu(p).oper[0]^.typ=top_reg));
  62. end;
  63. function RefsEqual(const r1, r2: treference): boolean;
  64. begin
  65. refsequal :=
  66. (r1.offset = r2.offset) and
  67. (r1.base = r2.base) and
  68. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  69. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  70. (r1.relsymbol = r2.relsymbol) and
  71. (r1.signindex = r2.signindex) and
  72. (r1.shiftimm = r2.shiftimm) and
  73. (r1.addressmode = r2.addressmode) and
  74. (r1.shiftmode = r2.shiftmode);
  75. end;
  76. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. (taicpu(instr).opcode = op) and
  81. ((cond = []) or (taicpu(instr).condition in cond)) and
  82. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  83. end;
  84. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  85. begin
  86. result := oper1.typ = oper2.typ;
  87. if result then
  88. case oper1.typ of
  89. top_const:
  90. Result:=oper1.val = oper2.val;
  91. top_reg:
  92. Result:=oper1.reg = oper2.reg;
  93. top_conditioncode:
  94. Result:=oper1.cc = oper2.cc;
  95. top_ref:
  96. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  97. else Result:=false;
  98. end
  99. end;
  100. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  101. begin
  102. result := (oper.typ = top_reg) and (oper.reg = reg);
  103. end;
  104. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  105. begin
  106. if (taicpu(movp).condition = C_EQ) and
  107. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  108. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  109. begin
  110. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  111. asml.remove(movp);
  112. movp.free;
  113. end;
  114. end;
  115. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  116. var
  117. p: taicpu;
  118. begin
  119. p := taicpu(hp);
  120. regLoadedWithNewValue := false;
  121. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  122. exit;
  123. case p.opcode of
  124. { These operands do not write into a register at all }
  125. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  126. exit;
  127. {Take care of post/preincremented store and loads, they will change their base register}
  128. A_STR, A_LDR:
  129. regLoadedWithNewValue :=
  130. (taicpu(p).oper[1]^.typ=top_ref) and
  131. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  132. (taicpu(p).oper[1]^.ref^.base = reg);
  133. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  134. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  135. regLoadedWithNewValue :=
  136. (p.oper[1]^.typ = top_reg) and
  137. (p.oper[1]^.reg = reg);
  138. {Loads to oper2 from coprocessor}
  139. {
  140. MCR/MRC is currently not supported in FPC
  141. A_MRC:
  142. regLoadedWithNewValue :=
  143. (p.oper[2]^.typ = top_reg) and
  144. (p.oper[2]^.reg = reg);
  145. }
  146. {Loads to all register in the registerset}
  147. A_LDM:
  148. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  149. end;
  150. if regLoadedWithNewValue then
  151. exit;
  152. case p.oper[0]^.typ of
  153. {This is the case}
  154. top_reg:
  155. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  156. { LDRD }
  157. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  158. {LDM/STM might write a new value to their index register}
  159. top_ref:
  160. regLoadedWithNewValue :=
  161. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  162. (taicpu(p).oper[0]^.ref^.base = reg);
  163. end;
  164. end;
  165. function AlignedToQWord(const ref : treference) : boolean;
  166. begin
  167. { (safe) heuristics to ensure alignment }
  168. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  169. (((ref.offset>=0) and
  170. ((ref.offset mod 8)=0) and
  171. ((ref.base=NR_R13) or
  172. (ref.index=NR_R13))
  173. ) or
  174. ((ref.offset<=0) and
  175. { when using NR_R11, it has always a value of <qword align>+4 }
  176. ((abs(ref.offset+4) mod 8)=0) and
  177. (current_procinfo.framepointer=NR_R11) and
  178. ((ref.base=NR_R11) or
  179. (ref.index=NR_R11))
  180. )
  181. );
  182. end;
  183. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  184. var
  185. p: taicpu;
  186. i: longint;
  187. begin
  188. instructionLoadsFromReg := false;
  189. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  190. exit;
  191. p:=taicpu(hp);
  192. i:=1;
  193. {For these instructions we have to start on oper[0]}
  194. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  195. A_CMP, A_CMN, A_TST, A_TEQ,
  196. A_B, A_BL, A_BX, A_BLX,
  197. A_SMLAL, A_UMLAL]) then i:=0;
  198. while(i<p.ops) do
  199. begin
  200. case p.oper[I]^.typ of
  201. top_reg:
  202. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  203. { STRD }
  204. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  205. top_regset:
  206. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  207. top_shifterop:
  208. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  209. top_ref:
  210. instructionLoadsFromReg :=
  211. (p.oper[I]^.ref^.base = reg) or
  212. (p.oper[I]^.ref^.index = reg);
  213. end;
  214. if instructionLoadsFromReg then exit; {Bailout if we found something}
  215. Inc(I);
  216. end;
  217. end;
  218. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  219. var AllUsedRegs: TAllUsedRegs): Boolean;
  220. begin
  221. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  222. RegUsedAfterInstruction :=
  223. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  224. not(regLoadedWithNewValue(reg,p)) and
  225. (
  226. not(GetNextInstruction(p,p)) or
  227. instructionLoadsFromReg(reg,p) or
  228. not(regLoadedWithNewValue(reg,p))
  229. );
  230. end;
  231. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  232. var Next: tai; reg: TRegister): Boolean;
  233. begin
  234. Next:=Current;
  235. repeat
  236. Result:=GetNextInstruction(Next,Next);
  237. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  238. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  239. end;
  240. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  241. var
  242. TmpUsedRegs: TAllUsedRegs;
  243. begin
  244. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  245. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  246. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  247. { don't mess with moves to pc }
  248. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  249. { don't mess with moves to lr }
  250. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  251. { the destination register of the mov might not be used beween p and movp }
  252. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  253. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  254. not (
  255. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  256. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  257. ) then
  258. begin
  259. CopyUsedRegs(TmpUsedRegs);
  260. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  261. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,movp,TmpUsedRegs)) then
  262. begin
  263. asml.insertbefore(tai_comment.Create(strpnew('Peephole '+optimizer+' removed superfluous mov')), movp);
  264. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  265. asml.remove(movp);
  266. movp.free;
  267. end;
  268. ReleaseUsedRegs(TmpUsedRegs);
  269. end;
  270. end;
  271. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  272. var
  273. hp1,hp2: tai;
  274. i, i2: longint;
  275. TmpUsedRegs: TAllUsedRegs;
  276. tempop: tasmop;
  277. function IsPowerOf2(const value: DWord): boolean; inline;
  278. begin
  279. Result:=(value and (value - 1)) = 0;
  280. end;
  281. begin
  282. result := false;
  283. case p.typ of
  284. ait_instruction:
  285. begin
  286. (* optimization proved not to be safe, see tw4768.pp
  287. {
  288. change
  289. <op> reg,x,y
  290. cmp reg,#0
  291. into
  292. <op>s reg,x,y
  293. }
  294. { this optimization can applied only to the currently enabled operations because
  295. the other operations do not update all flags and FPC does not track flag usage }
  296. if (taicpu(p).opcode in [A_ADC,A_ADD,A_SUB {A_UDIV,A_SDIV,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND}]) and
  297. (taicpu(p).oppostfix = PF_None) and
  298. (taicpu(p).condition = C_None) and
  299. GetNextInstruction(p, hp1) and
  300. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  301. (taicpu(hp1).oper[1]^.typ = top_const) and
  302. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  303. (taicpu(hp1).oper[1]^.val = 0) { and
  304. GetNextInstruction(hp1, hp2) and
  305. (tai(hp2).typ = ait_instruction) and
  306. // be careful here, following instructions could use other flags
  307. // however after a jump fpc never depends on the value of flags
  308. (taicpu(hp2).opcode = A_B) and
  309. (taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL])} then
  310. begin
  311. taicpu(p).oppostfix:=PF_S;
  312. asml.remove(hp1);
  313. hp1.free;
  314. end
  315. else
  316. *)
  317. case taicpu(p).opcode of
  318. A_STR:
  319. begin
  320. { change
  321. str reg1,ref
  322. ldr reg2,ref
  323. into
  324. str reg1,ref
  325. mov reg2,reg1
  326. }
  327. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  328. (taicpu(p).oppostfix=PF_None) and
  329. GetNextInstruction(p,hp1) and
  330. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  331. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  332. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  333. begin
  334. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  335. begin
  336. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 1 done')), hp1);
  337. asml.remove(hp1);
  338. hp1.free;
  339. end
  340. else
  341. begin
  342. taicpu(hp1).opcode:=A_MOV;
  343. taicpu(hp1).oppostfix:=PF_None;
  344. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  345. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 2 done')), hp1);
  346. end;
  347. result := true;
  348. end
  349. { change
  350. str reg1,ref
  351. str reg2,ref
  352. into
  353. strd reg1,ref
  354. }
  355. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  356. (taicpu(p).oppostfix=PF_None) and
  357. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  358. GetNextInstruction(p,hp1) and
  359. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  360. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  361. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  362. { str ensures that either base or index contain no register, else ldr wouldn't
  363. use an offset either
  364. }
  365. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  366. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  367. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  368. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  369. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  370. begin
  371. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrStr2Strd done')), p);
  372. taicpu(p).oppostfix:=PF_D;
  373. asml.remove(hp1);
  374. hp1.free;
  375. end;
  376. end;
  377. A_LDR:
  378. begin
  379. { change
  380. ldr reg1,ref
  381. ldr reg2,ref
  382. into ...
  383. }
  384. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  385. GetNextInstruction(p,hp1) and
  386. { ldrd is not allowed here }
  387. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  388. begin
  389. {
  390. ...
  391. ldr reg1,ref
  392. mov reg2,reg1
  393. }
  394. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  395. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  396. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  397. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  398. begin
  399. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  400. begin
  401. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldr done')), hp1);
  402. asml.remove(hp1);
  403. hp1.free;
  404. end
  405. else
  406. begin
  407. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2LdrMov done')), hp1);
  408. taicpu(hp1).opcode:=A_MOV;
  409. taicpu(hp1).oppostfix:=PF_None;
  410. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  411. end;
  412. result := true;
  413. end
  414. {
  415. ...
  416. ldrd reg1,ref
  417. }
  418. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  419. { ldrd does not allow any postfixes ... }
  420. (taicpu(p).oppostfix=PF_None) and
  421. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  422. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  423. { ldr ensures that either base or index contain no register, else ldr wouldn't
  424. use an offset either
  425. }
  426. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  427. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  428. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  429. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  430. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  431. begin
  432. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldrd done')), p);
  433. taicpu(p).oppostfix:=PF_D;
  434. asml.remove(hp1);
  435. hp1.free;
  436. end;
  437. end;
  438. { Remove superfluous mov after ldr
  439. changes
  440. ldr reg1, ref
  441. mov reg2, reg1
  442. to
  443. ldr reg2, ref
  444. conditions are:
  445. * no ldrd usage
  446. * reg1 must be released after mov
  447. * mov can not contain shifterops
  448. * ldr+mov have the same conditions
  449. * mov does not set flags
  450. }
  451. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  452. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  453. end;
  454. A_MOV:
  455. begin
  456. { fold
  457. mov reg1,reg0, shift imm1
  458. mov reg1,reg1, shift imm2
  459. }
  460. if (taicpu(p).ops=3) and
  461. (taicpu(p).oper[2]^.typ = top_shifterop) and
  462. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  463. getnextinstruction(p,hp1) and
  464. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  465. (taicpu(hp1).ops=3) and
  466. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  467. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  468. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  469. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  470. begin
  471. { fold
  472. mov reg1,reg0, lsl 16
  473. mov reg1,reg1, lsr 16
  474. strh reg1, ...
  475. dealloc reg1
  476. to
  477. strh reg1, ...
  478. dealloc reg1
  479. }
  480. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  481. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  482. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  483. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  484. getnextinstruction(hp1,hp2) and
  485. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  486. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  487. begin
  488. CopyUsedRegs(TmpUsedRegs);
  489. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  490. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  491. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  492. begin
  493. asml.insertbefore(tai_comment.Create(strpnew('Peephole optimizer removed superfluous 16 Bit zero extension')), hp1);
  494. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  495. asml.remove(p);
  496. asml.remove(hp1);
  497. p.free;
  498. hp1.free;
  499. p:=hp2;
  500. end;
  501. ReleaseUsedRegs(TmpUsedRegs);
  502. end
  503. { fold
  504. mov reg1,reg0, shift imm1
  505. mov reg1,reg1, shift imm2
  506. to
  507. mov reg1,reg0, shift imm1+imm2
  508. }
  509. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  510. { asr makes no use after a lsr, the asr can be foled into the lsr }
  511. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  512. begin
  513. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  514. { avoid overflows }
  515. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  516. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  517. SM_ROR:
  518. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  519. SM_ASR:
  520. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  521. SM_LSR,
  522. SM_LSL:
  523. begin
  524. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  525. InsertLLItem(p.previous, p.next, hp1);
  526. p.free;
  527. p:=hp1;
  528. end;
  529. else
  530. internalerror(2008072803);
  531. end;
  532. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShift2Shift 1 done')), p);
  533. asml.remove(hp1);
  534. hp1.free;
  535. result := true;
  536. end
  537. { fold
  538. mov reg1,reg0, shift imm1
  539. mov reg1,reg1, shift imm2
  540. mov reg1,reg1, shift imm3 ...
  541. }
  542. else if getnextinstruction(hp1,hp2) and
  543. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  544. (taicpu(hp2).ops=3) and
  545. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  546. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  547. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  548. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  549. begin
  550. { mov reg1,reg0, lsl imm1
  551. mov reg1,reg1, lsr/asr imm2
  552. mov reg1,reg1, lsl imm3 ...
  553. if imm3<=imm1 and imm2>=imm3
  554. to
  555. mov reg1,reg0, lsl imm1
  556. mov reg1,reg1, lsr/asr imm2-imm3
  557. }
  558. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  559. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  560. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  561. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  562. begin
  563. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  564. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 1 done')), p);
  565. asml.remove(hp2);
  566. hp2.free;
  567. result := true;
  568. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  569. begin
  570. asml.remove(hp1);
  571. hp1.free;
  572. end;
  573. end
  574. { mov reg1,reg0, lsr/asr imm1
  575. mov reg1,reg1, lsl imm2
  576. mov reg1,reg1, lsr/asr imm3 ...
  577. if imm3>=imm1 and imm2>=imm1
  578. to
  579. mov reg1,reg0, lsl imm2-imm1
  580. mov reg1,reg1, lsr/asr imm3 ...
  581. }
  582. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  583. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  584. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  585. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  586. begin
  587. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  588. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  589. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 2 done')), p);
  590. asml.remove(p);
  591. p.free;
  592. p:=hp2;
  593. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  594. begin
  595. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  596. asml.remove(hp1);
  597. hp1.free;
  598. p:=hp2;
  599. end;
  600. result := true;
  601. end;
  602. end;
  603. end;
  604. { Change the common
  605. mov r0, r0, lsr #24
  606. and r0, r0, #255
  607. and remove the superfluous and
  608. This could be extended to handle more cases.
  609. }
  610. if (taicpu(p).ops=3) and
  611. (taicpu(p).oper[2]^.typ = top_shifterop) and
  612. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  613. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  614. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  615. getnextinstruction(p,hp1) and
  616. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  617. (taicpu(hp1).ops=3) and
  618. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  619. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  620. (taicpu(hp1).oper[2]^.typ = top_const) and
  621. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  622. For LSR #25 and an AndConst of 255 that whould go like this:
  623. 255 and ((2 shl (32-25))-1)
  624. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  625. LSR #25 and AndConst of 254:
  626. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  627. }
  628. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  629. begin
  630. asml.insertbefore(tai_comment.Create(strpnew('Peephole LsrAnd2Lsr done')), hp1);
  631. asml.remove(hp1);
  632. hp1.free;
  633. end;
  634. {
  635. optimize
  636. mov rX, yyyy
  637. ....
  638. }
  639. if (taicpu(p).ops = 2) and
  640. GetNextInstruction(p,hp1) and
  641. (tai(hp1).typ = ait_instruction) then
  642. begin
  643. {
  644. This changes the very common
  645. mov r0, #0
  646. str r0, [...]
  647. mov r0, #0
  648. str r0, [...]
  649. and removes all superfluous mov instructions
  650. }
  651. if (taicpu(p).oper[1]^.typ = top_const) and
  652. (taicpu(hp1).opcode=A_STR) then
  653. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], [PF_None]) and
  654. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  655. GetNextInstruction(hp1, hp2) and
  656. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  657. (taicpu(hp2).ops = 2) and
  658. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  659. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  660. begin
  661. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovStrMov done')), hp2);
  662. GetNextInstruction(hp2,hp1);
  663. asml.remove(hp2);
  664. hp2.free;
  665. if not assigned(hp1) then break;
  666. end
  667. {
  668. This removes the first mov from
  669. mov rX,...
  670. mov rX,...
  671. }
  672. else if taicpu(hp1).opcode=A_MOV then
  673. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  674. (taicpu(hp1).ops = 2) and
  675. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  676. { don't remove the first mov if the second is a mov rX,rX }
  677. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  678. begin
  679. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovMov done')), p);
  680. asml.remove(p);
  681. p.free;
  682. p:=hp1;
  683. GetNextInstruction(hp1,hp1);
  684. if not assigned(hp1) then
  685. break;
  686. end;
  687. end;
  688. {
  689. change
  690. mov r1, r0
  691. add r1, r1, #1
  692. to
  693. add r1, r0, #1
  694. Todo: Make it work for mov+cmp too
  695. CAUTION! If this one is successful p might not be a mov instruction anymore!
  696. }
  697. if (taicpu(p).ops = 2) and
  698. (taicpu(p).oper[1]^.typ = top_reg) and
  699. (taicpu(p).oppostfix = PF_NONE) and
  700. GetNextInstruction(p, hp1) and
  701. (tai(hp1).typ = ait_instruction) and
  702. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  703. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN]) and
  704. {MOV and MVN might only have 2 ops}
  705. (taicpu(hp1).ops = 3) and
  706. (taicpu(hp1).condition in [C_NONE, taicpu(hp1).condition]) and
  707. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  708. (taicpu(hp1).oper[1]^.typ = top_reg) and
  709. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  710. begin
  711. { When we get here we still don't know if the registers match}
  712. for I:=1 to 2 do
  713. {
  714. If the first loop was successful p will be replaced with hp1.
  715. The checks will still be ok, because all required information
  716. will also be in hp1 then.
  717. }
  718. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  719. begin
  720. asml.insertbefore(tai_comment.Create(strpnew('Peephole RedundantMovProcess done')), hp1);
  721. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  722. if p<>hp1 then
  723. begin
  724. asml.remove(p);
  725. p.free;
  726. p:=hp1;
  727. end;
  728. end;
  729. end;
  730. { This folds shifterops into following instructions
  731. mov r0, r1, lsl #8
  732. add r2, r3, r0
  733. to
  734. add r2, r3, r1, lsl #8
  735. CAUTION! If this one is successful p might not be a mov instruction anymore!
  736. }
  737. if (taicpu(p).opcode = A_MOV) and
  738. (taicpu(p).ops = 3) and
  739. (taicpu(p).oper[1]^.typ = top_reg) and
  740. (taicpu(p).oper[2]^.typ = top_shifterop) and
  741. (taicpu(p).oppostfix = PF_NONE) and
  742. GetNextInstruction(p, hp1) and
  743. (tai(hp1).typ = ait_instruction) and
  744. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  745. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  746. (taicpu(hp1).oppostfix = PF_NONE) and
  747. (taicpu(hp1).condition = taicpu(p).condition) and
  748. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  749. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  750. A_CMP, A_CMN]) and
  751. (
  752. {Only ONE of the two src operands is allowed to match}
  753. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  754. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  755. ) then
  756. begin
  757. CopyUsedRegs(TmpUsedRegs);
  758. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  759. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  760. I2:=0
  761. else
  762. I2:=1;
  763. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  764. for I:=I2 to taicpu(hp1).ops-1 do
  765. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  766. begin
  767. { If the parameter matched on the second op from the RIGHT
  768. we have to switch the parameters, this will not happen for CMP
  769. were we're only evaluating the most right parameter
  770. }
  771. if I <> taicpu(hp1).ops-1 then
  772. begin
  773. {The SUB operators need to be changed when we swap parameters}
  774. case taicpu(hp1).opcode of
  775. A_SUB: tempop:=A_RSB;
  776. A_SBC: tempop:=A_RSC;
  777. A_RSB: tempop:=A_SUB;
  778. A_RSC: tempop:=A_SBC;
  779. else tempop:=taicpu(hp1).opcode;
  780. end;
  781. if taicpu(hp1).ops = 3 then
  782. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  783. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  784. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  785. else
  786. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  787. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  788. taicpu(p).oper[2]^.shifterop^);
  789. end
  790. else
  791. if taicpu(hp1).ops = 3 then
  792. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  793. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  794. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  795. else
  796. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  797. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  798. taicpu(p).oper[2]^.shifterop^);
  799. asml.insertbefore(hp2, p);
  800. asml.remove(p);
  801. asml.remove(hp1);
  802. p.free;
  803. hp1.free;
  804. p:=hp2;
  805. GetNextInstruction(p,hp1);
  806. asml.insertbefore(tai_comment.Create(strpnew('Peephole FoldShiftProcess done')), p);
  807. break;
  808. end;
  809. ReleaseUsedRegs(TmpUsedRegs);
  810. end;
  811. {
  812. Often we see shifts and then a superfluous mov to another register
  813. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  814. }
  815. if (taicpu(p).opcode = A_MOV) and
  816. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  817. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  818. end;
  819. A_ADD,
  820. A_ADC,
  821. A_RSB,
  822. A_RSC,
  823. A_SUB,
  824. A_SBC,
  825. A_AND,
  826. A_BIC,
  827. A_EOR,
  828. A_ORR,
  829. A_MLA,
  830. A_MUL:
  831. begin
  832. {
  833. change
  834. and reg2,reg1,const1
  835. and reg2,reg2,const2
  836. to
  837. and reg2,reg1,(const1 and const2)
  838. }
  839. if (taicpu(p).opcode = A_AND) and
  840. (taicpu(p).oper[1]^.typ = top_reg) and
  841. (taicpu(p).oper[2]^.typ = top_const) and
  842. GetNextInstruction(p, hp1) and
  843. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  844. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  845. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  846. (taicpu(hp1).oper[2]^.typ = top_const) then
  847. begin
  848. asml.insertbefore(tai_comment.Create(strpnew('Peephole AndAnd2And done')), p);
  849. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  850. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  851. asml.remove(hp1);
  852. hp1.free;
  853. end;
  854. {
  855. change
  856. add reg1, ...
  857. mov reg2, reg1
  858. to
  859. add reg2, ...
  860. }
  861. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  862. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  863. end;
  864. A_CMP:
  865. begin
  866. {
  867. change
  868. cmp reg,const1
  869. moveq reg,const1
  870. movne reg,const2
  871. to
  872. cmp reg,const1
  873. movne reg,const2
  874. }
  875. if (taicpu(p).oper[1]^.typ = top_const) and
  876. GetNextInstruction(p, hp1) and
  877. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  878. (taicpu(hp1).oper[1]^.typ = top_const) and
  879. GetNextInstruction(hp1, hp2) and
  880. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  881. (taicpu(hp1).oper[1]^.typ = top_const) then
  882. begin
  883. RemoveRedundantMove(p, hp1, asml);
  884. RemoveRedundantMove(p, hp2, asml);
  885. end;
  886. end;
  887. end;
  888. end;
  889. end;
  890. end;
  891. { instructions modifying the CPSR can be only the last instruction }
  892. function MustBeLast(p : tai) : boolean;
  893. begin
  894. Result:=(p.typ=ait_instruction) and
  895. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  896. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  897. (taicpu(p).oppostfix=PF_S));
  898. end;
  899. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  900. var
  901. p,hp1,hp2: tai;
  902. l : longint;
  903. condition : tasmcond;
  904. hp3: tai;
  905. WasLast: boolean;
  906. { UsedRegs, TmpUsedRegs: TRegSet; }
  907. begin
  908. p := BlockStart;
  909. { UsedRegs := []; }
  910. while (p <> BlockEnd) Do
  911. begin
  912. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  913. case p.Typ Of
  914. Ait_Instruction:
  915. begin
  916. case taicpu(p).opcode Of
  917. A_B:
  918. if taicpu(p).condition<>C_None then
  919. begin
  920. { check for
  921. Bxx xxx
  922. <several instructions>
  923. xxx:
  924. }
  925. l:=0;
  926. WasLast:=False;
  927. GetNextInstruction(p, hp1);
  928. while assigned(hp1) and
  929. (l<=4) and
  930. CanBeCond(hp1) and
  931. { stop on labels }
  932. not(hp1.typ=ait_label) do
  933. begin
  934. inc(l);
  935. if MustBeLast(hp1) then
  936. begin
  937. WasLast:=True;
  938. GetNextInstruction(hp1,hp1);
  939. break;
  940. end
  941. else
  942. GetNextInstruction(hp1,hp1);
  943. end;
  944. if assigned(hp1) then
  945. begin
  946. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  947. begin
  948. if (l<=4) and (l>0) then
  949. begin
  950. condition:=inverse_cond(taicpu(p).condition);
  951. hp2:=p;
  952. GetNextInstruction(p,hp1);
  953. p:=hp1;
  954. repeat
  955. if hp1.typ=ait_instruction then
  956. taicpu(hp1).condition:=condition;
  957. if MustBeLast(hp1) then
  958. begin
  959. GetNextInstruction(hp1,hp1);
  960. break;
  961. end
  962. else
  963. GetNextInstruction(hp1,hp1);
  964. until not(assigned(hp1)) or
  965. not(CanBeCond(hp1)) or
  966. (hp1.typ=ait_label);
  967. { wait with removing else GetNextInstruction could
  968. ignore the label if it was the only usage in the
  969. jump moved away }
  970. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  971. asml.remove(hp2);
  972. hp2.free;
  973. continue;
  974. end;
  975. end
  976. else
  977. { do not perform further optimizations if there is inctructon
  978. in block #1 which can not be optimized.
  979. }
  980. if not WasLast then
  981. begin
  982. { check further for
  983. Bcc xxx
  984. <several instructions 1>
  985. B yyy
  986. xxx:
  987. <several instructions 2>
  988. yyy:
  989. }
  990. { hp2 points to jmp yyy }
  991. hp2:=hp1;
  992. { skip hp1 to xxx }
  993. GetNextInstruction(hp1, hp1);
  994. if assigned(hp2) and
  995. assigned(hp1) and
  996. (l<=3) and
  997. (hp2.typ=ait_instruction) and
  998. (taicpu(hp2).is_jmp) and
  999. (taicpu(hp2).condition=C_None) and
  1000. { real label and jump, no further references to the
  1001. label are allowed }
  1002. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1003. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1004. begin
  1005. l:=0;
  1006. { skip hp1 to <several moves 2> }
  1007. GetNextInstruction(hp1, hp1);
  1008. while assigned(hp1) and
  1009. CanBeCond(hp1) do
  1010. begin
  1011. inc(l);
  1012. GetNextInstruction(hp1, hp1);
  1013. end;
  1014. { hp1 points to yyy: }
  1015. if assigned(hp1) and
  1016. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1017. begin
  1018. condition:=inverse_cond(taicpu(p).condition);
  1019. GetNextInstruction(p,hp1);
  1020. hp3:=p;
  1021. p:=hp1;
  1022. repeat
  1023. if hp1.typ=ait_instruction then
  1024. taicpu(hp1).condition:=condition;
  1025. GetNextInstruction(hp1,hp1);
  1026. until not(assigned(hp1)) or
  1027. not(CanBeCond(hp1));
  1028. { hp2 is still at jmp yyy }
  1029. GetNextInstruction(hp2,hp1);
  1030. { hp2 is now at xxx: }
  1031. condition:=inverse_cond(condition);
  1032. GetNextInstruction(hp1,hp1);
  1033. { hp1 is now at <several movs 2> }
  1034. repeat
  1035. taicpu(hp1).condition:=condition;
  1036. GetNextInstruction(hp1,hp1);
  1037. until not(assigned(hp1)) or
  1038. not(CanBeCond(hp1)) or
  1039. (hp1.typ=ait_label);
  1040. {
  1041. asml.remove(hp1.next)
  1042. hp1.next.free;
  1043. asml.remove(hp1);
  1044. hp1.free;
  1045. }
  1046. { remove Bcc }
  1047. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1048. asml.remove(hp3);
  1049. hp3.free;
  1050. { remove jmp }
  1051. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1052. asml.remove(hp2);
  1053. hp2.free;
  1054. continue;
  1055. end;
  1056. end;
  1057. end;
  1058. end;
  1059. end;
  1060. end;
  1061. end;
  1062. end;
  1063. p := tai(p.next)
  1064. end;
  1065. end;
  1066. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1067. begin
  1068. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1069. Result:=true
  1070. else
  1071. Result:=inherited RegInInstruction(Reg, p1);
  1072. end;
  1073. const
  1074. { set of opcode which might or do write to memory }
  1075. { TODO : extend armins.dat to contain r/w info }
  1076. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1077. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1078. { adjust the register live information when swapping the two instructions p and hp1,
  1079. they must follow one after the other }
  1080. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1081. procedure CheckLiveEnd(reg : tregister);
  1082. var
  1083. supreg : TSuperRegister;
  1084. regtype : TRegisterType;
  1085. begin
  1086. if reg=NR_NO then
  1087. exit;
  1088. regtype:=getregtype(reg);
  1089. supreg:=getsupreg(reg);
  1090. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1091. RegInInstruction(reg,p) then
  1092. cg.rg[regtype].live_end[supreg]:=p;
  1093. end;
  1094. procedure CheckLiveStart(reg : TRegister);
  1095. var
  1096. supreg : TSuperRegister;
  1097. regtype : TRegisterType;
  1098. begin
  1099. if reg=NR_NO then
  1100. exit;
  1101. regtype:=getregtype(reg);
  1102. supreg:=getsupreg(reg);
  1103. if (cg.rg[regtype].live_start[supreg]=p) and
  1104. RegInInstruction(reg,hp1) then
  1105. cg.rg[regtype].live_start[supreg]:=hp1;
  1106. end;
  1107. var
  1108. i : longint;
  1109. r : TSuperRegister;
  1110. begin
  1111. { assumption: p is directly followed by hp1 }
  1112. { if live of any reg used by p starts at p and hp1 uses this register then
  1113. set live start to hp1 }
  1114. for i:=0 to p.ops-1 do
  1115. case p.oper[i]^.typ of
  1116. Top_Reg:
  1117. CheckLiveStart(p.oper[i]^.reg);
  1118. Top_Ref:
  1119. begin
  1120. CheckLiveStart(p.oper[i]^.ref^.base);
  1121. CheckLiveStart(p.oper[i]^.ref^.index);
  1122. end;
  1123. Top_Shifterop:
  1124. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1125. Top_RegSet:
  1126. for r:=RS_R0 to RS_R15 do
  1127. if r in p.oper[i]^.regset^ then
  1128. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1129. end;
  1130. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1131. set live end to p }
  1132. for i:=0 to hp1.ops-1 do
  1133. case hp1.oper[i]^.typ of
  1134. Top_Reg:
  1135. CheckLiveEnd(hp1.oper[i]^.reg);
  1136. Top_Ref:
  1137. begin
  1138. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1139. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1140. end;
  1141. Top_Shifterop:
  1142. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1143. Top_RegSet:
  1144. for r:=RS_R0 to RS_R15 do
  1145. if r in hp1.oper[i]^.regset^ then
  1146. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1147. end;
  1148. end;
  1149. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1150. { TODO : schedule also forward }
  1151. { TODO : schedule distance > 1 }
  1152. var
  1153. hp1,hp2,hp3,hp4,hp5 : tai;
  1154. list : TAsmList;
  1155. begin
  1156. result:=true;
  1157. list:=TAsmList.Create;
  1158. p:=BlockStart;
  1159. while p<>BlockEnd Do
  1160. begin
  1161. if (p.typ=ait_instruction) and
  1162. GetNextInstruction(p,hp1) and
  1163. (hp1.typ=ait_instruction) and
  1164. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1165. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1166. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1167. not(RegModifiedByInstruction(NR_PC,p))
  1168. ) or
  1169. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1170. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1171. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1172. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1173. )
  1174. ) or
  1175. { try to prove that the memory accesses don't overlapp }
  1176. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1177. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1178. (taicpu(p).oppostfix=PF_None) and
  1179. (taicpu(hp1).oppostfix=PF_None) and
  1180. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1181. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1182. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1183. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1184. )
  1185. )
  1186. ) and
  1187. GetNextInstruction(hp1,hp2) and
  1188. (hp2.typ=ait_instruction) and
  1189. { loaded register used by next instruction? }
  1190. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1191. { loaded register not used by previous instruction? }
  1192. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1193. { same condition? }
  1194. (taicpu(p).condition=taicpu(hp1).condition) and
  1195. { first instruction might not change the register used as base }
  1196. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1197. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1198. ) and
  1199. { first instruction might not change the register used as index }
  1200. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1201. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1202. ) then
  1203. begin
  1204. hp3:=tai(p.Previous);
  1205. hp5:=tai(p.next);
  1206. asml.Remove(p);
  1207. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1208. { before the instruction? }
  1209. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1210. begin
  1211. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1212. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1213. begin
  1214. hp4:=hp3;
  1215. hp3:=tai(hp3.Previous);
  1216. asml.Remove(hp4);
  1217. list.Concat(hp4);
  1218. end
  1219. else
  1220. hp3:=tai(hp3.Previous);
  1221. end;
  1222. list.Concat(p);
  1223. SwapRegLive(taicpu(p),taicpu(hp1));
  1224. { after the instruction? }
  1225. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1226. begin
  1227. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1228. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1229. begin
  1230. hp4:=hp5;
  1231. hp5:=tai(hp5.next);
  1232. asml.Remove(hp4);
  1233. list.Concat(hp4);
  1234. end
  1235. else
  1236. hp5:=tai(hp5.Next);
  1237. end;
  1238. asml.Remove(hp1);
  1239. {$ifdef DEBUG_PREREGSCHEDULER}
  1240. asml.InsertBefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1241. {$endif DEBUG_PREREGSCHEDULER}
  1242. asml.InsertBefore(hp1,hp2);
  1243. asml.InsertListBefore(hp2,list);
  1244. p:=tai(p.next)
  1245. end
  1246. else if p.typ=ait_instruction then
  1247. p:=hp1
  1248. else
  1249. p:=tai(p.next);
  1250. end;
  1251. list.Free;
  1252. end;
  1253. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1254. begin
  1255. { TODO: Add optimizer code }
  1256. end;
  1257. begin
  1258. casmoptimizer:=TCpuAsmOptimizer;
  1259. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1260. End.