aoptcpu.pas 62 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. Interface
  22. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. { uses the same constructor as TAopObj }
  26. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  27. procedure PeepHoleOptPass2;override;
  28. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  29. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  30. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  31. var AllUsedRegs: TAllUsedRegs): Boolean;
  32. { gets the next tai object after current that contains info relevant
  33. to the optimizer in p1 which used the given register or does a
  34. change in program flow.
  35. If there is none, it returns false and
  36. sets p1 to nil }
  37. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  38. End;
  39. TCpuPreRegallocScheduler = class(TAsmScheduler)
  40. function SchedulerPass1Cpu(var p: tai): boolean;override;
  41. procedure SwapRegLive(p, hp1: taicpu);
  42. end;
  43. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  44. { uses the same constructor as TAopObj }
  45. procedure PeepHoleOptPass2;override;
  46. End;
  47. Implementation
  48. uses
  49. cutils,verbose,globals,
  50. systems,
  51. cpuinfo,
  52. cgobj,cgutils,procinfo,
  53. aasmbase,aasmdata;
  54. function CanBeCond(p : tai) : boolean;
  55. begin
  56. result:=
  57. (p.typ=ait_instruction) and
  58. (taicpu(p).condition=C_None) and
  59. (taicpu(p).opcode<>A_PLD) and
  60. ((taicpu(p).opcode<>A_BLX) or
  61. (taicpu(p).oper[0]^.typ=top_reg));
  62. end;
  63. function RefsEqual(const r1, r2: treference): boolean;
  64. begin
  65. refsequal :=
  66. (r1.offset = r2.offset) and
  67. (r1.base = r2.base) and
  68. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  69. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  70. (r1.relsymbol = r2.relsymbol) and
  71. (r1.signindex = r2.signindex) and
  72. (r1.shiftimm = r2.shiftimm) and
  73. (r1.addressmode = r2.addressmode) and
  74. (r1.shiftmode = r2.shiftmode);
  75. end;
  76. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. (taicpu(instr).opcode = op) and
  81. ((cond = []) or (taicpu(instr).condition in cond)) and
  82. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  83. end;
  84. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  85. begin
  86. result := oper1.typ = oper2.typ;
  87. if result then
  88. case oper1.typ of
  89. top_const:
  90. Result:=oper1.val = oper2.val;
  91. top_reg:
  92. Result:=oper1.reg = oper2.reg;
  93. top_conditioncode:
  94. Result:=oper1.cc = oper2.cc;
  95. top_ref:
  96. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  97. else Result:=false;
  98. end
  99. end;
  100. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  101. begin
  102. result := (oper.typ = top_reg) and (oper.reg = reg);
  103. end;
  104. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  105. begin
  106. if (taicpu(movp).condition = C_EQ) and
  107. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  108. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  109. begin
  110. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  111. asml.remove(movp);
  112. movp.free;
  113. end;
  114. end;
  115. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  116. var
  117. p: taicpu;
  118. begin
  119. p := taicpu(hp);
  120. regLoadedWithNewValue := false;
  121. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  122. exit;
  123. case p.opcode of
  124. { These operands do not write into a register at all }
  125. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  126. exit;
  127. {Take care of post/preincremented store and loads, they will change their base register}
  128. A_STR, A_LDR:
  129. regLoadedWithNewValue :=
  130. (taicpu(p).oper[1]^.typ=top_ref) and
  131. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  132. (taicpu(p).oper[1]^.ref^.base = reg);
  133. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  134. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  135. regLoadedWithNewValue :=
  136. (p.oper[1]^.typ = top_reg) and
  137. (p.oper[1]^.reg = reg);
  138. {Loads to oper2 from coprocessor}
  139. {
  140. MCR/MRC is currently not supported in FPC
  141. A_MRC:
  142. regLoadedWithNewValue :=
  143. (p.oper[2]^.typ = top_reg) and
  144. (p.oper[2]^.reg = reg);
  145. }
  146. {Loads to all register in the registerset}
  147. A_LDM:
  148. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  149. end;
  150. if regLoadedWithNewValue then
  151. exit;
  152. case p.oper[0]^.typ of
  153. {This is the case}
  154. top_reg:
  155. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  156. { LDRD }
  157. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  158. {LDM/STM might write a new value to their index register}
  159. top_ref:
  160. regLoadedWithNewValue :=
  161. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  162. (taicpu(p).oper[0]^.ref^.base = reg);
  163. end;
  164. end;
  165. function AlignedToQWord(const ref : treference) : boolean;
  166. begin
  167. { (safe) heuristics to ensure alignment }
  168. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  169. (((ref.offset>=0) and
  170. ((ref.offset mod 8)=0) and
  171. ((ref.base=NR_R13) or
  172. (ref.index=NR_R13))
  173. ) or
  174. ((ref.offset<=0) and
  175. { when using NR_R11, it has always a value of <qword align>+4 }
  176. ((abs(ref.offset+4) mod 8)=0) and
  177. (current_procinfo.framepointer=NR_R11) and
  178. ((ref.base=NR_R11) or
  179. (ref.index=NR_R11))
  180. )
  181. );
  182. end;
  183. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  184. var
  185. p: taicpu;
  186. i: longint;
  187. begin
  188. instructionLoadsFromReg := false;
  189. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  190. exit;
  191. p:=taicpu(hp);
  192. i:=1;
  193. {For these instructions we have to start on oper[0]}
  194. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  195. A_CMP, A_CMN, A_TST, A_TEQ,
  196. A_B, A_BL, A_BX, A_BLX,
  197. A_SMLAL, A_UMLAL]) then i:=0;
  198. while(i<p.ops) do
  199. begin
  200. case p.oper[I]^.typ of
  201. top_reg:
  202. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  203. { STRD }
  204. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  205. top_regset:
  206. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  207. top_shifterop:
  208. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  209. top_ref:
  210. instructionLoadsFromReg :=
  211. (p.oper[I]^.ref^.base = reg) or
  212. (p.oper[I]^.ref^.index = reg);
  213. end;
  214. if instructionLoadsFromReg then exit; {Bailout if we found something}
  215. Inc(I);
  216. end;
  217. end;
  218. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  219. var AllUsedRegs: TAllUsedRegs): Boolean;
  220. begin
  221. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  222. RegUsedAfterInstruction :=
  223. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  224. not(regLoadedWithNewValue(reg,p)) and
  225. (
  226. not(GetNextInstruction(p,p)) or
  227. instructionLoadsFromReg(reg,p) or
  228. not(regLoadedWithNewValue(reg,p))
  229. );
  230. end;
  231. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  232. var Next: tai; reg: TRegister): Boolean;
  233. begin
  234. Next:=Current;
  235. repeat
  236. Result:=GetNextInstruction(Next,Next);
  237. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  238. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  239. end;
  240. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  241. var
  242. alloc,
  243. dealloc : tai_regalloc;
  244. hp1 : tai;
  245. begin
  246. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  247. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  248. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  249. { don't mess with moves to pc }
  250. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  251. { don't mess with moves to lr }
  252. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  253. { the destination register of the mov might not be used beween p and movp }
  254. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  255. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  256. not (
  257. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  258. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  259. ) then
  260. begin
  261. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  262. if assigned(dealloc) then
  263. begin
  264. asml.insertbefore(tai_comment.Create(strpnew('Peephole '+optimizer+' removed superfluous mov')), movp);
  265. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  266. and remove it if possible }
  267. GetLastInstruction(p,hp1);
  268. asml.Remove(dealloc);
  269. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  270. if assigned(alloc) then
  271. begin
  272. asml.Remove(alloc);
  273. alloc.free;
  274. dealloc.free;
  275. end
  276. else
  277. asml.InsertAfter(dealloc,p);
  278. { try to move the allocation of the target register }
  279. GetLastInstruction(movp,hp1);
  280. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  281. if assigned(alloc) then
  282. begin
  283. asml.Remove(alloc);
  284. asml.InsertBefore(alloc,p);
  285. { adjust used regs }
  286. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  287. end;
  288. { finally get rid of the mov }
  289. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  290. asml.remove(movp);
  291. movp.free;
  292. end;
  293. end;
  294. end;
  295. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  296. var
  297. hp1,hp2: tai;
  298. i, i2: longint;
  299. TmpUsedRegs: TAllUsedRegs;
  300. tempop: tasmop;
  301. function IsPowerOf2(const value: DWord): boolean; inline;
  302. begin
  303. Result:=(value and (value - 1)) = 0;
  304. end;
  305. begin
  306. result := false;
  307. case p.typ of
  308. ait_instruction:
  309. begin
  310. {
  311. change
  312. <op> reg,x,y
  313. cmp reg,#0
  314. into
  315. <op>s reg,x,y
  316. }
  317. { this optimization can applied only to the currently enabled operations because
  318. the other operations do not update all flags and FPC does not track flag usage }
  319. if ((taicpu(p).opcode in [A_ADC,A_ADD,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND]) or
  320. { those two values are > 255 so check separately }
  321. (taicpu(p).opcode=A_UDIV) or
  322. (taicpu(p).opcode=A_SDIV)
  323. ) and
  324. (taicpu(p).oppostfix = PF_None) and
  325. (taicpu(p).condition = C_None) and
  326. GetNextInstruction(p, hp1) and
  327. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  328. (taicpu(hp1).oper[1]^.typ = top_const) and
  329. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  330. (taicpu(hp1).oper[1]^.val = 0) and
  331. GetNextInstruction(hp1, hp2) and
  332. (tai(hp2).typ = ait_instruction) and
  333. { be careful here, following instructions could use other flags
  334. however after a jump fpc never depends on the value of flags }
  335. (taicpu(hp2).opcode = A_B) and
  336. (((taicpu(p).opcode in [A_ADC,A_ADD,A_SUB]) and
  337. (taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL])) or
  338. (taicpu(hp2).condition in [C_EQ,C_NE])) and
  339. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  340. begin
  341. taicpu(p).oppostfix:=PF_S;
  342. asml.insertbefore(tai_comment.Create(strpnew('Peephole OpCmp2OpS done')), p);
  343. asml.remove(hp1);
  344. hp1.free;
  345. end
  346. else
  347. case taicpu(p).opcode of
  348. A_STR:
  349. begin
  350. { change
  351. str reg1,ref
  352. ldr reg2,ref
  353. into
  354. str reg1,ref
  355. mov reg2,reg1
  356. }
  357. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  358. (taicpu(p).oppostfix=PF_None) and
  359. GetNextInstruction(p,hp1) and
  360. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  361. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  362. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  363. begin
  364. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  365. begin
  366. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 1 done')), hp1);
  367. asml.remove(hp1);
  368. hp1.free;
  369. end
  370. else
  371. begin
  372. taicpu(hp1).opcode:=A_MOV;
  373. taicpu(hp1).oppostfix:=PF_None;
  374. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  375. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 2 done')), hp1);
  376. end;
  377. result := true;
  378. end
  379. { change
  380. str reg1,ref
  381. str reg2,ref
  382. into
  383. strd reg1,ref
  384. }
  385. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  386. (taicpu(p).oppostfix=PF_None) and
  387. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  388. GetNextInstruction(p,hp1) and
  389. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  390. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  391. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  392. { str ensures that either base or index contain no register, else ldr wouldn't
  393. use an offset either
  394. }
  395. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  396. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  397. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  398. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  399. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  400. begin
  401. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrStr2Strd done')), p);
  402. taicpu(p).oppostfix:=PF_D;
  403. asml.remove(hp1);
  404. hp1.free;
  405. end;
  406. end;
  407. A_LDR:
  408. begin
  409. { change
  410. ldr reg1,ref
  411. ldr reg2,ref
  412. into ...
  413. }
  414. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  415. GetNextInstruction(p,hp1) and
  416. { ldrd is not allowed here }
  417. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  418. begin
  419. {
  420. ...
  421. ldr reg1,ref
  422. mov reg2,reg1
  423. }
  424. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  425. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  426. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  427. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  428. begin
  429. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  430. begin
  431. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldr done')), hp1);
  432. asml.remove(hp1);
  433. hp1.free;
  434. end
  435. else
  436. begin
  437. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2LdrMov done')), hp1);
  438. taicpu(hp1).opcode:=A_MOV;
  439. taicpu(hp1).oppostfix:=PF_None;
  440. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  441. end;
  442. result := true;
  443. end
  444. {
  445. ...
  446. ldrd reg1,ref
  447. }
  448. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  449. { ldrd does not allow any postfixes ... }
  450. (taicpu(p).oppostfix=PF_None) and
  451. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  452. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  453. { ldr ensures that either base or index contain no register, else ldr wouldn't
  454. use an offset either
  455. }
  456. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  457. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  458. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  459. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  460. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  461. begin
  462. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldrd done')), p);
  463. taicpu(p).oppostfix:=PF_D;
  464. asml.remove(hp1);
  465. hp1.free;
  466. end;
  467. end;
  468. { Remove superfluous mov after ldr
  469. changes
  470. ldr reg1, ref
  471. mov reg2, reg1
  472. to
  473. ldr reg2, ref
  474. conditions are:
  475. * no ldrd usage
  476. * reg1 must be released after mov
  477. * mov can not contain shifterops
  478. * ldr+mov have the same conditions
  479. * mov does not set flags
  480. }
  481. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  482. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  483. end;
  484. A_MOV:
  485. begin
  486. { fold
  487. mov reg1,reg0, shift imm1
  488. mov reg1,reg1, shift imm2
  489. }
  490. if (taicpu(p).ops=3) and
  491. (taicpu(p).oper[2]^.typ = top_shifterop) and
  492. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  493. getnextinstruction(p,hp1) and
  494. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  495. (taicpu(hp1).ops=3) and
  496. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  497. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  498. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  499. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  500. begin
  501. { fold
  502. mov reg1,reg0, lsl 16
  503. mov reg1,reg1, lsr 16
  504. strh reg1, ...
  505. dealloc reg1
  506. to
  507. strh reg1, ...
  508. dealloc reg1
  509. }
  510. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  511. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  512. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  513. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  514. getnextinstruction(hp1,hp2) and
  515. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  516. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  517. begin
  518. CopyUsedRegs(TmpUsedRegs);
  519. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  520. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  521. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  522. begin
  523. asml.insertbefore(tai_comment.Create(strpnew('Peephole optimizer removed superfluous 16 Bit zero extension')), hp1);
  524. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  525. asml.remove(p);
  526. asml.remove(hp1);
  527. p.free;
  528. hp1.free;
  529. p:=hp2;
  530. end;
  531. ReleaseUsedRegs(TmpUsedRegs);
  532. end
  533. { fold
  534. mov reg1,reg0, shift imm1
  535. mov reg1,reg1, shift imm2
  536. to
  537. mov reg1,reg0, shift imm1+imm2
  538. }
  539. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  540. { asr makes no use after a lsr, the asr can be foled into the lsr }
  541. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  542. begin
  543. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  544. { avoid overflows }
  545. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  546. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  547. SM_ROR:
  548. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  549. SM_ASR:
  550. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  551. SM_LSR,
  552. SM_LSL:
  553. begin
  554. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  555. InsertLLItem(p.previous, p.next, hp1);
  556. p.free;
  557. p:=hp1;
  558. end;
  559. else
  560. internalerror(2008072803);
  561. end;
  562. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShift2Shift 1 done')), p);
  563. asml.remove(hp1);
  564. hp1.free;
  565. result := true;
  566. end
  567. { fold
  568. mov reg1,reg0, shift imm1
  569. mov reg1,reg1, shift imm2
  570. mov reg1,reg1, shift imm3 ...
  571. }
  572. else if getnextinstruction(hp1,hp2) and
  573. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  574. (taicpu(hp2).ops=3) and
  575. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  576. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  577. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  578. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  579. begin
  580. { mov reg1,reg0, lsl imm1
  581. mov reg1,reg1, lsr/asr imm2
  582. mov reg1,reg1, lsl imm3 ...
  583. if imm3<=imm1 and imm2>=imm3
  584. to
  585. mov reg1,reg0, lsl imm1
  586. mov reg1,reg1, lsr/asr imm2-imm3
  587. }
  588. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  589. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  590. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  591. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  592. begin
  593. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  594. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 1 done')), p);
  595. asml.remove(hp2);
  596. hp2.free;
  597. result := true;
  598. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  599. begin
  600. asml.remove(hp1);
  601. hp1.free;
  602. end;
  603. end
  604. { mov reg1,reg0, lsr/asr imm1
  605. mov reg1,reg1, lsl imm2
  606. mov reg1,reg1, lsr/asr imm3 ...
  607. if imm3>=imm1 and imm2>=imm1
  608. to
  609. mov reg1,reg0, lsl imm2-imm1
  610. mov reg1,reg1, lsr/asr imm3 ...
  611. }
  612. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  613. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  614. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  615. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  616. begin
  617. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  618. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  619. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 2 done')), p);
  620. asml.remove(p);
  621. p.free;
  622. p:=hp2;
  623. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  624. begin
  625. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  626. asml.remove(hp1);
  627. hp1.free;
  628. p:=hp2;
  629. end;
  630. result := true;
  631. end;
  632. end;
  633. end;
  634. { Change the common
  635. mov r0, r0, lsr #24
  636. and r0, r0, #255
  637. and remove the superfluous and
  638. This could be extended to handle more cases.
  639. }
  640. if (taicpu(p).ops=3) and
  641. (taicpu(p).oper[2]^.typ = top_shifterop) and
  642. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  643. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  644. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  645. getnextinstruction(p,hp1) and
  646. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  647. (taicpu(hp1).ops=3) and
  648. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  649. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  650. (taicpu(hp1).oper[2]^.typ = top_const) and
  651. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  652. For LSR #25 and an AndConst of 255 that whould go like this:
  653. 255 and ((2 shl (32-25))-1)
  654. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  655. LSR #25 and AndConst of 254:
  656. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  657. }
  658. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  659. begin
  660. asml.insertbefore(tai_comment.Create(strpnew('Peephole LsrAnd2Lsr done')), hp1);
  661. asml.remove(hp1);
  662. hp1.free;
  663. end;
  664. {
  665. optimize
  666. mov rX, yyyy
  667. ....
  668. }
  669. if (taicpu(p).ops = 2) and
  670. GetNextInstruction(p,hp1) and
  671. (tai(hp1).typ = ait_instruction) then
  672. begin
  673. {
  674. This changes the very common
  675. mov r0, #0
  676. str r0, [...]
  677. mov r0, #0
  678. str r0, [...]
  679. and removes all superfluous mov instructions
  680. }
  681. if (taicpu(p).oper[1]^.typ = top_const) and
  682. (taicpu(hp1).opcode=A_STR) then
  683. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], [PF_None]) and
  684. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  685. GetNextInstruction(hp1, hp2) and
  686. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  687. (taicpu(hp2).ops = 2) and
  688. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  689. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  690. begin
  691. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovStrMov done')), hp2);
  692. GetNextInstruction(hp2,hp1);
  693. asml.remove(hp2);
  694. hp2.free;
  695. if not assigned(hp1) then break;
  696. end
  697. {
  698. This removes the first mov from
  699. mov rX,...
  700. mov rX,...
  701. }
  702. else if taicpu(hp1).opcode=A_MOV then
  703. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  704. (taicpu(hp1).ops = 2) and
  705. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  706. { don't remove the first mov if the second is a mov rX,rX }
  707. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  708. begin
  709. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovMov done')), p);
  710. asml.remove(p);
  711. p.free;
  712. p:=hp1;
  713. GetNextInstruction(hp1,hp1);
  714. if not assigned(hp1) then
  715. break;
  716. end;
  717. end;
  718. {
  719. change
  720. mov r1, r0
  721. add r1, r1, #1
  722. to
  723. add r1, r0, #1
  724. Todo: Make it work for mov+cmp too
  725. CAUTION! If this one is successful p might not be a mov instruction anymore!
  726. }
  727. if (taicpu(p).ops = 2) and
  728. (taicpu(p).oper[1]^.typ = top_reg) and
  729. (taicpu(p).oppostfix = PF_NONE) and
  730. GetNextInstruction(p, hp1) and
  731. (tai(hp1).typ = ait_instruction) and
  732. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  733. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN]) and
  734. {MOV and MVN might only have 2 ops}
  735. (taicpu(hp1).ops = 3) and
  736. (taicpu(hp1).condition in [C_NONE, taicpu(hp1).condition]) and
  737. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  738. (taicpu(hp1).oper[1]^.typ = top_reg) and
  739. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  740. begin
  741. { When we get here we still don't know if the registers match}
  742. for I:=1 to 2 do
  743. {
  744. If the first loop was successful p will be replaced with hp1.
  745. The checks will still be ok, because all required information
  746. will also be in hp1 then.
  747. }
  748. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  749. begin
  750. asml.insertbefore(tai_comment.Create(strpnew('Peephole RedundantMovProcess done')), hp1);
  751. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  752. if p<>hp1 then
  753. begin
  754. asml.remove(p);
  755. p.free;
  756. p:=hp1;
  757. end;
  758. end;
  759. end;
  760. { This folds shifterops into following instructions
  761. mov r0, r1, lsl #8
  762. add r2, r3, r0
  763. to
  764. add r2, r3, r1, lsl #8
  765. CAUTION! If this one is successful p might not be a mov instruction anymore!
  766. }
  767. if (taicpu(p).opcode = A_MOV) and
  768. (taicpu(p).ops = 3) and
  769. (taicpu(p).oper[1]^.typ = top_reg) and
  770. (taicpu(p).oper[2]^.typ = top_shifterop) and
  771. (taicpu(p).oppostfix = PF_NONE) and
  772. GetNextInstruction(p, hp1) and
  773. (tai(hp1).typ = ait_instruction) and
  774. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  775. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  776. (taicpu(hp1).oppostfix = PF_NONE) and
  777. (taicpu(hp1).condition = taicpu(p).condition) and
  778. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  779. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  780. A_CMP, A_CMN]) and
  781. (
  782. {Only ONE of the two src operands is allowed to match}
  783. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  784. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  785. ) then
  786. begin
  787. CopyUsedRegs(TmpUsedRegs);
  788. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  789. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  790. I2:=0
  791. else
  792. I2:=1;
  793. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  794. for I:=I2 to taicpu(hp1).ops-1 do
  795. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  796. begin
  797. { If the parameter matched on the second op from the RIGHT
  798. we have to switch the parameters, this will not happen for CMP
  799. were we're only evaluating the most right parameter
  800. }
  801. if I <> taicpu(hp1).ops-1 then
  802. begin
  803. {The SUB operators need to be changed when we swap parameters}
  804. case taicpu(hp1).opcode of
  805. A_SUB: tempop:=A_RSB;
  806. A_SBC: tempop:=A_RSC;
  807. A_RSB: tempop:=A_SUB;
  808. A_RSC: tempop:=A_SBC;
  809. else tempop:=taicpu(hp1).opcode;
  810. end;
  811. if taicpu(hp1).ops = 3 then
  812. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  813. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  814. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  815. else
  816. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  817. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  818. taicpu(p).oper[2]^.shifterop^);
  819. end
  820. else
  821. if taicpu(hp1).ops = 3 then
  822. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  823. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  824. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  825. else
  826. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  827. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  828. taicpu(p).oper[2]^.shifterop^);
  829. asml.insertbefore(hp2, p);
  830. asml.remove(p);
  831. asml.remove(hp1);
  832. p.free;
  833. hp1.free;
  834. p:=hp2;
  835. GetNextInstruction(p,hp1);
  836. asml.insertbefore(tai_comment.Create(strpnew('Peephole FoldShiftProcess done')), p);
  837. break;
  838. end;
  839. ReleaseUsedRegs(TmpUsedRegs);
  840. end;
  841. {
  842. Often we see shifts and then a superfluous mov to another register
  843. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  844. }
  845. if (taicpu(p).opcode = A_MOV) and
  846. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  847. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  848. end;
  849. A_ADD,
  850. A_ADC,
  851. A_RSB,
  852. A_RSC,
  853. A_SUB,
  854. A_SBC,
  855. A_AND,
  856. A_BIC,
  857. A_EOR,
  858. A_ORR,
  859. A_MLA,
  860. A_MUL:
  861. begin
  862. {
  863. change
  864. and reg2,reg1,const1
  865. and reg2,reg2,const2
  866. to
  867. and reg2,reg1,(const1 and const2)
  868. }
  869. if (taicpu(p).opcode = A_AND) and
  870. (taicpu(p).oper[1]^.typ = top_reg) and
  871. (taicpu(p).oper[2]^.typ = top_const) and
  872. GetNextInstruction(p, hp1) and
  873. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  874. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  875. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  876. (taicpu(hp1).oper[2]^.typ = top_const) then
  877. begin
  878. asml.insertbefore(tai_comment.Create(strpnew('Peephole AndAnd2And done')), p);
  879. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  880. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  881. asml.remove(hp1);
  882. hp1.free;
  883. end;
  884. {
  885. change
  886. add reg1, ...
  887. mov reg2, reg1
  888. to
  889. add reg2, ...
  890. }
  891. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  892. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  893. end;
  894. A_CMP:
  895. begin
  896. {
  897. change
  898. cmp reg,const1
  899. moveq reg,const1
  900. movne reg,const2
  901. to
  902. cmp reg,const1
  903. movne reg,const2
  904. }
  905. if (taicpu(p).oper[1]^.typ = top_const) and
  906. GetNextInstruction(p, hp1) and
  907. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  908. (taicpu(hp1).oper[1]^.typ = top_const) and
  909. GetNextInstruction(hp1, hp2) and
  910. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  911. (taicpu(hp1).oper[1]^.typ = top_const) then
  912. begin
  913. RemoveRedundantMove(p, hp1, asml);
  914. RemoveRedundantMove(p, hp2, asml);
  915. end;
  916. end;
  917. end;
  918. end;
  919. end;
  920. end;
  921. { instructions modifying the CPSR can be only the last instruction }
  922. function MustBeLast(p : tai) : boolean;
  923. begin
  924. Result:=(p.typ=ait_instruction) and
  925. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  926. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  927. (taicpu(p).oppostfix=PF_S));
  928. end;
  929. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  930. var
  931. p,hp1,hp2: tai;
  932. l : longint;
  933. condition : tasmcond;
  934. hp3: tai;
  935. WasLast: boolean;
  936. { UsedRegs, TmpUsedRegs: TRegSet; }
  937. begin
  938. p := BlockStart;
  939. { UsedRegs := []; }
  940. while (p <> BlockEnd) Do
  941. begin
  942. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  943. case p.Typ Of
  944. Ait_Instruction:
  945. begin
  946. case taicpu(p).opcode Of
  947. A_B:
  948. if taicpu(p).condition<>C_None then
  949. begin
  950. { check for
  951. Bxx xxx
  952. <several instructions>
  953. xxx:
  954. }
  955. l:=0;
  956. WasLast:=False;
  957. GetNextInstruction(p, hp1);
  958. while assigned(hp1) and
  959. (l<=4) and
  960. CanBeCond(hp1) and
  961. { stop on labels }
  962. not(hp1.typ=ait_label) do
  963. begin
  964. inc(l);
  965. if MustBeLast(hp1) then
  966. begin
  967. WasLast:=True;
  968. GetNextInstruction(hp1,hp1);
  969. break;
  970. end
  971. else
  972. GetNextInstruction(hp1,hp1);
  973. end;
  974. if assigned(hp1) then
  975. begin
  976. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  977. begin
  978. if (l<=4) and (l>0) then
  979. begin
  980. condition:=inverse_cond(taicpu(p).condition);
  981. hp2:=p;
  982. GetNextInstruction(p,hp1);
  983. p:=hp1;
  984. repeat
  985. if hp1.typ=ait_instruction then
  986. taicpu(hp1).condition:=condition;
  987. if MustBeLast(hp1) then
  988. begin
  989. GetNextInstruction(hp1,hp1);
  990. break;
  991. end
  992. else
  993. GetNextInstruction(hp1,hp1);
  994. until not(assigned(hp1)) or
  995. not(CanBeCond(hp1)) or
  996. (hp1.typ=ait_label);
  997. { wait with removing else GetNextInstruction could
  998. ignore the label if it was the only usage in the
  999. jump moved away }
  1000. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1001. asml.remove(hp2);
  1002. hp2.free;
  1003. continue;
  1004. end;
  1005. end
  1006. else
  1007. { do not perform further optimizations if there is inctructon
  1008. in block #1 which can not be optimized.
  1009. }
  1010. if not WasLast then
  1011. begin
  1012. { check further for
  1013. Bcc xxx
  1014. <several instructions 1>
  1015. B yyy
  1016. xxx:
  1017. <several instructions 2>
  1018. yyy:
  1019. }
  1020. { hp2 points to jmp yyy }
  1021. hp2:=hp1;
  1022. { skip hp1 to xxx }
  1023. GetNextInstruction(hp1, hp1);
  1024. if assigned(hp2) and
  1025. assigned(hp1) and
  1026. (l<=3) and
  1027. (hp2.typ=ait_instruction) and
  1028. (taicpu(hp2).is_jmp) and
  1029. (taicpu(hp2).condition=C_None) and
  1030. { real label and jump, no further references to the
  1031. label are allowed }
  1032. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1033. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1034. begin
  1035. l:=0;
  1036. { skip hp1 to <several moves 2> }
  1037. GetNextInstruction(hp1, hp1);
  1038. while assigned(hp1) and
  1039. CanBeCond(hp1) do
  1040. begin
  1041. inc(l);
  1042. GetNextInstruction(hp1, hp1);
  1043. end;
  1044. { hp1 points to yyy: }
  1045. if assigned(hp1) and
  1046. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1047. begin
  1048. condition:=inverse_cond(taicpu(p).condition);
  1049. GetNextInstruction(p,hp1);
  1050. hp3:=p;
  1051. p:=hp1;
  1052. repeat
  1053. if hp1.typ=ait_instruction then
  1054. taicpu(hp1).condition:=condition;
  1055. GetNextInstruction(hp1,hp1);
  1056. until not(assigned(hp1)) or
  1057. not(CanBeCond(hp1));
  1058. { hp2 is still at jmp yyy }
  1059. GetNextInstruction(hp2,hp1);
  1060. { hp2 is now at xxx: }
  1061. condition:=inverse_cond(condition);
  1062. GetNextInstruction(hp1,hp1);
  1063. { hp1 is now at <several movs 2> }
  1064. repeat
  1065. taicpu(hp1).condition:=condition;
  1066. GetNextInstruction(hp1,hp1);
  1067. until not(assigned(hp1)) or
  1068. not(CanBeCond(hp1)) or
  1069. (hp1.typ=ait_label);
  1070. {
  1071. asml.remove(hp1.next)
  1072. hp1.next.free;
  1073. asml.remove(hp1);
  1074. hp1.free;
  1075. }
  1076. { remove Bcc }
  1077. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1078. asml.remove(hp3);
  1079. hp3.free;
  1080. { remove jmp }
  1081. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1082. asml.remove(hp2);
  1083. hp2.free;
  1084. continue;
  1085. end;
  1086. end;
  1087. end;
  1088. end;
  1089. end;
  1090. end;
  1091. end;
  1092. end;
  1093. p := tai(p.next)
  1094. end;
  1095. end;
  1096. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1097. begin
  1098. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1099. Result:=true
  1100. else
  1101. Result:=inherited RegInInstruction(Reg, p1);
  1102. end;
  1103. const
  1104. { set of opcode which might or do write to memory }
  1105. { TODO : extend armins.dat to contain r/w info }
  1106. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1107. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1108. { adjust the register live information when swapping the two instructions p and hp1,
  1109. they must follow one after the other }
  1110. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1111. procedure CheckLiveEnd(reg : tregister);
  1112. var
  1113. supreg : TSuperRegister;
  1114. regtype : TRegisterType;
  1115. begin
  1116. if reg=NR_NO then
  1117. exit;
  1118. regtype:=getregtype(reg);
  1119. supreg:=getsupreg(reg);
  1120. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1121. RegInInstruction(reg,p) then
  1122. cg.rg[regtype].live_end[supreg]:=p;
  1123. end;
  1124. procedure CheckLiveStart(reg : TRegister);
  1125. var
  1126. supreg : TSuperRegister;
  1127. regtype : TRegisterType;
  1128. begin
  1129. if reg=NR_NO then
  1130. exit;
  1131. regtype:=getregtype(reg);
  1132. supreg:=getsupreg(reg);
  1133. if (cg.rg[regtype].live_start[supreg]=p) and
  1134. RegInInstruction(reg,hp1) then
  1135. cg.rg[regtype].live_start[supreg]:=hp1;
  1136. end;
  1137. var
  1138. i : longint;
  1139. r : TSuperRegister;
  1140. begin
  1141. { assumption: p is directly followed by hp1 }
  1142. { if live of any reg used by p starts at p and hp1 uses this register then
  1143. set live start to hp1 }
  1144. for i:=0 to p.ops-1 do
  1145. case p.oper[i]^.typ of
  1146. Top_Reg:
  1147. CheckLiveStart(p.oper[i]^.reg);
  1148. Top_Ref:
  1149. begin
  1150. CheckLiveStart(p.oper[i]^.ref^.base);
  1151. CheckLiveStart(p.oper[i]^.ref^.index);
  1152. end;
  1153. Top_Shifterop:
  1154. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1155. Top_RegSet:
  1156. for r:=RS_R0 to RS_R15 do
  1157. if r in p.oper[i]^.regset^ then
  1158. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1159. end;
  1160. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1161. set live end to p }
  1162. for i:=0 to hp1.ops-1 do
  1163. case hp1.oper[i]^.typ of
  1164. Top_Reg:
  1165. CheckLiveEnd(hp1.oper[i]^.reg);
  1166. Top_Ref:
  1167. begin
  1168. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1169. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1170. end;
  1171. Top_Shifterop:
  1172. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1173. Top_RegSet:
  1174. for r:=RS_R0 to RS_R15 do
  1175. if r in hp1.oper[i]^.regset^ then
  1176. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1177. end;
  1178. end;
  1179. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1180. { TODO : schedule also forward }
  1181. { TODO : schedule distance > 1 }
  1182. var
  1183. hp1,hp2,hp3,hp4,hp5 : tai;
  1184. list : TAsmList;
  1185. begin
  1186. result:=true;
  1187. list:=TAsmList.Create;
  1188. p:=BlockStart;
  1189. while p<>BlockEnd Do
  1190. begin
  1191. if (p.typ=ait_instruction) and
  1192. GetNextInstruction(p,hp1) and
  1193. (hp1.typ=ait_instruction) and
  1194. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1195. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1196. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1197. not(RegModifiedByInstruction(NR_PC,p))
  1198. ) or
  1199. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1200. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1201. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1202. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1203. )
  1204. ) or
  1205. { try to prove that the memory accesses don't overlapp }
  1206. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1207. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1208. (taicpu(p).oppostfix=PF_None) and
  1209. (taicpu(hp1).oppostfix=PF_None) and
  1210. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1211. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1212. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1213. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1214. )
  1215. )
  1216. ) and
  1217. GetNextInstruction(hp1,hp2) and
  1218. (hp2.typ=ait_instruction) and
  1219. { loaded register used by next instruction? }
  1220. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1221. { loaded register not used by previous instruction? }
  1222. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1223. { same condition? }
  1224. (taicpu(p).condition=taicpu(hp1).condition) and
  1225. { first instruction might not change the register used as base }
  1226. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1227. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1228. ) and
  1229. { first instruction might not change the register used as index }
  1230. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1231. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1232. ) then
  1233. begin
  1234. hp3:=tai(p.Previous);
  1235. hp5:=tai(p.next);
  1236. asml.Remove(p);
  1237. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1238. { before the instruction? }
  1239. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1240. begin
  1241. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1242. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1243. begin
  1244. hp4:=hp3;
  1245. hp3:=tai(hp3.Previous);
  1246. asml.Remove(hp4);
  1247. list.Concat(hp4);
  1248. end
  1249. else
  1250. hp3:=tai(hp3.Previous);
  1251. end;
  1252. list.Concat(p);
  1253. SwapRegLive(taicpu(p),taicpu(hp1));
  1254. { after the instruction? }
  1255. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1256. begin
  1257. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1258. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1259. begin
  1260. hp4:=hp5;
  1261. hp5:=tai(hp5.next);
  1262. asml.Remove(hp4);
  1263. list.Concat(hp4);
  1264. end
  1265. else
  1266. hp5:=tai(hp5.Next);
  1267. end;
  1268. asml.Remove(hp1);
  1269. {$ifdef DEBUG_PREREGSCHEDULER}
  1270. asml.InsertBefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1271. {$endif DEBUG_PREREGSCHEDULER}
  1272. asml.InsertBefore(hp1,hp2);
  1273. asml.InsertListBefore(hp2,list);
  1274. p:=tai(p.next)
  1275. end
  1276. else if p.typ=ait_instruction then
  1277. p:=hp1
  1278. else
  1279. p:=tai(p.next);
  1280. end;
  1281. list.Free;
  1282. end;
  1283. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1284. begin
  1285. { TODO: Add optimizer code }
  1286. end;
  1287. begin
  1288. casmoptimizer:=TCpuAsmOptimizer;
  1289. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1290. End.