2
0

aoptcpu.pas 62 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. Interface
  22. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. { uses the same constructor as TAopObj }
  26. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  27. procedure PeepHoleOptPass2;override;
  28. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  29. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  30. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  31. var AllUsedRegs: TAllUsedRegs): Boolean;
  32. { gets the next tai object after current that contains info relevant
  33. to the optimizer in p1 which used the given register or does a
  34. change in program flow.
  35. If there is none, it returns false and
  36. sets p1 to nil }
  37. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  38. End;
  39. TCpuPreRegallocScheduler = class(TAsmScheduler)
  40. function SchedulerPass1Cpu(var p: tai): boolean;override;
  41. procedure SwapRegLive(p, hp1: taicpu);
  42. end;
  43. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  44. { uses the same constructor as TAopObj }
  45. procedure PeepHoleOptPass2;override;
  46. End;
  47. Implementation
  48. uses
  49. cutils,verbose,globals,
  50. systems,
  51. cpuinfo,
  52. cgobj,cgutils,procinfo,
  53. aasmbase,aasmdata;
  54. function CanBeCond(p : tai) : boolean;
  55. begin
  56. result:=
  57. (p.typ=ait_instruction) and
  58. (taicpu(p).condition=C_None) and
  59. (taicpu(p).opcode<>A_PLD) and
  60. ((taicpu(p).opcode<>A_BLX) or
  61. (taicpu(p).oper[0]^.typ=top_reg));
  62. end;
  63. function RefsEqual(const r1, r2: treference): boolean;
  64. begin
  65. refsequal :=
  66. (r1.offset = r2.offset) and
  67. (r1.base = r2.base) and
  68. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  69. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  70. (r1.relsymbol = r2.relsymbol) and
  71. (r1.signindex = r2.signindex) and
  72. (r1.shiftimm = r2.shiftimm) and
  73. (r1.addressmode = r2.addressmode) and
  74. (r1.shiftmode = r2.shiftmode);
  75. end;
  76. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. (taicpu(instr).opcode = op) and
  81. ((cond = []) or (taicpu(instr).condition in cond)) and
  82. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  83. end;
  84. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  85. begin
  86. result := oper1.typ = oper2.typ;
  87. if result then
  88. case oper1.typ of
  89. top_const:
  90. Result:=oper1.val = oper2.val;
  91. top_reg:
  92. Result:=oper1.reg = oper2.reg;
  93. top_conditioncode:
  94. Result:=oper1.cc = oper2.cc;
  95. top_ref:
  96. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  97. else Result:=false;
  98. end
  99. end;
  100. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  101. begin
  102. result := (oper.typ = top_reg) and (oper.reg = reg);
  103. end;
  104. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  105. begin
  106. if (taicpu(movp).condition = C_EQ) and
  107. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  108. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  109. begin
  110. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  111. asml.remove(movp);
  112. movp.free;
  113. end;
  114. end;
  115. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  116. var
  117. p: taicpu;
  118. begin
  119. p := taicpu(hp);
  120. regLoadedWithNewValue := false;
  121. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  122. exit;
  123. case p.opcode of
  124. { These operands do not write into a register at all }
  125. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  126. exit;
  127. {Take care of post/preincremented store and loads, they will change their base register}
  128. A_STR, A_LDR:
  129. regLoadedWithNewValue :=
  130. (taicpu(p).oper[1]^.typ=top_ref) and
  131. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  132. (taicpu(p).oper[1]^.ref^.base = reg);
  133. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  134. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  135. regLoadedWithNewValue :=
  136. (p.oper[1]^.typ = top_reg) and
  137. (p.oper[1]^.reg = reg);
  138. {Loads to oper2 from coprocessor}
  139. {
  140. MCR/MRC is currently not supported in FPC
  141. A_MRC:
  142. regLoadedWithNewValue :=
  143. (p.oper[2]^.typ = top_reg) and
  144. (p.oper[2]^.reg = reg);
  145. }
  146. {Loads to all register in the registerset}
  147. A_LDM:
  148. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  149. end;
  150. if regLoadedWithNewValue then
  151. exit;
  152. case p.oper[0]^.typ of
  153. {This is the case}
  154. top_reg:
  155. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  156. { LDRD }
  157. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  158. {LDM/STM might write a new value to their index register}
  159. top_ref:
  160. regLoadedWithNewValue :=
  161. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  162. (taicpu(p).oper[0]^.ref^.base = reg);
  163. end;
  164. end;
  165. function AlignedToQWord(const ref : treference) : boolean;
  166. begin
  167. { (safe) heuristics to ensure alignment }
  168. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  169. (((ref.offset>=0) and
  170. ((ref.offset mod 8)=0) and
  171. ((ref.base=NR_R13) or
  172. (ref.index=NR_R13))
  173. ) or
  174. ((ref.offset<=0) and
  175. { when using NR_R11, it has always a value of <qword align>+4 }
  176. ((abs(ref.offset+4) mod 8)=0) and
  177. (current_procinfo.framepointer=NR_R11) and
  178. ((ref.base=NR_R11) or
  179. (ref.index=NR_R11))
  180. )
  181. );
  182. end;
  183. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  184. var
  185. p: taicpu;
  186. i: longint;
  187. begin
  188. instructionLoadsFromReg := false;
  189. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  190. exit;
  191. p:=taicpu(hp);
  192. i:=1;
  193. {For these instructions we have to start on oper[0]}
  194. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  195. A_CMP, A_CMN, A_TST, A_TEQ,
  196. A_B, A_BL, A_BX, A_BLX,
  197. A_SMLAL, A_UMLAL]) then i:=0;
  198. while(i<p.ops) do
  199. begin
  200. case p.oper[I]^.typ of
  201. top_reg:
  202. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  203. { STRD }
  204. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  205. top_regset:
  206. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  207. top_shifterop:
  208. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  209. top_ref:
  210. instructionLoadsFromReg :=
  211. (p.oper[I]^.ref^.base = reg) or
  212. (p.oper[I]^.ref^.index = reg);
  213. end;
  214. if instructionLoadsFromReg then exit; {Bailout if we found something}
  215. Inc(I);
  216. end;
  217. end;
  218. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  219. var AllUsedRegs: TAllUsedRegs): Boolean;
  220. begin
  221. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  222. RegUsedAfterInstruction :=
  223. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  224. not(regLoadedWithNewValue(reg,p)) and
  225. (
  226. not(GetNextInstruction(p,p)) or
  227. instructionLoadsFromReg(reg,p) or
  228. not(regLoadedWithNewValue(reg,p))
  229. );
  230. end;
  231. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  232. var Next: tai; reg: TRegister): Boolean;
  233. begin
  234. Next:=Current;
  235. repeat
  236. Result:=GetNextInstruction(Next,Next);
  237. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  238. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  239. end;
  240. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  241. var
  242. alloc,
  243. dealloc : tai_regalloc;
  244. hp1 : tai;
  245. begin
  246. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  247. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  248. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  249. { don't mess with moves to pc }
  250. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  251. { don't mess with moves to lr }
  252. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  253. { the destination register of the mov might not be used beween p and movp }
  254. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  255. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  256. not (
  257. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  258. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  259. ) then
  260. begin
  261. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  262. if assigned(dealloc) then
  263. begin
  264. asml.insertbefore(tai_comment.Create(strpnew('Peephole '+optimizer+' removed superfluous mov')), movp);
  265. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  266. and remove it if possible }
  267. GetLastInstruction(p,hp1);
  268. asml.Remove(dealloc);
  269. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  270. if assigned(alloc) then
  271. begin
  272. asml.Remove(alloc);
  273. alloc.free;
  274. dealloc.free;
  275. end
  276. else
  277. asml.InsertAfter(dealloc,p);
  278. { try to move the allocation of the target register }
  279. GetLastInstruction(movp,hp1);
  280. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  281. if assigned(alloc) then
  282. begin
  283. asml.Remove(alloc);
  284. asml.InsertBefore(alloc,p);
  285. { adjust used regs }
  286. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  287. end;
  288. { finally get rid of the mov }
  289. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  290. asml.remove(movp);
  291. movp.free;
  292. end;
  293. end;
  294. end;
  295. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  296. var
  297. hp1,hp2: tai;
  298. i, i2: longint;
  299. TmpUsedRegs: TAllUsedRegs;
  300. tempop: tasmop;
  301. function IsPowerOf2(const value: DWord): boolean; inline;
  302. begin
  303. Result:=(value and (value - 1)) = 0;
  304. end;
  305. begin
  306. result := false;
  307. case p.typ of
  308. ait_instruction:
  309. begin
  310. {
  311. change
  312. <op> reg,x,y
  313. cmp reg,#0
  314. into
  315. <op>s reg,x,y
  316. }
  317. { this optimization can applied only to the currently enabled operations because
  318. the other operations do not update all flags and FPC does not track flag usage }
  319. if ((taicpu(p).opcode in [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,
  320. A_ORR,A_EOR,A_AND,A_RSB,A_RSC,A_SBC,A_MLA])
  321. ) and
  322. (taicpu(p).oppostfix = PF_None) and
  323. (taicpu(p).condition = C_None) and
  324. GetNextInstruction(p, hp1) and
  325. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  326. (taicpu(hp1).oper[1]^.typ = top_const) and
  327. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  328. (taicpu(hp1).oper[1]^.val = 0) and
  329. GetNextInstruction(hp1, hp2) and
  330. (tai(hp2).typ = ait_instruction) and
  331. { be careful here, following instructions could use other flags
  332. however after a jump fpc never depends on the value of flags }
  333. (taicpu(hp2).opcode = A_B) and
  334. { All above instructions set Z and N according to the following
  335. Z := result = 0;
  336. N := result[31];
  337. EQ = Z=1; NE = Z=0;
  338. MI = N=1; PL = N=0; }
  339. (taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) and
  340. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  341. begin
  342. asml.insertbefore(tai_comment.Create(strpnew('Peephole OpCmp2OpS done')), p);
  343. taicpu(p).oppostfix:=PF_S;
  344. { move flag allocation if possible }
  345. GetLastInstruction(hp1, hp2);
  346. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  347. if assigned(hp2) then
  348. begin
  349. asml.Remove(hp2);
  350. asml.insertbefore(hp2, p);
  351. end;
  352. asml.remove(hp1);
  353. hp1.free;
  354. end
  355. else
  356. case taicpu(p).opcode of
  357. A_STR:
  358. begin
  359. { change
  360. str reg1,ref
  361. ldr reg2,ref
  362. into
  363. str reg1,ref
  364. mov reg2,reg1
  365. }
  366. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  367. (taicpu(p).oppostfix=PF_None) and
  368. GetNextInstruction(p,hp1) and
  369. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  370. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  371. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  372. begin
  373. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  374. begin
  375. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 1 done')), hp1);
  376. asml.remove(hp1);
  377. hp1.free;
  378. end
  379. else
  380. begin
  381. taicpu(hp1).opcode:=A_MOV;
  382. taicpu(hp1).oppostfix:=PF_None;
  383. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  384. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 2 done')), hp1);
  385. end;
  386. result := true;
  387. end
  388. { change
  389. str reg1,ref
  390. str reg2,ref
  391. into
  392. strd reg1,ref
  393. }
  394. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  395. (taicpu(p).oppostfix=PF_None) and
  396. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  397. GetNextInstruction(p,hp1) and
  398. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  399. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  400. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  401. { str ensures that either base or index contain no register, else ldr wouldn't
  402. use an offset either
  403. }
  404. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  405. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  406. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  407. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  408. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  409. begin
  410. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrStr2Strd done')), p);
  411. taicpu(p).oppostfix:=PF_D;
  412. asml.remove(hp1);
  413. hp1.free;
  414. end;
  415. end;
  416. A_LDR:
  417. begin
  418. { change
  419. ldr reg1,ref
  420. ldr reg2,ref
  421. into ...
  422. }
  423. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  424. GetNextInstruction(p,hp1) and
  425. { ldrd is not allowed here }
  426. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  427. begin
  428. {
  429. ...
  430. ldr reg1,ref
  431. mov reg2,reg1
  432. }
  433. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  434. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  435. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  436. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  437. begin
  438. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  439. begin
  440. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldr done')), hp1);
  441. asml.remove(hp1);
  442. hp1.free;
  443. end
  444. else
  445. begin
  446. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2LdrMov done')), hp1);
  447. taicpu(hp1).opcode:=A_MOV;
  448. taicpu(hp1).oppostfix:=PF_None;
  449. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  450. end;
  451. result := true;
  452. end
  453. {
  454. ...
  455. ldrd reg1,ref
  456. }
  457. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  458. { ldrd does not allow any postfixes ... }
  459. (taicpu(p).oppostfix=PF_None) and
  460. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  461. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  462. { ldr ensures that either base or index contain no register, else ldr wouldn't
  463. use an offset either
  464. }
  465. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  466. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  467. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  468. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  469. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  470. begin
  471. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldrd done')), p);
  472. taicpu(p).oppostfix:=PF_D;
  473. asml.remove(hp1);
  474. hp1.free;
  475. end;
  476. end;
  477. { Remove superfluous mov after ldr
  478. changes
  479. ldr reg1, ref
  480. mov reg2, reg1
  481. to
  482. ldr reg2, ref
  483. conditions are:
  484. * no ldrd usage
  485. * reg1 must be released after mov
  486. * mov can not contain shifterops
  487. * ldr+mov have the same conditions
  488. * mov does not set flags
  489. }
  490. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  491. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  492. end;
  493. A_MOV:
  494. begin
  495. { fold
  496. mov reg1,reg0, shift imm1
  497. mov reg1,reg1, shift imm2
  498. }
  499. if (taicpu(p).ops=3) and
  500. (taicpu(p).oper[2]^.typ = top_shifterop) and
  501. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  502. getnextinstruction(p,hp1) and
  503. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  504. (taicpu(hp1).ops=3) and
  505. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  506. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  507. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  508. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  509. begin
  510. { fold
  511. mov reg1,reg0, lsl 16
  512. mov reg1,reg1, lsr 16
  513. strh reg1, ...
  514. dealloc reg1
  515. to
  516. strh reg1, ...
  517. dealloc reg1
  518. }
  519. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  520. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  521. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  522. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  523. getnextinstruction(hp1,hp2) and
  524. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  525. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  526. begin
  527. CopyUsedRegs(TmpUsedRegs);
  528. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  529. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  530. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  531. begin
  532. asml.insertbefore(tai_comment.Create(strpnew('Peephole optimizer removed superfluous 16 Bit zero extension')), hp1);
  533. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  534. asml.remove(p);
  535. asml.remove(hp1);
  536. p.free;
  537. hp1.free;
  538. p:=hp2;
  539. end;
  540. ReleaseUsedRegs(TmpUsedRegs);
  541. end
  542. { fold
  543. mov reg1,reg0, shift imm1
  544. mov reg1,reg1, shift imm2
  545. to
  546. mov reg1,reg0, shift imm1+imm2
  547. }
  548. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  549. { asr makes no use after a lsr, the asr can be foled into the lsr }
  550. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  551. begin
  552. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  553. { avoid overflows }
  554. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  555. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  556. SM_ROR:
  557. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  558. SM_ASR:
  559. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  560. SM_LSR,
  561. SM_LSL:
  562. begin
  563. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  564. InsertLLItem(p.previous, p.next, hp1);
  565. p.free;
  566. p:=hp1;
  567. end;
  568. else
  569. internalerror(2008072803);
  570. end;
  571. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShift2Shift 1 done')), p);
  572. asml.remove(hp1);
  573. hp1.free;
  574. result := true;
  575. end
  576. { fold
  577. mov reg1,reg0, shift imm1
  578. mov reg1,reg1, shift imm2
  579. mov reg1,reg1, shift imm3 ...
  580. }
  581. else if getnextinstruction(hp1,hp2) and
  582. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  583. (taicpu(hp2).ops=3) and
  584. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  585. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  586. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  587. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  588. begin
  589. { mov reg1,reg0, lsl imm1
  590. mov reg1,reg1, lsr/asr imm2
  591. mov reg1,reg1, lsl imm3 ...
  592. if imm3<=imm1 and imm2>=imm3
  593. to
  594. mov reg1,reg0, lsl imm1
  595. mov reg1,reg1, lsr/asr imm2-imm3
  596. }
  597. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  598. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  599. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  600. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  601. begin
  602. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  603. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 1 done')), p);
  604. asml.remove(hp2);
  605. hp2.free;
  606. result := true;
  607. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  608. begin
  609. asml.remove(hp1);
  610. hp1.free;
  611. end;
  612. end
  613. { mov reg1,reg0, lsr/asr imm1
  614. mov reg1,reg1, lsl imm2
  615. mov reg1,reg1, lsr/asr imm3 ...
  616. if imm3>=imm1 and imm2>=imm1
  617. to
  618. mov reg1,reg0, lsl imm2-imm1
  619. mov reg1,reg1, lsr/asr imm3 ...
  620. }
  621. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  622. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  623. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  624. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  625. begin
  626. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  627. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  628. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 2 done')), p);
  629. asml.remove(p);
  630. p.free;
  631. p:=hp2;
  632. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  633. begin
  634. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  635. asml.remove(hp1);
  636. hp1.free;
  637. p:=hp2;
  638. end;
  639. result := true;
  640. end;
  641. end;
  642. end;
  643. { Change the common
  644. mov r0, r0, lsr #24
  645. and r0, r0, #255
  646. and remove the superfluous and
  647. This could be extended to handle more cases.
  648. }
  649. if (taicpu(p).ops=3) and
  650. (taicpu(p).oper[2]^.typ = top_shifterop) and
  651. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  652. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  653. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  654. getnextinstruction(p,hp1) and
  655. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  656. (taicpu(hp1).ops=3) and
  657. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  658. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  659. (taicpu(hp1).oper[2]^.typ = top_const) and
  660. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  661. For LSR #25 and an AndConst of 255 that whould go like this:
  662. 255 and ((2 shl (32-25))-1)
  663. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  664. LSR #25 and AndConst of 254:
  665. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  666. }
  667. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  668. begin
  669. asml.insertbefore(tai_comment.Create(strpnew('Peephole LsrAnd2Lsr done')), hp1);
  670. asml.remove(hp1);
  671. hp1.free;
  672. end;
  673. {
  674. optimize
  675. mov rX, yyyy
  676. ....
  677. }
  678. if (taicpu(p).ops = 2) and
  679. GetNextInstruction(p,hp1) and
  680. (tai(hp1).typ = ait_instruction) then
  681. begin
  682. {
  683. This changes the very common
  684. mov r0, #0
  685. str r0, [...]
  686. mov r0, #0
  687. str r0, [...]
  688. and removes all superfluous mov instructions
  689. }
  690. if (taicpu(p).oper[1]^.typ = top_const) and
  691. (taicpu(hp1).opcode=A_STR) then
  692. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], [PF_None]) and
  693. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  694. GetNextInstruction(hp1, hp2) and
  695. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  696. (taicpu(hp2).ops = 2) and
  697. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  698. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  699. begin
  700. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovStrMov done')), hp2);
  701. GetNextInstruction(hp2,hp1);
  702. asml.remove(hp2);
  703. hp2.free;
  704. if not assigned(hp1) then break;
  705. end
  706. {
  707. This removes the first mov from
  708. mov rX,...
  709. mov rX,...
  710. }
  711. else if taicpu(hp1).opcode=A_MOV then
  712. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  713. (taicpu(hp1).ops = 2) and
  714. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  715. { don't remove the first mov if the second is a mov rX,rX }
  716. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  717. begin
  718. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovMov done')), p);
  719. asml.remove(p);
  720. p.free;
  721. p:=hp1;
  722. GetNextInstruction(hp1,hp1);
  723. if not assigned(hp1) then
  724. break;
  725. end;
  726. end;
  727. {
  728. change
  729. mov r1, r0
  730. add r1, r1, #1
  731. to
  732. add r1, r0, #1
  733. Todo: Make it work for mov+cmp too
  734. CAUTION! If this one is successful p might not be a mov instruction anymore!
  735. }
  736. if (taicpu(p).ops = 2) and
  737. (taicpu(p).oper[1]^.typ = top_reg) and
  738. (taicpu(p).oppostfix = PF_NONE) and
  739. GetNextInstruction(p, hp1) and
  740. (tai(hp1).typ = ait_instruction) and
  741. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  742. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN]) and
  743. {MOV and MVN might only have 2 ops}
  744. (taicpu(hp1).ops = 3) and
  745. (taicpu(hp1).condition in [C_NONE, taicpu(hp1).condition]) and
  746. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  747. (taicpu(hp1).oper[1]^.typ = top_reg) and
  748. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  749. begin
  750. { When we get here we still don't know if the registers match}
  751. for I:=1 to 2 do
  752. {
  753. If the first loop was successful p will be replaced with hp1.
  754. The checks will still be ok, because all required information
  755. will also be in hp1 then.
  756. }
  757. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  758. begin
  759. asml.insertbefore(tai_comment.Create(strpnew('Peephole RedundantMovProcess done')), hp1);
  760. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  761. if p<>hp1 then
  762. begin
  763. asml.remove(p);
  764. p.free;
  765. p:=hp1;
  766. end;
  767. end;
  768. end;
  769. { This folds shifterops into following instructions
  770. mov r0, r1, lsl #8
  771. add r2, r3, r0
  772. to
  773. add r2, r3, r1, lsl #8
  774. CAUTION! If this one is successful p might not be a mov instruction anymore!
  775. }
  776. if (taicpu(p).opcode = A_MOV) and
  777. (taicpu(p).ops = 3) and
  778. (taicpu(p).oper[1]^.typ = top_reg) and
  779. (taicpu(p).oper[2]^.typ = top_shifterop) and
  780. (taicpu(p).oppostfix = PF_NONE) and
  781. GetNextInstruction(p, hp1) and
  782. (tai(hp1).typ = ait_instruction) and
  783. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  784. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  785. (taicpu(hp1).oppostfix = PF_NONE) and
  786. (taicpu(hp1).condition = taicpu(p).condition) and
  787. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  788. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  789. A_CMP, A_CMN]) and
  790. (
  791. {Only ONE of the two src operands is allowed to match}
  792. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  793. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  794. ) then
  795. begin
  796. CopyUsedRegs(TmpUsedRegs);
  797. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  798. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  799. I2:=0
  800. else
  801. I2:=1;
  802. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  803. for I:=I2 to taicpu(hp1).ops-1 do
  804. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  805. begin
  806. { If the parameter matched on the second op from the RIGHT
  807. we have to switch the parameters, this will not happen for CMP
  808. were we're only evaluating the most right parameter
  809. }
  810. if I <> taicpu(hp1).ops-1 then
  811. begin
  812. {The SUB operators need to be changed when we swap parameters}
  813. case taicpu(hp1).opcode of
  814. A_SUB: tempop:=A_RSB;
  815. A_SBC: tempop:=A_RSC;
  816. A_RSB: tempop:=A_SUB;
  817. A_RSC: tempop:=A_SBC;
  818. else tempop:=taicpu(hp1).opcode;
  819. end;
  820. if taicpu(hp1).ops = 3 then
  821. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  822. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  823. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  824. else
  825. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  826. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  827. taicpu(p).oper[2]^.shifterop^);
  828. end
  829. else
  830. if taicpu(hp1).ops = 3 then
  831. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  832. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  833. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  834. else
  835. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  836. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  837. taicpu(p).oper[2]^.shifterop^);
  838. asml.insertbefore(hp2, p);
  839. asml.remove(p);
  840. asml.remove(hp1);
  841. p.free;
  842. hp1.free;
  843. p:=hp2;
  844. GetNextInstruction(p,hp1);
  845. asml.insertbefore(tai_comment.Create(strpnew('Peephole FoldShiftProcess done')), p);
  846. break;
  847. end;
  848. ReleaseUsedRegs(TmpUsedRegs);
  849. end;
  850. {
  851. Often we see shifts and then a superfluous mov to another register
  852. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  853. }
  854. if (taicpu(p).opcode = A_MOV) and
  855. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  856. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  857. end;
  858. A_ADD,
  859. A_ADC,
  860. A_RSB,
  861. A_RSC,
  862. A_SUB,
  863. A_SBC,
  864. A_AND,
  865. A_BIC,
  866. A_EOR,
  867. A_ORR,
  868. A_MLA,
  869. A_MUL:
  870. begin
  871. {
  872. change
  873. and reg2,reg1,const1
  874. and reg2,reg2,const2
  875. to
  876. and reg2,reg1,(const1 and const2)
  877. }
  878. if (taicpu(p).opcode = A_AND) and
  879. (taicpu(p).oper[1]^.typ = top_reg) and
  880. (taicpu(p).oper[2]^.typ = top_const) and
  881. GetNextInstruction(p, hp1) and
  882. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  883. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  884. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  885. (taicpu(hp1).oper[2]^.typ = top_const) then
  886. begin
  887. asml.insertbefore(tai_comment.Create(strpnew('Peephole AndAnd2And done')), p);
  888. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  889. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  890. asml.remove(hp1);
  891. hp1.free;
  892. end;
  893. {
  894. change
  895. add reg1, ...
  896. mov reg2, reg1
  897. to
  898. add reg2, ...
  899. }
  900. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  901. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  902. end;
  903. A_CMP:
  904. begin
  905. {
  906. change
  907. cmp reg,const1
  908. moveq reg,const1
  909. movne reg,const2
  910. to
  911. cmp reg,const1
  912. movne reg,const2
  913. }
  914. if (taicpu(p).oper[1]^.typ = top_const) and
  915. GetNextInstruction(p, hp1) and
  916. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  917. (taicpu(hp1).oper[1]^.typ = top_const) and
  918. GetNextInstruction(hp1, hp2) and
  919. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  920. (taicpu(hp1).oper[1]^.typ = top_const) then
  921. begin
  922. RemoveRedundantMove(p, hp1, asml);
  923. RemoveRedundantMove(p, hp2, asml);
  924. end;
  925. end;
  926. end;
  927. end;
  928. end;
  929. end;
  930. { instructions modifying the CPSR can be only the last instruction }
  931. function MustBeLast(p : tai) : boolean;
  932. begin
  933. Result:=(p.typ=ait_instruction) and
  934. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  935. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  936. (taicpu(p).oppostfix=PF_S));
  937. end;
  938. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  939. var
  940. p,hp1,hp2: tai;
  941. l : longint;
  942. condition : tasmcond;
  943. hp3: tai;
  944. WasLast: boolean;
  945. { UsedRegs, TmpUsedRegs: TRegSet; }
  946. begin
  947. p := BlockStart;
  948. { UsedRegs := []; }
  949. while (p <> BlockEnd) Do
  950. begin
  951. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  952. case p.Typ Of
  953. Ait_Instruction:
  954. begin
  955. case taicpu(p).opcode Of
  956. A_B:
  957. if taicpu(p).condition<>C_None then
  958. begin
  959. { check for
  960. Bxx xxx
  961. <several instructions>
  962. xxx:
  963. }
  964. l:=0;
  965. WasLast:=False;
  966. GetNextInstruction(p, hp1);
  967. while assigned(hp1) and
  968. (l<=4) and
  969. CanBeCond(hp1) and
  970. { stop on labels }
  971. not(hp1.typ=ait_label) do
  972. begin
  973. inc(l);
  974. if MustBeLast(hp1) then
  975. begin
  976. WasLast:=True;
  977. GetNextInstruction(hp1,hp1);
  978. break;
  979. end
  980. else
  981. GetNextInstruction(hp1,hp1);
  982. end;
  983. if assigned(hp1) then
  984. begin
  985. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  986. begin
  987. if (l<=4) and (l>0) then
  988. begin
  989. condition:=inverse_cond(taicpu(p).condition);
  990. hp2:=p;
  991. GetNextInstruction(p,hp1);
  992. p:=hp1;
  993. repeat
  994. if hp1.typ=ait_instruction then
  995. taicpu(hp1).condition:=condition;
  996. if MustBeLast(hp1) then
  997. begin
  998. GetNextInstruction(hp1,hp1);
  999. break;
  1000. end
  1001. else
  1002. GetNextInstruction(hp1,hp1);
  1003. until not(assigned(hp1)) or
  1004. not(CanBeCond(hp1)) or
  1005. (hp1.typ=ait_label);
  1006. { wait with removing else GetNextInstruction could
  1007. ignore the label if it was the only usage in the
  1008. jump moved away }
  1009. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1010. asml.remove(hp2);
  1011. hp2.free;
  1012. continue;
  1013. end;
  1014. end
  1015. else
  1016. { do not perform further optimizations if there is inctructon
  1017. in block #1 which can not be optimized.
  1018. }
  1019. if not WasLast then
  1020. begin
  1021. { check further for
  1022. Bcc xxx
  1023. <several instructions 1>
  1024. B yyy
  1025. xxx:
  1026. <several instructions 2>
  1027. yyy:
  1028. }
  1029. { hp2 points to jmp yyy }
  1030. hp2:=hp1;
  1031. { skip hp1 to xxx }
  1032. GetNextInstruction(hp1, hp1);
  1033. if assigned(hp2) and
  1034. assigned(hp1) and
  1035. (l<=3) and
  1036. (hp2.typ=ait_instruction) and
  1037. (taicpu(hp2).is_jmp) and
  1038. (taicpu(hp2).condition=C_None) and
  1039. { real label and jump, no further references to the
  1040. label are allowed }
  1041. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1042. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1043. begin
  1044. l:=0;
  1045. { skip hp1 to <several moves 2> }
  1046. GetNextInstruction(hp1, hp1);
  1047. while assigned(hp1) and
  1048. CanBeCond(hp1) do
  1049. begin
  1050. inc(l);
  1051. GetNextInstruction(hp1, hp1);
  1052. end;
  1053. { hp1 points to yyy: }
  1054. if assigned(hp1) and
  1055. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1056. begin
  1057. condition:=inverse_cond(taicpu(p).condition);
  1058. GetNextInstruction(p,hp1);
  1059. hp3:=p;
  1060. p:=hp1;
  1061. repeat
  1062. if hp1.typ=ait_instruction then
  1063. taicpu(hp1).condition:=condition;
  1064. GetNextInstruction(hp1,hp1);
  1065. until not(assigned(hp1)) or
  1066. not(CanBeCond(hp1));
  1067. { hp2 is still at jmp yyy }
  1068. GetNextInstruction(hp2,hp1);
  1069. { hp2 is now at xxx: }
  1070. condition:=inverse_cond(condition);
  1071. GetNextInstruction(hp1,hp1);
  1072. { hp1 is now at <several movs 2> }
  1073. repeat
  1074. taicpu(hp1).condition:=condition;
  1075. GetNextInstruction(hp1,hp1);
  1076. until not(assigned(hp1)) or
  1077. not(CanBeCond(hp1)) or
  1078. (hp1.typ=ait_label);
  1079. {
  1080. asml.remove(hp1.next)
  1081. hp1.next.free;
  1082. asml.remove(hp1);
  1083. hp1.free;
  1084. }
  1085. { remove Bcc }
  1086. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1087. asml.remove(hp3);
  1088. hp3.free;
  1089. { remove jmp }
  1090. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1091. asml.remove(hp2);
  1092. hp2.free;
  1093. continue;
  1094. end;
  1095. end;
  1096. end;
  1097. end;
  1098. end;
  1099. end;
  1100. end;
  1101. end;
  1102. p := tai(p.next)
  1103. end;
  1104. end;
  1105. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1106. begin
  1107. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1108. Result:=true
  1109. else
  1110. Result:=inherited RegInInstruction(Reg, p1);
  1111. end;
  1112. const
  1113. { set of opcode which might or do write to memory }
  1114. { TODO : extend armins.dat to contain r/w info }
  1115. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1116. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1117. { adjust the register live information when swapping the two instructions p and hp1,
  1118. they must follow one after the other }
  1119. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1120. procedure CheckLiveEnd(reg : tregister);
  1121. var
  1122. supreg : TSuperRegister;
  1123. regtype : TRegisterType;
  1124. begin
  1125. if reg=NR_NO then
  1126. exit;
  1127. regtype:=getregtype(reg);
  1128. supreg:=getsupreg(reg);
  1129. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1130. RegInInstruction(reg,p) then
  1131. cg.rg[regtype].live_end[supreg]:=p;
  1132. end;
  1133. procedure CheckLiveStart(reg : TRegister);
  1134. var
  1135. supreg : TSuperRegister;
  1136. regtype : TRegisterType;
  1137. begin
  1138. if reg=NR_NO then
  1139. exit;
  1140. regtype:=getregtype(reg);
  1141. supreg:=getsupreg(reg);
  1142. if (cg.rg[regtype].live_start[supreg]=p) and
  1143. RegInInstruction(reg,hp1) then
  1144. cg.rg[regtype].live_start[supreg]:=hp1;
  1145. end;
  1146. var
  1147. i : longint;
  1148. r : TSuperRegister;
  1149. begin
  1150. { assumption: p is directly followed by hp1 }
  1151. { if live of any reg used by p starts at p and hp1 uses this register then
  1152. set live start to hp1 }
  1153. for i:=0 to p.ops-1 do
  1154. case p.oper[i]^.typ of
  1155. Top_Reg:
  1156. CheckLiveStart(p.oper[i]^.reg);
  1157. Top_Ref:
  1158. begin
  1159. CheckLiveStart(p.oper[i]^.ref^.base);
  1160. CheckLiveStart(p.oper[i]^.ref^.index);
  1161. end;
  1162. Top_Shifterop:
  1163. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1164. Top_RegSet:
  1165. for r:=RS_R0 to RS_R15 do
  1166. if r in p.oper[i]^.regset^ then
  1167. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1168. end;
  1169. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1170. set live end to p }
  1171. for i:=0 to hp1.ops-1 do
  1172. case hp1.oper[i]^.typ of
  1173. Top_Reg:
  1174. CheckLiveEnd(hp1.oper[i]^.reg);
  1175. Top_Ref:
  1176. begin
  1177. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1178. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1179. end;
  1180. Top_Shifterop:
  1181. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1182. Top_RegSet:
  1183. for r:=RS_R0 to RS_R15 do
  1184. if r in hp1.oper[i]^.regset^ then
  1185. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1186. end;
  1187. end;
  1188. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1189. { TODO : schedule also forward }
  1190. { TODO : schedule distance > 1 }
  1191. var
  1192. hp1,hp2,hp3,hp4,hp5 : tai;
  1193. list : TAsmList;
  1194. begin
  1195. result:=true;
  1196. list:=TAsmList.Create;
  1197. p:=BlockStart;
  1198. while p<>BlockEnd Do
  1199. begin
  1200. if (p.typ=ait_instruction) and
  1201. GetNextInstruction(p,hp1) and
  1202. (hp1.typ=ait_instruction) and
  1203. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1204. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1205. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1206. not(RegModifiedByInstruction(NR_PC,p))
  1207. ) or
  1208. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1209. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1210. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1211. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1212. )
  1213. ) or
  1214. { try to prove that the memory accesses don't overlapp }
  1215. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1216. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1217. (taicpu(p).oppostfix=PF_None) and
  1218. (taicpu(hp1).oppostfix=PF_None) and
  1219. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1220. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1221. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1222. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1223. )
  1224. )
  1225. ) and
  1226. GetNextInstruction(hp1,hp2) and
  1227. (hp2.typ=ait_instruction) and
  1228. { loaded register used by next instruction? }
  1229. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1230. { loaded register not used by previous instruction? }
  1231. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1232. { same condition? }
  1233. (taicpu(p).condition=taicpu(hp1).condition) and
  1234. { first instruction might not change the register used as base }
  1235. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1236. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1237. ) and
  1238. { first instruction might not change the register used as index }
  1239. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1240. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1241. ) then
  1242. begin
  1243. hp3:=tai(p.Previous);
  1244. hp5:=tai(p.next);
  1245. asml.Remove(p);
  1246. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1247. { before the instruction? }
  1248. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1249. begin
  1250. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1251. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1252. begin
  1253. hp4:=hp3;
  1254. hp3:=tai(hp3.Previous);
  1255. asml.Remove(hp4);
  1256. list.Concat(hp4);
  1257. end
  1258. else
  1259. hp3:=tai(hp3.Previous);
  1260. end;
  1261. list.Concat(p);
  1262. SwapRegLive(taicpu(p),taicpu(hp1));
  1263. { after the instruction? }
  1264. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1265. begin
  1266. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1267. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1268. begin
  1269. hp4:=hp5;
  1270. hp5:=tai(hp5.next);
  1271. asml.Remove(hp4);
  1272. list.Concat(hp4);
  1273. end
  1274. else
  1275. hp5:=tai(hp5.Next);
  1276. end;
  1277. asml.Remove(hp1);
  1278. {$ifdef DEBUG_PREREGSCHEDULER}
  1279. asml.InsertBefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1280. {$endif DEBUG_PREREGSCHEDULER}
  1281. asml.InsertBefore(hp1,hp2);
  1282. asml.InsertListBefore(hp2,list);
  1283. p:=tai(p.next)
  1284. end
  1285. else if p.typ=ait_instruction then
  1286. p:=hp1
  1287. else
  1288. p:=tai(p.next);
  1289. end;
  1290. list.Free;
  1291. end;
  1292. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1293. begin
  1294. { TODO: Add optimizer code }
  1295. end;
  1296. begin
  1297. casmoptimizer:=TCpuAsmOptimizer;
  1298. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1299. End.