aoptcpu.pas 62 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. Interface
  22. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. { uses the same constructor as TAopObj }
  26. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  27. procedure PeepHoleOptPass2;override;
  28. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  29. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  30. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  31. var AllUsedRegs: TAllUsedRegs): Boolean;
  32. { gets the next tai object after current that contains info relevant
  33. to the optimizer in p1 which used the given register or does a
  34. change in program flow.
  35. If there is none, it returns false and
  36. sets p1 to nil }
  37. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  38. End;
  39. TCpuPreRegallocScheduler = class(TAsmScheduler)
  40. function SchedulerPass1Cpu(var p: tai): boolean;override;
  41. procedure SwapRegLive(p, hp1: taicpu);
  42. end;
  43. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  44. { uses the same constructor as TAopObj }
  45. procedure PeepHoleOptPass2;override;
  46. End;
  47. Implementation
  48. uses
  49. cutils,verbose,globals,
  50. systems,
  51. cpuinfo,
  52. cgobj,cgutils,procinfo,
  53. aasmbase,aasmdata;
  54. function CanBeCond(p : tai) : boolean;
  55. begin
  56. result:=
  57. (p.typ=ait_instruction) and
  58. (taicpu(p).condition=C_None) and
  59. (taicpu(p).opcode<>A_PLD) and
  60. ((taicpu(p).opcode<>A_BLX) or
  61. (taicpu(p).oper[0]^.typ=top_reg));
  62. end;
  63. function RefsEqual(const r1, r2: treference): boolean;
  64. begin
  65. refsequal :=
  66. (r1.offset = r2.offset) and
  67. (r1.base = r2.base) and
  68. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  69. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  70. (r1.relsymbol = r2.relsymbol) and
  71. (r1.signindex = r2.signindex) and
  72. (r1.shiftimm = r2.shiftimm) and
  73. (r1.addressmode = r2.addressmode) and
  74. (r1.shiftmode = r2.shiftmode);
  75. end;
  76. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. (taicpu(instr).opcode = op) and
  81. ((cond = []) or (taicpu(instr).condition in cond)) and
  82. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  83. end;
  84. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  85. begin
  86. result := oper1.typ = oper2.typ;
  87. if result then
  88. case oper1.typ of
  89. top_const:
  90. Result:=oper1.val = oper2.val;
  91. top_reg:
  92. Result:=oper1.reg = oper2.reg;
  93. top_conditioncode:
  94. Result:=oper1.cc = oper2.cc;
  95. top_ref:
  96. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  97. else Result:=false;
  98. end
  99. end;
  100. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  101. begin
  102. result := (oper.typ = top_reg) and (oper.reg = reg);
  103. end;
  104. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  105. begin
  106. if (taicpu(movp).condition = C_EQ) and
  107. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  108. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  109. begin
  110. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  111. asml.remove(movp);
  112. movp.free;
  113. end;
  114. end;
  115. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  116. var
  117. p: taicpu;
  118. begin
  119. p := taicpu(hp);
  120. regLoadedWithNewValue := false;
  121. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  122. exit;
  123. case p.opcode of
  124. { These operands do not write into a register at all }
  125. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  126. exit;
  127. {Take care of post/preincremented store and loads, they will change their base register}
  128. A_STR, A_LDR:
  129. regLoadedWithNewValue :=
  130. (taicpu(p).oper[1]^.typ=top_ref) and
  131. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  132. (taicpu(p).oper[1]^.ref^.base = reg);
  133. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  134. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  135. regLoadedWithNewValue :=
  136. (p.oper[1]^.typ = top_reg) and
  137. (p.oper[1]^.reg = reg);
  138. {Loads to oper2 from coprocessor}
  139. {
  140. MCR/MRC is currently not supported in FPC
  141. A_MRC:
  142. regLoadedWithNewValue :=
  143. (p.oper[2]^.typ = top_reg) and
  144. (p.oper[2]^.reg = reg);
  145. }
  146. {Loads to all register in the registerset}
  147. A_LDM:
  148. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  149. end;
  150. if regLoadedWithNewValue then
  151. exit;
  152. case p.oper[0]^.typ of
  153. {This is the case}
  154. top_reg:
  155. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  156. { LDRD }
  157. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  158. {LDM/STM might write a new value to their index register}
  159. top_ref:
  160. regLoadedWithNewValue :=
  161. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  162. (taicpu(p).oper[0]^.ref^.base = reg);
  163. end;
  164. end;
  165. function AlignedToQWord(const ref : treference) : boolean;
  166. begin
  167. { (safe) heuristics to ensure alignment }
  168. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  169. (((ref.offset>=0) and
  170. ((ref.offset mod 8)=0) and
  171. ((ref.base=NR_R13) or
  172. (ref.index=NR_R13))
  173. ) or
  174. ((ref.offset<=0) and
  175. { when using NR_R11, it has always a value of <qword align>+4 }
  176. ((abs(ref.offset+4) mod 8)=0) and
  177. (current_procinfo.framepointer=NR_R11) and
  178. ((ref.base=NR_R11) or
  179. (ref.index=NR_R11))
  180. )
  181. );
  182. end;
  183. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  184. var
  185. p: taicpu;
  186. i: longint;
  187. begin
  188. instructionLoadsFromReg := false;
  189. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  190. exit;
  191. p:=taicpu(hp);
  192. i:=1;
  193. {For these instructions we have to start on oper[0]}
  194. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  195. A_CMP, A_CMN, A_TST, A_TEQ,
  196. A_B, A_BL, A_BX, A_BLX,
  197. A_SMLAL, A_UMLAL]) then i:=0;
  198. while(i<p.ops) do
  199. begin
  200. case p.oper[I]^.typ of
  201. top_reg:
  202. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  203. { STRD }
  204. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  205. top_regset:
  206. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  207. top_shifterop:
  208. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  209. top_ref:
  210. instructionLoadsFromReg :=
  211. (p.oper[I]^.ref^.base = reg) or
  212. (p.oper[I]^.ref^.index = reg);
  213. end;
  214. if instructionLoadsFromReg then exit; {Bailout if we found something}
  215. Inc(I);
  216. end;
  217. end;
  218. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  219. var AllUsedRegs: TAllUsedRegs): Boolean;
  220. begin
  221. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  222. RegUsedAfterInstruction :=
  223. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  224. not(regLoadedWithNewValue(reg,p)) and
  225. (
  226. not(GetNextInstruction(p,p)) or
  227. instructionLoadsFromReg(reg,p) or
  228. not(regLoadedWithNewValue(reg,p))
  229. );
  230. end;
  231. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  232. var Next: tai; reg: TRegister): Boolean;
  233. begin
  234. Next:=Current;
  235. repeat
  236. Result:=GetNextInstruction(Next,Next);
  237. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  238. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  239. end;
  240. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  241. var
  242. alloc,
  243. dealloc : tai_regalloc;
  244. hp1 : tai;
  245. begin
  246. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  247. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  248. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  249. { don't mess with moves to pc }
  250. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  251. { don't mess with moves to lr }
  252. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  253. { the destination register of the mov might not be used beween p and movp }
  254. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  255. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  256. not (
  257. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  258. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  259. ) then
  260. begin
  261. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  262. if assigned(dealloc) then
  263. begin
  264. asml.insertbefore(tai_comment.Create(strpnew('Peephole '+optimizer+' removed superfluous mov')), movp);
  265. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  266. and remove it if possible }
  267. GetLastInstruction(p,hp1);
  268. asml.Remove(dealloc);
  269. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  270. if assigned(alloc) then
  271. begin
  272. asml.Remove(alloc);
  273. alloc.free;
  274. dealloc.free;
  275. end
  276. else
  277. asml.InsertAfter(dealloc,p);
  278. { try to move the allocation of the target register }
  279. GetLastInstruction(movp,hp1);
  280. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  281. if assigned(alloc) then
  282. begin
  283. asml.Remove(alloc);
  284. asml.InsertBefore(alloc,p);
  285. { adjust used regs }
  286. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  287. end;
  288. { finally get rid of the mov }
  289. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  290. asml.remove(movp);
  291. movp.free;
  292. end;
  293. end;
  294. end;
  295. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  296. var
  297. hp1,hp2: tai;
  298. i, i2: longint;
  299. TmpUsedRegs: TAllUsedRegs;
  300. tempop: tasmop;
  301. function IsPowerOf2(const value: DWord): boolean; inline;
  302. begin
  303. Result:=(value and (value - 1)) = 0;
  304. end;
  305. begin
  306. result := false;
  307. case p.typ of
  308. ait_instruction:
  309. begin
  310. {
  311. change
  312. <op> reg,x,y
  313. cmp reg,#0
  314. into
  315. <op>s reg,x,y
  316. }
  317. { this optimization can applied only to the currently enabled operations because
  318. the other operations do not update all flags and FPC does not track flag usage }
  319. if ((taicpu(p).opcode in [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,
  320. A_ORR,A_EOR,A_AND,A_RSB,A_RSC,A_SBC,A_UMULL,A_UMULAL])
  321. ) and
  322. (taicpu(p).oppostfix = PF_None) and
  323. (taicpu(p).condition = C_None) and
  324. GetNextInstruction(p, hp1) and
  325. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  326. (taicpu(hp1).oper[1]^.typ = top_const) and
  327. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  328. (taicpu(hp1).oper[1]^.val = 0) and
  329. GetNextInstruction(hp1, hp2) and
  330. (tai(hp2).typ = ait_instruction) and
  331. { be careful here, following instructions could use other flags
  332. however after a jump fpc never depends on the value of flags }
  333. (taicpu(hp2).opcode = A_B) and
  334. (((taicpu(p).opcode in [A_ADC,A_ADD,A_SBC,A_SUB]) and
  335. (taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL])) or
  336. (taicpu(hp2).condition in [C_EQ,C_NE])) and
  337. assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
  338. begin
  339. asml.insertbefore(tai_comment.Create(strpnew('Peephole OpCmp2OpS done')), p);
  340. taicpu(p).oppostfix:=PF_S;
  341. { move flag allocation if possible }
  342. GetLastInstruction(hp1, hp2);
  343. hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
  344. if assigned(hp2) then
  345. begin
  346. asml.Remove(hp2);
  347. asml.insertbefore(hp2, p);
  348. end;
  349. asml.remove(hp1);
  350. hp1.free;
  351. end
  352. else
  353. case taicpu(p).opcode of
  354. A_STR:
  355. begin
  356. { change
  357. str reg1,ref
  358. ldr reg2,ref
  359. into
  360. str reg1,ref
  361. mov reg2,reg1
  362. }
  363. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  364. (taicpu(p).oppostfix=PF_None) and
  365. GetNextInstruction(p,hp1) and
  366. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  367. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  368. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  369. begin
  370. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  371. begin
  372. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 1 done')), hp1);
  373. asml.remove(hp1);
  374. hp1.free;
  375. end
  376. else
  377. begin
  378. taicpu(hp1).opcode:=A_MOV;
  379. taicpu(hp1).oppostfix:=PF_None;
  380. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  381. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 2 done')), hp1);
  382. end;
  383. result := true;
  384. end
  385. { change
  386. str reg1,ref
  387. str reg2,ref
  388. into
  389. strd reg1,ref
  390. }
  391. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  392. (taicpu(p).oppostfix=PF_None) and
  393. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  394. GetNextInstruction(p,hp1) and
  395. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  396. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  397. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  398. { str ensures that either base or index contain no register, else ldr wouldn't
  399. use an offset either
  400. }
  401. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  402. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  403. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  404. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  405. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  406. begin
  407. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrStr2Strd done')), p);
  408. taicpu(p).oppostfix:=PF_D;
  409. asml.remove(hp1);
  410. hp1.free;
  411. end;
  412. end;
  413. A_LDR:
  414. begin
  415. { change
  416. ldr reg1,ref
  417. ldr reg2,ref
  418. into ...
  419. }
  420. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  421. GetNextInstruction(p,hp1) and
  422. { ldrd is not allowed here }
  423. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  424. begin
  425. {
  426. ...
  427. ldr reg1,ref
  428. mov reg2,reg1
  429. }
  430. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  431. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  432. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  433. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  434. begin
  435. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  436. begin
  437. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldr done')), hp1);
  438. asml.remove(hp1);
  439. hp1.free;
  440. end
  441. else
  442. begin
  443. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2LdrMov done')), hp1);
  444. taicpu(hp1).opcode:=A_MOV;
  445. taicpu(hp1).oppostfix:=PF_None;
  446. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  447. end;
  448. result := true;
  449. end
  450. {
  451. ...
  452. ldrd reg1,ref
  453. }
  454. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  455. { ldrd does not allow any postfixes ... }
  456. (taicpu(p).oppostfix=PF_None) and
  457. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  458. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  459. { ldr ensures that either base or index contain no register, else ldr wouldn't
  460. use an offset either
  461. }
  462. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  463. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  464. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  465. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  466. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  467. begin
  468. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldrd done')), p);
  469. taicpu(p).oppostfix:=PF_D;
  470. asml.remove(hp1);
  471. hp1.free;
  472. end;
  473. end;
  474. { Remove superfluous mov after ldr
  475. changes
  476. ldr reg1, ref
  477. mov reg2, reg1
  478. to
  479. ldr reg2, ref
  480. conditions are:
  481. * no ldrd usage
  482. * reg1 must be released after mov
  483. * mov can not contain shifterops
  484. * ldr+mov have the same conditions
  485. * mov does not set flags
  486. }
  487. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  488. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  489. end;
  490. A_MOV:
  491. begin
  492. { fold
  493. mov reg1,reg0, shift imm1
  494. mov reg1,reg1, shift imm2
  495. }
  496. if (taicpu(p).ops=3) and
  497. (taicpu(p).oper[2]^.typ = top_shifterop) and
  498. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  499. getnextinstruction(p,hp1) and
  500. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  501. (taicpu(hp1).ops=3) and
  502. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  503. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  504. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  505. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  506. begin
  507. { fold
  508. mov reg1,reg0, lsl 16
  509. mov reg1,reg1, lsr 16
  510. strh reg1, ...
  511. dealloc reg1
  512. to
  513. strh reg1, ...
  514. dealloc reg1
  515. }
  516. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  517. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  518. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  519. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  520. getnextinstruction(hp1,hp2) and
  521. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  522. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  523. begin
  524. CopyUsedRegs(TmpUsedRegs);
  525. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  526. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  527. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  528. begin
  529. asml.insertbefore(tai_comment.Create(strpnew('Peephole optimizer removed superfluous 16 Bit zero extension')), hp1);
  530. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  531. asml.remove(p);
  532. asml.remove(hp1);
  533. p.free;
  534. hp1.free;
  535. p:=hp2;
  536. end;
  537. ReleaseUsedRegs(TmpUsedRegs);
  538. end
  539. { fold
  540. mov reg1,reg0, shift imm1
  541. mov reg1,reg1, shift imm2
  542. to
  543. mov reg1,reg0, shift imm1+imm2
  544. }
  545. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  546. { asr makes no use after a lsr, the asr can be foled into the lsr }
  547. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  548. begin
  549. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  550. { avoid overflows }
  551. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  552. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  553. SM_ROR:
  554. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  555. SM_ASR:
  556. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  557. SM_LSR,
  558. SM_LSL:
  559. begin
  560. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  561. InsertLLItem(p.previous, p.next, hp1);
  562. p.free;
  563. p:=hp1;
  564. end;
  565. else
  566. internalerror(2008072803);
  567. end;
  568. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShift2Shift 1 done')), p);
  569. asml.remove(hp1);
  570. hp1.free;
  571. result := true;
  572. end
  573. { fold
  574. mov reg1,reg0, shift imm1
  575. mov reg1,reg1, shift imm2
  576. mov reg1,reg1, shift imm3 ...
  577. }
  578. else if getnextinstruction(hp1,hp2) and
  579. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  580. (taicpu(hp2).ops=3) and
  581. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  582. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  583. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  584. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  585. begin
  586. { mov reg1,reg0, lsl imm1
  587. mov reg1,reg1, lsr/asr imm2
  588. mov reg1,reg1, lsl imm3 ...
  589. if imm3<=imm1 and imm2>=imm3
  590. to
  591. mov reg1,reg0, lsl imm1
  592. mov reg1,reg1, lsr/asr imm2-imm3
  593. }
  594. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  595. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  596. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  597. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  598. begin
  599. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  600. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 1 done')), p);
  601. asml.remove(hp2);
  602. hp2.free;
  603. result := true;
  604. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  605. begin
  606. asml.remove(hp1);
  607. hp1.free;
  608. end;
  609. end
  610. { mov reg1,reg0, lsr/asr imm1
  611. mov reg1,reg1, lsl imm2
  612. mov reg1,reg1, lsr/asr imm3 ...
  613. if imm3>=imm1 and imm2>=imm1
  614. to
  615. mov reg1,reg0, lsl imm2-imm1
  616. mov reg1,reg1, lsr/asr imm3 ...
  617. }
  618. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  619. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  620. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  621. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  622. begin
  623. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  624. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  625. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 2 done')), p);
  626. asml.remove(p);
  627. p.free;
  628. p:=hp2;
  629. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  630. begin
  631. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  632. asml.remove(hp1);
  633. hp1.free;
  634. p:=hp2;
  635. end;
  636. result := true;
  637. end;
  638. end;
  639. end;
  640. { Change the common
  641. mov r0, r0, lsr #24
  642. and r0, r0, #255
  643. and remove the superfluous and
  644. This could be extended to handle more cases.
  645. }
  646. if (taicpu(p).ops=3) and
  647. (taicpu(p).oper[2]^.typ = top_shifterop) and
  648. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  649. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  650. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  651. getnextinstruction(p,hp1) and
  652. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  653. (taicpu(hp1).ops=3) and
  654. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  655. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  656. (taicpu(hp1).oper[2]^.typ = top_const) and
  657. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  658. For LSR #25 and an AndConst of 255 that whould go like this:
  659. 255 and ((2 shl (32-25))-1)
  660. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  661. LSR #25 and AndConst of 254:
  662. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  663. }
  664. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  665. begin
  666. asml.insertbefore(tai_comment.Create(strpnew('Peephole LsrAnd2Lsr done')), hp1);
  667. asml.remove(hp1);
  668. hp1.free;
  669. end;
  670. {
  671. optimize
  672. mov rX, yyyy
  673. ....
  674. }
  675. if (taicpu(p).ops = 2) and
  676. GetNextInstruction(p,hp1) and
  677. (tai(hp1).typ = ait_instruction) then
  678. begin
  679. {
  680. This changes the very common
  681. mov r0, #0
  682. str r0, [...]
  683. mov r0, #0
  684. str r0, [...]
  685. and removes all superfluous mov instructions
  686. }
  687. if (taicpu(p).oper[1]^.typ = top_const) and
  688. (taicpu(hp1).opcode=A_STR) then
  689. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], [PF_None]) and
  690. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  691. GetNextInstruction(hp1, hp2) and
  692. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  693. (taicpu(hp2).ops = 2) and
  694. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  695. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  696. begin
  697. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovStrMov done')), hp2);
  698. GetNextInstruction(hp2,hp1);
  699. asml.remove(hp2);
  700. hp2.free;
  701. if not assigned(hp1) then break;
  702. end
  703. {
  704. This removes the first mov from
  705. mov rX,...
  706. mov rX,...
  707. }
  708. else if taicpu(hp1).opcode=A_MOV then
  709. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  710. (taicpu(hp1).ops = 2) and
  711. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  712. { don't remove the first mov if the second is a mov rX,rX }
  713. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  714. begin
  715. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovMov done')), p);
  716. asml.remove(p);
  717. p.free;
  718. p:=hp1;
  719. GetNextInstruction(hp1,hp1);
  720. if not assigned(hp1) then
  721. break;
  722. end;
  723. end;
  724. {
  725. change
  726. mov r1, r0
  727. add r1, r1, #1
  728. to
  729. add r1, r0, #1
  730. Todo: Make it work for mov+cmp too
  731. CAUTION! If this one is successful p might not be a mov instruction anymore!
  732. }
  733. if (taicpu(p).ops = 2) and
  734. (taicpu(p).oper[1]^.typ = top_reg) and
  735. (taicpu(p).oppostfix = PF_NONE) and
  736. GetNextInstruction(p, hp1) and
  737. (tai(hp1).typ = ait_instruction) and
  738. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  739. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN]) and
  740. {MOV and MVN might only have 2 ops}
  741. (taicpu(hp1).ops = 3) and
  742. (taicpu(hp1).condition in [C_NONE, taicpu(hp1).condition]) and
  743. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  744. (taicpu(hp1).oper[1]^.typ = top_reg) and
  745. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  746. begin
  747. { When we get here we still don't know if the registers match}
  748. for I:=1 to 2 do
  749. {
  750. If the first loop was successful p will be replaced with hp1.
  751. The checks will still be ok, because all required information
  752. will also be in hp1 then.
  753. }
  754. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  755. begin
  756. asml.insertbefore(tai_comment.Create(strpnew('Peephole RedundantMovProcess done')), hp1);
  757. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  758. if p<>hp1 then
  759. begin
  760. asml.remove(p);
  761. p.free;
  762. p:=hp1;
  763. end;
  764. end;
  765. end;
  766. { This folds shifterops into following instructions
  767. mov r0, r1, lsl #8
  768. add r2, r3, r0
  769. to
  770. add r2, r3, r1, lsl #8
  771. CAUTION! If this one is successful p might not be a mov instruction anymore!
  772. }
  773. if (taicpu(p).opcode = A_MOV) and
  774. (taicpu(p).ops = 3) and
  775. (taicpu(p).oper[1]^.typ = top_reg) and
  776. (taicpu(p).oper[2]^.typ = top_shifterop) and
  777. (taicpu(p).oppostfix = PF_NONE) and
  778. GetNextInstruction(p, hp1) and
  779. (tai(hp1).typ = ait_instruction) and
  780. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  781. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  782. (taicpu(hp1).oppostfix = PF_NONE) and
  783. (taicpu(hp1).condition = taicpu(p).condition) and
  784. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  785. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  786. A_CMP, A_CMN]) and
  787. (
  788. {Only ONE of the two src operands is allowed to match}
  789. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  790. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  791. ) then
  792. begin
  793. CopyUsedRegs(TmpUsedRegs);
  794. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  795. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  796. I2:=0
  797. else
  798. I2:=1;
  799. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  800. for I:=I2 to taicpu(hp1).ops-1 do
  801. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  802. begin
  803. { If the parameter matched on the second op from the RIGHT
  804. we have to switch the parameters, this will not happen for CMP
  805. were we're only evaluating the most right parameter
  806. }
  807. if I <> taicpu(hp1).ops-1 then
  808. begin
  809. {The SUB operators need to be changed when we swap parameters}
  810. case taicpu(hp1).opcode of
  811. A_SUB: tempop:=A_RSB;
  812. A_SBC: tempop:=A_RSC;
  813. A_RSB: tempop:=A_SUB;
  814. A_RSC: tempop:=A_SBC;
  815. else tempop:=taicpu(hp1).opcode;
  816. end;
  817. if taicpu(hp1).ops = 3 then
  818. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  819. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  820. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  821. else
  822. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  823. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  824. taicpu(p).oper[2]^.shifterop^);
  825. end
  826. else
  827. if taicpu(hp1).ops = 3 then
  828. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  829. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  830. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  831. else
  832. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  833. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  834. taicpu(p).oper[2]^.shifterop^);
  835. asml.insertbefore(hp2, p);
  836. asml.remove(p);
  837. asml.remove(hp1);
  838. p.free;
  839. hp1.free;
  840. p:=hp2;
  841. GetNextInstruction(p,hp1);
  842. asml.insertbefore(tai_comment.Create(strpnew('Peephole FoldShiftProcess done')), p);
  843. break;
  844. end;
  845. ReleaseUsedRegs(TmpUsedRegs);
  846. end;
  847. {
  848. Often we see shifts and then a superfluous mov to another register
  849. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  850. }
  851. if (taicpu(p).opcode = A_MOV) and
  852. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  853. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  854. end;
  855. A_ADD,
  856. A_ADC,
  857. A_RSB,
  858. A_RSC,
  859. A_SUB,
  860. A_SBC,
  861. A_AND,
  862. A_BIC,
  863. A_EOR,
  864. A_ORR,
  865. A_MLA,
  866. A_MUL:
  867. begin
  868. {
  869. change
  870. and reg2,reg1,const1
  871. and reg2,reg2,const2
  872. to
  873. and reg2,reg1,(const1 and const2)
  874. }
  875. if (taicpu(p).opcode = A_AND) and
  876. (taicpu(p).oper[1]^.typ = top_reg) and
  877. (taicpu(p).oper[2]^.typ = top_const) and
  878. GetNextInstruction(p, hp1) and
  879. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  880. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  881. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  882. (taicpu(hp1).oper[2]^.typ = top_const) then
  883. begin
  884. asml.insertbefore(tai_comment.Create(strpnew('Peephole AndAnd2And done')), p);
  885. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  886. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  887. asml.remove(hp1);
  888. hp1.free;
  889. end;
  890. {
  891. change
  892. add reg1, ...
  893. mov reg2, reg1
  894. to
  895. add reg2, ...
  896. }
  897. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  898. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  899. end;
  900. A_CMP:
  901. begin
  902. {
  903. change
  904. cmp reg,const1
  905. moveq reg,const1
  906. movne reg,const2
  907. to
  908. cmp reg,const1
  909. movne reg,const2
  910. }
  911. if (taicpu(p).oper[1]^.typ = top_const) and
  912. GetNextInstruction(p, hp1) and
  913. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  914. (taicpu(hp1).oper[1]^.typ = top_const) and
  915. GetNextInstruction(hp1, hp2) and
  916. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  917. (taicpu(hp1).oper[1]^.typ = top_const) then
  918. begin
  919. RemoveRedundantMove(p, hp1, asml);
  920. RemoveRedundantMove(p, hp2, asml);
  921. end;
  922. end;
  923. end;
  924. end;
  925. end;
  926. end;
  927. { instructions modifying the CPSR can be only the last instruction }
  928. function MustBeLast(p : tai) : boolean;
  929. begin
  930. Result:=(p.typ=ait_instruction) and
  931. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  932. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  933. (taicpu(p).oppostfix=PF_S));
  934. end;
  935. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  936. var
  937. p,hp1,hp2: tai;
  938. l : longint;
  939. condition : tasmcond;
  940. hp3: tai;
  941. WasLast: boolean;
  942. { UsedRegs, TmpUsedRegs: TRegSet; }
  943. begin
  944. p := BlockStart;
  945. { UsedRegs := []; }
  946. while (p <> BlockEnd) Do
  947. begin
  948. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  949. case p.Typ Of
  950. Ait_Instruction:
  951. begin
  952. case taicpu(p).opcode Of
  953. A_B:
  954. if taicpu(p).condition<>C_None then
  955. begin
  956. { check for
  957. Bxx xxx
  958. <several instructions>
  959. xxx:
  960. }
  961. l:=0;
  962. WasLast:=False;
  963. GetNextInstruction(p, hp1);
  964. while assigned(hp1) and
  965. (l<=4) and
  966. CanBeCond(hp1) and
  967. { stop on labels }
  968. not(hp1.typ=ait_label) do
  969. begin
  970. inc(l);
  971. if MustBeLast(hp1) then
  972. begin
  973. WasLast:=True;
  974. GetNextInstruction(hp1,hp1);
  975. break;
  976. end
  977. else
  978. GetNextInstruction(hp1,hp1);
  979. end;
  980. if assigned(hp1) then
  981. begin
  982. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  983. begin
  984. if (l<=4) and (l>0) then
  985. begin
  986. condition:=inverse_cond(taicpu(p).condition);
  987. hp2:=p;
  988. GetNextInstruction(p,hp1);
  989. p:=hp1;
  990. repeat
  991. if hp1.typ=ait_instruction then
  992. taicpu(hp1).condition:=condition;
  993. if MustBeLast(hp1) then
  994. begin
  995. GetNextInstruction(hp1,hp1);
  996. break;
  997. end
  998. else
  999. GetNextInstruction(hp1,hp1);
  1000. until not(assigned(hp1)) or
  1001. not(CanBeCond(hp1)) or
  1002. (hp1.typ=ait_label);
  1003. { wait with removing else GetNextInstruction could
  1004. ignore the label if it was the only usage in the
  1005. jump moved away }
  1006. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1007. asml.remove(hp2);
  1008. hp2.free;
  1009. continue;
  1010. end;
  1011. end
  1012. else
  1013. { do not perform further optimizations if there is inctructon
  1014. in block #1 which can not be optimized.
  1015. }
  1016. if not WasLast then
  1017. begin
  1018. { check further for
  1019. Bcc xxx
  1020. <several instructions 1>
  1021. B yyy
  1022. xxx:
  1023. <several instructions 2>
  1024. yyy:
  1025. }
  1026. { hp2 points to jmp yyy }
  1027. hp2:=hp1;
  1028. { skip hp1 to xxx }
  1029. GetNextInstruction(hp1, hp1);
  1030. if assigned(hp2) and
  1031. assigned(hp1) and
  1032. (l<=3) and
  1033. (hp2.typ=ait_instruction) and
  1034. (taicpu(hp2).is_jmp) and
  1035. (taicpu(hp2).condition=C_None) and
  1036. { real label and jump, no further references to the
  1037. label are allowed }
  1038. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1039. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1040. begin
  1041. l:=0;
  1042. { skip hp1 to <several moves 2> }
  1043. GetNextInstruction(hp1, hp1);
  1044. while assigned(hp1) and
  1045. CanBeCond(hp1) do
  1046. begin
  1047. inc(l);
  1048. GetNextInstruction(hp1, hp1);
  1049. end;
  1050. { hp1 points to yyy: }
  1051. if assigned(hp1) and
  1052. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1053. begin
  1054. condition:=inverse_cond(taicpu(p).condition);
  1055. GetNextInstruction(p,hp1);
  1056. hp3:=p;
  1057. p:=hp1;
  1058. repeat
  1059. if hp1.typ=ait_instruction then
  1060. taicpu(hp1).condition:=condition;
  1061. GetNextInstruction(hp1,hp1);
  1062. until not(assigned(hp1)) or
  1063. not(CanBeCond(hp1));
  1064. { hp2 is still at jmp yyy }
  1065. GetNextInstruction(hp2,hp1);
  1066. { hp2 is now at xxx: }
  1067. condition:=inverse_cond(condition);
  1068. GetNextInstruction(hp1,hp1);
  1069. { hp1 is now at <several movs 2> }
  1070. repeat
  1071. taicpu(hp1).condition:=condition;
  1072. GetNextInstruction(hp1,hp1);
  1073. until not(assigned(hp1)) or
  1074. not(CanBeCond(hp1)) or
  1075. (hp1.typ=ait_label);
  1076. {
  1077. asml.remove(hp1.next)
  1078. hp1.next.free;
  1079. asml.remove(hp1);
  1080. hp1.free;
  1081. }
  1082. { remove Bcc }
  1083. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1084. asml.remove(hp3);
  1085. hp3.free;
  1086. { remove jmp }
  1087. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1088. asml.remove(hp2);
  1089. hp2.free;
  1090. continue;
  1091. end;
  1092. end;
  1093. end;
  1094. end;
  1095. end;
  1096. end;
  1097. end;
  1098. end;
  1099. p := tai(p.next)
  1100. end;
  1101. end;
  1102. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1103. begin
  1104. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1105. Result:=true
  1106. else
  1107. Result:=inherited RegInInstruction(Reg, p1);
  1108. end;
  1109. const
  1110. { set of opcode which might or do write to memory }
  1111. { TODO : extend armins.dat to contain r/w info }
  1112. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1113. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1114. { adjust the register live information when swapping the two instructions p and hp1,
  1115. they must follow one after the other }
  1116. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1117. procedure CheckLiveEnd(reg : tregister);
  1118. var
  1119. supreg : TSuperRegister;
  1120. regtype : TRegisterType;
  1121. begin
  1122. if reg=NR_NO then
  1123. exit;
  1124. regtype:=getregtype(reg);
  1125. supreg:=getsupreg(reg);
  1126. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1127. RegInInstruction(reg,p) then
  1128. cg.rg[regtype].live_end[supreg]:=p;
  1129. end;
  1130. procedure CheckLiveStart(reg : TRegister);
  1131. var
  1132. supreg : TSuperRegister;
  1133. regtype : TRegisterType;
  1134. begin
  1135. if reg=NR_NO then
  1136. exit;
  1137. regtype:=getregtype(reg);
  1138. supreg:=getsupreg(reg);
  1139. if (cg.rg[regtype].live_start[supreg]=p) and
  1140. RegInInstruction(reg,hp1) then
  1141. cg.rg[regtype].live_start[supreg]:=hp1;
  1142. end;
  1143. var
  1144. i : longint;
  1145. r : TSuperRegister;
  1146. begin
  1147. { assumption: p is directly followed by hp1 }
  1148. { if live of any reg used by p starts at p and hp1 uses this register then
  1149. set live start to hp1 }
  1150. for i:=0 to p.ops-1 do
  1151. case p.oper[i]^.typ of
  1152. Top_Reg:
  1153. CheckLiveStart(p.oper[i]^.reg);
  1154. Top_Ref:
  1155. begin
  1156. CheckLiveStart(p.oper[i]^.ref^.base);
  1157. CheckLiveStart(p.oper[i]^.ref^.index);
  1158. end;
  1159. Top_Shifterop:
  1160. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1161. Top_RegSet:
  1162. for r:=RS_R0 to RS_R15 do
  1163. if r in p.oper[i]^.regset^ then
  1164. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1165. end;
  1166. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1167. set live end to p }
  1168. for i:=0 to hp1.ops-1 do
  1169. case hp1.oper[i]^.typ of
  1170. Top_Reg:
  1171. CheckLiveEnd(hp1.oper[i]^.reg);
  1172. Top_Ref:
  1173. begin
  1174. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1175. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1176. end;
  1177. Top_Shifterop:
  1178. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1179. Top_RegSet:
  1180. for r:=RS_R0 to RS_R15 do
  1181. if r in hp1.oper[i]^.regset^ then
  1182. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1183. end;
  1184. end;
  1185. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1186. { TODO : schedule also forward }
  1187. { TODO : schedule distance > 1 }
  1188. var
  1189. hp1,hp2,hp3,hp4,hp5 : tai;
  1190. list : TAsmList;
  1191. begin
  1192. result:=true;
  1193. list:=TAsmList.Create;
  1194. p:=BlockStart;
  1195. while p<>BlockEnd Do
  1196. begin
  1197. if (p.typ=ait_instruction) and
  1198. GetNextInstruction(p,hp1) and
  1199. (hp1.typ=ait_instruction) and
  1200. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1201. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1202. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1203. not(RegModifiedByInstruction(NR_PC,p))
  1204. ) or
  1205. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1206. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1207. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1208. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1209. )
  1210. ) or
  1211. { try to prove that the memory accesses don't overlapp }
  1212. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1213. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1214. (taicpu(p).oppostfix=PF_None) and
  1215. (taicpu(hp1).oppostfix=PF_None) and
  1216. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1217. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1218. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1219. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1220. )
  1221. )
  1222. ) and
  1223. GetNextInstruction(hp1,hp2) and
  1224. (hp2.typ=ait_instruction) and
  1225. { loaded register used by next instruction? }
  1226. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1227. { loaded register not used by previous instruction? }
  1228. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1229. { same condition? }
  1230. (taicpu(p).condition=taicpu(hp1).condition) and
  1231. { first instruction might not change the register used as base }
  1232. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1233. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1234. ) and
  1235. { first instruction might not change the register used as index }
  1236. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1237. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1238. ) then
  1239. begin
  1240. hp3:=tai(p.Previous);
  1241. hp5:=tai(p.next);
  1242. asml.Remove(p);
  1243. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1244. { before the instruction? }
  1245. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1246. begin
  1247. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1248. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1249. begin
  1250. hp4:=hp3;
  1251. hp3:=tai(hp3.Previous);
  1252. asml.Remove(hp4);
  1253. list.Concat(hp4);
  1254. end
  1255. else
  1256. hp3:=tai(hp3.Previous);
  1257. end;
  1258. list.Concat(p);
  1259. SwapRegLive(taicpu(p),taicpu(hp1));
  1260. { after the instruction? }
  1261. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1262. begin
  1263. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1264. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1265. begin
  1266. hp4:=hp5;
  1267. hp5:=tai(hp5.next);
  1268. asml.Remove(hp4);
  1269. list.Concat(hp4);
  1270. end
  1271. else
  1272. hp5:=tai(hp5.Next);
  1273. end;
  1274. asml.Remove(hp1);
  1275. {$ifdef DEBUG_PREREGSCHEDULER}
  1276. asml.InsertBefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1277. {$endif DEBUG_PREREGSCHEDULER}
  1278. asml.InsertBefore(hp1,hp2);
  1279. asml.InsertListBefore(hp2,list);
  1280. p:=tai(p.next)
  1281. end
  1282. else if p.typ=ait_instruction then
  1283. p:=hp1
  1284. else
  1285. p:=tai(p.next);
  1286. end;
  1287. list.Free;
  1288. end;
  1289. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1290. begin
  1291. { TODO: Add optimizer code }
  1292. end;
  1293. begin
  1294. casmoptimizer:=TCpuAsmOptimizer;
  1295. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1296. End.