aoptcpu.pas 61 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. Interface
  22. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. { uses the same constructor as TAopObj }
  26. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  27. procedure PeepHoleOptPass2;override;
  28. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  29. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  30. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  31. var AllUsedRegs: TAllUsedRegs): Boolean;
  32. { gets the next tai object after current that contains info relevant
  33. to the optimizer in p1 which used the given register or does a
  34. change in program flow.
  35. If there is none, it returns false and
  36. sets p1 to nil }
  37. Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
  38. End;
  39. TCpuPreRegallocScheduler = class(TAsmScheduler)
  40. function SchedulerPass1Cpu(var p: tai): boolean;override;
  41. procedure SwapRegLive(p, hp1: taicpu);
  42. end;
  43. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  44. { uses the same constructor as TAopObj }
  45. procedure PeepHoleOptPass2;override;
  46. End;
  47. Implementation
  48. uses
  49. cutils,verbose,globals,
  50. systems,
  51. cpuinfo,
  52. cgobj,cgutils,procinfo,
  53. aasmbase,aasmdata;
  54. function CanBeCond(p : tai) : boolean;
  55. begin
  56. result:=
  57. (p.typ=ait_instruction) and
  58. (taicpu(p).condition=C_None) and
  59. (taicpu(p).opcode<>A_PLD) and
  60. ((taicpu(p).opcode<>A_BLX) or
  61. (taicpu(p).oper[0]^.typ=top_reg));
  62. end;
  63. function RefsEqual(const r1, r2: treference): boolean;
  64. begin
  65. refsequal :=
  66. (r1.offset = r2.offset) and
  67. (r1.base = r2.base) and
  68. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  69. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  70. (r1.relsymbol = r2.relsymbol) and
  71. (r1.signindex = r2.signindex) and
  72. (r1.shiftimm = r2.shiftimm) and
  73. (r1.addressmode = r2.addressmode) and
  74. (r1.shiftmode = r2.shiftmode);
  75. end;
  76. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  77. begin
  78. result :=
  79. (instr.typ = ait_instruction) and
  80. (taicpu(instr).opcode = op) and
  81. ((cond = []) or (taicpu(instr).condition in cond)) and
  82. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  83. end;
  84. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  85. begin
  86. result := oper1.typ = oper2.typ;
  87. if result then
  88. case oper1.typ of
  89. top_const:
  90. Result:=oper1.val = oper2.val;
  91. top_reg:
  92. Result:=oper1.reg = oper2.reg;
  93. top_conditioncode:
  94. Result:=oper1.cc = oper2.cc;
  95. top_ref:
  96. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  97. else Result:=false;
  98. end
  99. end;
  100. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  101. begin
  102. result := (oper.typ = top_reg) and (oper.reg = reg);
  103. end;
  104. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  105. begin
  106. if (taicpu(movp).condition = C_EQ) and
  107. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  108. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  109. begin
  110. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  111. asml.remove(movp);
  112. movp.free;
  113. end;
  114. end;
  115. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  116. var
  117. p: taicpu;
  118. begin
  119. p := taicpu(hp);
  120. regLoadedWithNewValue := false;
  121. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  122. exit;
  123. case p.opcode of
  124. { These operands do not write into a register at all }
  125. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  126. exit;
  127. {Take care of post/preincremented store and loads, they will change their base register}
  128. A_STR, A_LDR:
  129. regLoadedWithNewValue :=
  130. (taicpu(p).oper[1]^.typ=top_ref) and
  131. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  132. (taicpu(p).oper[1]^.ref^.base = reg);
  133. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  134. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  135. regLoadedWithNewValue :=
  136. (p.oper[1]^.typ = top_reg) and
  137. (p.oper[1]^.reg = reg);
  138. {Loads to oper2 from coprocessor}
  139. {
  140. MCR/MRC is currently not supported in FPC
  141. A_MRC:
  142. regLoadedWithNewValue :=
  143. (p.oper[2]^.typ = top_reg) and
  144. (p.oper[2]^.reg = reg);
  145. }
  146. {Loads to all register in the registerset}
  147. A_LDM:
  148. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  149. end;
  150. if regLoadedWithNewValue then
  151. exit;
  152. case p.oper[0]^.typ of
  153. {This is the case}
  154. top_reg:
  155. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  156. { LDRD }
  157. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  158. {LDM/STM might write a new value to their index register}
  159. top_ref:
  160. regLoadedWithNewValue :=
  161. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  162. (taicpu(p).oper[0]^.ref^.base = reg);
  163. end;
  164. end;
  165. function AlignedToQWord(const ref : treference) : boolean;
  166. begin
  167. { (safe) heuristics to ensure alignment }
  168. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  169. (((ref.offset>=0) and
  170. ((ref.offset mod 8)=0) and
  171. ((ref.base=NR_R13) or
  172. (ref.index=NR_R13))
  173. ) or
  174. ((ref.offset<=0) and
  175. { when using NR_R11, it has always a value of <qword align>+4 }
  176. ((abs(ref.offset+4) mod 8)=0) and
  177. (current_procinfo.framepointer=NR_R11) and
  178. ((ref.base=NR_R11) or
  179. (ref.index=NR_R11))
  180. )
  181. );
  182. end;
  183. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  184. var
  185. p: taicpu;
  186. i: longint;
  187. begin
  188. instructionLoadsFromReg := false;
  189. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  190. exit;
  191. p:=taicpu(hp);
  192. i:=1;
  193. {For these instructions we have to start on oper[0]}
  194. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  195. A_CMP, A_CMN, A_TST, A_TEQ,
  196. A_B, A_BL, A_BX, A_BLX,
  197. A_SMLAL, A_UMLAL]) then i:=0;
  198. while(i<p.ops) do
  199. begin
  200. case p.oper[I]^.typ of
  201. top_reg:
  202. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  203. { STRD }
  204. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  205. top_regset:
  206. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  207. top_shifterop:
  208. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  209. top_ref:
  210. instructionLoadsFromReg :=
  211. (p.oper[I]^.ref^.base = reg) or
  212. (p.oper[I]^.ref^.index = reg);
  213. end;
  214. if instructionLoadsFromReg then exit; {Bailout if we found something}
  215. Inc(I);
  216. end;
  217. end;
  218. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  219. var AllUsedRegs: TAllUsedRegs): Boolean;
  220. begin
  221. AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true);
  222. RegUsedAfterInstruction :=
  223. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  224. not(regLoadedWithNewValue(reg,p)) and
  225. (
  226. not(GetNextInstruction(p,p)) or
  227. instructionLoadsFromReg(reg,p) or
  228. not(regLoadedWithNewValue(reg,p))
  229. );
  230. end;
  231. function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  232. var Next: tai; reg: TRegister): Boolean;
  233. begin
  234. Next:=Current;
  235. repeat
  236. Result:=GetNextInstruction(Next,Next);
  237. until not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
  238. (is_calljmp(taicpu(Next).opcode)) or (RegInInstruction(NR_PC,Next));
  239. end;
  240. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  241. var
  242. alloc,
  243. dealloc : tai_regalloc;
  244. hp1 : tai;
  245. begin
  246. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  247. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  248. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  249. { don't mess with moves to pc }
  250. (taicpu(movp).oper[0]^.reg<>NR_PC) and
  251. { don't mess with moves to lr }
  252. (taicpu(movp).oper[0]^.reg<>NR_R14) and
  253. { the destination register of the mov might not be used beween p and movp }
  254. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  255. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  256. not (
  257. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  258. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  259. ) then
  260. begin
  261. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  262. if assigned(dealloc) then
  263. begin
  264. asml.insertbefore(tai_comment.Create(strpnew('Peephole '+optimizer+' removed superfluous mov')), movp);
  265. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  266. and remove it if possible }
  267. GetLastInstruction(p,hp1);
  268. asml.Remove(dealloc);
  269. alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
  270. if assigned(alloc) then
  271. begin
  272. asml.Remove(alloc);
  273. alloc.free;
  274. dealloc.free;
  275. end
  276. else
  277. asml.InsertAfter(dealloc,p);
  278. { try to move the allocation of the target register }
  279. GetLastInstruction(movp,hp1);
  280. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  281. if assigned(alloc) then
  282. begin
  283. asml.Remove(alloc);
  284. asml.InsertBefore(alloc,p);
  285. { adjust used regs }
  286. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  287. end;
  288. { finally get rid of the mov }
  289. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  290. asml.remove(movp);
  291. movp.free;
  292. end;
  293. end;
  294. end;
  295. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  296. var
  297. hp1,hp2: tai;
  298. i, i2: longint;
  299. TmpUsedRegs: TAllUsedRegs;
  300. tempop: tasmop;
  301. function IsPowerOf2(const value: DWord): boolean; inline;
  302. begin
  303. Result:=(value and (value - 1)) = 0;
  304. end;
  305. begin
  306. result := false;
  307. case p.typ of
  308. ait_instruction:
  309. begin
  310. (* optimization proved not to be safe, see tw4768.pp
  311. {
  312. change
  313. <op> reg,x,y
  314. cmp reg,#0
  315. into
  316. <op>s reg,x,y
  317. }
  318. { this optimization can applied only to the currently enabled operations because
  319. the other operations do not update all flags and FPC does not track flag usage }
  320. if (taicpu(p).opcode in [A_ADC,A_ADD,A_SUB {A_UDIV,A_SDIV,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND}]) and
  321. (taicpu(p).oppostfix = PF_None) and
  322. (taicpu(p).condition = C_None) and
  323. GetNextInstruction(p, hp1) and
  324. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  325. (taicpu(hp1).oper[1]^.typ = top_const) and
  326. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  327. (taicpu(hp1).oper[1]^.val = 0) { and
  328. GetNextInstruction(hp1, hp2) and
  329. (tai(hp2).typ = ait_instruction) and
  330. // be careful here, following instructions could use other flags
  331. // however after a jump fpc never depends on the value of flags
  332. (taicpu(hp2).opcode = A_B) and
  333. (taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL])} then
  334. begin
  335. taicpu(p).oppostfix:=PF_S;
  336. asml.remove(hp1);
  337. hp1.free;
  338. end
  339. else
  340. *)
  341. case taicpu(p).opcode of
  342. A_STR:
  343. begin
  344. { change
  345. str reg1,ref
  346. ldr reg2,ref
  347. into
  348. str reg1,ref
  349. mov reg2,reg1
  350. }
  351. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  352. (taicpu(p).oppostfix=PF_None) and
  353. GetNextInstruction(p,hp1) and
  354. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  355. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  356. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  357. begin
  358. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  359. begin
  360. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 1 done')), hp1);
  361. asml.remove(hp1);
  362. hp1.free;
  363. end
  364. else
  365. begin
  366. taicpu(hp1).opcode:=A_MOV;
  367. taicpu(hp1).oppostfix:=PF_None;
  368. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  369. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 2 done')), hp1);
  370. end;
  371. result := true;
  372. end
  373. { change
  374. str reg1,ref
  375. str reg2,ref
  376. into
  377. strd reg1,ref
  378. }
  379. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  380. (taicpu(p).oppostfix=PF_None) and
  381. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  382. GetNextInstruction(p,hp1) and
  383. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  384. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  385. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  386. { str ensures that either base or index contain no register, else ldr wouldn't
  387. use an offset either
  388. }
  389. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  390. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  391. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  392. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  393. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  394. begin
  395. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrStr2Strd done')), p);
  396. taicpu(p).oppostfix:=PF_D;
  397. asml.remove(hp1);
  398. hp1.free;
  399. end;
  400. end;
  401. A_LDR:
  402. begin
  403. { change
  404. ldr reg1,ref
  405. ldr reg2,ref
  406. into ...
  407. }
  408. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  409. GetNextInstruction(p,hp1) and
  410. { ldrd is not allowed here }
  411. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  412. begin
  413. {
  414. ...
  415. ldr reg1,ref
  416. mov reg2,reg1
  417. }
  418. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  419. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  420. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  421. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  422. begin
  423. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  424. begin
  425. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldr done')), hp1);
  426. asml.remove(hp1);
  427. hp1.free;
  428. end
  429. else
  430. begin
  431. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2LdrMov done')), hp1);
  432. taicpu(hp1).opcode:=A_MOV;
  433. taicpu(hp1).oppostfix:=PF_None;
  434. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  435. end;
  436. result := true;
  437. end
  438. {
  439. ...
  440. ldrd reg1,ref
  441. }
  442. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  443. { ldrd does not allow any postfixes ... }
  444. (taicpu(p).oppostfix=PF_None) and
  445. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  446. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  447. { ldr ensures that either base or index contain no register, else ldr wouldn't
  448. use an offset either
  449. }
  450. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  451. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  452. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  453. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  454. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  455. begin
  456. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldrd done')), p);
  457. taicpu(p).oppostfix:=PF_D;
  458. asml.remove(hp1);
  459. hp1.free;
  460. end;
  461. end;
  462. { Remove superfluous mov after ldr
  463. changes
  464. ldr reg1, ref
  465. mov reg2, reg1
  466. to
  467. ldr reg2, ref
  468. conditions are:
  469. * no ldrd usage
  470. * reg1 must be released after mov
  471. * mov can not contain shifterops
  472. * ldr+mov have the same conditions
  473. * mov does not set flags
  474. }
  475. if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  476. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  477. end;
  478. A_MOV:
  479. begin
  480. { fold
  481. mov reg1,reg0, shift imm1
  482. mov reg1,reg1, shift imm2
  483. }
  484. if (taicpu(p).ops=3) and
  485. (taicpu(p).oper[2]^.typ = top_shifterop) and
  486. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  487. getnextinstruction(p,hp1) and
  488. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  489. (taicpu(hp1).ops=3) and
  490. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  491. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  492. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  493. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  494. begin
  495. { fold
  496. mov reg1,reg0, lsl 16
  497. mov reg1,reg1, lsr 16
  498. strh reg1, ...
  499. dealloc reg1
  500. to
  501. strh reg1, ...
  502. dealloc reg1
  503. }
  504. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  505. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  506. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  507. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  508. getnextinstruction(hp1,hp2) and
  509. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  510. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  511. begin
  512. CopyUsedRegs(TmpUsedRegs);
  513. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  514. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  515. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  516. begin
  517. asml.insertbefore(tai_comment.Create(strpnew('Peephole optimizer removed superfluous 16 Bit zero extension')), hp1);
  518. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  519. asml.remove(p);
  520. asml.remove(hp1);
  521. p.free;
  522. hp1.free;
  523. p:=hp2;
  524. end;
  525. ReleaseUsedRegs(TmpUsedRegs);
  526. end
  527. { fold
  528. mov reg1,reg0, shift imm1
  529. mov reg1,reg1, shift imm2
  530. to
  531. mov reg1,reg0, shift imm1+imm2
  532. }
  533. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  534. { asr makes no use after a lsr, the asr can be foled into the lsr }
  535. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  536. begin
  537. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  538. { avoid overflows }
  539. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  540. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  541. SM_ROR:
  542. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  543. SM_ASR:
  544. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  545. SM_LSR,
  546. SM_LSL:
  547. begin
  548. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  549. InsertLLItem(p.previous, p.next, hp1);
  550. p.free;
  551. p:=hp1;
  552. end;
  553. else
  554. internalerror(2008072803);
  555. end;
  556. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShift2Shift 1 done')), p);
  557. asml.remove(hp1);
  558. hp1.free;
  559. result := true;
  560. end
  561. { fold
  562. mov reg1,reg0, shift imm1
  563. mov reg1,reg1, shift imm2
  564. mov reg1,reg1, shift imm3 ...
  565. }
  566. else if getnextinstruction(hp1,hp2) and
  567. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  568. (taicpu(hp2).ops=3) and
  569. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  570. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  571. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  572. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  573. begin
  574. { mov reg1,reg0, lsl imm1
  575. mov reg1,reg1, lsr/asr imm2
  576. mov reg1,reg1, lsl imm3 ...
  577. if imm3<=imm1 and imm2>=imm3
  578. to
  579. mov reg1,reg0, lsl imm1
  580. mov reg1,reg1, lsr/asr imm2-imm3
  581. }
  582. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  583. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  584. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  585. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  586. begin
  587. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  588. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 1 done')), p);
  589. asml.remove(hp2);
  590. hp2.free;
  591. result := true;
  592. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  593. begin
  594. asml.remove(hp1);
  595. hp1.free;
  596. end;
  597. end
  598. { mov reg1,reg0, lsr/asr imm1
  599. mov reg1,reg1, lsl imm2
  600. mov reg1,reg1, lsr/asr imm3 ...
  601. if imm3>=imm1 and imm2>=imm1
  602. to
  603. mov reg1,reg0, lsl imm2-imm1
  604. mov reg1,reg1, lsr/asr imm3 ...
  605. }
  606. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  607. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  608. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  609. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  610. begin
  611. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  612. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  613. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 2 done')), p);
  614. asml.remove(p);
  615. p.free;
  616. p:=hp2;
  617. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  618. begin
  619. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  620. asml.remove(hp1);
  621. hp1.free;
  622. p:=hp2;
  623. end;
  624. result := true;
  625. end;
  626. end;
  627. end;
  628. { Change the common
  629. mov r0, r0, lsr #24
  630. and r0, r0, #255
  631. and remove the superfluous and
  632. This could be extended to handle more cases.
  633. }
  634. if (taicpu(p).ops=3) and
  635. (taicpu(p).oper[2]^.typ = top_shifterop) and
  636. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  637. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  638. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  639. getnextinstruction(p,hp1) and
  640. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  641. (taicpu(hp1).ops=3) and
  642. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  643. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  644. (taicpu(hp1).oper[2]^.typ = top_const) and
  645. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  646. For LSR #25 and an AndConst of 255 that whould go like this:
  647. 255 and ((2 shl (32-25))-1)
  648. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  649. LSR #25 and AndConst of 254:
  650. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  651. }
  652. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  653. begin
  654. asml.insertbefore(tai_comment.Create(strpnew('Peephole LsrAnd2Lsr done')), hp1);
  655. asml.remove(hp1);
  656. hp1.free;
  657. end;
  658. {
  659. optimize
  660. mov rX, yyyy
  661. ....
  662. }
  663. if (taicpu(p).ops = 2) and
  664. GetNextInstruction(p,hp1) and
  665. (tai(hp1).typ = ait_instruction) then
  666. begin
  667. {
  668. This changes the very common
  669. mov r0, #0
  670. str r0, [...]
  671. mov r0, #0
  672. str r0, [...]
  673. and removes all superfluous mov instructions
  674. }
  675. if (taicpu(p).oper[1]^.typ = top_const) and
  676. (taicpu(hp1).opcode=A_STR) then
  677. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], [PF_None]) and
  678. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  679. GetNextInstruction(hp1, hp2) and
  680. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  681. (taicpu(hp2).ops = 2) and
  682. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  683. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  684. begin
  685. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovStrMov done')), hp2);
  686. GetNextInstruction(hp2,hp1);
  687. asml.remove(hp2);
  688. hp2.free;
  689. if not assigned(hp1) then break;
  690. end
  691. {
  692. This removes the first mov from
  693. mov rX,...
  694. mov rX,...
  695. }
  696. else if taicpu(hp1).opcode=A_MOV then
  697. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  698. (taicpu(hp1).ops = 2) and
  699. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  700. { don't remove the first mov if the second is a mov rX,rX }
  701. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  702. begin
  703. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovMov done')), p);
  704. asml.remove(p);
  705. p.free;
  706. p:=hp1;
  707. GetNextInstruction(hp1,hp1);
  708. if not assigned(hp1) then
  709. break;
  710. end;
  711. end;
  712. {
  713. change
  714. mov r1, r0
  715. add r1, r1, #1
  716. to
  717. add r1, r0, #1
  718. Todo: Make it work for mov+cmp too
  719. CAUTION! If this one is successful p might not be a mov instruction anymore!
  720. }
  721. if (taicpu(p).ops = 2) and
  722. (taicpu(p).oper[1]^.typ = top_reg) and
  723. (taicpu(p).oppostfix = PF_NONE) and
  724. GetNextInstruction(p, hp1) and
  725. (tai(hp1).typ = ait_instruction) and
  726. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  727. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN]) and
  728. {MOV and MVN might only have 2 ops}
  729. (taicpu(hp1).ops = 3) and
  730. (taicpu(hp1).condition in [C_NONE, taicpu(hp1).condition]) and
  731. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  732. (taicpu(hp1).oper[1]^.typ = top_reg) and
  733. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  734. begin
  735. { When we get here we still don't know if the registers match}
  736. for I:=1 to 2 do
  737. {
  738. If the first loop was successful p will be replaced with hp1.
  739. The checks will still be ok, because all required information
  740. will also be in hp1 then.
  741. }
  742. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  743. begin
  744. asml.insertbefore(tai_comment.Create(strpnew('Peephole RedundantMovProcess done')), hp1);
  745. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  746. if p<>hp1 then
  747. begin
  748. asml.remove(p);
  749. p.free;
  750. p:=hp1;
  751. end;
  752. end;
  753. end;
  754. { This folds shifterops into following instructions
  755. mov r0, r1, lsl #8
  756. add r2, r3, r0
  757. to
  758. add r2, r3, r1, lsl #8
  759. CAUTION! If this one is successful p might not be a mov instruction anymore!
  760. }
  761. if (taicpu(p).opcode = A_MOV) and
  762. (taicpu(p).ops = 3) and
  763. (taicpu(p).oper[1]^.typ = top_reg) and
  764. (taicpu(p).oper[2]^.typ = top_shifterop) and
  765. (taicpu(p).oppostfix = PF_NONE) and
  766. GetNextInstruction(p, hp1) and
  767. (tai(hp1).typ = ait_instruction) and
  768. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  769. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  770. (taicpu(hp1).oppostfix = PF_NONE) and
  771. (taicpu(hp1).condition = taicpu(p).condition) and
  772. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  773. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  774. A_CMP, A_CMN]) and
  775. (
  776. {Only ONE of the two src operands is allowed to match}
  777. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  778. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  779. ) then
  780. begin
  781. CopyUsedRegs(TmpUsedRegs);
  782. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  783. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  784. I2:=0
  785. else
  786. I2:=1;
  787. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  788. for I:=I2 to taicpu(hp1).ops-1 do
  789. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  790. begin
  791. { If the parameter matched on the second op from the RIGHT
  792. we have to switch the parameters, this will not happen for CMP
  793. were we're only evaluating the most right parameter
  794. }
  795. if I <> taicpu(hp1).ops-1 then
  796. begin
  797. {The SUB operators need to be changed when we swap parameters}
  798. case taicpu(hp1).opcode of
  799. A_SUB: tempop:=A_RSB;
  800. A_SBC: tempop:=A_RSC;
  801. A_RSB: tempop:=A_SUB;
  802. A_RSC: tempop:=A_SBC;
  803. else tempop:=taicpu(hp1).opcode;
  804. end;
  805. if taicpu(hp1).ops = 3 then
  806. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  807. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  808. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  809. else
  810. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  811. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  812. taicpu(p).oper[2]^.shifterop^);
  813. end
  814. else
  815. if taicpu(hp1).ops = 3 then
  816. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  817. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  818. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  819. else
  820. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  821. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  822. taicpu(p).oper[2]^.shifterop^);
  823. asml.insertbefore(hp2, p);
  824. asml.remove(p);
  825. asml.remove(hp1);
  826. p.free;
  827. hp1.free;
  828. p:=hp2;
  829. GetNextInstruction(p,hp1);
  830. asml.insertbefore(tai_comment.Create(strpnew('Peephole FoldShiftProcess done')), p);
  831. break;
  832. end;
  833. ReleaseUsedRegs(TmpUsedRegs);
  834. end;
  835. {
  836. Often we see shifts and then a superfluous mov to another register
  837. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  838. }
  839. if (taicpu(p).opcode = A_MOV) and
  840. GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  841. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  842. end;
  843. A_ADD,
  844. A_ADC,
  845. A_RSB,
  846. A_RSC,
  847. A_SUB,
  848. A_SBC,
  849. A_AND,
  850. A_BIC,
  851. A_EOR,
  852. A_ORR,
  853. A_MLA,
  854. A_MUL:
  855. begin
  856. {
  857. change
  858. and reg2,reg1,const1
  859. and reg2,reg2,const2
  860. to
  861. and reg2,reg1,(const1 and const2)
  862. }
  863. if (taicpu(p).opcode = A_AND) and
  864. (taicpu(p).oper[1]^.typ = top_reg) and
  865. (taicpu(p).oper[2]^.typ = top_const) and
  866. GetNextInstruction(p, hp1) and
  867. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  868. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  869. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  870. (taicpu(hp1).oper[2]^.typ = top_const) then
  871. begin
  872. asml.insertbefore(tai_comment.Create(strpnew('Peephole AndAnd2And done')), p);
  873. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  874. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  875. asml.remove(hp1);
  876. hp1.free;
  877. end;
  878. {
  879. change
  880. add reg1, ...
  881. mov reg2, reg1
  882. to
  883. add reg2, ...
  884. }
  885. if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
  886. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  887. end;
  888. A_CMP:
  889. begin
  890. {
  891. change
  892. cmp reg,const1
  893. moveq reg,const1
  894. movne reg,const2
  895. to
  896. cmp reg,const1
  897. movne reg,const2
  898. }
  899. if (taicpu(p).oper[1]^.typ = top_const) and
  900. GetNextInstruction(p, hp1) and
  901. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  902. (taicpu(hp1).oper[1]^.typ = top_const) and
  903. GetNextInstruction(hp1, hp2) and
  904. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  905. (taicpu(hp1).oper[1]^.typ = top_const) then
  906. begin
  907. RemoveRedundantMove(p, hp1, asml);
  908. RemoveRedundantMove(p, hp2, asml);
  909. end;
  910. end;
  911. end;
  912. end;
  913. end;
  914. end;
  915. { instructions modifying the CPSR can be only the last instruction }
  916. function MustBeLast(p : tai) : boolean;
  917. begin
  918. Result:=(p.typ=ait_instruction) and
  919. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  920. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  921. (taicpu(p).oppostfix=PF_S));
  922. end;
  923. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  924. var
  925. p,hp1,hp2: tai;
  926. l : longint;
  927. condition : tasmcond;
  928. hp3: tai;
  929. WasLast: boolean;
  930. { UsedRegs, TmpUsedRegs: TRegSet; }
  931. begin
  932. p := BlockStart;
  933. { UsedRegs := []; }
  934. while (p <> BlockEnd) Do
  935. begin
  936. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  937. case p.Typ Of
  938. Ait_Instruction:
  939. begin
  940. case taicpu(p).opcode Of
  941. A_B:
  942. if taicpu(p).condition<>C_None then
  943. begin
  944. { check for
  945. Bxx xxx
  946. <several instructions>
  947. xxx:
  948. }
  949. l:=0;
  950. WasLast:=False;
  951. GetNextInstruction(p, hp1);
  952. while assigned(hp1) and
  953. (l<=4) and
  954. CanBeCond(hp1) and
  955. { stop on labels }
  956. not(hp1.typ=ait_label) do
  957. begin
  958. inc(l);
  959. if MustBeLast(hp1) then
  960. begin
  961. WasLast:=True;
  962. GetNextInstruction(hp1,hp1);
  963. break;
  964. end
  965. else
  966. GetNextInstruction(hp1,hp1);
  967. end;
  968. if assigned(hp1) then
  969. begin
  970. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  971. begin
  972. if (l<=4) and (l>0) then
  973. begin
  974. condition:=inverse_cond(taicpu(p).condition);
  975. hp2:=p;
  976. GetNextInstruction(p,hp1);
  977. p:=hp1;
  978. repeat
  979. if hp1.typ=ait_instruction then
  980. taicpu(hp1).condition:=condition;
  981. if MustBeLast(hp1) then
  982. begin
  983. GetNextInstruction(hp1,hp1);
  984. break;
  985. end
  986. else
  987. GetNextInstruction(hp1,hp1);
  988. until not(assigned(hp1)) or
  989. not(CanBeCond(hp1)) or
  990. (hp1.typ=ait_label);
  991. { wait with removing else GetNextInstruction could
  992. ignore the label if it was the only usage in the
  993. jump moved away }
  994. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  995. asml.remove(hp2);
  996. hp2.free;
  997. continue;
  998. end;
  999. end
  1000. else
  1001. { do not perform further optimizations if there is inctructon
  1002. in block #1 which can not be optimized.
  1003. }
  1004. if not WasLast then
  1005. begin
  1006. { check further for
  1007. Bcc xxx
  1008. <several instructions 1>
  1009. B yyy
  1010. xxx:
  1011. <several instructions 2>
  1012. yyy:
  1013. }
  1014. { hp2 points to jmp yyy }
  1015. hp2:=hp1;
  1016. { skip hp1 to xxx }
  1017. GetNextInstruction(hp1, hp1);
  1018. if assigned(hp2) and
  1019. assigned(hp1) and
  1020. (l<=3) and
  1021. (hp2.typ=ait_instruction) and
  1022. (taicpu(hp2).is_jmp) and
  1023. (taicpu(hp2).condition=C_None) and
  1024. { real label and jump, no further references to the
  1025. label are allowed }
  1026. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1027. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1028. begin
  1029. l:=0;
  1030. { skip hp1 to <several moves 2> }
  1031. GetNextInstruction(hp1, hp1);
  1032. while assigned(hp1) and
  1033. CanBeCond(hp1) do
  1034. begin
  1035. inc(l);
  1036. GetNextInstruction(hp1, hp1);
  1037. end;
  1038. { hp1 points to yyy: }
  1039. if assigned(hp1) and
  1040. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1041. begin
  1042. condition:=inverse_cond(taicpu(p).condition);
  1043. GetNextInstruction(p,hp1);
  1044. hp3:=p;
  1045. p:=hp1;
  1046. repeat
  1047. if hp1.typ=ait_instruction then
  1048. taicpu(hp1).condition:=condition;
  1049. GetNextInstruction(hp1,hp1);
  1050. until not(assigned(hp1)) or
  1051. not(CanBeCond(hp1));
  1052. { hp2 is still at jmp yyy }
  1053. GetNextInstruction(hp2,hp1);
  1054. { hp2 is now at xxx: }
  1055. condition:=inverse_cond(condition);
  1056. GetNextInstruction(hp1,hp1);
  1057. { hp1 is now at <several movs 2> }
  1058. repeat
  1059. taicpu(hp1).condition:=condition;
  1060. GetNextInstruction(hp1,hp1);
  1061. until not(assigned(hp1)) or
  1062. not(CanBeCond(hp1)) or
  1063. (hp1.typ=ait_label);
  1064. {
  1065. asml.remove(hp1.next)
  1066. hp1.next.free;
  1067. asml.remove(hp1);
  1068. hp1.free;
  1069. }
  1070. { remove Bcc }
  1071. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1072. asml.remove(hp3);
  1073. hp3.free;
  1074. { remove jmp }
  1075. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1076. asml.remove(hp2);
  1077. hp2.free;
  1078. continue;
  1079. end;
  1080. end;
  1081. end;
  1082. end;
  1083. end;
  1084. end;
  1085. end;
  1086. end;
  1087. p := tai(p.next)
  1088. end;
  1089. end;
  1090. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1091. begin
  1092. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1093. Result:=true
  1094. else
  1095. Result:=inherited RegInInstruction(Reg, p1);
  1096. end;
  1097. const
  1098. { set of opcode which might or do write to memory }
  1099. { TODO : extend armins.dat to contain r/w info }
  1100. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1101. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1102. { adjust the register live information when swapping the two instructions p and hp1,
  1103. they must follow one after the other }
  1104. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1105. procedure CheckLiveEnd(reg : tregister);
  1106. var
  1107. supreg : TSuperRegister;
  1108. regtype : TRegisterType;
  1109. begin
  1110. if reg=NR_NO then
  1111. exit;
  1112. regtype:=getregtype(reg);
  1113. supreg:=getsupreg(reg);
  1114. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1115. RegInInstruction(reg,p) then
  1116. cg.rg[regtype].live_end[supreg]:=p;
  1117. end;
  1118. procedure CheckLiveStart(reg : TRegister);
  1119. var
  1120. supreg : TSuperRegister;
  1121. regtype : TRegisterType;
  1122. begin
  1123. if reg=NR_NO then
  1124. exit;
  1125. regtype:=getregtype(reg);
  1126. supreg:=getsupreg(reg);
  1127. if (cg.rg[regtype].live_start[supreg]=p) and
  1128. RegInInstruction(reg,hp1) then
  1129. cg.rg[regtype].live_start[supreg]:=hp1;
  1130. end;
  1131. var
  1132. i : longint;
  1133. r : TSuperRegister;
  1134. begin
  1135. { assumption: p is directly followed by hp1 }
  1136. { if live of any reg used by p starts at p and hp1 uses this register then
  1137. set live start to hp1 }
  1138. for i:=0 to p.ops-1 do
  1139. case p.oper[i]^.typ of
  1140. Top_Reg:
  1141. CheckLiveStart(p.oper[i]^.reg);
  1142. Top_Ref:
  1143. begin
  1144. CheckLiveStart(p.oper[i]^.ref^.base);
  1145. CheckLiveStart(p.oper[i]^.ref^.index);
  1146. end;
  1147. Top_Shifterop:
  1148. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1149. Top_RegSet:
  1150. for r:=RS_R0 to RS_R15 do
  1151. if r in p.oper[i]^.regset^ then
  1152. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1153. end;
  1154. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1155. set live end to p }
  1156. for i:=0 to hp1.ops-1 do
  1157. case hp1.oper[i]^.typ of
  1158. Top_Reg:
  1159. CheckLiveEnd(hp1.oper[i]^.reg);
  1160. Top_Ref:
  1161. begin
  1162. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1163. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1164. end;
  1165. Top_Shifterop:
  1166. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1167. Top_RegSet:
  1168. for r:=RS_R0 to RS_R15 do
  1169. if r in hp1.oper[i]^.regset^ then
  1170. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1171. end;
  1172. end;
  1173. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1174. { TODO : schedule also forward }
  1175. { TODO : schedule distance > 1 }
  1176. var
  1177. hp1,hp2,hp3,hp4,hp5 : tai;
  1178. list : TAsmList;
  1179. begin
  1180. result:=true;
  1181. list:=TAsmList.Create;
  1182. p:=BlockStart;
  1183. while p<>BlockEnd Do
  1184. begin
  1185. if (p.typ=ait_instruction) and
  1186. GetNextInstruction(p,hp1) and
  1187. (hp1.typ=ait_instruction) and
  1188. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1189. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1190. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1191. not(RegModifiedByInstruction(NR_PC,p))
  1192. ) or
  1193. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1194. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1195. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1196. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1197. )
  1198. ) or
  1199. { try to prove that the memory accesses don't overlapp }
  1200. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1201. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1202. (taicpu(p).oppostfix=PF_None) and
  1203. (taicpu(hp1).oppostfix=PF_None) and
  1204. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1205. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1206. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1207. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1208. )
  1209. )
  1210. ) and
  1211. GetNextInstruction(hp1,hp2) and
  1212. (hp2.typ=ait_instruction) and
  1213. { loaded register used by next instruction? }
  1214. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1215. { loaded register not used by previous instruction? }
  1216. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1217. { same condition? }
  1218. (taicpu(p).condition=taicpu(hp1).condition) and
  1219. { first instruction might not change the register used as base }
  1220. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1221. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1222. ) and
  1223. { first instruction might not change the register used as index }
  1224. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1225. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1226. ) then
  1227. begin
  1228. hp3:=tai(p.Previous);
  1229. hp5:=tai(p.next);
  1230. asml.Remove(p);
  1231. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1232. { before the instruction? }
  1233. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1234. begin
  1235. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1236. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1237. begin
  1238. hp4:=hp3;
  1239. hp3:=tai(hp3.Previous);
  1240. asml.Remove(hp4);
  1241. list.Concat(hp4);
  1242. end
  1243. else
  1244. hp3:=tai(hp3.Previous);
  1245. end;
  1246. list.Concat(p);
  1247. SwapRegLive(taicpu(p),taicpu(hp1));
  1248. { after the instruction? }
  1249. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1250. begin
  1251. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1252. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1253. begin
  1254. hp4:=hp5;
  1255. hp5:=tai(hp5.next);
  1256. asml.Remove(hp4);
  1257. list.Concat(hp4);
  1258. end
  1259. else
  1260. hp5:=tai(hp5.Next);
  1261. end;
  1262. asml.Remove(hp1);
  1263. {$ifdef DEBUG_PREREGSCHEDULER}
  1264. asml.InsertBefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1265. {$endif DEBUG_PREREGSCHEDULER}
  1266. asml.InsertBefore(hp1,hp2);
  1267. asml.InsertListBefore(hp2,list);
  1268. p:=tai(p.next)
  1269. end
  1270. else if p.typ=ait_instruction then
  1271. p:=hp1
  1272. else
  1273. p:=tai(p.next);
  1274. end;
  1275. list.Free;
  1276. end;
  1277. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1278. begin
  1279. { TODO: Add optimizer code }
  1280. end;
  1281. begin
  1282. casmoptimizer:=TCpuAsmOptimizer;
  1283. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1284. End.