aoptcpu.pas 59 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. Interface
  22. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
  23. Type
  24. { TCpuAsmOptimizer }
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  31. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  32. var AllUsedRegs: TAllUsedRegs): Boolean;
  33. End;
  34. { TCpuPreRegallocScheduler }
  35. TCpuPreRegallocScheduler = class(TAsmOptimizer)
  36. function PeepHoleOptPass1Cpu(var p: tai): boolean;override;
  37. procedure SwapRegLive(p, hp1: taicpu);
  38. end;
  39. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  40. { uses the same constructor as TAopObj }
  41. procedure PeepHoleOptPass2;override;
  42. End;
  43. Implementation
  44. uses
  45. cutils,verbose,globals,
  46. systems,
  47. cpuinfo,
  48. cgobj,cgutils,procinfo,
  49. aasmbase,aasmdata;
  50. function CanBeCond(p : tai) : boolean;
  51. begin
  52. result:=
  53. (p.typ=ait_instruction) and
  54. (taicpu(p).condition=C_None) and
  55. (taicpu(p).opcode<>A_PLD) and
  56. ((taicpu(p).opcode<>A_BLX) or
  57. (taicpu(p).oper[0]^.typ=top_reg));
  58. end;
  59. function RefsEqual(const r1, r2: treference): boolean;
  60. begin
  61. refsequal :=
  62. (r1.offset = r2.offset) and
  63. (r1.base = r2.base) and
  64. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  65. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  66. (r1.relsymbol = r2.relsymbol) and
  67. (r1.signindex = r2.signindex) and
  68. (r1.shiftimm = r2.shiftimm) and
  69. (r1.addressmode = r2.addressmode) and
  70. (r1.shiftmode = r2.shiftmode);
  71. end;
  72. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  73. begin
  74. result :=
  75. (instr.typ = ait_instruction) and
  76. (taicpu(instr).opcode = op) and
  77. ((cond = []) or (taicpu(instr).condition in cond)) and
  78. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  79. end;
  80. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  81. begin
  82. result := oper1.typ = oper2.typ;
  83. if result then
  84. case oper1.typ of
  85. top_const:
  86. Result:=oper1.val = oper2.val;
  87. top_reg:
  88. Result:=oper1.reg = oper2.reg;
  89. top_conditioncode:
  90. Result:=oper1.cc = oper2.cc;
  91. top_ref:
  92. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  93. else Result:=false;
  94. end
  95. end;
  96. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  97. begin
  98. result := (oper.typ = top_reg) and (oper.reg = reg);
  99. end;
  100. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  101. begin
  102. if (taicpu(movp).condition = C_EQ) and
  103. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  104. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  105. begin
  106. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  107. asml.remove(movp);
  108. movp.free;
  109. end;
  110. end;
  111. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  112. var
  113. p: taicpu;
  114. begin
  115. p := taicpu(hp);
  116. regLoadedWithNewValue := false;
  117. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  118. exit;
  119. case p.opcode of
  120. { These operands do not write into a register at all }
  121. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  122. exit;
  123. {Take care of post/preincremented store and loads, they will change their base register}
  124. A_STR, A_LDR:
  125. regLoadedWithNewValue :=
  126. (taicpu(p).oper[1]^.typ=top_ref) and
  127. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  128. (taicpu(p).oper[1]^.ref^.base = reg);
  129. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  130. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  131. regLoadedWithNewValue :=
  132. (p.oper[1]^.typ = top_reg) and
  133. (p.oper[1]^.reg = reg);
  134. {Loads to oper2 from coprocessor}
  135. {
  136. MCR/MRC is currently not supported in FPC
  137. A_MRC:
  138. regLoadedWithNewValue :=
  139. (p.oper[2]^.typ = top_reg) and
  140. (p.oper[2]^.reg = reg);
  141. }
  142. {Loads to all register in the registerset}
  143. A_LDM:
  144. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  145. end;
  146. if regLoadedWithNewValue then
  147. exit;
  148. case p.oper[0]^.typ of
  149. {This is the case}
  150. top_reg:
  151. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  152. { LDRD }
  153. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  154. {LDM/STM might write a new value to their index register}
  155. top_ref:
  156. regLoadedWithNewValue :=
  157. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  158. (taicpu(p).oper[0]^.ref^.base = reg);
  159. end;
  160. end;
  161. function AlignedToQWord(const ref : treference) : boolean;
  162. begin
  163. { (safe) heuristics to ensure alignment }
  164. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  165. (((ref.offset>=0) and
  166. ((ref.offset mod 8)=0) and
  167. ((ref.base=NR_R13) or
  168. (ref.index=NR_R13))
  169. ) or
  170. ((ref.offset<=0) and
  171. { when using NR_R11, it has always a value of <qword align>+4 }
  172. ((abs(ref.offset+4) mod 8)=0) and
  173. (current_procinfo.framepointer=NR_R11) and
  174. ((ref.base=NR_R11) or
  175. (ref.index=NR_R11))
  176. )
  177. );
  178. end;
  179. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  180. var
  181. p: taicpu;
  182. i: longint;
  183. begin
  184. instructionLoadsFromReg := false;
  185. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  186. exit;
  187. p:=taicpu(hp);
  188. i:=1;
  189. {For these instructions we have to start on oper[0]}
  190. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  191. A_CMP, A_CMN, A_TST, A_TEQ,
  192. A_B, A_BL, A_BX, A_BLX,
  193. A_SMLAL, A_UMLAL]) then i:=0;
  194. while(i<p.ops) do
  195. begin
  196. case p.oper[I]^.typ of
  197. top_reg:
  198. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  199. { STRD }
  200. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  201. top_regset:
  202. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  203. top_shifterop:
  204. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  205. top_ref:
  206. instructionLoadsFromReg :=
  207. (p.oper[I]^.ref^.base = reg) or
  208. (p.oper[I]^.ref^.index = reg);
  209. end;
  210. if instructionLoadsFromReg then exit; {Bailout if we found something}
  211. Inc(I);
  212. end;
  213. end;
  214. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  215. var AllUsedRegs: TAllUsedRegs): Boolean;
  216. begin
  217. AllUsedRegs[getregtype(reg)].Update(tai(p.Next));
  218. RegUsedAfterInstruction :=
  219. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  220. not(regLoadedWithNewValue(reg,p)) and
  221. (
  222. not(GetNextInstruction(p,p)) or
  223. instructionLoadsFromReg(reg,p) or
  224. not(regLoadedWithNewValue(reg,p))
  225. );
  226. end;
  227. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  228. var
  229. TmpUsedRegs: TAllUsedRegs;
  230. begin
  231. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  232. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  233. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  234. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  235. not (
  236. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  237. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  238. ) then
  239. begin
  240. CopyUsedRegs(TmpUsedRegs);
  241. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  242. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,movp,TmpUsedRegs)) then
  243. begin
  244. asml.insertbefore(tai_comment.Create(strpnew('Peephole '+optimizer+' removed superfluous mov')), movp);
  245. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  246. asml.remove(movp);
  247. movp.free;
  248. end;
  249. ReleaseUsedRegs(TmpUsedRegs);
  250. end;
  251. end;
  252. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  253. var
  254. hp1,hp2: tai;
  255. i, i2: longint;
  256. TmpUsedRegs: TAllUsedRegs;
  257. tempop: tasmop;
  258. function IsPowerOf2(const value: DWord): boolean; inline;
  259. begin
  260. Result:=(value and (value - 1)) = 0;
  261. end;
  262. begin
  263. result := false;
  264. case p.typ of
  265. ait_instruction:
  266. begin
  267. (* optimization proved not to be safe, see tw4768.pp
  268. {
  269. change
  270. <op> reg,x,y
  271. cmp reg,#0
  272. into
  273. <op>s reg,x,y
  274. }
  275. { this optimization can applied only to the currently enabled operations because
  276. the other operations do not update all flags and FPC does not track flag usage }
  277. if (taicpu(p).opcode in [A_ADC,A_ADD,A_SUB {A_UDIV,A_SDIV,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND}]) and
  278. (taicpu(p).oppostfix = PF_None) and
  279. (taicpu(p).condition = C_None) and
  280. GetNextInstruction(p, hp1) and
  281. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  282. (taicpu(hp1).oper[1]^.typ = top_const) and
  283. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  284. (taicpu(hp1).oper[1]^.val = 0) { and
  285. GetNextInstruction(hp1, hp2) and
  286. (tai(hp2).typ = ait_instruction) and
  287. // be careful here, following instructions could use other flags
  288. // however after a jump fpc never depends on the value of flags
  289. (taicpu(hp2).opcode = A_B) and
  290. (taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL])} then
  291. begin
  292. taicpu(p).oppostfix:=PF_S;
  293. asml.remove(hp1);
  294. hp1.free;
  295. end
  296. else
  297. *)
  298. case taicpu(p).opcode of
  299. A_STR:
  300. begin
  301. { change
  302. str reg1,ref
  303. ldr reg2,ref
  304. into
  305. str reg1,ref
  306. mov reg2,reg1
  307. }
  308. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  309. (taicpu(p).oppostfix=PF_None) and
  310. GetNextInstruction(p,hp1) and
  311. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  312. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  313. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  314. begin
  315. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  316. begin
  317. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 1 done')), hp1);
  318. asml.remove(hp1);
  319. hp1.free;
  320. end
  321. else
  322. begin
  323. taicpu(hp1).opcode:=A_MOV;
  324. taicpu(hp1).oppostfix:=PF_None;
  325. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  326. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 2 done')), hp1);
  327. end;
  328. result := true;
  329. end
  330. { change
  331. str reg1,ref
  332. str reg2,ref
  333. into
  334. strd reg1,ref
  335. }
  336. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  337. (taicpu(p).oppostfix=PF_None) and
  338. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  339. GetNextInstruction(p,hp1) and
  340. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  341. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  342. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  343. { str ensures that either base or index contain no register, else ldr wouldn't
  344. use an offset either
  345. }
  346. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  347. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  348. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  349. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  350. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  351. begin
  352. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrStr2Strd done')), p);
  353. taicpu(p).oppostfix:=PF_D;
  354. asml.remove(hp1);
  355. hp1.free;
  356. end;
  357. end;
  358. A_LDR:
  359. begin
  360. { change
  361. ldr reg1,ref
  362. ldr reg2,ref
  363. into ...
  364. }
  365. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  366. GetNextInstruction(p,hp1) and
  367. { ldrd is not allowed here }
  368. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  369. begin
  370. {
  371. ...
  372. ldr reg1,ref
  373. mov reg2,reg1
  374. }
  375. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  376. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  377. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  378. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  379. begin
  380. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  381. begin
  382. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldr done')), hp1);
  383. asml.remove(hp1);
  384. hp1.free;
  385. end
  386. else
  387. begin
  388. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2LdrMov done')), hp1);
  389. taicpu(hp1).opcode:=A_MOV;
  390. taicpu(hp1).oppostfix:=PF_None;
  391. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  392. end;
  393. result := true;
  394. end
  395. {
  396. ...
  397. ldrd reg1,ref
  398. }
  399. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  400. { ldrd does not allow any postfixes ... }
  401. (taicpu(p).oppostfix=PF_None) and
  402. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  403. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  404. { ldr ensures that either base or index contain no register, else ldr wouldn't
  405. use an offset either
  406. }
  407. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  408. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  409. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  410. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  411. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  412. begin
  413. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldrd done')), p);
  414. taicpu(p).oppostfix:=PF_D;
  415. asml.remove(hp1);
  416. hp1.free;
  417. end;
  418. end;
  419. { Remove superfluous mov after ldr
  420. changes
  421. ldr reg1, ref
  422. mov reg2, reg1
  423. to
  424. ldr reg2, ref
  425. conditions are:
  426. * no ldrd usage
  427. * reg1 must be released after mov
  428. * mov can not contain shifterops
  429. * ldr+mov have the same conditions
  430. * mov does not set flags
  431. }
  432. if (taicpu(p).oppostfix<>PF_D) and GetNextInstruction(p, hp1) then
  433. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  434. end;
  435. A_MOV:
  436. begin
  437. { fold
  438. mov reg1,reg0, shift imm1
  439. mov reg1,reg1, shift imm2
  440. }
  441. if (taicpu(p).ops=3) and
  442. (taicpu(p).oper[2]^.typ = top_shifterop) and
  443. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  444. getnextinstruction(p,hp1) and
  445. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  446. (taicpu(hp1).ops=3) and
  447. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  448. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  449. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  450. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  451. begin
  452. { fold
  453. mov reg1,reg0, lsl 16
  454. mov reg1,reg1, lsr 16
  455. strh reg1, ...
  456. dealloc reg1
  457. to
  458. strh reg1, ...
  459. dealloc reg1
  460. }
  461. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  462. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  463. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  464. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  465. getnextinstruction(hp1,hp2) and
  466. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  467. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  468. begin
  469. CopyUsedRegs(TmpUsedRegs);
  470. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  471. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  472. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  473. begin
  474. asml.insertbefore(tai_comment.Create(strpnew('Peephole optimizer removed superfluous 16 Bit zero extension')), hp1);
  475. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  476. asml.remove(p);
  477. asml.remove(hp1);
  478. p.free;
  479. hp1.free;
  480. p:=hp2;
  481. end;
  482. ReleaseUsedRegs(TmpUsedRegs);
  483. end
  484. { fold
  485. mov reg1,reg0, shift imm1
  486. mov reg1,reg1, shift imm2
  487. to
  488. mov reg1,reg0, shift imm1+imm2
  489. }
  490. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  491. { asr makes no use after a lsr, the asr can be foled into the lsr }
  492. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  493. begin
  494. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  495. { avoid overflows }
  496. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  497. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  498. SM_ROR:
  499. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  500. SM_ASR:
  501. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  502. SM_LSR,
  503. SM_LSL:
  504. begin
  505. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  506. InsertLLItem(p.previous, p.next, hp1);
  507. p.free;
  508. p:=hp1;
  509. end;
  510. else
  511. internalerror(2008072803);
  512. end;
  513. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShift2Shift 1 done')), p);
  514. asml.remove(hp1);
  515. hp1.free;
  516. result := true;
  517. end
  518. { fold
  519. mov reg1,reg0, shift imm1
  520. mov reg1,reg1, shift imm2
  521. mov reg1,reg1, shift imm3 ...
  522. }
  523. else if getnextinstruction(hp1,hp2) and
  524. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  525. (taicpu(hp2).ops=3) and
  526. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  527. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  528. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  529. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  530. begin
  531. { mov reg1,reg0, lsl imm1
  532. mov reg1,reg1, lsr/asr imm2
  533. mov reg1,reg1, lsl imm3 ...
  534. if imm3<=imm1 and imm2>=imm3
  535. to
  536. mov reg1,reg0, lsl imm1
  537. mov reg1,reg1, lsr/asr imm2-imm3
  538. }
  539. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  540. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  541. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  542. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  543. begin
  544. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  545. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 1 done')), p);
  546. asml.remove(hp2);
  547. hp2.free;
  548. result := true;
  549. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  550. begin
  551. asml.remove(hp1);
  552. hp1.free;
  553. end;
  554. end
  555. { mov reg1,reg0, lsr/asr imm1
  556. mov reg1,reg1, lsl imm2
  557. mov reg1,reg1, lsr/asr imm3 ...
  558. if imm3>=imm1 and imm2>=imm1
  559. to
  560. mov reg1,reg0, lsl imm2-imm1
  561. mov reg1,reg1, lsr/asr imm3 ...
  562. }
  563. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  564. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  565. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  566. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  567. begin
  568. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  569. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  570. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 2 done')), p);
  571. asml.remove(p);
  572. p.free;
  573. p:=hp2;
  574. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  575. begin
  576. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  577. asml.remove(hp1);
  578. hp1.free;
  579. p:=hp2;
  580. end;
  581. result := true;
  582. end;
  583. end;
  584. end;
  585. { Change the common
  586. mov r0, r0, lsr #24
  587. and r0, r0, #255
  588. and remove the superfluous and
  589. This could be extended to handle more cases.
  590. }
  591. if (taicpu(p).ops=3) and
  592. (taicpu(p).oper[2]^.typ = top_shifterop) and
  593. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  594. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  595. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  596. getnextinstruction(p,hp1) and
  597. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  598. (taicpu(hp1).ops=3) and
  599. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  600. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  601. (taicpu(hp1).oper[2]^.typ = top_const) and
  602. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  603. For LSR #25 and an AndConst of 255 that whould go like this:
  604. 255 and ((2 shl (32-25))-1)
  605. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  606. LSR #25 and AndConst of 254:
  607. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  608. }
  609. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  610. begin
  611. asml.insertbefore(tai_comment.Create(strpnew('Peephole LsrAnd2Lsr done')), hp1);
  612. asml.remove(hp1);
  613. hp1.free;
  614. end;
  615. {
  616. optimize
  617. mov rX, yyyy
  618. ....
  619. }
  620. if (taicpu(p).ops = 2) and
  621. GetNextInstruction(p,hp1) and
  622. (tai(hp1).typ = ait_instruction) then
  623. begin
  624. {
  625. This changes the very common
  626. mov r0, #0
  627. str r0, [...]
  628. mov r0, #0
  629. str r0, [...]
  630. and removes all superfluous mov instructions
  631. }
  632. if (taicpu(p).oper[1]^.typ = top_const) and
  633. (taicpu(hp1).opcode=A_STR) then
  634. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], [PF_None]) and
  635. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  636. GetNextInstruction(hp1, hp2) and
  637. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  638. (taicpu(hp2).ops = 2) and
  639. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  640. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  641. begin
  642. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovStrMov done')), hp2);
  643. GetNextInstruction(hp2,hp1);
  644. asml.remove(hp2);
  645. hp2.free;
  646. if not assigned(hp1) then break;
  647. end
  648. {
  649. This removes the first mov from
  650. mov rX,...
  651. mov rX,...
  652. }
  653. else if taicpu(hp1).opcode=A_MOV then
  654. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  655. (taicpu(hp1).ops = 2) and
  656. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  657. { don't remove the first mov if the second is a mov rX,rX }
  658. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  659. begin
  660. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovMov done')), p);
  661. asml.remove(p);
  662. p.free;
  663. p:=hp1;
  664. GetNextInstruction(hp1,hp1);
  665. if not assigned(hp1) then
  666. break;
  667. end;
  668. end;
  669. {
  670. change
  671. mov r1, r0
  672. add r1, r1, #1
  673. to
  674. add r1, r0, #1
  675. Todo: Make it work for mov+cmp too
  676. CAUTION! If this one is successful p might not be a mov instruction anymore!
  677. }
  678. if (taicpu(p).ops = 2) and
  679. (taicpu(p).oper[1]^.typ = top_reg) and
  680. (taicpu(p).oppostfix = PF_NONE) and
  681. GetNextInstruction(p, hp1) and
  682. (tai(hp1).typ = ait_instruction) and
  683. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  684. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN]) and
  685. {MOV and MVN might only have 2 ops}
  686. (taicpu(hp1).ops = 3) and
  687. (taicpu(hp1).condition in [C_NONE, taicpu(hp1).condition]) and
  688. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  689. (taicpu(hp1).oper[1]^.typ = top_reg) and
  690. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  691. begin
  692. { When we get here we still don't know if the registers match}
  693. for I:=1 to 2 do
  694. {
  695. If the first loop was successful p will be replaced with hp1.
  696. The checks will still be ok, because all required information
  697. will also be in hp1 then.
  698. }
  699. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  700. begin
  701. asml.insertbefore(tai_comment.Create(strpnew('Peephole RedundantMovProcess done')), hp1);
  702. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  703. if p<>hp1 then
  704. begin
  705. asml.remove(p);
  706. p.free;
  707. p:=hp1;
  708. end;
  709. end;
  710. end;
  711. { This folds shifterops into following instructions
  712. mov r0, r1, lsl #8
  713. add r2, r3, r0
  714. to
  715. add r2, r3, r1, lsl #8
  716. CAUTION! If this one is successful p might not be a mov instruction anymore!
  717. }
  718. if (taicpu(p).opcode = A_MOV) and
  719. (taicpu(p).ops = 3) and
  720. (taicpu(p).oper[1]^.typ = top_reg) and
  721. (taicpu(p).oper[2]^.typ = top_shifterop) and
  722. (taicpu(p).oppostfix = PF_NONE) and
  723. GetNextInstruction(p, hp1) and
  724. (tai(hp1).typ = ait_instruction) and
  725. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  726. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  727. (taicpu(hp1).oppostfix = PF_NONE) and
  728. (taicpu(hp1).condition = taicpu(p).condition) and
  729. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  730. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  731. A_CMP, A_CMN]) and
  732. (
  733. {Only ONE of the two src operands is allowed to match}
  734. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  735. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  736. ) then
  737. begin
  738. CopyUsedRegs(TmpUsedRegs);
  739. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  740. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  741. I2:=0
  742. else
  743. I2:=1;
  744. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  745. for I:=I2 to taicpu(hp1).ops-1 do
  746. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  747. begin
  748. { If the parameter matched on the second op from the RIGHT
  749. we have to switch the parameters, this will not happen for CMP
  750. were we're only evaluating the most right parameter
  751. }
  752. if I <> taicpu(hp1).ops-1 then
  753. begin
  754. {The SUB operators need to be changed when we swap parameters}
  755. case taicpu(hp1).opcode of
  756. A_SUB: tempop:=A_RSB;
  757. A_SBC: tempop:=A_RSC;
  758. A_RSB: tempop:=A_SUB;
  759. A_RSC: tempop:=A_SBC;
  760. else tempop:=taicpu(hp1).opcode;
  761. end;
  762. if taicpu(hp1).ops = 3 then
  763. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  764. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  765. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  766. else
  767. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  768. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  769. taicpu(p).oper[2]^.shifterop^);
  770. end
  771. else
  772. if taicpu(hp1).ops = 3 then
  773. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  774. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  775. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  776. else
  777. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  778. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  779. taicpu(p).oper[2]^.shifterop^);
  780. asml.insertbefore(hp2, p);
  781. asml.remove(p);
  782. asml.remove(hp1);
  783. p.free;
  784. hp1.free;
  785. p:=hp2;
  786. GetNextInstruction(p,hp1);
  787. asml.insertbefore(tai_comment.Create(strpnew('Peephole FoldShiftProcess done')), p);
  788. break;
  789. end;
  790. ReleaseUsedRegs(TmpUsedRegs);
  791. end;
  792. {
  793. Often we see shifts and then a superfluous mov to another register
  794. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  795. }
  796. if (taicpu(p).opcode = A_MOV) and
  797. GetNextInstruction(p, hp1) then
  798. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  799. end;
  800. A_ADD,
  801. A_ADC,
  802. A_RSB,
  803. A_RSC,
  804. A_SUB,
  805. A_SBC,
  806. A_AND,
  807. A_BIC,
  808. A_EOR,
  809. A_ORR,
  810. A_MLA,
  811. A_MUL:
  812. begin
  813. {
  814. change
  815. and reg2,reg1,const1
  816. and reg2,reg2,const2
  817. to
  818. and reg2,reg1,(const1 and const2)
  819. }
  820. if (taicpu(p).opcode = A_AND) and
  821. (taicpu(p).oper[1]^.typ = top_reg) and
  822. (taicpu(p).oper[2]^.typ = top_const) and
  823. GetNextInstruction(p, hp1) and
  824. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  825. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  826. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  827. (taicpu(hp1).oper[2]^.typ = top_const) then
  828. begin
  829. asml.insertbefore(tai_comment.Create(strpnew('Peephole AndAnd2And done')), p);
  830. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  831. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  832. asml.remove(hp1);
  833. hp1.free;
  834. end;
  835. {
  836. change
  837. add reg1, ...
  838. mov reg2, reg1
  839. to
  840. add reg2, ...
  841. }
  842. if GetNextInstruction(p, hp1) then
  843. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  844. end;
  845. A_CMP:
  846. begin
  847. {
  848. change
  849. cmp reg,const1
  850. moveq reg,const1
  851. movne reg,const2
  852. to
  853. cmp reg,const1
  854. movne reg,const2
  855. }
  856. if (taicpu(p).oper[1]^.typ = top_const) and
  857. GetNextInstruction(p, hp1) and
  858. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  859. (taicpu(hp1).oper[1]^.typ = top_const) and
  860. GetNextInstruction(hp1, hp2) and
  861. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  862. (taicpu(hp1).oper[1]^.typ = top_const) then
  863. begin
  864. RemoveRedundantMove(p, hp1, asml);
  865. RemoveRedundantMove(p, hp2, asml);
  866. end;
  867. end;
  868. end;
  869. end;
  870. end;
  871. end;
  872. { instructions modifying the CPSR can be only the last instruction }
  873. function MustBeLast(p : tai) : boolean;
  874. begin
  875. Result:=(p.typ=ait_instruction) and
  876. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  877. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  878. (taicpu(p).oppostfix=PF_S));
  879. end;
  880. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  881. var
  882. p,hp1,hp2: tai;
  883. l : longint;
  884. condition : tasmcond;
  885. hp3: tai;
  886. WasLast: boolean;
  887. { UsedRegs, TmpUsedRegs: TRegSet; }
  888. begin
  889. p := BlockStart;
  890. { UsedRegs := []; }
  891. while (p <> BlockEnd) Do
  892. begin
  893. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  894. case p.Typ Of
  895. Ait_Instruction:
  896. begin
  897. case taicpu(p).opcode Of
  898. A_B:
  899. if taicpu(p).condition<>C_None then
  900. begin
  901. { check for
  902. Bxx xxx
  903. <several instructions>
  904. xxx:
  905. }
  906. l:=0;
  907. WasLast:=False;
  908. GetNextInstruction(p, hp1);
  909. while assigned(hp1) and
  910. (l<=4) and
  911. CanBeCond(hp1) and
  912. { stop on labels }
  913. not(hp1.typ=ait_label) do
  914. begin
  915. inc(l);
  916. if MustBeLast(hp1) then
  917. begin
  918. WasLast:=True;
  919. GetNextInstruction(hp1,hp1);
  920. break;
  921. end
  922. else
  923. GetNextInstruction(hp1,hp1);
  924. end;
  925. if assigned(hp1) then
  926. begin
  927. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  928. begin
  929. if (l<=4) and (l>0) then
  930. begin
  931. condition:=inverse_cond(taicpu(p).condition);
  932. hp2:=p;
  933. GetNextInstruction(p,hp1);
  934. p:=hp1;
  935. repeat
  936. if hp1.typ=ait_instruction then
  937. taicpu(hp1).condition:=condition;
  938. if MustBeLast(hp1) then
  939. begin
  940. GetNextInstruction(hp1,hp1);
  941. break;
  942. end
  943. else
  944. GetNextInstruction(hp1,hp1);
  945. until not(assigned(hp1)) or
  946. not(CanBeCond(hp1)) or
  947. (hp1.typ=ait_label);
  948. { wait with removing else GetNextInstruction could
  949. ignore the label if it was the only usage in the
  950. jump moved away }
  951. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  952. asml.remove(hp2);
  953. hp2.free;
  954. continue;
  955. end;
  956. end
  957. else
  958. { do not perform further optimizations if there is inctructon
  959. in block #1 which can not be optimized.
  960. }
  961. if not WasLast then
  962. begin
  963. { check further for
  964. Bcc xxx
  965. <several instructions 1>
  966. B yyy
  967. xxx:
  968. <several instructions 2>
  969. yyy:
  970. }
  971. { hp2 points to jmp yyy }
  972. hp2:=hp1;
  973. { skip hp1 to xxx }
  974. GetNextInstruction(hp1, hp1);
  975. if assigned(hp2) and
  976. assigned(hp1) and
  977. (l<=3) and
  978. (hp2.typ=ait_instruction) and
  979. (taicpu(hp2).is_jmp) and
  980. (taicpu(hp2).condition=C_None) and
  981. { real label and jump, no further references to the
  982. label are allowed }
  983. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  984. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  985. begin
  986. l:=0;
  987. { skip hp1 to <several moves 2> }
  988. GetNextInstruction(hp1, hp1);
  989. while assigned(hp1) and
  990. CanBeCond(hp1) do
  991. begin
  992. inc(l);
  993. GetNextInstruction(hp1, hp1);
  994. end;
  995. { hp1 points to yyy: }
  996. if assigned(hp1) and
  997. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  998. begin
  999. condition:=inverse_cond(taicpu(p).condition);
  1000. GetNextInstruction(p,hp1);
  1001. hp3:=p;
  1002. p:=hp1;
  1003. repeat
  1004. if hp1.typ=ait_instruction then
  1005. taicpu(hp1).condition:=condition;
  1006. GetNextInstruction(hp1,hp1);
  1007. until not(assigned(hp1)) or
  1008. not(CanBeCond(hp1));
  1009. { hp2 is still at jmp yyy }
  1010. GetNextInstruction(hp2,hp1);
  1011. { hp2 is now at xxx: }
  1012. condition:=inverse_cond(condition);
  1013. GetNextInstruction(hp1,hp1);
  1014. { hp1 is now at <several movs 2> }
  1015. repeat
  1016. taicpu(hp1).condition:=condition;
  1017. GetNextInstruction(hp1,hp1);
  1018. until not(assigned(hp1)) or
  1019. not(CanBeCond(hp1)) or
  1020. (hp1.typ=ait_label);
  1021. {
  1022. asml.remove(hp1.next)
  1023. hp1.next.free;
  1024. asml.remove(hp1);
  1025. hp1.free;
  1026. }
  1027. { remove Bcc }
  1028. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1029. asml.remove(hp3);
  1030. hp3.free;
  1031. { remove jmp }
  1032. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1033. asml.remove(hp2);
  1034. hp2.free;
  1035. continue;
  1036. end;
  1037. end;
  1038. end;
  1039. end;
  1040. end;
  1041. end;
  1042. end;
  1043. end;
  1044. p := tai(p.next)
  1045. end;
  1046. end;
  1047. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1048. begin
  1049. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1050. Result:=true
  1051. else
  1052. Result:=inherited RegInInstruction(Reg, p1);
  1053. end;
  1054. const
  1055. { set of opcode which might or do write to memory }
  1056. { TODO : extend armins.dat to contain r/w info }
  1057. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1058. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1059. { adjust the register live information when swapping the two instructions p and hp1,
  1060. they must follow one after the other }
  1061. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1062. procedure CheckLiveEnd(reg : tregister);
  1063. var
  1064. supreg : TSuperRegister;
  1065. regtype : TRegisterType;
  1066. begin
  1067. if reg=NR_NO then
  1068. exit;
  1069. regtype:=getregtype(reg);
  1070. supreg:=getsupreg(reg);
  1071. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1072. RegInInstruction(reg,p) then
  1073. cg.rg[regtype].live_end[supreg]:=p;
  1074. end;
  1075. procedure CheckLiveStart(reg : TRegister);
  1076. var
  1077. supreg : TSuperRegister;
  1078. regtype : TRegisterType;
  1079. begin
  1080. if reg=NR_NO then
  1081. exit;
  1082. regtype:=getregtype(reg);
  1083. supreg:=getsupreg(reg);
  1084. if (cg.rg[regtype].live_start[supreg]=p) and
  1085. RegInInstruction(reg,hp1) then
  1086. cg.rg[regtype].live_start[supreg]:=hp1;
  1087. end;
  1088. var
  1089. i : longint;
  1090. r : TSuperRegister;
  1091. begin
  1092. { assumption: p is directly followed by hp1 }
  1093. { if live of any reg used by p starts at p and hp1 uses this register then
  1094. set live start to hp1 }
  1095. for i:=0 to p.ops-1 do
  1096. case p.oper[i]^.typ of
  1097. Top_Reg:
  1098. CheckLiveStart(p.oper[i]^.reg);
  1099. Top_Ref:
  1100. begin
  1101. CheckLiveStart(p.oper[i]^.ref^.base);
  1102. CheckLiveStart(p.oper[i]^.ref^.index);
  1103. end;
  1104. Top_Shifterop:
  1105. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1106. Top_RegSet:
  1107. for r:=RS_R0 to RS_R15 do
  1108. if r in p.oper[i]^.regset^ then
  1109. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1110. end;
  1111. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1112. set live end to p }
  1113. for i:=0 to hp1.ops-1 do
  1114. case hp1.oper[i]^.typ of
  1115. Top_Reg:
  1116. CheckLiveEnd(hp1.oper[i]^.reg);
  1117. Top_Ref:
  1118. begin
  1119. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1120. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1121. end;
  1122. Top_Shifterop:
  1123. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1124. Top_RegSet:
  1125. for r:=RS_R0 to RS_R15 do
  1126. if r in hp1.oper[i]^.regset^ then
  1127. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1128. end;
  1129. end;
  1130. function TCpuPreRegallocScheduler.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1131. { TODO : schedule also forward }
  1132. { TODO : schedule distance > 1 }
  1133. var
  1134. hp1,hp2,hp3,hp4,hp5 : tai;
  1135. list : TAsmList;
  1136. begin
  1137. result:=true;
  1138. list:=TAsmList.Create;
  1139. p := BlockStart;
  1140. while (p <> BlockEnd) Do
  1141. begin
  1142. if (p.typ=ait_instruction) and
  1143. GetNextInstruction(p,hp1) and
  1144. (hp1.typ=ait_instruction) and
  1145. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1146. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1147. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1148. not(RegModifiedByInstruction(NR_PC,p))
  1149. ) or
  1150. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1151. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1152. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1153. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1154. )
  1155. ) or
  1156. { try to prove that the memory accesses don't overlapp }
  1157. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1158. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1159. (taicpu(p).oppostfix=PF_None) and
  1160. (taicpu(hp1).oppostfix=PF_None) and
  1161. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1162. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1163. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1164. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1165. )
  1166. )
  1167. ) and
  1168. GetNextInstruction(hp1,hp2) and
  1169. (hp2.typ=ait_instruction) and
  1170. { loaded register used by next instruction? }
  1171. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1172. { loaded register not used by previous instruction? }
  1173. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1174. { same condition? }
  1175. (taicpu(p).condition=taicpu(hp1).condition) and
  1176. { first instruction might not change the register used as base }
  1177. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1178. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1179. ) and
  1180. { first instruction might not change the register used as index }
  1181. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1182. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1183. ) then
  1184. begin
  1185. hp3:=tai(p.Previous);
  1186. hp5:=tai(p.next);
  1187. asml.Remove(p);
  1188. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1189. { before the instruction? }
  1190. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1191. begin
  1192. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1193. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1194. begin
  1195. hp4:=hp3;
  1196. hp3:=tai(hp3.Previous);
  1197. asml.Remove(hp4);
  1198. list.Concat(hp4);
  1199. end
  1200. else
  1201. hp3:=tai(hp3.Previous);
  1202. end;
  1203. list.Concat(p);
  1204. SwapRegLive(taicpu(p),taicpu(hp1));
  1205. { after the instruction? }
  1206. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1207. begin
  1208. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1209. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1210. begin
  1211. hp4:=hp5;
  1212. hp5:=tai(hp5.next);
  1213. asml.Remove(hp4);
  1214. list.Concat(hp4);
  1215. end
  1216. else
  1217. hp5:=tai(hp5.Next);
  1218. end;
  1219. asml.Remove(hp1);
  1220. {$ifdef DEBUG_PREREGSCHEDULER}
  1221. asml.InsertBefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1222. {$endif DEBUG_PREREGSCHEDULER}
  1223. asml.InsertBefore(hp1,hp2);
  1224. asml.InsertListBefore(hp2,list);
  1225. p := tai(p.next)
  1226. end
  1227. else if p.typ=ait_instruction then
  1228. p:=hp1
  1229. else
  1230. p := tai(p.next);
  1231. end;
  1232. list.Free;
  1233. end;
  1234. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1235. begin
  1236. { TODO: Add optimizer code }
  1237. end;
  1238. begin
  1239. casmoptimizer:=TCpuAsmOptimizer;
  1240. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1241. End.