aoptcpu.pas 59 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. Interface
  22. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
  23. Type
  24. { TCpuAsmOptimizer }
  25. TCpuAsmOptimizer = class(TAsmOptimizer)
  26. { uses the same constructor as TAopObj }
  27. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  28. procedure PeepHoleOptPass2;override;
  29. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  30. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  31. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  32. var AllUsedRegs: TAllUsedRegs): Boolean;
  33. End;
  34. { TCpuPreRegallocScheduler }
  35. TCpuPreRegallocScheduler = class(TAsmOptimizer)
  36. function PeepHoleOptPass1Cpu(var p: tai): boolean;override;
  37. procedure SwapRegLive(p, hp1: taicpu);
  38. end;
  39. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  40. { uses the same constructor as TAopObj }
  41. procedure PeepHoleOptPass2;override;
  42. End;
  43. Implementation
  44. uses
  45. cutils,verbose,globals,
  46. systems,
  47. cpuinfo,
  48. cgobj,cgutils,procinfo,
  49. aasmbase,aasmdata;
  50. function CanBeCond(p : tai) : boolean;
  51. begin
  52. result:=
  53. (p.typ=ait_instruction) and
  54. (taicpu(p).condition=C_None) and
  55. (taicpu(p).opcode<>A_PLD) and
  56. ((taicpu(p).opcode<>A_BLX) or
  57. (taicpu(p).oper[0]^.typ=top_reg));
  58. end;
  59. function RefsEqual(const r1, r2: treference): boolean;
  60. begin
  61. refsequal :=
  62. (r1.offset = r2.offset) and
  63. (r1.base = r2.base) and
  64. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  65. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  66. (r1.relsymbol = r2.relsymbol) and
  67. (r1.signindex = r2.signindex) and
  68. (r1.shiftimm = r2.shiftimm) and
  69. (r1.addressmode = r2.addressmode) and
  70. (r1.shiftmode = r2.shiftmode);
  71. end;
  72. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  73. begin
  74. result :=
  75. (instr.typ = ait_instruction) and
  76. (taicpu(instr).opcode = op) and
  77. ((cond = []) or (taicpu(instr).condition in cond)) and
  78. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  79. end;
  80. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  81. begin
  82. result := oper1.typ = oper2.typ;
  83. if result then
  84. case oper1.typ of
  85. top_const:
  86. Result:=oper1.val = oper2.val;
  87. top_reg:
  88. Result:=oper1.reg = oper2.reg;
  89. top_conditioncode:
  90. Result:=oper1.cc = oper2.cc;
  91. top_ref:
  92. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  93. else Result:=false;
  94. end
  95. end;
  96. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  97. begin
  98. result := (oper.typ = top_reg) and (oper.reg = reg);
  99. end;
  100. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  101. begin
  102. if (taicpu(movp).condition = C_EQ) and
  103. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  104. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  105. begin
  106. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  107. asml.remove(movp);
  108. movp.free;
  109. end;
  110. end;
  111. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  112. var
  113. p: taicpu;
  114. begin
  115. p := taicpu(hp);
  116. regLoadedWithNewValue := false;
  117. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  118. exit;
  119. case p.opcode of
  120. { These operands do not write into a register at all }
  121. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  122. exit;
  123. {Take care of post/preincremented store and loads, they will change their base register}
  124. A_STR, A_LDR:
  125. regLoadedWithNewValue :=
  126. (taicpu(p).oper[1]^.typ=top_ref) and
  127. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  128. (taicpu(p).oper[1]^.ref^.base = reg);
  129. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  130. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  131. regLoadedWithNewValue :=
  132. (p.oper[1]^.typ = top_reg) and
  133. (p.oper[1]^.reg = reg);
  134. {Loads to oper2 from coprocessor}
  135. {
  136. MCR/MRC is currently not supported in FPC
  137. A_MRC:
  138. regLoadedWithNewValue :=
  139. (p.oper[2]^.typ = top_reg) and
  140. (p.oper[2]^.reg = reg);
  141. }
  142. {Loads to all register in the registerset}
  143. A_LDM:
  144. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  145. end;
  146. if regLoadedWithNewValue then
  147. exit;
  148. case p.oper[0]^.typ of
  149. {This is the case}
  150. top_reg:
  151. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  152. { LDRD }
  153. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  154. {LDM/STM might write a new value to their index register}
  155. top_ref:
  156. regLoadedWithNewValue :=
  157. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  158. (taicpu(p).oper[0]^.ref^.base = reg);
  159. end;
  160. end;
  161. function AlignedToQWord(const ref : treference) : boolean;
  162. begin
  163. { (safe) heuristics to ensure alignment }
  164. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  165. (((ref.offset>=0) and
  166. ((ref.offset mod 8)=0) and
  167. ((ref.base=NR_R13) or
  168. (ref.index=NR_R13))
  169. ) or
  170. ((ref.offset<=0) and
  171. { when using NR_R11, it has always a value of <qword align>+4 }
  172. ((abs(ref.offset+4) mod 8)=0) and
  173. (current_procinfo.framepointer=NR_R11) and
  174. ((ref.base=NR_R11) or
  175. (ref.index=NR_R11))
  176. )
  177. );
  178. end;
  179. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  180. var
  181. p: taicpu;
  182. i: longint;
  183. begin
  184. instructionLoadsFromReg := false;
  185. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  186. exit;
  187. p:=taicpu(hp);
  188. i:=1;
  189. {For these instructions we have to start on oper[0]}
  190. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  191. A_CMP, A_CMN, A_TST, A_TEQ,
  192. A_B, A_BL, A_BX, A_BLX,
  193. A_SMLAL, A_UMLAL]) then i:=0;
  194. while(i<p.ops) do
  195. begin
  196. case p.oper[I]^.typ of
  197. top_reg:
  198. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  199. { STRD }
  200. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  201. top_regset:
  202. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  203. top_shifterop:
  204. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  205. top_ref:
  206. instructionLoadsFromReg :=
  207. (p.oper[I]^.ref^.base = reg) or
  208. (p.oper[I]^.ref^.index = reg);
  209. end;
  210. if instructionLoadsFromReg then exit; {Bailout if we found something}
  211. Inc(I);
  212. end;
  213. end;
  214. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  215. var AllUsedRegs: TAllUsedRegs): Boolean;
  216. begin
  217. AllUsedRegs[getregtype(reg)].Update(tai(p.Next));
  218. RegUsedAfterInstruction :=
  219. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  220. not(regLoadedWithNewValue(reg,p)) and
  221. (
  222. not(GetNextInstruction(p,p)) or
  223. instructionLoadsFromReg(reg,p) or
  224. not(regLoadedWithNewValue(reg,p))
  225. );
  226. end;
  227. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  228. var
  229. TmpUsedRegs: TAllUsedRegs;
  230. begin
  231. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  232. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  233. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  234. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  235. not (
  236. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  237. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  238. ) then
  239. begin
  240. CopyUsedRegs(TmpUsedRegs);
  241. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  242. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,movp,TmpUsedRegs)) then
  243. begin
  244. asml.insertbefore(tai_comment.Create(strpnew('Peephole '+optimizer+' removed superfluous mov')), movp);
  245. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  246. asml.remove(movp);
  247. movp.free;
  248. end;
  249. ReleaseUsedRegs(TmpUsedRegs);
  250. end;
  251. end;
  252. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  253. var
  254. hp1,hp2: tai;
  255. i, i2: longint;
  256. TmpUsedRegs: TAllUsedRegs;
  257. tempop: tasmop;
  258. function IsPowerOf2(const value: DWord): boolean; inline;
  259. begin
  260. Result:=(value and (value - 1)) = 0;
  261. end;
  262. begin
  263. result := false;
  264. case p.typ of
  265. ait_instruction:
  266. begin
  267. (* optimization proved not to be safe, see tw4768.pp
  268. {
  269. change
  270. <op> reg,x,y
  271. cmp reg,#0
  272. into
  273. <op>s reg,x,y
  274. }
  275. { this optimization can applied only to the currently enabled operations because
  276. the other operations do not update all flags and FPC does not track flag usage }
  277. if (taicpu(p).opcode in [A_ADC,A_ADD,A_SUB {A_UDIV,A_SDIV,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND}]) and
  278. (taicpu(p).oppostfix = PF_None) and
  279. (taicpu(p).condition = C_None) and
  280. GetNextInstruction(p, hp1) and
  281. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  282. (taicpu(hp1).oper[1]^.typ = top_const) and
  283. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  284. (taicpu(hp1).oper[1]^.val = 0) { and
  285. GetNextInstruction(hp1, hp2) and
  286. (tai(hp2).typ = ait_instruction) and
  287. // be careful here, following instructions could use other flags
  288. // however after a jump fpc never depends on the value of flags
  289. (taicpu(hp2).opcode = A_B) and
  290. (taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL])} then
  291. begin
  292. taicpu(p).oppostfix:=PF_S;
  293. asml.remove(hp1);
  294. hp1.free;
  295. end
  296. else
  297. *)
  298. case taicpu(p).opcode of
  299. A_STR:
  300. begin
  301. { change
  302. str reg1,ref
  303. ldr reg2,ref
  304. into
  305. str reg1,ref
  306. mov reg2,reg1
  307. }
  308. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  309. (taicpu(p).oppostfix=PF_None) and
  310. GetNextInstruction(p,hp1) and
  311. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  312. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  313. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  314. begin
  315. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  316. begin
  317. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 1 done')), hp1);
  318. asml.remove(hp1);
  319. hp1.free;
  320. end
  321. else
  322. begin
  323. taicpu(hp1).opcode:=A_MOV;
  324. taicpu(hp1).oppostfix:=PF_None;
  325. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  326. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 2 done')), hp1);
  327. end;
  328. result := true;
  329. end
  330. { change
  331. str reg1,ref
  332. str reg2,ref
  333. into
  334. strd reg1,ref
  335. }
  336. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  337. (taicpu(p).oppostfix=PF_None) and
  338. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  339. GetNextInstruction(p,hp1) and
  340. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  341. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  342. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  343. { str ensures that either base or index contain no register, else ldr wouldn't
  344. use an offset either
  345. }
  346. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  347. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  348. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  349. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  350. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  351. begin
  352. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrStr2Strd done')), p);
  353. taicpu(p).oppostfix:=PF_D;
  354. asml.remove(hp1);
  355. hp1.free;
  356. end;
  357. end;
  358. A_LDR:
  359. begin
  360. { change
  361. ldr reg1,ref
  362. ldr reg2,ref
  363. into ...
  364. }
  365. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  366. GetNextInstruction(p,hp1) and
  367. { ldrd is not allowed here }
  368. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  369. begin
  370. {
  371. ...
  372. ldr reg1,ref
  373. mov reg2,reg1
  374. }
  375. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  376. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  377. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  378. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  379. begin
  380. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  381. begin
  382. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldr done')), hp1);
  383. asml.remove(hp1);
  384. hp1.free;
  385. end
  386. else
  387. begin
  388. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2LdrMov done')), hp1);
  389. taicpu(hp1).opcode:=A_MOV;
  390. taicpu(hp1).oppostfix:=PF_None;
  391. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  392. end;
  393. result := true;
  394. end
  395. {
  396. ...
  397. ldrd reg1,ref
  398. }
  399. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  400. { ldrd does not allow any postfixes ... }
  401. (taicpu(p).oppostfix=PF_None) and
  402. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  403. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  404. { ldr ensures that either base or index contain no register, else ldr wouldn't
  405. use an offset either
  406. }
  407. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  408. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  409. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  410. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  411. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  412. begin
  413. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldrd done')), p);
  414. taicpu(p).oppostfix:=PF_D;
  415. asml.remove(hp1);
  416. hp1.free;
  417. end;
  418. end;
  419. { Remove superfluous mov after ldr
  420. changes
  421. ldr reg1, ref
  422. mov reg2, reg1
  423. to
  424. ldr reg2, ref
  425. conditions are:
  426. * no ldrd usage
  427. * reg1 must be released after mov
  428. * mov can not contain shifterops
  429. * ldr+mov have the same conditions
  430. * mov does not set flags
  431. }
  432. if (taicpu(p).oppostfix<>PF_D) and GetNextInstruction(p, hp1) then
  433. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  434. end;
  435. A_MOV:
  436. begin
  437. { fold
  438. mov reg1,reg0, shift imm1
  439. mov reg1,reg1, shift imm2
  440. }
  441. if (taicpu(p).ops=3) and
  442. (taicpu(p).oper[2]^.typ = top_shifterop) and
  443. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  444. getnextinstruction(p,hp1) and
  445. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  446. (taicpu(hp1).ops=3) and
  447. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  448. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  449. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  450. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  451. begin
  452. { fold
  453. mov reg1,reg0, lsl 16
  454. mov reg1,reg1, lsr 16
  455. strh reg1, ...
  456. dealloc reg1
  457. to
  458. strh reg1, ...
  459. dealloc reg1
  460. }
  461. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  462. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  463. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  464. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  465. getnextinstruction(hp1,hp2) and
  466. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  467. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  468. begin
  469. CopyUsedRegs(TmpUsedRegs);
  470. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  471. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  472. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  473. begin
  474. asml.insertbefore(tai_comment.Create(strpnew('Peephole optimizer removed superfluous 16 Bit zero extension')), hp1);
  475. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  476. asml.remove(p);
  477. asml.remove(hp1);
  478. p.free;
  479. hp1.free;
  480. p:=hp2;
  481. end;
  482. ReleaseUsedRegs(TmpUsedRegs);
  483. end
  484. { fold
  485. mov reg1,reg0, shift imm1
  486. mov reg1,reg1, shift imm2
  487. to
  488. mov reg1,reg0, shift imm1+imm2
  489. }
  490. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  491. { asr makes no use after a lsr, the asr can be foled into the lsr }
  492. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  493. begin
  494. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  495. { avoid overflows }
  496. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  497. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  498. SM_ROR:
  499. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  500. SM_ASR:
  501. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  502. SM_LSR,
  503. SM_LSL:
  504. begin
  505. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  506. InsertLLItem(p.previous, p.next, hp1);
  507. p.free;
  508. p:=hp1;
  509. end;
  510. else
  511. internalerror(2008072803);
  512. end;
  513. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShift2Shift 1 done')), p);
  514. asml.remove(hp1);
  515. hp1.free;
  516. result := true;
  517. end
  518. { fold
  519. mov reg1,reg0, shift imm1
  520. mov reg1,reg1, shift imm2
  521. mov reg1,reg1, shift imm3 ...
  522. }
  523. else if getnextinstruction(hp1,hp2) and
  524. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  525. (taicpu(hp2).ops=3) and
  526. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  527. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  528. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  529. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  530. begin
  531. { mov reg1,reg0, lsl imm1
  532. mov reg1,reg1, lsr/asr imm2
  533. mov reg1,reg1, lsl imm3 ...
  534. if imm3<=imm1 and imm2>=imm3
  535. to
  536. mov reg1,reg0, lsl imm1
  537. mov reg1,reg1, lsr/asr imm2-imm3
  538. }
  539. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  540. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  541. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  542. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  543. begin
  544. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  545. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 1 done')), p);
  546. asml.remove(hp2);
  547. hp2.free;
  548. result := true;
  549. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  550. begin
  551. asml.remove(hp1);
  552. hp1.free;
  553. end;
  554. end
  555. { mov reg1,reg0, lsr/asr imm1
  556. mov reg1,reg1, lsl imm2
  557. mov reg1,reg1, lsr/asr imm3 ...
  558. if imm3>=imm1 and imm2>=imm1
  559. to
  560. mov reg1,reg0, lsl imm2-imm1
  561. mov reg1,reg1, lsr/asr imm3 ...
  562. }
  563. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  564. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  565. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  566. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  567. begin
  568. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  569. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  570. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 2 done')), p);
  571. asml.remove(p);
  572. p.free;
  573. p:=hp2;
  574. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  575. begin
  576. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  577. asml.remove(hp1);
  578. hp1.free;
  579. p:=hp2;
  580. end;
  581. result := true;
  582. end;
  583. end;
  584. end;
  585. { Change the common
  586. mov r0, r0, lsr #24
  587. and r0, r0, #255
  588. and remove the superfluous and
  589. This could be extended to handle more cases.
  590. }
  591. if (taicpu(p).ops=3) and
  592. (taicpu(p).oper[2]^.typ = top_shifterop) and
  593. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  594. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  595. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  596. getnextinstruction(p,hp1) and
  597. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  598. (taicpu(hp1).ops=3) and
  599. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  600. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  601. (taicpu(hp1).oper[2]^.typ = top_const) and
  602. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  603. For LSR #25 and an AndConst of 255 that whould go like this:
  604. 255 and ((2 shl (32-25))-1)
  605. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  606. LSR #25 and AndConst of 254:
  607. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  608. }
  609. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  610. begin
  611. asml.insertbefore(tai_comment.Create(strpnew('Peephole LsrAnd2Lsr done')), hp1);
  612. asml.remove(hp1);
  613. hp1.free;
  614. end;
  615. {
  616. optimize
  617. mov rX, yyyy
  618. ....
  619. }
  620. if (taicpu(p).ops = 2) and
  621. GetNextInstruction(p,hp1) and
  622. (tai(hp1).typ = ait_instruction) then
  623. begin
  624. {
  625. This changes the very common
  626. mov r0, #0
  627. str r0, [...]
  628. mov r0, #0
  629. str r0, [...]
  630. and removes all superfluous mov instructions
  631. }
  632. if (taicpu(p).oper[1]^.typ = top_const) and
  633. (taicpu(hp1).opcode=A_STR) then
  634. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], [PF_None]) and
  635. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  636. GetNextInstruction(hp1, hp2) and
  637. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  638. (taicpu(hp2).ops = 2) and
  639. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  640. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  641. begin
  642. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovStrMov done')), hp2);
  643. GetNextInstruction(hp2,hp1);
  644. asml.remove(hp2);
  645. hp2.free;
  646. if not assigned(hp1) then break;
  647. end
  648. {
  649. This removes the first mov from
  650. mov rX,...
  651. mov rX,...
  652. }
  653. else if taicpu(hp1).opcode=A_MOV then
  654. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  655. (taicpu(hp1).ops = 2) and
  656. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) do
  657. begin
  658. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovMov done')), p);
  659. asml.remove(p);
  660. p.free;
  661. p:=hp1;
  662. GetNextInstruction(hp1,hp1);
  663. if not assigned(hp1) then
  664. break;
  665. end;
  666. end;
  667. {
  668. change
  669. mov r1, r0
  670. add r1, r1, #1
  671. to
  672. add r1, r0, #1
  673. Todo: Make it work for mov+cmp too
  674. CAUTION! If this one is successful p might not be a mov instruction anymore!
  675. }
  676. if (taicpu(p).ops = 2) and
  677. (taicpu(p).oper[1]^.typ = top_reg) and
  678. (taicpu(p).oppostfix = PF_NONE) and
  679. GetNextInstruction(p, hp1) and
  680. (tai(hp1).typ = ait_instruction) and
  681. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  682. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN]) and
  683. {MOV and MVN might only have 2 ops}
  684. (taicpu(hp1).ops = 3) and
  685. (taicpu(hp1).condition in [C_NONE, taicpu(hp1).condition]) and
  686. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  687. (taicpu(hp1).oper[1]^.typ = top_reg) and
  688. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  689. begin
  690. { When we get here we still don't know if the registers match}
  691. for I:=1 to 2 do
  692. {
  693. If the first loop was successful p will be replaced with hp1.
  694. The checks will still be ok, because all required information
  695. will also be in hp1 then.
  696. }
  697. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  698. begin
  699. asml.insertbefore(tai_comment.Create(strpnew('Peephole RedundantMovProcess done')), hp1);
  700. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  701. if p<>hp1 then
  702. begin
  703. asml.remove(p);
  704. p.free;
  705. p:=hp1;
  706. end;
  707. end;
  708. end;
  709. { This folds shifterops into following instructions
  710. mov r0, r1, lsl #8
  711. add r2, r3, r0
  712. to
  713. add r2, r3, r1, lsl #8
  714. CAUTION! If this one is successful p might not be a mov instruction anymore!
  715. }
  716. if (taicpu(p).opcode = A_MOV) and
  717. (taicpu(p).ops = 3) and
  718. (taicpu(p).oper[1]^.typ = top_reg) and
  719. (taicpu(p).oper[2]^.typ = top_shifterop) and
  720. (taicpu(p).oppostfix = PF_NONE) and
  721. GetNextInstruction(p, hp1) and
  722. (tai(hp1).typ = ait_instruction) and
  723. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  724. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  725. (taicpu(hp1).oppostfix = PF_NONE) and
  726. (taicpu(hp1).condition = taicpu(p).condition) and
  727. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  728. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  729. A_CMP, A_CMN]) and
  730. (
  731. {Only ONE of the two src operands is allowed to match}
  732. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  733. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  734. ) then
  735. begin
  736. CopyUsedRegs(TmpUsedRegs);
  737. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  738. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  739. I2:=0
  740. else
  741. I2:=1;
  742. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  743. for I:=I2 to taicpu(hp1).ops-1 do
  744. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  745. begin
  746. { If the parameter matched on the second op from the RIGHT
  747. we have to switch the parameters, this will not happen for CMP
  748. were we're only evaluating the most right parameter
  749. }
  750. if I <> taicpu(hp1).ops-1 then
  751. begin
  752. {The SUB operators need to be changed when we swap parameters}
  753. case taicpu(hp1).opcode of
  754. A_SUB: tempop:=A_RSB;
  755. A_SBC: tempop:=A_RSC;
  756. A_RSB: tempop:=A_SUB;
  757. A_RSC: tempop:=A_SBC;
  758. else tempop:=taicpu(hp1).opcode;
  759. end;
  760. if taicpu(hp1).ops = 3 then
  761. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  762. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  763. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  764. else
  765. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  766. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  767. taicpu(p).oper[2]^.shifterop^);
  768. end
  769. else
  770. if taicpu(hp1).ops = 3 then
  771. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  772. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  773. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  774. else
  775. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  776. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  777. taicpu(p).oper[2]^.shifterop^);
  778. asml.insertbefore(hp2, p);
  779. asml.remove(p);
  780. asml.remove(hp1);
  781. p.free;
  782. hp1.free;
  783. p:=hp2;
  784. GetNextInstruction(p,hp1);
  785. asml.insertbefore(tai_comment.Create(strpnew('Peephole FoldShiftProcess done')), p);
  786. break;
  787. end;
  788. ReleaseUsedRegs(TmpUsedRegs);
  789. end;
  790. {
  791. Often we see shifts and then a superfluous mov to another register
  792. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  793. }
  794. if (taicpu(p).opcode = A_MOV) and
  795. GetNextInstruction(p, hp1) then
  796. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  797. end;
  798. A_ADD,
  799. A_ADC,
  800. A_RSB,
  801. A_RSC,
  802. A_SUB,
  803. A_SBC,
  804. A_AND,
  805. A_BIC,
  806. A_EOR,
  807. A_ORR,
  808. A_MLA,
  809. A_MUL:
  810. begin
  811. {
  812. change
  813. and reg2,reg1,const1
  814. and reg2,reg2,const2
  815. to
  816. and reg2,reg1,(const1 and const2)
  817. }
  818. if (taicpu(p).opcode = A_AND) and
  819. (taicpu(p).oper[1]^.typ = top_reg) and
  820. (taicpu(p).oper[2]^.typ = top_const) and
  821. GetNextInstruction(p, hp1) and
  822. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  823. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  824. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  825. (taicpu(hp1).oper[2]^.typ = top_const) then
  826. begin
  827. asml.insertbefore(tai_comment.Create(strpnew('Peephole AndAnd2And done')), p);
  828. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  829. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  830. asml.remove(hp1);
  831. hp1.free;
  832. end;
  833. {
  834. change
  835. add reg1, ...
  836. mov reg2, reg1
  837. to
  838. add reg2, ...
  839. }
  840. if GetNextInstruction(p, hp1) then
  841. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  842. end;
  843. A_CMP:
  844. begin
  845. {
  846. change
  847. cmp reg,const1
  848. moveq reg,const1
  849. movne reg,const2
  850. to
  851. cmp reg,const1
  852. movne reg,const2
  853. }
  854. if (taicpu(p).oper[1]^.typ = top_const) and
  855. GetNextInstruction(p, hp1) and
  856. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  857. (taicpu(hp1).oper[1]^.typ = top_const) and
  858. GetNextInstruction(hp1, hp2) and
  859. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  860. (taicpu(hp1).oper[1]^.typ = top_const) then
  861. begin
  862. RemoveRedundantMove(p, hp1, asml);
  863. RemoveRedundantMove(p, hp2, asml);
  864. end;
  865. end;
  866. end;
  867. end;
  868. end;
  869. end;
  870. { instructions modifying the CPSR can be only the last instruction }
  871. function MustBeLast(p : tai) : boolean;
  872. begin
  873. Result:=(p.typ=ait_instruction) and
  874. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  875. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  876. (taicpu(p).oppostfix=PF_S));
  877. end;
  878. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  879. var
  880. p,hp1,hp2: tai;
  881. l : longint;
  882. condition : tasmcond;
  883. hp3: tai;
  884. WasLast: boolean;
  885. { UsedRegs, TmpUsedRegs: TRegSet; }
  886. begin
  887. p := BlockStart;
  888. { UsedRegs := []; }
  889. while (p <> BlockEnd) Do
  890. begin
  891. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  892. case p.Typ Of
  893. Ait_Instruction:
  894. begin
  895. case taicpu(p).opcode Of
  896. A_B:
  897. if taicpu(p).condition<>C_None then
  898. begin
  899. { check for
  900. Bxx xxx
  901. <several instructions>
  902. xxx:
  903. }
  904. l:=0;
  905. WasLast:=False;
  906. GetNextInstruction(p, hp1);
  907. while assigned(hp1) and
  908. (l<=4) and
  909. CanBeCond(hp1) and
  910. { stop on labels }
  911. not(hp1.typ=ait_label) do
  912. begin
  913. inc(l);
  914. if MustBeLast(hp1) then
  915. begin
  916. WasLast:=True;
  917. GetNextInstruction(hp1,hp1);
  918. break;
  919. end
  920. else
  921. GetNextInstruction(hp1,hp1);
  922. end;
  923. if assigned(hp1) then
  924. begin
  925. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  926. begin
  927. if (l<=4) and (l>0) then
  928. begin
  929. condition:=inverse_cond(taicpu(p).condition);
  930. hp2:=p;
  931. GetNextInstruction(p,hp1);
  932. p:=hp1;
  933. repeat
  934. if hp1.typ=ait_instruction then
  935. taicpu(hp1).condition:=condition;
  936. if MustBeLast(hp1) then
  937. begin
  938. GetNextInstruction(hp1,hp1);
  939. break;
  940. end
  941. else
  942. GetNextInstruction(hp1,hp1);
  943. until not(assigned(hp1)) or
  944. not(CanBeCond(hp1)) or
  945. (hp1.typ=ait_label);
  946. { wait with removing else GetNextInstruction could
  947. ignore the label if it was the only usage in the
  948. jump moved away }
  949. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  950. asml.remove(hp2);
  951. hp2.free;
  952. continue;
  953. end;
  954. end
  955. else
  956. { do not perform further optimizations if there is inctructon
  957. in block #1 which can not be optimized.
  958. }
  959. if not WasLast then
  960. begin
  961. { check further for
  962. Bcc xxx
  963. <several instructions 1>
  964. B yyy
  965. xxx:
  966. <several instructions 2>
  967. yyy:
  968. }
  969. { hp2 points to jmp yyy }
  970. hp2:=hp1;
  971. { skip hp1 to xxx }
  972. GetNextInstruction(hp1, hp1);
  973. if assigned(hp2) and
  974. assigned(hp1) and
  975. (l<=3) and
  976. (hp2.typ=ait_instruction) and
  977. (taicpu(hp2).is_jmp) and
  978. (taicpu(hp2).condition=C_None) and
  979. { real label and jump, no further references to the
  980. label are allowed }
  981. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  982. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  983. begin
  984. l:=0;
  985. { skip hp1 to <several moves 2> }
  986. GetNextInstruction(hp1, hp1);
  987. while assigned(hp1) and
  988. CanBeCond(hp1) do
  989. begin
  990. inc(l);
  991. GetNextInstruction(hp1, hp1);
  992. end;
  993. { hp1 points to yyy: }
  994. if assigned(hp1) and
  995. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  996. begin
  997. condition:=inverse_cond(taicpu(p).condition);
  998. GetNextInstruction(p,hp1);
  999. hp3:=p;
  1000. p:=hp1;
  1001. repeat
  1002. if hp1.typ=ait_instruction then
  1003. taicpu(hp1).condition:=condition;
  1004. GetNextInstruction(hp1,hp1);
  1005. until not(assigned(hp1)) or
  1006. not(CanBeCond(hp1));
  1007. { hp2 is still at jmp yyy }
  1008. GetNextInstruction(hp2,hp1);
  1009. { hp2 is now at xxx: }
  1010. condition:=inverse_cond(condition);
  1011. GetNextInstruction(hp1,hp1);
  1012. { hp1 is now at <several movs 2> }
  1013. repeat
  1014. taicpu(hp1).condition:=condition;
  1015. GetNextInstruction(hp1,hp1);
  1016. until not(assigned(hp1)) or
  1017. not(CanBeCond(hp1)) or
  1018. (hp1.typ=ait_label);
  1019. {
  1020. asml.remove(hp1.next)
  1021. hp1.next.free;
  1022. asml.remove(hp1);
  1023. hp1.free;
  1024. }
  1025. { remove Bcc }
  1026. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1027. asml.remove(hp3);
  1028. hp3.free;
  1029. { remove jmp }
  1030. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1031. asml.remove(hp2);
  1032. hp2.free;
  1033. continue;
  1034. end;
  1035. end;
  1036. end;
  1037. end;
  1038. end;
  1039. end;
  1040. end;
  1041. end;
  1042. p := tai(p.next)
  1043. end;
  1044. end;
  1045. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1046. begin
  1047. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1048. Result:=true
  1049. else
  1050. Result:=inherited RegInInstruction(Reg, p1);
  1051. end;
  1052. const
  1053. { set of opcode which might or do write to memory }
  1054. { TODO : extend armins.dat to contain r/w info }
  1055. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1056. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1057. { adjust the register live information when swapping the two instructions p and hp1,
  1058. they must follow one after the other }
  1059. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1060. procedure CheckLiveEnd(reg : tregister);
  1061. var
  1062. supreg : TSuperRegister;
  1063. regtype : TRegisterType;
  1064. begin
  1065. if reg=NR_NO then
  1066. exit;
  1067. regtype:=getregtype(reg);
  1068. supreg:=getsupreg(reg);
  1069. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1070. RegInInstruction(reg,p) then
  1071. cg.rg[regtype].live_end[supreg]:=p;
  1072. end;
  1073. procedure CheckLiveStart(reg : TRegister);
  1074. var
  1075. supreg : TSuperRegister;
  1076. regtype : TRegisterType;
  1077. begin
  1078. if reg=NR_NO then
  1079. exit;
  1080. regtype:=getregtype(reg);
  1081. supreg:=getsupreg(reg);
  1082. if (cg.rg[regtype].live_start[supreg]=p) and
  1083. RegInInstruction(reg,hp1) then
  1084. cg.rg[regtype].live_start[supreg]:=hp1;
  1085. end;
  1086. var
  1087. i : longint;
  1088. r : TSuperRegister;
  1089. begin
  1090. { assumption: p is directly followed by hp1 }
  1091. { if live of any reg used by p starts at p and hp1 uses this register then
  1092. set live start to hp1 }
  1093. for i:=0 to p.ops-1 do
  1094. case p.oper[i]^.typ of
  1095. Top_Reg:
  1096. CheckLiveStart(p.oper[i]^.reg);
  1097. Top_Ref:
  1098. begin
  1099. CheckLiveStart(p.oper[i]^.ref^.base);
  1100. CheckLiveStart(p.oper[i]^.ref^.index);
  1101. end;
  1102. Top_Shifterop:
  1103. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1104. Top_RegSet:
  1105. for r:=RS_R0 to RS_R15 do
  1106. if r in p.oper[i]^.regset^ then
  1107. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1108. end;
  1109. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1110. set live end to p }
  1111. for i:=0 to hp1.ops-1 do
  1112. case hp1.oper[i]^.typ of
  1113. Top_Reg:
  1114. CheckLiveEnd(hp1.oper[i]^.reg);
  1115. Top_Ref:
  1116. begin
  1117. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1118. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1119. end;
  1120. Top_Shifterop:
  1121. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1122. Top_RegSet:
  1123. for r:=RS_R0 to RS_R15 do
  1124. if r in p.oper[i]^.regset^ then
  1125. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1126. end;
  1127. end;
  1128. function TCpuPreRegallocScheduler.PeepHoleOptPass1Cpu(var p: tai): boolean;
  1129. { TODO : schedule also forward }
  1130. { TODO : schedule distance > 1 }
  1131. var
  1132. hp1,hp2,hp3,hp4,hp5 : tai;
  1133. list : TAsmList;
  1134. begin
  1135. result:=true;
  1136. list:=TAsmList.Create;
  1137. p := BlockStart;
  1138. { UsedRegs := []; }
  1139. while (p <> BlockEnd) Do
  1140. begin
  1141. if (p.typ=ait_instruction) and
  1142. GetNextInstruction(p,hp1) and
  1143. (hp1.typ=ait_instruction) and
  1144. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1145. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1146. not(RegModifiedByInstruction(NR_PC,p)) and
  1147. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH])
  1148. ) or
  1149. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1150. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1151. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1152. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1153. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1154. )
  1155. ) or
  1156. { try to prove that the memory accesses don't overlapp }
  1157. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1158. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1159. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1160. (taicpu(p).oppostfix=PF_None) and
  1161. (taicpu(hp1).oppostfix=PF_None) and
  1162. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1163. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1164. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1165. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1166. )
  1167. )
  1168. ) and
  1169. GetNextInstruction(hp1,hp2) and
  1170. (hp2.typ=ait_instruction) and
  1171. { loaded register used by next instruction? }
  1172. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1173. { loaded register not used by previous instruction? }
  1174. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1175. { same condition? }
  1176. (taicpu(p).condition=taicpu(hp1).condition) and
  1177. { first instruction might not change the register used as base }
  1178. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1179. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1180. ) and
  1181. { first instruction might not change the register used as index }
  1182. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1183. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1184. ) then
  1185. begin
  1186. hp3:=tai(p.Previous);
  1187. hp5:=tai(p.next);
  1188. asml.Remove(p);
  1189. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1190. { before the instruction? }
  1191. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1192. begin
  1193. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1194. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1195. begin
  1196. hp4:=hp3;
  1197. hp3:=tai(hp3.Previous);
  1198. asml.Remove(hp4);
  1199. list.Concat(hp4);
  1200. end
  1201. else
  1202. hp3:=tai(hp3.Previous);
  1203. end;
  1204. list.Concat(p);
  1205. SwapRegLive(taicpu(p),taicpu(hp1));
  1206. { after the instruction? }
  1207. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1208. begin
  1209. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1210. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1211. begin
  1212. hp4:=hp5;
  1213. hp5:=tai(hp5.next);
  1214. asml.Remove(hp4);
  1215. list.Concat(hp4);
  1216. end
  1217. else
  1218. hp5:=tai(hp5.Next);
  1219. end;
  1220. asml.Remove(hp1);
  1221. {$ifdef DEBUG_PREREGSCHEDULER}
  1222. asml.InsertBefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1223. {$endif DEBUG_PREREGSCHEDULER}
  1224. asml.InsertBefore(hp1,hp2);
  1225. asml.InsertListBefore(hp2,list);
  1226. end;
  1227. p := tai(p.next)
  1228. end;
  1229. list.Free;
  1230. end;
  1231. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1232. begin
  1233. { TODO: Add optimizer code }
  1234. end;
  1235. begin
  1236. casmoptimizer:=TCpuAsmOptimizer;
  1237. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1238. End.