aoptcpu.pas 59 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. {$define DEBUG_PREREGSCHEDULER}
  21. Interface
  22. uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
  23. Type
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. { uses the same constructor as TAopObj }
  26. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  27. procedure PeepHoleOptPass2;override;
  28. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  29. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  30. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  31. var AllUsedRegs: TAllUsedRegs): Boolean;
  32. End;
  33. TCpuPreRegallocScheduler = class(TAsmScheduler)
  34. function SchedulerPass1Cpu(var p: tai): boolean;override;
  35. procedure SwapRegLive(p, hp1: taicpu);
  36. end;
  37. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  38. { uses the same constructor as TAopObj }
  39. procedure PeepHoleOptPass2;override;
  40. End;
  41. Implementation
  42. uses
  43. cutils,verbose,globals,
  44. systems,
  45. cpuinfo,
  46. cgobj,cgutils,procinfo,
  47. aasmbase,aasmdata;
  48. function CanBeCond(p : tai) : boolean;
  49. begin
  50. result:=
  51. (p.typ=ait_instruction) and
  52. (taicpu(p).condition=C_None) and
  53. (taicpu(p).opcode<>A_PLD) and
  54. ((taicpu(p).opcode<>A_BLX) or
  55. (taicpu(p).oper[0]^.typ=top_reg));
  56. end;
  57. function RefsEqual(const r1, r2: treference): boolean;
  58. begin
  59. refsequal :=
  60. (r1.offset = r2.offset) and
  61. (r1.base = r2.base) and
  62. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  63. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  64. (r1.relsymbol = r2.relsymbol) and
  65. (r1.signindex = r2.signindex) and
  66. (r1.shiftimm = r2.shiftimm) and
  67. (r1.addressmode = r2.addressmode) and
  68. (r1.shiftmode = r2.shiftmode);
  69. end;
  70. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  71. begin
  72. result :=
  73. (instr.typ = ait_instruction) and
  74. (taicpu(instr).opcode = op) and
  75. ((cond = []) or (taicpu(instr).condition in cond)) and
  76. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  77. end;
  78. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  79. begin
  80. result := oper1.typ = oper2.typ;
  81. if result then
  82. case oper1.typ of
  83. top_const:
  84. Result:=oper1.val = oper2.val;
  85. top_reg:
  86. Result:=oper1.reg = oper2.reg;
  87. top_conditioncode:
  88. Result:=oper1.cc = oper2.cc;
  89. top_ref:
  90. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  91. else Result:=false;
  92. end
  93. end;
  94. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  95. begin
  96. result := (oper.typ = top_reg) and (oper.reg = reg);
  97. end;
  98. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  99. begin
  100. if (taicpu(movp).condition = C_EQ) and
  101. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  102. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  103. begin
  104. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  105. asml.remove(movp);
  106. movp.free;
  107. end;
  108. end;
  109. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  110. var
  111. p: taicpu;
  112. begin
  113. p := taicpu(hp);
  114. regLoadedWithNewValue := false;
  115. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  116. exit;
  117. case p.opcode of
  118. { These operands do not write into a register at all }
  119. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  120. exit;
  121. {Take care of post/preincremented store and loads, they will change their base register}
  122. A_STR, A_LDR:
  123. regLoadedWithNewValue :=
  124. (taicpu(p).oper[1]^.typ=top_ref) and
  125. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  126. (taicpu(p).oper[1]^.ref^.base = reg);
  127. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  128. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  129. regLoadedWithNewValue :=
  130. (p.oper[1]^.typ = top_reg) and
  131. (p.oper[1]^.reg = reg);
  132. {Loads to oper2 from coprocessor}
  133. {
  134. MCR/MRC is currently not supported in FPC
  135. A_MRC:
  136. regLoadedWithNewValue :=
  137. (p.oper[2]^.typ = top_reg) and
  138. (p.oper[2]^.reg = reg);
  139. }
  140. {Loads to all register in the registerset}
  141. A_LDM:
  142. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  143. end;
  144. if regLoadedWithNewValue then
  145. exit;
  146. case p.oper[0]^.typ of
  147. {This is the case}
  148. top_reg:
  149. regLoadedWithNewValue := (p.oper[0]^.reg = reg) or
  150. { LDRD }
  151. (p.opcode=A_LDR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg));
  152. {LDM/STM might write a new value to their index register}
  153. top_ref:
  154. regLoadedWithNewValue :=
  155. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  156. (taicpu(p).oper[0]^.ref^.base = reg);
  157. end;
  158. end;
  159. function AlignedToQWord(const ref : treference) : boolean;
  160. begin
  161. { (safe) heuristics to ensure alignment }
  162. result:=(target_info.abi in [abi_eabi,abi_armeb,abi_eabihf]) and
  163. (((ref.offset>=0) and
  164. ((ref.offset mod 8)=0) and
  165. ((ref.base=NR_R13) or
  166. (ref.index=NR_R13))
  167. ) or
  168. ((ref.offset<=0) and
  169. { when using NR_R11, it has always a value of <qword align>+4 }
  170. ((abs(ref.offset+4) mod 8)=0) and
  171. (current_procinfo.framepointer=NR_R11) and
  172. ((ref.base=NR_R11) or
  173. (ref.index=NR_R11))
  174. )
  175. );
  176. end;
  177. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  178. var
  179. p: taicpu;
  180. i: longint;
  181. begin
  182. instructionLoadsFromReg := false;
  183. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  184. exit;
  185. p:=taicpu(hp);
  186. i:=1;
  187. {For these instructions we have to start on oper[0]}
  188. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  189. A_CMP, A_CMN, A_TST, A_TEQ,
  190. A_B, A_BL, A_BX, A_BLX,
  191. A_SMLAL, A_UMLAL]) then i:=0;
  192. while(i<p.ops) do
  193. begin
  194. case p.oper[I]^.typ of
  195. top_reg:
  196. instructionLoadsFromReg := (p.oper[I]^.reg = reg) or
  197. { STRD }
  198. ((i=0) and (p.opcode=A_STR) and (p.oppostfix=PF_D) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
  199. top_regset:
  200. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  201. top_shifterop:
  202. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  203. top_ref:
  204. instructionLoadsFromReg :=
  205. (p.oper[I]^.ref^.base = reg) or
  206. (p.oper[I]^.ref^.index = reg);
  207. end;
  208. if instructionLoadsFromReg then exit; {Bailout if we found something}
  209. Inc(I);
  210. end;
  211. end;
  212. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  213. var AllUsedRegs: TAllUsedRegs): Boolean;
  214. begin
  215. AllUsedRegs[getregtype(reg)].Update(tai(p.Next));
  216. RegUsedAfterInstruction :=
  217. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  218. not(regLoadedWithNewValue(reg,p)) and
  219. (
  220. not(GetNextInstruction(p,p)) or
  221. instructionLoadsFromReg(reg,p) or
  222. not(regLoadedWithNewValue(reg,p))
  223. );
  224. end;
  225. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  226. var
  227. TmpUsedRegs: TAllUsedRegs;
  228. begin
  229. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  230. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  231. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  232. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  233. not (
  234. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  235. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  236. ) then
  237. begin
  238. CopyUsedRegs(TmpUsedRegs);
  239. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  240. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,movp,TmpUsedRegs)) then
  241. begin
  242. asml.insertbefore(tai_comment.Create(strpnew('Peephole '+optimizer+' removed superfluous mov')), movp);
  243. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  244. asml.remove(movp);
  245. movp.free;
  246. end;
  247. ReleaseUsedRegs(TmpUsedRegs);
  248. end;
  249. end;
  250. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  251. var
  252. hp1,hp2: tai;
  253. i, i2: longint;
  254. TmpUsedRegs: TAllUsedRegs;
  255. tempop: tasmop;
  256. function IsPowerOf2(const value: DWord): boolean; inline;
  257. begin
  258. Result:=(value and (value - 1)) = 0;
  259. end;
  260. begin
  261. result := false;
  262. case p.typ of
  263. ait_instruction:
  264. begin
  265. (* optimization proved not to be safe, see tw4768.pp
  266. {
  267. change
  268. <op> reg,x,y
  269. cmp reg,#0
  270. into
  271. <op>s reg,x,y
  272. }
  273. { this optimization can applied only to the currently enabled operations because
  274. the other operations do not update all flags and FPC does not track flag usage }
  275. if (taicpu(p).opcode in [A_ADC,A_ADD,A_SUB {A_UDIV,A_SDIV,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND}]) and
  276. (taicpu(p).oppostfix = PF_None) and
  277. (taicpu(p).condition = C_None) and
  278. GetNextInstruction(p, hp1) and
  279. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  280. (taicpu(hp1).oper[1]^.typ = top_const) and
  281. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  282. (taicpu(hp1).oper[1]^.val = 0) { and
  283. GetNextInstruction(hp1, hp2) and
  284. (tai(hp2).typ = ait_instruction) and
  285. // be careful here, following instructions could use other flags
  286. // however after a jump fpc never depends on the value of flags
  287. (taicpu(hp2).opcode = A_B) and
  288. (taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL])} then
  289. begin
  290. taicpu(p).oppostfix:=PF_S;
  291. asml.remove(hp1);
  292. hp1.free;
  293. end
  294. else
  295. *)
  296. case taicpu(p).opcode of
  297. A_STR:
  298. begin
  299. { change
  300. str reg1,ref
  301. ldr reg2,ref
  302. into
  303. str reg1,ref
  304. mov reg2,reg1
  305. }
  306. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  307. (taicpu(p).oppostfix=PF_None) and
  308. GetNextInstruction(p,hp1) and
  309. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  310. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  311. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  312. begin
  313. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  314. begin
  315. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 1 done')), hp1);
  316. asml.remove(hp1);
  317. hp1.free;
  318. end
  319. else
  320. begin
  321. taicpu(hp1).opcode:=A_MOV;
  322. taicpu(hp1).oppostfix:=PF_None;
  323. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  324. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 2 done')), hp1);
  325. end;
  326. result := true;
  327. end
  328. { change
  329. str reg1,ref
  330. str reg2,ref
  331. into
  332. strd reg1,ref
  333. }
  334. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  335. (taicpu(p).oppostfix=PF_None) and
  336. (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  337. GetNextInstruction(p,hp1) and
  338. MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
  339. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  340. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  341. { str ensures that either base or index contain no register, else ldr wouldn't
  342. use an offset either
  343. }
  344. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  345. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  346. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  347. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  348. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  349. begin
  350. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrStr2Strd done')), p);
  351. taicpu(p).oppostfix:=PF_D;
  352. asml.remove(hp1);
  353. hp1.free;
  354. end;
  355. end;
  356. A_LDR:
  357. begin
  358. { change
  359. ldr reg1,ref
  360. ldr reg2,ref
  361. into ...
  362. }
  363. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  364. GetNextInstruction(p,hp1) and
  365. { ldrd is not allowed here }
  366. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
  367. begin
  368. {
  369. ...
  370. ldr reg1,ref
  371. mov reg2,reg1
  372. }
  373. if RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  374. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  375. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  376. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  377. begin
  378. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  379. begin
  380. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldr done')), hp1);
  381. asml.remove(hp1);
  382. hp1.free;
  383. end
  384. else
  385. begin
  386. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2LdrMov done')), hp1);
  387. taicpu(hp1).opcode:=A_MOV;
  388. taicpu(hp1).oppostfix:=PF_None;
  389. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  390. end;
  391. result := true;
  392. end
  393. {
  394. ...
  395. ldrd reg1,ref
  396. }
  397. else if (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
  398. { ldrd does not allow any postfixes ... }
  399. (taicpu(p).oppostfix=PF_None) and
  400. not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
  401. (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
  402. { ldr ensures that either base or index contain no register, else ldr wouldn't
  403. use an offset either
  404. }
  405. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  406. (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
  407. (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
  408. (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
  409. AlignedToQWord(taicpu(p).oper[1]^.ref^) then
  410. begin
  411. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldrd done')), p);
  412. taicpu(p).oppostfix:=PF_D;
  413. asml.remove(hp1);
  414. hp1.free;
  415. end;
  416. end;
  417. { Remove superfluous mov after ldr
  418. changes
  419. ldr reg1, ref
  420. mov reg2, reg1
  421. to
  422. ldr reg2, ref
  423. conditions are:
  424. * no ldrd usage
  425. * reg1 must be released after mov
  426. * mov can not contain shifterops
  427. * ldr+mov have the same conditions
  428. * mov does not set flags
  429. }
  430. if (taicpu(p).oppostfix<>PF_D) and GetNextInstruction(p, hp1) then
  431. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  432. end;
  433. A_MOV:
  434. begin
  435. { fold
  436. mov reg1,reg0, shift imm1
  437. mov reg1,reg1, shift imm2
  438. }
  439. if (taicpu(p).ops=3) and
  440. (taicpu(p).oper[2]^.typ = top_shifterop) and
  441. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  442. getnextinstruction(p,hp1) and
  443. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  444. (taicpu(hp1).ops=3) and
  445. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  446. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  447. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  448. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  449. begin
  450. { fold
  451. mov reg1,reg0, lsl 16
  452. mov reg1,reg1, lsr 16
  453. strh reg1, ...
  454. dealloc reg1
  455. to
  456. strh reg1, ...
  457. dealloc reg1
  458. }
  459. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  460. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  461. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  462. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  463. getnextinstruction(hp1,hp2) and
  464. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  465. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  466. begin
  467. CopyUsedRegs(TmpUsedRegs);
  468. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  469. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  470. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  471. begin
  472. asml.insertbefore(tai_comment.Create(strpnew('Peephole optimizer removed superfluous 16 Bit zero extension')), hp1);
  473. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  474. asml.remove(p);
  475. asml.remove(hp1);
  476. p.free;
  477. hp1.free;
  478. p:=hp2;
  479. end;
  480. ReleaseUsedRegs(TmpUsedRegs);
  481. end
  482. { fold
  483. mov reg1,reg0, shift imm1
  484. mov reg1,reg1, shift imm2
  485. to
  486. mov reg1,reg0, shift imm1+imm2
  487. }
  488. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  489. { asr makes no use after a lsr, the asr can be foled into the lsr }
  490. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  491. begin
  492. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  493. { avoid overflows }
  494. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  495. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  496. SM_ROR:
  497. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  498. SM_ASR:
  499. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  500. SM_LSR,
  501. SM_LSL:
  502. begin
  503. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  504. InsertLLItem(p.previous, p.next, hp1);
  505. p.free;
  506. p:=hp1;
  507. end;
  508. else
  509. internalerror(2008072803);
  510. end;
  511. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShift2Shift 1 done')), p);
  512. asml.remove(hp1);
  513. hp1.free;
  514. result := true;
  515. end
  516. { fold
  517. mov reg1,reg0, shift imm1
  518. mov reg1,reg1, shift imm2
  519. mov reg1,reg1, shift imm3 ...
  520. }
  521. else if getnextinstruction(hp1,hp2) and
  522. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  523. (taicpu(hp2).ops=3) and
  524. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  525. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  526. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  527. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  528. begin
  529. { mov reg1,reg0, lsl imm1
  530. mov reg1,reg1, lsr/asr imm2
  531. mov reg1,reg1, lsl imm3 ...
  532. if imm3<=imm1 and imm2>=imm3
  533. to
  534. mov reg1,reg0, lsl imm1
  535. mov reg1,reg1, lsr/asr imm2-imm3
  536. }
  537. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  538. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  539. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  540. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  541. begin
  542. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  543. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 1 done')), p);
  544. asml.remove(hp2);
  545. hp2.free;
  546. result := true;
  547. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  548. begin
  549. asml.remove(hp1);
  550. hp1.free;
  551. end;
  552. end
  553. { mov reg1,reg0, lsr/asr imm1
  554. mov reg1,reg1, lsl imm2
  555. mov reg1,reg1, lsr/asr imm3 ...
  556. if imm3>=imm1 and imm2>=imm1
  557. to
  558. mov reg1,reg0, lsl imm2-imm1
  559. mov reg1,reg1, lsr/asr imm3 ...
  560. }
  561. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  562. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  563. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  564. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  565. begin
  566. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  567. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  568. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 2 done')), p);
  569. asml.remove(p);
  570. p.free;
  571. p:=hp2;
  572. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  573. begin
  574. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  575. asml.remove(hp1);
  576. hp1.free;
  577. p:=hp2;
  578. end;
  579. result := true;
  580. end;
  581. end;
  582. end;
  583. { Change the common
  584. mov r0, r0, lsr #24
  585. and r0, r0, #255
  586. and remove the superfluous and
  587. This could be extended to handle more cases.
  588. }
  589. if (taicpu(p).ops=3) and
  590. (taicpu(p).oper[2]^.typ = top_shifterop) and
  591. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  592. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  593. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  594. getnextinstruction(p,hp1) and
  595. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  596. (taicpu(hp1).ops=3) and
  597. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  598. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  599. (taicpu(hp1).oper[2]^.typ = top_const) and
  600. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  601. For LSR #25 and an AndConst of 255 that whould go like this:
  602. 255 and ((2 shl (32-25))-1)
  603. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  604. LSR #25 and AndConst of 254:
  605. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  606. }
  607. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  608. begin
  609. asml.insertbefore(tai_comment.Create(strpnew('Peephole LsrAnd2Lsr done')), hp1);
  610. asml.remove(hp1);
  611. hp1.free;
  612. end;
  613. {
  614. optimize
  615. mov rX, yyyy
  616. ....
  617. }
  618. if (taicpu(p).ops = 2) and
  619. GetNextInstruction(p,hp1) and
  620. (tai(hp1).typ = ait_instruction) then
  621. begin
  622. {
  623. This changes the very common
  624. mov r0, #0
  625. str r0, [...]
  626. mov r0, #0
  627. str r0, [...]
  628. and removes all superfluous mov instructions
  629. }
  630. if (taicpu(p).oper[1]^.typ = top_const) and
  631. (taicpu(hp1).opcode=A_STR) then
  632. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], [PF_None]) and
  633. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  634. GetNextInstruction(hp1, hp2) and
  635. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  636. (taicpu(hp2).ops = 2) and
  637. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  638. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  639. begin
  640. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovStrMov done')), hp2);
  641. GetNextInstruction(hp2,hp1);
  642. asml.remove(hp2);
  643. hp2.free;
  644. if not assigned(hp1) then break;
  645. end
  646. {
  647. This removes the first mov from
  648. mov rX,...
  649. mov rX,...
  650. }
  651. else if taicpu(hp1).opcode=A_MOV then
  652. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  653. (taicpu(hp1).ops = 2) and
  654. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  655. { don't remove the first mov if the second is a mov rX,rX }
  656. not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
  657. begin
  658. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovMov done')), p);
  659. asml.remove(p);
  660. p.free;
  661. p:=hp1;
  662. GetNextInstruction(hp1,hp1);
  663. if not assigned(hp1) then
  664. break;
  665. end;
  666. end;
  667. {
  668. change
  669. mov r1, r0
  670. add r1, r1, #1
  671. to
  672. add r1, r0, #1
  673. Todo: Make it work for mov+cmp too
  674. CAUTION! If this one is successful p might not be a mov instruction anymore!
  675. }
  676. if (taicpu(p).ops = 2) and
  677. (taicpu(p).oper[1]^.typ = top_reg) and
  678. (taicpu(p).oppostfix = PF_NONE) and
  679. GetNextInstruction(p, hp1) and
  680. (tai(hp1).typ = ait_instruction) and
  681. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  682. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN]) and
  683. {MOV and MVN might only have 2 ops}
  684. (taicpu(hp1).ops = 3) and
  685. (taicpu(hp1).condition in [C_NONE, taicpu(hp1).condition]) and
  686. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  687. (taicpu(hp1).oper[1]^.typ = top_reg) and
  688. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  689. begin
  690. { When we get here we still don't know if the registers match}
  691. for I:=1 to 2 do
  692. {
  693. If the first loop was successful p will be replaced with hp1.
  694. The checks will still be ok, because all required information
  695. will also be in hp1 then.
  696. }
  697. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  698. begin
  699. asml.insertbefore(tai_comment.Create(strpnew('Peephole RedundantMovProcess done')), hp1);
  700. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  701. if p<>hp1 then
  702. begin
  703. asml.remove(p);
  704. p.free;
  705. p:=hp1;
  706. end;
  707. end;
  708. end;
  709. { This folds shifterops into following instructions
  710. mov r0, r1, lsl #8
  711. add r2, r3, r0
  712. to
  713. add r2, r3, r1, lsl #8
  714. CAUTION! If this one is successful p might not be a mov instruction anymore!
  715. }
  716. if (taicpu(p).opcode = A_MOV) and
  717. (taicpu(p).ops = 3) and
  718. (taicpu(p).oper[1]^.typ = top_reg) and
  719. (taicpu(p).oper[2]^.typ = top_shifterop) and
  720. (taicpu(p).oppostfix = PF_NONE) and
  721. GetNextInstruction(p, hp1) and
  722. (tai(hp1).typ = ait_instruction) and
  723. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  724. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  725. (taicpu(hp1).oppostfix = PF_NONE) and
  726. (taicpu(hp1).condition = taicpu(p).condition) and
  727. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  728. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  729. A_CMP, A_CMN]) and
  730. (
  731. {Only ONE of the two src operands is allowed to match}
  732. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  733. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  734. ) then
  735. begin
  736. CopyUsedRegs(TmpUsedRegs);
  737. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  738. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  739. I2:=0
  740. else
  741. I2:=1;
  742. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  743. for I:=I2 to taicpu(hp1).ops-1 do
  744. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  745. begin
  746. { If the parameter matched on the second op from the RIGHT
  747. we have to switch the parameters, this will not happen for CMP
  748. were we're only evaluating the most right parameter
  749. }
  750. if I <> taicpu(hp1).ops-1 then
  751. begin
  752. {The SUB operators need to be changed when we swap parameters}
  753. case taicpu(hp1).opcode of
  754. A_SUB: tempop:=A_RSB;
  755. A_SBC: tempop:=A_RSC;
  756. A_RSB: tempop:=A_SUB;
  757. A_RSC: tempop:=A_SBC;
  758. else tempop:=taicpu(hp1).opcode;
  759. end;
  760. if taicpu(hp1).ops = 3 then
  761. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  762. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  763. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  764. else
  765. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  766. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  767. taicpu(p).oper[2]^.shifterop^);
  768. end
  769. else
  770. if taicpu(hp1).ops = 3 then
  771. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  772. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  773. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  774. else
  775. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  776. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  777. taicpu(p).oper[2]^.shifterop^);
  778. asml.insertbefore(hp2, p);
  779. asml.remove(p);
  780. asml.remove(hp1);
  781. p.free;
  782. hp1.free;
  783. p:=hp2;
  784. GetNextInstruction(p,hp1);
  785. asml.insertbefore(tai_comment.Create(strpnew('Peephole FoldShiftProcess done')), p);
  786. break;
  787. end;
  788. ReleaseUsedRegs(TmpUsedRegs);
  789. end;
  790. {
  791. Often we see shifts and then a superfluous mov to another register
  792. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  793. }
  794. if (taicpu(p).opcode = A_MOV) and
  795. GetNextInstruction(p, hp1) then
  796. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  797. end;
  798. A_ADD,
  799. A_ADC,
  800. A_RSB,
  801. A_RSC,
  802. A_SUB,
  803. A_SBC,
  804. A_AND,
  805. A_BIC,
  806. A_EOR,
  807. A_ORR,
  808. A_MLA,
  809. A_MUL:
  810. begin
  811. {
  812. change
  813. and reg2,reg1,const1
  814. and reg2,reg2,const2
  815. to
  816. and reg2,reg1,(const1 and const2)
  817. }
  818. if (taicpu(p).opcode = A_AND) and
  819. (taicpu(p).oper[1]^.typ = top_reg) and
  820. (taicpu(p).oper[2]^.typ = top_const) and
  821. GetNextInstruction(p, hp1) and
  822. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  823. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  824. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  825. (taicpu(hp1).oper[2]^.typ = top_const) then
  826. begin
  827. asml.insertbefore(tai_comment.Create(strpnew('Peephole AndAnd2And done')), p);
  828. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  829. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  830. asml.remove(hp1);
  831. hp1.free;
  832. end;
  833. {
  834. change
  835. add reg1, ...
  836. mov reg2, reg1
  837. to
  838. add reg2, ...
  839. }
  840. if GetNextInstruction(p, hp1) then
  841. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  842. end;
  843. A_CMP:
  844. begin
  845. {
  846. change
  847. cmp reg,const1
  848. moveq reg,const1
  849. movne reg,const2
  850. to
  851. cmp reg,const1
  852. movne reg,const2
  853. }
  854. if (taicpu(p).oper[1]^.typ = top_const) and
  855. GetNextInstruction(p, hp1) and
  856. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  857. (taicpu(hp1).oper[1]^.typ = top_const) and
  858. GetNextInstruction(hp1, hp2) and
  859. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  860. (taicpu(hp1).oper[1]^.typ = top_const) then
  861. begin
  862. RemoveRedundantMove(p, hp1, asml);
  863. RemoveRedundantMove(p, hp2, asml);
  864. end;
  865. end;
  866. end;
  867. end;
  868. end;
  869. end;
  870. { instructions modifying the CPSR can be only the last instruction }
  871. function MustBeLast(p : tai) : boolean;
  872. begin
  873. Result:=(p.typ=ait_instruction) and
  874. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  875. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  876. (taicpu(p).oppostfix=PF_S));
  877. end;
  878. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  879. var
  880. p,hp1,hp2: tai;
  881. l : longint;
  882. condition : tasmcond;
  883. hp3: tai;
  884. WasLast: boolean;
  885. { UsedRegs, TmpUsedRegs: TRegSet; }
  886. begin
  887. p := BlockStart;
  888. { UsedRegs := []; }
  889. while (p <> BlockEnd) Do
  890. begin
  891. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  892. case p.Typ Of
  893. Ait_Instruction:
  894. begin
  895. case taicpu(p).opcode Of
  896. A_B:
  897. if taicpu(p).condition<>C_None then
  898. begin
  899. { check for
  900. Bxx xxx
  901. <several instructions>
  902. xxx:
  903. }
  904. l:=0;
  905. WasLast:=False;
  906. GetNextInstruction(p, hp1);
  907. while assigned(hp1) and
  908. (l<=4) and
  909. CanBeCond(hp1) and
  910. { stop on labels }
  911. not(hp1.typ=ait_label) do
  912. begin
  913. inc(l);
  914. if MustBeLast(hp1) then
  915. begin
  916. WasLast:=True;
  917. GetNextInstruction(hp1,hp1);
  918. break;
  919. end
  920. else
  921. GetNextInstruction(hp1,hp1);
  922. end;
  923. if assigned(hp1) then
  924. begin
  925. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  926. begin
  927. if (l<=4) and (l>0) then
  928. begin
  929. condition:=inverse_cond(taicpu(p).condition);
  930. hp2:=p;
  931. GetNextInstruction(p,hp1);
  932. p:=hp1;
  933. repeat
  934. if hp1.typ=ait_instruction then
  935. taicpu(hp1).condition:=condition;
  936. if MustBeLast(hp1) then
  937. begin
  938. GetNextInstruction(hp1,hp1);
  939. break;
  940. end
  941. else
  942. GetNextInstruction(hp1,hp1);
  943. until not(assigned(hp1)) or
  944. not(CanBeCond(hp1)) or
  945. (hp1.typ=ait_label);
  946. { wait with removing else GetNextInstruction could
  947. ignore the label if it was the only usage in the
  948. jump moved away }
  949. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  950. asml.remove(hp2);
  951. hp2.free;
  952. continue;
  953. end;
  954. end
  955. else
  956. { do not perform further optimizations if there is inctructon
  957. in block #1 which can not be optimized.
  958. }
  959. if not WasLast then
  960. begin
  961. { check further for
  962. Bcc xxx
  963. <several instructions 1>
  964. B yyy
  965. xxx:
  966. <several instructions 2>
  967. yyy:
  968. }
  969. { hp2 points to jmp yyy }
  970. hp2:=hp1;
  971. { skip hp1 to xxx }
  972. GetNextInstruction(hp1, hp1);
  973. if assigned(hp2) and
  974. assigned(hp1) and
  975. (l<=3) and
  976. (hp2.typ=ait_instruction) and
  977. (taicpu(hp2).is_jmp) and
  978. (taicpu(hp2).condition=C_None) and
  979. { real label and jump, no further references to the
  980. label are allowed }
  981. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  982. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  983. begin
  984. l:=0;
  985. { skip hp1 to <several moves 2> }
  986. GetNextInstruction(hp1, hp1);
  987. while assigned(hp1) and
  988. CanBeCond(hp1) do
  989. begin
  990. inc(l);
  991. GetNextInstruction(hp1, hp1);
  992. end;
  993. { hp1 points to yyy: }
  994. if assigned(hp1) and
  995. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  996. begin
  997. condition:=inverse_cond(taicpu(p).condition);
  998. GetNextInstruction(p,hp1);
  999. hp3:=p;
  1000. p:=hp1;
  1001. repeat
  1002. if hp1.typ=ait_instruction then
  1003. taicpu(hp1).condition:=condition;
  1004. GetNextInstruction(hp1,hp1);
  1005. until not(assigned(hp1)) or
  1006. not(CanBeCond(hp1));
  1007. { hp2 is still at jmp yyy }
  1008. GetNextInstruction(hp2,hp1);
  1009. { hp2 is now at xxx: }
  1010. condition:=inverse_cond(condition);
  1011. GetNextInstruction(hp1,hp1);
  1012. { hp1 is now at <several movs 2> }
  1013. repeat
  1014. taicpu(hp1).condition:=condition;
  1015. GetNextInstruction(hp1,hp1);
  1016. until not(assigned(hp1)) or
  1017. not(CanBeCond(hp1)) or
  1018. (hp1.typ=ait_label);
  1019. {
  1020. asml.remove(hp1.next)
  1021. hp1.next.free;
  1022. asml.remove(hp1);
  1023. hp1.free;
  1024. }
  1025. { remove Bcc }
  1026. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1027. asml.remove(hp3);
  1028. hp3.free;
  1029. { remove jmp }
  1030. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1031. asml.remove(hp2);
  1032. hp2.free;
  1033. continue;
  1034. end;
  1035. end;
  1036. end;
  1037. end;
  1038. end;
  1039. end;
  1040. end;
  1041. end;
  1042. p := tai(p.next)
  1043. end;
  1044. end;
  1045. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  1046. begin
  1047. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  1048. Result:=true
  1049. else
  1050. Result:=inherited RegInInstruction(Reg, p1);
  1051. end;
  1052. const
  1053. { set of opcode which might or do write to memory }
  1054. { TODO : extend armins.dat to contain r/w info }
  1055. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  1056. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  1057. { adjust the register live information when swapping the two instructions p and hp1,
  1058. they must follow one after the other }
  1059. procedure TCpuPreRegallocScheduler.SwapRegLive(p,hp1 : taicpu);
  1060. procedure CheckLiveEnd(reg : tregister);
  1061. var
  1062. supreg : TSuperRegister;
  1063. regtype : TRegisterType;
  1064. begin
  1065. if reg=NR_NO then
  1066. exit;
  1067. regtype:=getregtype(reg);
  1068. supreg:=getsupreg(reg);
  1069. if (cg.rg[regtype].live_end[supreg]=hp1) and
  1070. RegInInstruction(reg,p) then
  1071. cg.rg[regtype].live_end[supreg]:=p;
  1072. end;
  1073. procedure CheckLiveStart(reg : TRegister);
  1074. var
  1075. supreg : TSuperRegister;
  1076. regtype : TRegisterType;
  1077. begin
  1078. if reg=NR_NO then
  1079. exit;
  1080. regtype:=getregtype(reg);
  1081. supreg:=getsupreg(reg);
  1082. if (cg.rg[regtype].live_start[supreg]=p) and
  1083. RegInInstruction(reg,hp1) then
  1084. cg.rg[regtype].live_start[supreg]:=hp1;
  1085. end;
  1086. var
  1087. i : longint;
  1088. r : TSuperRegister;
  1089. begin
  1090. { assumption: p is directly followed by hp1 }
  1091. { if live of any reg used by p starts at p and hp1 uses this register then
  1092. set live start to hp1 }
  1093. for i:=0 to p.ops-1 do
  1094. case p.oper[i]^.typ of
  1095. Top_Reg:
  1096. CheckLiveStart(p.oper[i]^.reg);
  1097. Top_Ref:
  1098. begin
  1099. CheckLiveStart(p.oper[i]^.ref^.base);
  1100. CheckLiveStart(p.oper[i]^.ref^.index);
  1101. end;
  1102. Top_Shifterop:
  1103. CheckLiveStart(p.oper[i]^.shifterop^.rs);
  1104. Top_RegSet:
  1105. for r:=RS_R0 to RS_R15 do
  1106. if r in p.oper[i]^.regset^ then
  1107. CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1108. end;
  1109. { if live of any reg used by hp1 ends at hp1 and p uses this register then
  1110. set live end to p }
  1111. for i:=0 to hp1.ops-1 do
  1112. case hp1.oper[i]^.typ of
  1113. Top_Reg:
  1114. CheckLiveEnd(hp1.oper[i]^.reg);
  1115. Top_Ref:
  1116. begin
  1117. CheckLiveEnd(hp1.oper[i]^.ref^.base);
  1118. CheckLiveEnd(hp1.oper[i]^.ref^.index);
  1119. end;
  1120. Top_Shifterop:
  1121. CheckLiveStart(hp1.oper[i]^.shifterop^.rs);
  1122. Top_RegSet:
  1123. for r:=RS_R0 to RS_R15 do
  1124. if r in hp1.oper[i]^.regset^ then
  1125. CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
  1126. end;
  1127. end;
  1128. function TCpuPreRegallocScheduler.SchedulerPass1Cpu(var p: tai): boolean;
  1129. { TODO : schedule also forward }
  1130. { TODO : schedule distance > 1 }
  1131. var
  1132. hp1,hp2,hp3,hp4,hp5 : tai;
  1133. list : TAsmList;
  1134. begin
  1135. result:=true;
  1136. list:=TAsmList.Create;
  1137. p:=BlockStart;
  1138. while p<>BlockEnd Do
  1139. begin
  1140. if (p.typ=ait_instruction) and
  1141. GetNextInstruction(p,hp1) and
  1142. (hp1.typ=ait_instruction) and
  1143. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1144. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  1145. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  1146. not(RegModifiedByInstruction(NR_PC,p))
  1147. ) or
  1148. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  1149. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  1150. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  1151. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1152. )
  1153. ) or
  1154. { try to prove that the memory accesses don't overlapp }
  1155. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1156. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1157. (taicpu(p).oppostfix=PF_None) and
  1158. (taicpu(hp1).oppostfix=PF_None) and
  1159. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1160. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1161. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1162. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1163. )
  1164. )
  1165. ) and
  1166. GetNextInstruction(hp1,hp2) and
  1167. (hp2.typ=ait_instruction) and
  1168. { loaded register used by next instruction? }
  1169. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1170. { loaded register not used by previous instruction? }
  1171. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1172. { same condition? }
  1173. (taicpu(p).condition=taicpu(hp1).condition) and
  1174. { first instruction might not change the register used as base }
  1175. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1176. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1177. ) and
  1178. { first instruction might not change the register used as index }
  1179. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1180. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1181. ) then
  1182. begin
  1183. hp3:=tai(p.Previous);
  1184. hp5:=tai(p.next);
  1185. asml.Remove(p);
  1186. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1187. { before the instruction? }
  1188. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1189. begin
  1190. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1191. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1192. begin
  1193. hp4:=hp3;
  1194. hp3:=tai(hp3.Previous);
  1195. asml.Remove(hp4);
  1196. list.Concat(hp4);
  1197. end
  1198. else
  1199. hp3:=tai(hp3.Previous);
  1200. end;
  1201. list.Concat(p);
  1202. SwapRegLive(taicpu(p),taicpu(hp1));
  1203. { after the instruction? }
  1204. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1205. begin
  1206. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1207. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1208. begin
  1209. hp4:=hp5;
  1210. hp5:=tai(hp5.next);
  1211. asml.Remove(hp4);
  1212. list.Concat(hp4);
  1213. end
  1214. else
  1215. hp5:=tai(hp5.Next);
  1216. end;
  1217. asml.Remove(hp1);
  1218. {$ifdef DEBUG_PREREGSCHEDULER}
  1219. asml.InsertBefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1220. {$endif DEBUG_PREREGSCHEDULER}
  1221. asml.InsertBefore(hp1,hp2);
  1222. asml.InsertListBefore(hp2,list);
  1223. p:=tai(p.next)
  1224. end
  1225. else if p.typ=ait_instruction then
  1226. p:=hp1
  1227. else
  1228. p:=tai(p.next);
  1229. end;
  1230. list.Free;
  1231. end;
  1232. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1233. begin
  1234. { TODO: Add optimizer code }
  1235. end;
  1236. begin
  1237. casmoptimizer:=TCpuAsmOptimizer;
  1238. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1239. End.