aoptcpu.pas 53 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the ARM optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. Interface
  21. uses cgbase, cpubase, aasmtai, aopt, aoptcpub, aoptobj;
  22. Type
  23. { TCpuAsmOptimizer }
  24. TCpuAsmOptimizer = class(TAsmOptimizer)
  25. { uses the same constructor as TAopObj }
  26. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  27. procedure PeepHoleOptPass2;override;
  28. Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
  29. procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  30. function RegUsedAfterInstruction(reg: Tregister; p: tai;
  31. var AllUsedRegs: TAllUsedRegs): Boolean;
  32. End;
  33. TCpuPreRegallocScheduler = class(TAsmOptimizer)
  34. function PeepHoleOptPass1Cpu(var p: tai): boolean;override;
  35. end;
  36. TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
  37. { uses the same constructor as TAopObj }
  38. procedure PeepHoleOptPass2;override;
  39. End;
  40. Implementation
  41. uses
  42. cutils,
  43. verbose,
  44. cgutils,
  45. aasmbase,aasmdata,aasmcpu;
  46. function CanBeCond(p : tai) : boolean;
  47. begin
  48. result:=
  49. (p.typ=ait_instruction) and
  50. (taicpu(p).condition=C_None) and
  51. (taicpu(p).opcode<>A_PLD) and
  52. ((taicpu(p).opcode<>A_BLX) or
  53. (taicpu(p).oper[0]^.typ=top_reg));
  54. end;
  55. function RefsEqual(const r1, r2: treference): boolean;
  56. begin
  57. refsequal :=
  58. (r1.offset = r2.offset) and
  59. (r1.base = r2.base) and
  60. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  61. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  62. (r1.relsymbol = r2.relsymbol) and
  63. (r1.signindex = r2.signindex) and
  64. (r1.shiftimm = r2.shiftimm) and
  65. (r1.addressmode = r2.addressmode) and
  66. (r1.shiftmode = r2.shiftmode);
  67. end;
  68. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  69. begin
  70. result :=
  71. (instr.typ = ait_instruction) and
  72. (taicpu(instr).opcode = op) and
  73. ((cond = []) or (taicpu(instr).condition in cond)) and
  74. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  75. end;
  76. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  77. begin
  78. result := oper1.typ = oper2.typ;
  79. if result then
  80. case oper1.typ of
  81. top_const:
  82. Result:=oper1.val = oper2.val;
  83. top_reg:
  84. Result:=oper1.reg = oper2.reg;
  85. top_conditioncode:
  86. Result:=oper1.cc = oper2.cc;
  87. top_ref:
  88. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  89. else Result:=false;
  90. end
  91. end;
  92. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  93. begin
  94. result := (oper.typ = top_reg) and (oper.reg = reg);
  95. end;
  96. procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
  97. begin
  98. if (taicpu(movp).condition = C_EQ) and
  99. (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
  100. (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
  101. begin
  102. asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
  103. asml.remove(movp);
  104. movp.free;
  105. end;
  106. end;
  107. function regLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  108. var
  109. p: taicpu;
  110. begin
  111. p := taicpu(hp);
  112. regLoadedWithNewValue := false;
  113. if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
  114. exit;
  115. case p.opcode of
  116. { These operands do not write into a register at all }
  117. A_CMP, A_CMN, A_TST, A_TEQ, A_B, A_BL, A_BX, A_BLX, A_SWI, A_MSR, A_PLD:
  118. exit;
  119. {Take care of post/preincremented store and loads, they will change their base register}
  120. A_STR, A_LDR:
  121. regLoadedWithNewValue :=
  122. (taicpu(p).oper[1]^.typ=top_ref) and
  123. (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  124. (taicpu(p).oper[1]^.ref^.base = reg);
  125. { These four are writing into the first 2 register, UMLAL and SMLAL will also read from them }
  126. A_UMLAL, A_UMULL, A_SMLAL, A_SMULL:
  127. regLoadedWithNewValue :=
  128. (p.oper[1]^.typ = top_reg) and
  129. (p.oper[1]^.reg = reg);
  130. {Loads to oper2 from coprocessor}
  131. {
  132. MCR/MRC is currently not supported in FPC
  133. A_MRC:
  134. regLoadedWithNewValue :=
  135. (p.oper[2]^.typ = top_reg) and
  136. (p.oper[2]^.reg = reg);
  137. }
  138. {Loads to all register in the registerset}
  139. A_LDM:
  140. regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
  141. end;
  142. if regLoadedWithNewValue then
  143. exit;
  144. case p.oper[0]^.typ of
  145. {This is the case}
  146. top_reg:
  147. regLoadedWithNewValue := (p.oper[0]^.reg = reg);
  148. {LDM/STM might write a new value to their index register}
  149. top_ref:
  150. regLoadedWithNewValue :=
  151. (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
  152. (taicpu(p).oper[0]^.ref^.base = reg);
  153. end;
  154. end;
  155. function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
  156. var
  157. p: taicpu;
  158. i: longint;
  159. begin
  160. instructionLoadsFromReg := false;
  161. if not (assigned(hp) and (hp.typ = ait_instruction)) then
  162. exit;
  163. p:=taicpu(hp);
  164. i:=1;
  165. {For these instructions we have to start on oper[0]}
  166. if (p.opcode in [A_STR, A_LDM, A_STM, A_PLD,
  167. A_CMP, A_CMN, A_TST, A_TEQ,
  168. A_B, A_BL, A_BX, A_BLX,
  169. A_SMLAL, A_UMLAL]) then i:=0;
  170. while(i<p.ops) do
  171. begin
  172. case p.oper[I]^.typ of
  173. top_reg:
  174. instructionLoadsFromReg := p.oper[I]^.reg = reg;
  175. top_regset:
  176. instructionLoadsFromReg := (getsupreg(reg) in p.oper[I]^.regset^);
  177. top_shifterop:
  178. instructionLoadsFromReg := p.oper[I]^.shifterop^.rs = reg;
  179. top_ref:
  180. instructionLoadsFromReg :=
  181. (p.oper[I]^.ref^.base = reg) or
  182. (p.oper[I]^.ref^.index = reg);
  183. end;
  184. if instructionLoadsFromReg then exit; {Bailout if we found something}
  185. Inc(I);
  186. end;
  187. end;
  188. function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai;
  189. var AllUsedRegs: TAllUsedRegs): Boolean;
  190. begin
  191. AllUsedRegs[getregtype(reg)].Update(tai(p.Next));
  192. RegUsedAfterInstruction :=
  193. AllUsedRegs[getregtype(reg)].IsUsed(reg) and
  194. not(regLoadedWithNewValue(reg,p)) and
  195. (
  196. not(GetNextInstruction(p,p)) or
  197. instructionLoadsFromReg(reg,p) or
  198. not(regLoadedWithNewValue(reg,p))
  199. );
  200. end;
  201. procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
  202. var
  203. TmpUsedRegs: TAllUsedRegs;
  204. begin
  205. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  206. (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
  207. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  208. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  209. not (
  210. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  211. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
  212. ) then
  213. begin
  214. CopyUsedRegs(TmpUsedRegs);
  215. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  216. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,movp,TmpUsedRegs)) then
  217. begin
  218. asml.insertbefore(tai_comment.Create(strpnew('Peephole '+optimizer+' removed superfluous mov')), movp);
  219. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  220. asml.remove(movp);
  221. movp.free;
  222. end;
  223. ReleaseUsedRegs(TmpUsedRegs);
  224. end;
  225. end;
  226. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  227. var
  228. hp1,hp2: tai;
  229. i, i2: longint;
  230. TmpUsedRegs: TAllUsedRegs;
  231. tempop: tasmop;
  232. function IsPowerOf2(const value: DWord): boolean; inline;
  233. begin
  234. Result:=(value and (value - 1)) = 0;
  235. end;
  236. begin
  237. result := false;
  238. case p.typ of
  239. ait_instruction:
  240. begin
  241. (* optimization proved not to be safe, see tw4768.pp
  242. {
  243. change
  244. <op> reg,x,y
  245. cmp reg,#0
  246. into
  247. <op>s reg,x,y
  248. }
  249. { this optimization can applied only to the currently enabled operations because
  250. the other operations do not update all flags and FPC does not track flag usage }
  251. if (taicpu(p).opcode in [A_ADC,A_ADD,A_SUB {A_UDIV,A_SDIV,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND}]) and
  252. (taicpu(p).oppostfix = PF_None) and
  253. (taicpu(p).condition = C_None) and
  254. GetNextInstruction(p, hp1) and
  255. MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
  256. (taicpu(hp1).oper[1]^.typ = top_const) and
  257. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  258. (taicpu(hp1).oper[1]^.val = 0) { and
  259. GetNextInstruction(hp1, hp2) and
  260. (tai(hp2).typ = ait_instruction) and
  261. // be careful here, following instructions could use other flags
  262. // however after a jump fpc never depends on the value of flags
  263. (taicpu(hp2).opcode = A_B) and
  264. (taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL])} then
  265. begin
  266. taicpu(p).oppostfix:=PF_S;
  267. asml.remove(hp1);
  268. hp1.free;
  269. end
  270. else
  271. *)
  272. case taicpu(p).opcode of
  273. A_STR:
  274. begin
  275. { change
  276. str reg1,ref
  277. ldr reg2,ref
  278. into
  279. str reg1,ref
  280. mov reg2,reg1
  281. }
  282. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  283. (taicpu(p).oppostfix=PF_None) and
  284. GetNextInstruction(p,hp1) and
  285. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
  286. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  287. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  288. begin
  289. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  290. begin
  291. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 1 done')), hp1);
  292. asml.remove(hp1);
  293. hp1.free;
  294. end
  295. else
  296. begin
  297. taicpu(hp1).opcode:=A_MOV;
  298. taicpu(hp1).oppostfix:=PF_None;
  299. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  300. asml.insertbefore(tai_comment.Create(strpnew('Peephole StrLdr2StrMov 2 done')), hp1);
  301. end;
  302. result := true;
  303. end;
  304. end;
  305. A_LDR:
  306. begin
  307. { change
  308. ldr reg1,ref
  309. ldr reg2,ref
  310. into
  311. ldr reg1,ref
  312. mov reg2,reg1
  313. }
  314. if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
  315. GetNextInstruction(p,hp1) and
  316. MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix]) and
  317. RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
  318. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
  319. (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
  320. (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
  321. begin
  322. if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
  323. begin
  324. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2Ldr done')), hp1);
  325. asml.remove(hp1);
  326. hp1.free;
  327. end
  328. else
  329. begin
  330. asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrLdr2LdrMov done')), hp1);
  331. taicpu(hp1).opcode:=A_MOV;
  332. taicpu(hp1).oppostfix:=PF_None;
  333. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  334. end;
  335. result := true;
  336. end;
  337. { Remove superfluous mov after ldr
  338. changes
  339. ldr reg1, ref
  340. mov reg2, reg1
  341. to
  342. ldr reg2, ref
  343. conditions are:
  344. * reg1 must be released after mov
  345. * mov can not contain shifterops
  346. * ldr+mov have the same conditions
  347. * mov does not set flags
  348. }
  349. if GetNextInstruction(p, hp1) then
  350. RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
  351. end;
  352. A_MOV:
  353. begin
  354. { fold
  355. mov reg1,reg0, shift imm1
  356. mov reg1,reg1, shift imm2
  357. }
  358. if (taicpu(p).ops=3) and
  359. (taicpu(p).oper[2]^.typ = top_shifterop) and
  360. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  361. getnextinstruction(p,hp1) and
  362. MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
  363. (taicpu(hp1).ops=3) and
  364. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  365. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  366. (taicpu(hp1).oper[2]^.typ = top_shifterop) and
  367. (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
  368. begin
  369. { fold
  370. mov reg1,reg0, lsl 16
  371. mov reg1,reg1, lsr 16
  372. strh reg1, ...
  373. dealloc reg1
  374. to
  375. strh reg1, ...
  376. dealloc reg1
  377. }
  378. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  379. (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
  380. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
  381. (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
  382. getnextinstruction(hp1,hp2) and
  383. MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
  384. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
  385. begin
  386. CopyUsedRegs(TmpUsedRegs);
  387. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  388. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  389. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
  390. begin
  391. asml.insertbefore(tai_comment.Create(strpnew('Peephole optimizer removed superfluous 16 Bit zero extension')), hp1);
  392. taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
  393. asml.remove(p);
  394. asml.remove(hp1);
  395. p.free;
  396. hp1.free;
  397. p:=hp2;
  398. end;
  399. ReleaseUsedRegs(TmpUsedRegs);
  400. end
  401. { fold
  402. mov reg1,reg0, shift imm1
  403. mov reg1,reg1, shift imm2
  404. to
  405. mov reg1,reg0, shift imm1+imm2
  406. }
  407. else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
  408. { asr makes no use after a lsr, the asr can be foled into the lsr }
  409. ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
  410. begin
  411. inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
  412. { avoid overflows }
  413. if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
  414. case taicpu(p).oper[2]^.shifterop^.shiftmode of
  415. SM_ROR:
  416. taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
  417. SM_ASR:
  418. taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
  419. SM_LSR,
  420. SM_LSL:
  421. begin
  422. hp1:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
  423. InsertLLItem(p.previous, p.next, hp1);
  424. p.free;
  425. p:=hp1;
  426. end;
  427. else
  428. internalerror(2008072803);
  429. end;
  430. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShift2Shift 1 done')), p);
  431. asml.remove(hp1);
  432. hp1.free;
  433. result := true;
  434. end
  435. { fold
  436. mov reg1,reg0, shift imm1
  437. mov reg1,reg1, shift imm2
  438. mov reg1,reg1, shift imm3 ...
  439. }
  440. else if getnextinstruction(hp1,hp2) and
  441. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  442. (taicpu(hp2).ops=3) and
  443. MatchOperand(taicpu(hp2).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  444. MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
  445. (taicpu(hp2).oper[2]^.typ = top_shifterop) and
  446. (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
  447. begin
  448. { mov reg1,reg0, lsl imm1
  449. mov reg1,reg1, lsr/asr imm2
  450. mov reg1,reg1, lsl imm3 ...
  451. if imm3<=imm1 and imm2>=imm3
  452. to
  453. mov reg1,reg0, lsl imm1
  454. mov reg1,reg1, lsr/asr imm2-imm3
  455. }
  456. if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  457. (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  458. (taicpu(hp2).oper[2]^.shifterop^.shiftimm<=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  459. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(hp2).oper[2]^.shifterop^.shiftimm) then
  460. begin
  461. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
  462. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 1 done')), p);
  463. asml.remove(hp2);
  464. hp2.free;
  465. result := true;
  466. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  467. begin
  468. asml.remove(hp1);
  469. hp1.free;
  470. end;
  471. end
  472. { mov reg1,reg0, lsr/asr imm1
  473. mov reg1,reg1, lsl imm2
  474. mov reg1,reg1, lsr/asr imm3 ...
  475. if imm3>=imm1 and imm2>=imm1
  476. to
  477. mov reg1,reg0, lsl imm2-imm1
  478. mov reg1,reg1, lsr/asr imm3 ...
  479. }
  480. else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
  481. (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
  482. (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
  483. (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
  484. begin
  485. dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
  486. taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
  487. asml.insertbefore(tai_comment.Create(strpnew('Peephole ShiftShiftShift2ShiftShift 2 done')), p);
  488. asml.remove(p);
  489. p.free;
  490. p:=hp2;
  491. if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
  492. begin
  493. taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
  494. asml.remove(hp1);
  495. hp1.free;
  496. p:=hp2;
  497. end;
  498. result := true;
  499. end;
  500. end;
  501. end;
  502. { Change the common
  503. mov r0, r0, lsr #24
  504. and r0, r0, #255
  505. and remove the superfluous and
  506. This could be extended to handle more cases.
  507. }
  508. if (taicpu(p).ops=3) and
  509. (taicpu(p).oper[2]^.typ = top_shifterop) and
  510. (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
  511. (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
  512. (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
  513. getnextinstruction(p,hp1) and
  514. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  515. (taicpu(hp1).ops=3) and
  516. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  517. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
  518. (taicpu(hp1).oper[2]^.typ = top_const) and
  519. { Check if the AND actually would only mask out bits beeing already zero because of the shift
  520. For LSR #25 and an AndConst of 255 that whould go like this:
  521. 255 and ((2 shl (32-25))-1)
  522. which results in 127, which is one less a power-of-2, meaning all lower bits are set.
  523. LSR #25 and AndConst of 254:
  524. 254 and ((2 shl (32-25))-1) = 126 -> lowest bit is clear, so we can't remove it.
  525. }
  526. ispowerof2((taicpu(hp1).oper[2]^.val and ((2 shl (32-taicpu(p).oper[2]^.shifterop^.shiftimm))-1))+1) then
  527. begin
  528. asml.insertbefore(tai_comment.Create(strpnew('Peephole LsrAnd2Lsr done')), hp1);
  529. asml.remove(hp1);
  530. hp1.free;
  531. end;
  532. {
  533. optimize
  534. mov rX, yyyy
  535. ....
  536. }
  537. if (taicpu(p).ops = 2) and
  538. GetNextInstruction(p,hp1) and
  539. (tai(hp1).typ = ait_instruction) then
  540. begin
  541. {
  542. This changes the very common
  543. mov r0, #0
  544. str r0, [...]
  545. mov r0, #0
  546. str r0, [...]
  547. and removes all superfluous mov instructions
  548. }
  549. if (taicpu(p).oper[1]^.typ = top_const) and
  550. (taicpu(hp1).opcode=A_STR) then
  551. while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
  552. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
  553. GetNextInstruction(hp1, hp2) and
  554. MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
  555. (taicpu(hp2).ops = 2) and
  556. MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
  557. MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
  558. begin
  559. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovStrMov done')), hp2);
  560. GetNextInstruction(hp2,hp1);
  561. asml.remove(hp2);
  562. hp2.free;
  563. if not assigned(hp1) then break;
  564. end
  565. {
  566. This removes the first mov from
  567. mov rX,...
  568. mov rX,...
  569. }
  570. else if taicpu(hp1).opcode=A_MOV then
  571. while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
  572. (taicpu(hp1).ops = 2) and
  573. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) do
  574. begin
  575. asml.insertbefore(tai_comment.Create(strpnew('Peephole MovMov done')), p);
  576. asml.remove(p);
  577. p.free;
  578. p:=hp1;
  579. GetNextInstruction(hp1,hp1);
  580. if not assigned(hp1) then
  581. break;
  582. end;
  583. end;
  584. {
  585. change
  586. mov r1, r0
  587. add r1, r1, #1
  588. to
  589. add r1, r0, #1
  590. Todo: Make it work for mov+cmp too
  591. CAUTION! If this one is successful p might not be a mov instruction anymore!
  592. }
  593. if (taicpu(p).ops = 2) and
  594. (taicpu(p).oper[1]^.typ = top_reg) and
  595. (taicpu(p).oppostfix = PF_NONE) and
  596. GetNextInstruction(p, hp1) and
  597. (tai(hp1).typ = ait_instruction) and
  598. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  599. A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN]) and
  600. {MOV and MVN might only have 2 ops}
  601. (taicpu(hp1).ops = 3) and
  602. (taicpu(hp1).condition in [C_NONE, taicpu(hp1).condition]) and
  603. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
  604. (taicpu(hp1).oper[1]^.typ = top_reg) and
  605. (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop]) then
  606. begin
  607. { When we get here we still don't know if the registers match}
  608. for I:=1 to 2 do
  609. {
  610. If the first loop was successful p will be replaced with hp1.
  611. The checks will still be ok, because all required information
  612. will also be in hp1 then.
  613. }
  614. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  615. begin
  616. asml.insertbefore(tai_comment.Create(strpnew('Peephole RedundantMovProcess done')), hp1);
  617. taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
  618. if p<>hp1 then
  619. begin
  620. asml.remove(p);
  621. p.free;
  622. p:=hp1;
  623. end;
  624. end;
  625. end;
  626. { This folds shifterops into following instructions
  627. mov r0, r1, lsl #8
  628. add r2, r3, r0
  629. to
  630. add r2, r3, r1, lsl #8
  631. CAUTION! If this one is successful p might not be a mov instruction anymore!
  632. }
  633. if (taicpu(p).opcode = A_MOV) and
  634. (taicpu(p).ops = 3) and
  635. (taicpu(p).oper[1]^.typ = top_reg) and
  636. (taicpu(p).oper[2]^.typ = top_shifterop) and
  637. (taicpu(p).oppostfix = PF_NONE) and
  638. GetNextInstruction(p, hp1) and
  639. (tai(hp1).typ = ait_instruction) and
  640. (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
  641. (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
  642. (taicpu(hp1).oppostfix = PF_NONE) and
  643. (taicpu(hp1).condition = taicpu(p).condition) and
  644. (taicpu(hp1).opcode in [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
  645. A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
  646. A_CMP, A_CMN]) and
  647. (
  648. {Only ONE of the two src operands is allowed to match}
  649. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
  650. MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
  651. ) then
  652. begin
  653. CopyUsedRegs(TmpUsedRegs);
  654. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  655. if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
  656. I2:=0
  657. else
  658. I2:=1;
  659. if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
  660. for I:=I2 to taicpu(hp1).ops-1 do
  661. if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
  662. begin
  663. { If the parameter matched on the second op from the RIGHT
  664. we have to switch the parameters, this will not happen for CMP
  665. were we're only evaluating the most right parameter
  666. }
  667. if I <> taicpu(hp1).ops-1 then
  668. begin
  669. {The SUB operators need to be changed when we swap parameters}
  670. case taicpu(hp1).opcode of
  671. A_SUB: tempop:=A_RSB;
  672. A_SBC: tempop:=A_RSC;
  673. A_RSB: tempop:=A_SUB;
  674. A_RSC: tempop:=A_SBC;
  675. else tempop:=taicpu(hp1).opcode;
  676. end;
  677. if taicpu(hp1).ops = 3 then
  678. hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
  679. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
  680. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  681. else
  682. hp2:=taicpu.op_reg_reg_shifterop(tempop,
  683. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  684. taicpu(p).oper[2]^.shifterop^);
  685. end
  686. else
  687. if taicpu(hp1).ops = 3 then
  688. hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
  689. taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
  690. taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
  691. else
  692. hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
  693. taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
  694. taicpu(p).oper[2]^.shifterop^);
  695. asml.insertbefore(hp2, p);
  696. asml.remove(p);
  697. asml.remove(hp1);
  698. p.free;
  699. hp1.free;
  700. p:=hp2;
  701. GetNextInstruction(p,hp1);
  702. asml.insertbefore(tai_comment.Create(strpnew('Peephole FoldShiftProcess done')), p);
  703. break;
  704. end;
  705. ReleaseUsedRegs(TmpUsedRegs);
  706. end;
  707. {
  708. Often we see shifts and then a superfluous mov to another register
  709. In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
  710. }
  711. if (taicpu(p).opcode = A_MOV) and
  712. GetNextInstruction(p, hp1) then
  713. RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
  714. end;
  715. A_ADD,
  716. A_ADC,
  717. A_RSB,
  718. A_RSC,
  719. A_SUB,
  720. A_SBC,
  721. A_AND,
  722. A_BIC,
  723. A_EOR,
  724. A_ORR,
  725. A_MLA,
  726. A_MUL:
  727. begin
  728. {
  729. change
  730. and reg2,reg1,const1
  731. and reg2,reg2,const2
  732. to
  733. and reg2,reg1,(const1 and const2)
  734. }
  735. if (taicpu(p).opcode = A_AND) and
  736. (taicpu(p).oper[1]^.typ = top_reg) and
  737. (taicpu(p).oper[2]^.typ = top_const) and
  738. GetNextInstruction(p, hp1) and
  739. MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
  740. MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
  741. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  742. (taicpu(hp1).oper[2]^.typ = top_const) then
  743. begin
  744. asml.insertbefore(tai_comment.Create(strpnew('Peephole AndAnd2And done')), p);
  745. taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
  746. taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
  747. asml.remove(hp1);
  748. hp1.free;
  749. end;
  750. {
  751. change
  752. add reg1, ...
  753. mov reg2, reg1
  754. to
  755. add reg2, ...
  756. }
  757. if GetNextInstruction(p, hp1) then
  758. RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
  759. end;
  760. A_CMP:
  761. begin
  762. {
  763. change
  764. cmp reg,const1
  765. moveq reg,const1
  766. movne reg,const2
  767. to
  768. cmp reg,const1
  769. movne reg,const2
  770. }
  771. if (taicpu(p).oper[1]^.typ = top_const) and
  772. GetNextInstruction(p, hp1) and
  773. MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  774. (taicpu(hp1).oper[1]^.typ = top_const) and
  775. GetNextInstruction(hp1, hp2) and
  776. MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
  777. (taicpu(hp1).oper[1]^.typ = top_const) then
  778. begin
  779. RemoveRedundantMove(p, hp1, asml);
  780. RemoveRedundantMove(p, hp2, asml);
  781. end;
  782. end;
  783. end;
  784. end;
  785. end;
  786. end;
  787. { instructions modifying the CPSR can be only the last instruction }
  788. function MustBeLast(p : tai) : boolean;
  789. begin
  790. Result:=(p.typ=ait_instruction) and
  791. ((taicpu(p).opcode in [A_BL,A_BLX,A_CMP,A_CMN,A_SWI,A_TEQ,A_TST,A_CMF,A_CMFE {,A_MSR}]) or
  792. ((taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_PC)) or
  793. (taicpu(p).oppostfix=PF_S));
  794. end;
  795. procedure TCpuAsmOptimizer.PeepHoleOptPass2;
  796. var
  797. p,hp1,hp2: tai;
  798. l : longint;
  799. condition : tasmcond;
  800. hp3: tai;
  801. WasLast: boolean;
  802. { UsedRegs, TmpUsedRegs: TRegSet; }
  803. begin
  804. p := BlockStart;
  805. { UsedRegs := []; }
  806. while (p <> BlockEnd) Do
  807. begin
  808. { UpdateUsedRegs(UsedRegs, tai(p.next)); }
  809. case p.Typ Of
  810. Ait_Instruction:
  811. begin
  812. case taicpu(p).opcode Of
  813. A_B:
  814. if taicpu(p).condition<>C_None then
  815. begin
  816. { check for
  817. Bxx xxx
  818. <several instructions>
  819. xxx:
  820. }
  821. l:=0;
  822. WasLast:=False;
  823. GetNextInstruction(p, hp1);
  824. while assigned(hp1) and
  825. (l<=4) and
  826. CanBeCond(hp1) and
  827. { stop on labels }
  828. not(hp1.typ=ait_label) do
  829. begin
  830. inc(l);
  831. if MustBeLast(hp1) then
  832. begin
  833. WasLast:=True;
  834. GetNextInstruction(hp1,hp1);
  835. break;
  836. end
  837. else
  838. GetNextInstruction(hp1,hp1);
  839. end;
  840. if assigned(hp1) then
  841. begin
  842. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  843. begin
  844. if (l<=4) and (l>0) then
  845. begin
  846. condition:=inverse_cond(taicpu(p).condition);
  847. hp2:=p;
  848. GetNextInstruction(p,hp1);
  849. p:=hp1;
  850. repeat
  851. if hp1.typ=ait_instruction then
  852. taicpu(hp1).condition:=condition;
  853. if MustBeLast(hp1) then
  854. begin
  855. GetNextInstruction(hp1,hp1);
  856. break;
  857. end
  858. else
  859. GetNextInstruction(hp1,hp1);
  860. until not(assigned(hp1)) or
  861. not(CanBeCond(hp1)) or
  862. (hp1.typ=ait_label);
  863. { wait with removing else GetNextInstruction could
  864. ignore the label if it was the only usage in the
  865. jump moved away }
  866. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  867. asml.remove(hp2);
  868. hp2.free;
  869. continue;
  870. end;
  871. end
  872. else
  873. { do not perform further optimizations if there is inctructon
  874. in block #1 which can not be optimized.
  875. }
  876. if not WasLast then
  877. begin
  878. { check further for
  879. Bcc xxx
  880. <several instructions 1>
  881. B yyy
  882. xxx:
  883. <several instructions 2>
  884. yyy:
  885. }
  886. { hp2 points to jmp yyy }
  887. hp2:=hp1;
  888. { skip hp1 to xxx }
  889. GetNextInstruction(hp1, hp1);
  890. if assigned(hp2) and
  891. assigned(hp1) and
  892. (l<=3) and
  893. (hp2.typ=ait_instruction) and
  894. (taicpu(hp2).is_jmp) and
  895. (taicpu(hp2).condition=C_None) and
  896. { real label and jump, no further references to the
  897. label are allowed }
  898. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  899. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  900. begin
  901. l:=0;
  902. { skip hp1 to <several moves 2> }
  903. GetNextInstruction(hp1, hp1);
  904. while assigned(hp1) and
  905. CanBeCond(hp1) do
  906. begin
  907. inc(l);
  908. GetNextInstruction(hp1, hp1);
  909. end;
  910. { hp1 points to yyy: }
  911. if assigned(hp1) and
  912. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  913. begin
  914. condition:=inverse_cond(taicpu(p).condition);
  915. GetNextInstruction(p,hp1);
  916. hp3:=p;
  917. p:=hp1;
  918. repeat
  919. if hp1.typ=ait_instruction then
  920. taicpu(hp1).condition:=condition;
  921. GetNextInstruction(hp1,hp1);
  922. until not(assigned(hp1)) or
  923. not(CanBeCond(hp1));
  924. { hp2 is still at jmp yyy }
  925. GetNextInstruction(hp2,hp1);
  926. { hp2 is now at xxx: }
  927. condition:=inverse_cond(condition);
  928. GetNextInstruction(hp1,hp1);
  929. { hp1 is now at <several movs 2> }
  930. repeat
  931. taicpu(hp1).condition:=condition;
  932. GetNextInstruction(hp1,hp1);
  933. until not(assigned(hp1)) or
  934. not(CanBeCond(hp1)) or
  935. (hp1.typ=ait_label);
  936. {
  937. asml.remove(hp1.next)
  938. hp1.next.free;
  939. asml.remove(hp1);
  940. hp1.free;
  941. }
  942. { remove Bcc }
  943. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  944. asml.remove(hp3);
  945. hp3.free;
  946. { remove jmp }
  947. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  948. asml.remove(hp2);
  949. hp2.free;
  950. continue;
  951. end;
  952. end;
  953. end;
  954. end;
  955. end;
  956. end;
  957. end;
  958. end;
  959. p := tai(p.next)
  960. end;
  961. end;
  962. function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
  963. begin
  964. If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
  965. Result:=true
  966. else
  967. Result:=inherited RegInInstruction(Reg, p1);
  968. end;
  969. const
  970. { set of opcode which might or do write to memory }
  971. { TODO : extend armins.dat to contain r/w info }
  972. opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
  973. A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
  974. function TCpuPreRegallocScheduler.PeepHoleOptPass1Cpu(var p: tai): boolean;
  975. { TODO : schedule also forward }
  976. { TODO : schedule distance > 1 }
  977. var
  978. hp1,hp2,hp3,hp4,hp5 : tai;
  979. list : TAsmList;
  980. begin
  981. result:=true;
  982. list:=TAsmList.Create;
  983. p := BlockStart;
  984. { UsedRegs := []; }
  985. while (p <> BlockEnd) Do
  986. begin
  987. if (p.typ=ait_instruction) and
  988. GetNextInstruction(p,hp1) and
  989. (hp1.typ=ait_instruction) and
  990. { for now we don't reschedule if the previous instruction changes potentially a memory location }
  991. ( (not(taicpu(p).opcode in opcode_could_mem_write) and
  992. not(RegModifiedByInstruction(NR_PC,p)) and
  993. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH])
  994. ) or
  995. ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
  996. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  997. ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
  998. (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
  999. (taicpu(hp1).oper[1]^.ref^.offset=0)
  1000. )
  1001. ) or
  1002. { try to prove that the memory accesses don't overlapp }
  1003. ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
  1004. (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
  1005. (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
  1006. (taicpu(p).oppostfix=PF_None) and
  1007. (taicpu(hp1).oppostfix=PF_None) and
  1008. (taicpu(p).oper[1]^.ref^.index=NR_NO) and
  1009. (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
  1010. { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
  1011. (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
  1012. )
  1013. )
  1014. ) and
  1015. GetNextInstruction(hp1,hp2) and
  1016. (hp2.typ=ait_instruction) and
  1017. { loaded register used by next instruction? }
  1018. (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
  1019. { loaded register not used by previous instruction? }
  1020. not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
  1021. { same condition? }
  1022. (taicpu(p).condition=taicpu(hp1).condition) and
  1023. { first instruction might not change the register used as base }
  1024. ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
  1025. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
  1026. ) and
  1027. { first instruction might not change the register used as index }
  1028. ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
  1029. not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
  1030. ) then
  1031. begin
  1032. hp3:=tai(p.Previous);
  1033. hp5:=tai(p.next);
  1034. asml.Remove(p);
  1035. { if there is a reg. dealloc instruction associated with p, move it together with p }
  1036. { before the instruction? }
  1037. while assigned(hp3) and (hp3.typ<>ait_instruction) do
  1038. begin
  1039. if (hp3.typ=ait_regalloc) and (tai_regalloc(hp3).ratype in [ra_dealloc]) and
  1040. RegInInstruction(tai_regalloc(hp3).reg,p) then
  1041. begin
  1042. hp4:=hp3;
  1043. hp3:=tai(hp3.Previous);
  1044. asml.Remove(hp4);
  1045. list.Concat(hp4);
  1046. end
  1047. else
  1048. hp3:=tai(hp3.Previous);
  1049. end;
  1050. list.Concat(p);
  1051. { after the instruction? }
  1052. while assigned(hp5) and (hp5.typ<>ait_instruction) do
  1053. begin
  1054. if (hp5.typ=ait_regalloc) and (tai_regalloc(hp5).ratype in [ra_dealloc]) and
  1055. RegInInstruction(tai_regalloc(hp5).reg,p) then
  1056. begin
  1057. hp4:=hp5;
  1058. hp5:=tai(hp5.next);
  1059. asml.Remove(hp4);
  1060. list.Concat(hp4);
  1061. end
  1062. else
  1063. hp5:=tai(hp5.Next);
  1064. end;
  1065. asml.Remove(hp1);
  1066. {$ifdef DEBUG_PREREGSCHEDULER}
  1067. asml.InsertBefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
  1068. {$endif DEBUG_PREREGSCHEDULER}
  1069. asml.InsertBefore(hp1,hp2);
  1070. asml.InsertListBefore(hp2,list);
  1071. end;
  1072. p := tai(p.next)
  1073. end;
  1074. list.Free;
  1075. end;
  1076. procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
  1077. begin
  1078. { TODO: Add optimizer code }
  1079. end;
  1080. begin
  1081. casmoptimizer:=TCpuAsmOptimizer;
  1082. cpreregallocscheduler:=TCpuPreRegallocScheduler;
  1083. End.