aoptarm.pas 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601
  1. {
  2. Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
  3. Development Team
  4. This unit implements an ARM optimizer object used commonly for ARM and AAarch64
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptarm;
  19. {$i fpcdefs.inc}
  20. { $define DEBUG_PREREGSCHEDULER}
  21. { $define DEBUG_AOPTCPU}
  22. Interface
  23. uses
  24. cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
  25. Type
  26. { while ARM and AAarch64 look not very similar at a first glance,
  27. several optimizations can be shared between both }
  28. TARMAsmOptimizer = class(TAsmOptimizer)
  29. procedure DebugMsg(const s : string; p : tai);
  30. function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
  31. function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
  32. function OptPass1UXTB(var p: tai): Boolean;
  33. function OptPass1UXTH(var p: tai): Boolean;
  34. function OptPass1SXTB(var p: tai): Boolean;
  35. End;
  36. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  37. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  38. {$ifdef AARCH64}
  39. function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
  40. {$endif AARCH64}
  41. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  42. function RefsEqual(const r1, r2: treference): boolean;
  43. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  44. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  45. Implementation
  46. uses
  47. cutils,verbose,globtype,globals,
  48. systems,
  49. cpuinfo,
  50. cgobj,procinfo,
  51. aasmbase,aasmdata;
  52. {$ifdef DEBUG_AOPTCPU}
  53. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
  54. begin
  55. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  56. end;
  57. {$else DEBUG_AOPTCPU}
  58. procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  59. begin
  60. end;
  61. {$endif DEBUG_AOPTCPU}
  62. function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  63. begin
  64. result :=
  65. (instr.typ = ait_instruction) and
  66. ((op = []) or ((ord(taicpu(instr).opcode)<256) and (taicpu(instr).opcode in op))) and
  67. ((cond = []) or (taicpu(instr).condition in cond)) and
  68. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  69. end;
  70. function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
  71. begin
  72. result :=
  73. (instr.typ = ait_instruction) and
  74. (taicpu(instr).opcode = op) and
  75. ((cond = []) or (taicpu(instr).condition in cond)) and
  76. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  77. end;
  78. {$ifdef AARCH64}
  79. function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
  80. begin
  81. result :=
  82. (instr.typ = ait_instruction) and
  83. ((op = []) or (taicpu(instr).opcode in op)) and
  84. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  85. end;
  86. {$endif AARCH64}
  87. function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
  88. begin
  89. result :=
  90. (instr.typ = ait_instruction) and
  91. (taicpu(instr).opcode = op) and
  92. ((postfix = []) or (taicpu(instr).oppostfix in postfix));
  93. end;
  94. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  95. begin
  96. result := (oper.typ = top_reg) and (oper.reg = reg);
  97. end;
  98. function RefsEqual(const r1, r2: treference): boolean;
  99. begin
  100. refsequal :=
  101. (r1.offset = r2.offset) and
  102. (r1.base = r2.base) and
  103. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  104. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  105. (r1.relsymbol = r2.relsymbol) and
  106. {$ifdef ARM}
  107. (r1.signindex = r2.signindex) and
  108. {$endif ARM}
  109. (r1.shiftimm = r2.shiftimm) and
  110. (r1.addressmode = r2.addressmode) and
  111. (r1.shiftmode = r2.shiftmode) and
  112. (r1.volatility=[]) and
  113. (r2.volatility=[]);
  114. end;
  115. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  116. begin
  117. result := oper1.typ = oper2.typ;
  118. if result then
  119. case oper1.typ of
  120. top_const:
  121. Result:=oper1.val = oper2.val;
  122. top_reg:
  123. Result:=oper1.reg = oper2.reg;
  124. top_conditioncode:
  125. Result:=oper1.cc = oper2.cc;
  126. top_realconst:
  127. Result:=oper1.val_real = oper2.val_real;
  128. top_ref:
  129. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  130. else Result:=false;
  131. end
  132. end;
  133. function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
  134. Out Next: tai; reg: TRegister): Boolean;
  135. begin
  136. Next:=Current;
  137. repeat
  138. Result:=GetNextInstruction(Next,Next);
  139. until not (Result) or
  140. not(cs_opt_level3 in current_settings.optimizerswitches) or
  141. (Next.typ<>ait_instruction) or
  142. RegInInstruction(reg,Next) or
  143. is_calljmp(taicpu(Next).opcode)
  144. {$ifdef ARM}
  145. or RegModifiedByInstruction(NR_PC,Next);
  146. {$endif ARM}
  147. end;
  148. function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
  149. var
  150. alloc,
  151. dealloc : tai_regalloc;
  152. hp1 : tai;
  153. begin
  154. Result:=false;
  155. if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
  156. { We can't optimize if there is a shiftop }
  157. (taicpu(movp).ops=2) and
  158. MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
  159. { don't mess with moves to fp }
  160. (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
  161. { the destination register of the mov might not be used beween p and movp }
  162. not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
  163. {$ifdef ARM}
  164. { cb[n]z are thumb instructions which require specific registers, with no wide forms }
  165. (taicpu(p).opcode<>A_CBZ) and
  166. (taicpu(p).opcode<>A_CBNZ) and
  167. {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
  168. not (
  169. (taicpu(p).opcode in [A_MLA, A_MUL]) and
  170. (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
  171. (current_settings.cputype < cpu_armv6)
  172. ) and
  173. {$endif ARM}
  174. { Take care to only do this for instructions which REALLY load to the first register.
  175. Otherwise
  176. str reg0, [reg1]
  177. mov reg2, reg0
  178. will be optimized to
  179. str reg2, [reg1]
  180. }
  181. RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
  182. begin
  183. dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
  184. if assigned(dealloc) then
  185. begin
  186. DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
  187. result:=true;
  188. { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
  189. and remove it if possible }
  190. asml.Remove(dealloc);
  191. alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
  192. if assigned(alloc) then
  193. begin
  194. asml.Remove(alloc);
  195. alloc.free;
  196. dealloc.free;
  197. end
  198. else
  199. asml.InsertAfter(dealloc,p);
  200. { try to move the allocation of the target register }
  201. GetLastInstruction(movp,hp1);
  202. alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
  203. if assigned(alloc) then
  204. begin
  205. asml.Remove(alloc);
  206. asml.InsertBefore(alloc,p);
  207. { adjust used regs }
  208. IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
  209. end;
  210. { finally get rid of the mov }
  211. taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
  212. { Remove preindexing and postindexing for LDR in some cases.
  213. For example:
  214. ldr reg2,[reg1, xxx]!
  215. mov reg1,reg2
  216. must be translated to:
  217. ldr reg1,[reg1, xxx]
  218. Preindexing must be removed there, since the same register is used as the base and as the target.
  219. Such case is not allowed for ARM CPU and produces crash. }
  220. if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
  221. and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
  222. then
  223. taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
  224. asml.remove(movp);
  225. movp.free;
  226. end;
  227. end;
  228. end;
  229. function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
  230. var
  231. hp1, hp2: tai;
  232. begin
  233. Result:=false;
  234. {
  235. change
  236. uxtb reg2,reg1
  237. strb reg2,[...]
  238. dealloc reg2
  239. to
  240. strb reg1,[...]
  241. }
  242. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  243. (taicpu(p).ops=2) and
  244. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  245. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  246. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  247. { the reference in strb might not use reg2 }
  248. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  249. { reg1 might not be modified inbetween }
  250. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  251. begin
  252. DebugMsg('Peephole UxtbStrb2Strb done', p);
  253. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  254. GetNextInstruction(p,hp2);
  255. asml.remove(p);
  256. p.free;
  257. p:=hp2;
  258. result:=true;
  259. end
  260. {
  261. change
  262. uxtb reg2,reg1
  263. uxth reg3,reg2
  264. dealloc reg2
  265. to
  266. uxtb reg3,reg1
  267. }
  268. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  269. (taicpu(p).ops=2) and
  270. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  271. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  272. (taicpu(hp1).ops = 2) and
  273. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  274. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  275. { reg1 might not be modified inbetween }
  276. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  277. begin
  278. DebugMsg('Peephole UxtbUxth2Uxtb done', p);
  279. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  280. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  281. asml.remove(hp1);
  282. hp1.free;
  283. result:=true;
  284. end
  285. {
  286. change
  287. uxtb reg2,reg1
  288. uxtb reg3,reg2
  289. dealloc reg2
  290. to
  291. uxtb reg3,reg1
  292. }
  293. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  294. (taicpu(p).ops=2) and
  295. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  296. MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
  297. (taicpu(hp1).ops = 2) and
  298. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  299. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  300. { reg1 might not be modified inbetween }
  301. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  302. begin
  303. DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
  304. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  305. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  306. asml.remove(hp1);
  307. hp1.free;
  308. result:=true;
  309. end
  310. {
  311. change
  312. uxtb reg2,reg1
  313. and reg3,reg2,#0x*FF
  314. dealloc reg2
  315. to
  316. uxtb reg3,reg1
  317. }
  318. else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
  319. (taicpu(p).ops=2) and
  320. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  321. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  322. (taicpu(hp1).ops=3) and
  323. (taicpu(hp1).oper[2]^.typ=top_const) and
  324. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  325. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  326. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  327. { reg1 might not be modified inbetween }
  328. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  329. begin
  330. DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
  331. taicpu(hp1).opcode:=A_UXTB;
  332. taicpu(hp1).ops:=2;
  333. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  334. GetNextInstruction(p,hp2);
  335. asml.remove(p);
  336. p.free;
  337. p:=hp2;
  338. result:=true;
  339. end
  340. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  341. RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
  342. Result:=true;
  343. end;
  344. function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
  345. var
  346. hp1: tai;
  347. begin
  348. Result:=false;
  349. {
  350. change
  351. uxth reg2,reg1
  352. strh reg2,[...]
  353. dealloc reg2
  354. to
  355. strh reg1,[...]
  356. }
  357. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  358. (taicpu(p).ops=2) and
  359. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  360. MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
  361. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  362. { the reference in strb might not use reg2 }
  363. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  364. { reg1 might not be modified inbetween }
  365. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  366. begin
  367. DebugMsg('Peephole UXTHStrh2Strh done', p);
  368. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  369. GetNextInstruction(p, hp1);
  370. asml.remove(p);
  371. p.free;
  372. p:=hp1;
  373. result:=true;
  374. end
  375. {
  376. change
  377. uxth reg2,reg1
  378. uxth reg3,reg2
  379. dealloc reg2
  380. to
  381. uxth reg3,reg1
  382. }
  383. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  384. (taicpu(p).ops=2) and
  385. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  386. MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
  387. (taicpu(hp1).ops=2) and
  388. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  389. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  390. { reg1 might not be modified inbetween }
  391. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  392. begin
  393. DebugMsg('Peephole UxthUxth2Uxth done', p);
  394. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
  395. taicpu(hp1).opcode:=A_UXTH;
  396. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  397. GetNextInstruction(p, hp1);
  398. asml.remove(p);
  399. p.free;
  400. p:=hp1;
  401. result:=true;
  402. end
  403. {
  404. change
  405. uxth reg2,reg1
  406. and reg3,reg2,#65535
  407. dealloc reg2
  408. to
  409. uxth reg3,reg1
  410. }
  411. else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
  412. (taicpu(p).ops=2) and
  413. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  414. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  415. (taicpu(hp1).ops=3) and
  416. (taicpu(hp1).oper[2]^.typ=top_const) and
  417. ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
  418. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  419. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  420. { reg1 might not be modified inbetween }
  421. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  422. begin
  423. DebugMsg('Peephole UxthAndImm2Uxth done', p);
  424. taicpu(hp1).opcode:=A_UXTH;
  425. taicpu(hp1).ops:=2;
  426. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  427. GetNextInstruction(p, hp1);
  428. asml.remove(p);
  429. p.free;
  430. p:=hp1;
  431. result:=true;
  432. end
  433. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  434. RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
  435. Result:=true;
  436. end;
  437. function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
  438. var
  439. hp1, hp2: tai;
  440. begin
  441. Result:=false;
  442. {
  443. change
  444. sxtb reg2,reg1
  445. strb reg2,[...]
  446. dealloc reg2
  447. to
  448. strb reg1,[...]
  449. }
  450. if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
  451. (taicpu(p).ops=2) and
  452. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  453. MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
  454. assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
  455. { the reference in strb might not use reg2 }
  456. not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
  457. { reg1 might not be modified inbetween }
  458. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  459. begin
  460. DebugMsg('Peephole SxtbStrb2Strb done', p);
  461. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
  462. GetNextInstruction(p,hp2);
  463. asml.remove(p);
  464. p.free;
  465. p:=hp2;
  466. result:=true;
  467. end
  468. {
  469. change
  470. sxtb reg2,reg1
  471. sxth reg3,reg2
  472. dealloc reg2
  473. to
  474. sxtb reg3,reg1
  475. }
  476. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  477. (taicpu(p).ops=2) and
  478. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  479. MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
  480. (taicpu(hp1).ops = 2) and
  481. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  482. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  483. { reg1 might not be modified inbetween }
  484. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  485. begin
  486. DebugMsg('Peephole SxtbSxth2Sxtb done', p);
  487. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  488. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  489. asml.remove(hp1);
  490. hp1.free;
  491. result:=true;
  492. end
  493. {
  494. change
  495. sxtb reg2,reg1
  496. sxtb reg3,reg2
  497. dealloc reg2
  498. to
  499. uxtb reg3,reg1
  500. }
  501. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  502. (taicpu(p).ops=2) and
  503. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  504. MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
  505. (taicpu(hp1).ops = 2) and
  506. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  507. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  508. { reg1 might not be modified inbetween }
  509. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  510. begin
  511. DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
  512. AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
  513. taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
  514. asml.remove(hp1);
  515. hp1.free;
  516. result:=true;
  517. end
  518. {
  519. change
  520. sxtb reg2,reg1
  521. and reg3,reg2,#0x*FF
  522. dealloc reg2
  523. to
  524. uxtb reg3,reg1
  525. }
  526. else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
  527. (taicpu(p).ops=2) and
  528. GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
  529. MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
  530. (taicpu(hp1).ops=3) and
  531. (taicpu(hp1).oper[2]^.typ=top_const) and
  532. ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
  533. MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
  534. RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
  535. { reg1 might not be modified inbetween }
  536. not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
  537. begin
  538. DebugMsg('Peephole SxtbAndImm2Sxtb done', p);
  539. taicpu(hp1).opcode:=A_SXTB;
  540. taicpu(hp1).ops:=2;
  541. taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
  542. GetNextInstruction(p,hp2);
  543. asml.remove(p);
  544. p.free;
  545. p:=hp2;
  546. result:=true;
  547. end
  548. else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
  549. RemoveSuperfluousMove(p, hp1, 'SxtbMov2Data') then
  550. Result:=true;
  551. end;
  552. end.