aoptcpu.pas 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. end;
  33. Var
  34. AsmOptimizer : TCpuAsmOptimizer;
  35. Implementation
  36. uses
  37. verbose,globtype,globals,
  38. cpuinfo,
  39. aasmcpu,
  40. aoptutils,
  41. procinfo,
  42. cgutils,
  43. { units we should get rid off: }
  44. symsym,symconst;
  45. { converts a TChange variable to a TRegister }
  46. function tch2reg(ch: tinschange): tsuperregister;
  47. const
  48. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  49. begin
  50. if (ch <= CH_REDI) then
  51. tch2reg := ch2reg[ch]
  52. else if (ch <= CH_WEDI) then
  53. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  54. else if (ch <= CH_RWEDI) then
  55. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  56. else if (ch <= CH_MEDI) then
  57. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  58. else
  59. InternalError(2016041901)
  60. end;
  61. { Checks if the register is a 32 bit general purpose register }
  62. function isgp32reg(reg: TRegister): boolean;
  63. begin
  64. {$push}{$warnings off}
  65. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  66. {$pop}
  67. end;
  68. { returns true if p contains a memory operand with a segment set }
  69. function InsContainsSegRef(p: taicpu): boolean;
  70. var
  71. i: longint;
  72. begin
  73. result:=true;
  74. for i:=0 to p.opercnt-1 do
  75. if (p.oper[i]^.typ=top_ref) and
  76. (p.oper[i]^.ref^.segment<>NR_NO) then
  77. exit;
  78. result:=false;
  79. end;
  80. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  81. var
  82. p: tai;
  83. begin
  84. p := BlockStart;
  85. while (p <> BlockEnd) Do
  86. begin
  87. case p.Typ Of
  88. Ait_Instruction:
  89. begin
  90. if InsContainsSegRef(taicpu(p)) then
  91. begin
  92. p := tai(p.next);
  93. continue;
  94. end;
  95. case taicpu(p).opcode Of
  96. A_IMUL:
  97. if PrePeepholeOptIMUL(p) then
  98. Continue;
  99. A_SAR,A_SHR:
  100. if PrePeepholeOptSxx(p) then
  101. continue;
  102. A_XOR:
  103. if (taicpu(p).oper[0]^.typ = top_reg) and
  104. (taicpu(p).oper[1]^.typ = top_reg) and
  105. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  106. { temporarily change this to 'mov reg,0' to make it easier }
  107. { for the CSE. Will be changed back in pass 2 }
  108. begin
  109. taicpu(p).opcode := A_MOV;
  110. taicpu(p).loadConst(0,0);
  111. end;
  112. end;
  113. end;
  114. end;
  115. p := tai(p.next)
  116. end;
  117. end;
  118. { First pass of peephole optimizations }
  119. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  120. function WriteOk : Boolean;
  121. begin
  122. writeln('Ok');
  123. Result:=True;
  124. end;
  125. var
  126. l : longint;
  127. p,hp1,hp2 : tai;
  128. hp3,hp4: tai;
  129. v:aint;
  130. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  131. {traces sucessive jumps to their final destination and sets it, e.g.
  132. je l1 je l3
  133. <code> <code>
  134. l1: becomes l1:
  135. je l2 je l3
  136. <code> <code>
  137. l2: l2:
  138. jmp l3 jmp l3
  139. the level parameter denotes how deeep we have already followed the jump,
  140. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  141. var p1, p2: tai;
  142. l: tasmlabel;
  143. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  144. begin
  145. FindAnyLabel := false;
  146. while assigned(hp.next) and
  147. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  148. hp := tai(hp.next);
  149. if assigned(hp.next) and
  150. (tai(hp.next).typ = ait_label) then
  151. begin
  152. FindAnyLabel := true;
  153. l := tai_label(hp.next).labsym;
  154. end
  155. end;
  156. begin
  157. GetfinalDestination := false;
  158. if level > 20 then
  159. exit;
  160. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  161. if assigned(p1) then
  162. begin
  163. SkipLabels(p1,p1);
  164. if (tai(p1).typ = ait_instruction) and
  165. (taicpu(p1).is_jmp) then
  166. if { the next instruction after the label where the jump hp arrives}
  167. { is unconditional or of the same type as hp, so continue }
  168. (taicpu(p1).condition in [C_None,hp.condition]) or
  169. { the next instruction after the label where the jump hp arrives}
  170. { is the opposite of hp (so this one is never taken), but after }
  171. { that one there is a branch that will be taken, so perform a }
  172. { little hack: set p1 equal to this instruction (that's what the}
  173. { last SkipLabels is for, only works with short bool evaluation)}
  174. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  175. SkipLabels(p1,p2) and
  176. (p2.typ = ait_instruction) and
  177. (taicpu(p2).is_jmp) and
  178. (taicpu(p2).condition in [C_None,hp.condition]) and
  179. SkipLabels(p1,p1)) then
  180. begin
  181. { quick check for loops of the form "l5: ; jmp l5 }
  182. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  183. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  184. exit;
  185. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  186. exit;
  187. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  188. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  189. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  190. end
  191. else
  192. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  193. if not FindAnyLabel(p1,l) then
  194. begin
  195. {$ifdef finaldestdebug}
  196. insertllitem(asml,p1,p1.next,tai_comment.Create(
  197. strpnew('previous label inserted'))));
  198. {$endif finaldestdebug}
  199. current_asmdata.getjumplabel(l);
  200. insertllitem(p1,p1.next,tai_label.Create(l));
  201. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  202. hp.oper[0]^.ref^.symbol := l;
  203. l.increfs;
  204. { this won't work, since the new label isn't in the labeltable }
  205. { so it will fail the rangecheck. Labeltable should become a }
  206. { hashtable to support this: }
  207. { GetFinalDestination(asml, hp); }
  208. end
  209. else
  210. begin
  211. {$ifdef finaldestdebug}
  212. insertllitem(asml,p1,p1.next,tai_comment.Create(
  213. strpnew('next label reused'))));
  214. {$endif finaldestdebug}
  215. l.increfs;
  216. hp.oper[0]^.ref^.symbol := l;
  217. if not GetFinalDestination(asml, hp,succ(level)) then
  218. exit;
  219. end;
  220. end;
  221. GetFinalDestination := true;
  222. end;
  223. begin
  224. p := BlockStart;
  225. ClearUsedRegs;
  226. while (p <> BlockEnd) Do
  227. begin
  228. UpDateUsedRegs(UsedRegs, tai(p.next));
  229. case p.Typ Of
  230. ait_instruction:
  231. begin
  232. current_filepos:=taicpu(p).fileinfo;
  233. if InsContainsSegRef(taicpu(p)) then
  234. begin
  235. p := tai(p.next);
  236. continue;
  237. end;
  238. { Handle Jmp Optimizations }
  239. if taicpu(p).is_jmp then
  240. begin
  241. { the following if-block removes all code between a jmp and the next label,
  242. because it can never be executed }
  243. if (taicpu(p).opcode = A_JMP) then
  244. begin
  245. hp2:=p;
  246. while GetNextInstruction(hp2, hp1) and
  247. (hp1.typ <> ait_label) do
  248. if not(hp1.typ in ([ait_label]+skipinstr)) then
  249. begin
  250. { don't kill start/end of assembler block,
  251. no-line-info-start/end etc }
  252. if not(hp1.typ in [ait_align,ait_marker]) then
  253. begin
  254. asml.remove(hp1);
  255. hp1.free;
  256. end
  257. else
  258. hp2:=hp1;
  259. end
  260. else break;
  261. end;
  262. { remove jumps to a label coming right after them }
  263. if GetNextInstruction(p, hp1) then
  264. begin
  265. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  266. { TODO: FIXME removing the first instruction fails}
  267. (p<>blockstart) then
  268. begin
  269. hp2:=tai(hp1.next);
  270. asml.remove(p);
  271. p.free;
  272. p:=hp2;
  273. continue;
  274. end
  275. else
  276. begin
  277. if hp1.typ = ait_label then
  278. SkipLabels(hp1,hp1);
  279. if (tai(hp1).typ=ait_instruction) and
  280. (taicpu(hp1).opcode=A_JMP) and
  281. GetNextInstruction(hp1, hp2) and
  282. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  283. begin
  284. if taicpu(p).opcode=A_Jcc then
  285. begin
  286. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  287. tai_label(hp2).labsym.decrefs;
  288. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  289. { when free'ing hp1, the ref. isn't decresed, so we don't
  290. increase it (FK)
  291. taicpu(p).oper[0]^.ref^.symbol.increfs;
  292. }
  293. asml.remove(hp1);
  294. hp1.free;
  295. GetFinalDestination(asml, taicpu(p),0);
  296. end
  297. else
  298. begin
  299. GetFinalDestination(asml, taicpu(p),0);
  300. p:=tai(p.next);
  301. continue;
  302. end;
  303. end
  304. else
  305. GetFinalDestination(asml, taicpu(p),0);
  306. end;
  307. end;
  308. end
  309. else
  310. { All other optimizes }
  311. begin
  312. case taicpu(p).opcode Of
  313. A_AND:
  314. if OptPass1And(p) then
  315. continue;
  316. A_CMP:
  317. begin
  318. { cmp register,$8000 neg register
  319. je target --> jo target
  320. .... only if register is deallocated before jump.}
  321. case Taicpu(p).opsize of
  322. S_B: v:=$80;
  323. S_W: v:=$8000;
  324. S_L: v:=aint($80000000);
  325. else
  326. internalerror(2013112905);
  327. end;
  328. if (taicpu(p).oper[0]^.typ=Top_const) and
  329. (taicpu(p).oper[0]^.val=v) and
  330. (Taicpu(p).oper[1]^.typ=top_reg) and
  331. GetNextInstruction(p, hp1) and
  332. (hp1.typ=ait_instruction) and
  333. (taicpu(hp1).opcode=A_Jcc) and
  334. (Taicpu(hp1).condition in [C_E,C_NE]) and
  335. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  336. begin
  337. Taicpu(p).opcode:=A_NEG;
  338. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  339. Taicpu(p).clearop(1);
  340. Taicpu(p).ops:=1;
  341. if Taicpu(hp1).condition=C_E then
  342. Taicpu(hp1).condition:=C_O
  343. else
  344. Taicpu(hp1).condition:=C_NO;
  345. continue;
  346. end;
  347. {
  348. @@2: @@2:
  349. .... ....
  350. cmp operand1,0
  351. jle/jbe @@1
  352. dec operand1 --> sub operand1,1
  353. jmp @@2 jge/jae @@2
  354. @@1: @@1:
  355. ... ....}
  356. if (taicpu(p).oper[0]^.typ = top_const) and
  357. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  358. (taicpu(p).oper[0]^.val = 0) and
  359. GetNextInstruction(p, hp1) and
  360. (hp1.typ = ait_instruction) and
  361. (taicpu(hp1).is_jmp) and
  362. (taicpu(hp1).opcode=A_Jcc) and
  363. (taicpu(hp1).condition in [C_LE,C_BE]) and
  364. GetNextInstruction(hp1,hp2) and
  365. (hp2.typ = ait_instruction) and
  366. (taicpu(hp2).opcode = A_DEC) and
  367. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  368. GetNextInstruction(hp2, hp3) and
  369. (hp3.typ = ait_instruction) and
  370. (taicpu(hp3).is_jmp) and
  371. (taicpu(hp3).opcode = A_JMP) and
  372. GetNextInstruction(hp3, hp4) and
  373. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  374. begin
  375. taicpu(hp2).Opcode := A_SUB;
  376. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  377. taicpu(hp2).loadConst(0,1);
  378. taicpu(hp2).ops:=2;
  379. taicpu(hp3).Opcode := A_Jcc;
  380. case taicpu(hp1).condition of
  381. C_LE: taicpu(hp3).condition := C_GE;
  382. C_BE: taicpu(hp3).condition := C_AE;
  383. end;
  384. asml.remove(p);
  385. asml.remove(hp1);
  386. p.free;
  387. hp1.free;
  388. p := hp2;
  389. continue;
  390. end
  391. end;
  392. A_FLD:
  393. begin
  394. if (taicpu(p).oper[0]^.typ = top_reg) and
  395. GetNextInstruction(p, hp1) and
  396. (hp1.typ = Ait_Instruction) and
  397. (taicpu(hp1).oper[0]^.typ = top_reg) and
  398. (taicpu(hp1).oper[1]^.typ = top_reg) and
  399. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  400. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  401. { change to
  402. fld reg fxxx reg,st
  403. fxxxp st, st1 (hp1)
  404. Remark: non commutative operations must be reversed!
  405. }
  406. begin
  407. case taicpu(hp1).opcode Of
  408. A_FMULP,A_FADDP,
  409. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  410. begin
  411. case taicpu(hp1).opcode Of
  412. A_FADDP: taicpu(hp1).opcode := A_FADD;
  413. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  414. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  415. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  416. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  417. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  418. end;
  419. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  420. taicpu(hp1).oper[1]^.reg := NR_ST;
  421. asml.remove(p);
  422. p.free;
  423. p := hp1;
  424. continue;
  425. end;
  426. end;
  427. end
  428. else
  429. if (taicpu(p).oper[0]^.typ = top_ref) and
  430. GetNextInstruction(p, hp2) and
  431. (hp2.typ = Ait_Instruction) and
  432. (taicpu(hp2).ops = 2) and
  433. (taicpu(hp2).oper[0]^.typ = top_reg) and
  434. (taicpu(hp2).oper[1]^.typ = top_reg) and
  435. (taicpu(p).opsize in [S_FS, S_FL]) and
  436. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  437. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  438. if GetLastInstruction(p, hp1) and
  439. (hp1.typ = Ait_Instruction) and
  440. ((taicpu(hp1).opcode = A_FLD) or
  441. (taicpu(hp1).opcode = A_FST)) and
  442. (taicpu(hp1).opsize = taicpu(p).opsize) and
  443. (taicpu(hp1).oper[0]^.typ = top_ref) and
  444. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  445. if ((taicpu(hp2).opcode = A_FMULP) or
  446. (taicpu(hp2).opcode = A_FADDP)) then
  447. { change to
  448. fld/fst mem1 (hp1) fld/fst mem1
  449. fld mem1 (p) fadd/
  450. faddp/ fmul st, st
  451. fmulp st, st1 (hp2) }
  452. begin
  453. asml.remove(p);
  454. p.free;
  455. p := hp1;
  456. if (taicpu(hp2).opcode = A_FADDP) then
  457. taicpu(hp2).opcode := A_FADD
  458. else
  459. taicpu(hp2).opcode := A_FMUL;
  460. taicpu(hp2).oper[1]^.reg := NR_ST;
  461. end
  462. else
  463. { change to
  464. fld/fst mem1 (hp1) fld/fst mem1
  465. fld mem1 (p) fld st}
  466. begin
  467. taicpu(p).changeopsize(S_FL);
  468. taicpu(p).loadreg(0,NR_ST);
  469. end
  470. else
  471. begin
  472. case taicpu(hp2).opcode Of
  473. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  474. { change to
  475. fld/fst mem1 (hp1) fld/fst mem1
  476. fld mem2 (p) fxxx mem2
  477. fxxxp st, st1 (hp2) }
  478. begin
  479. case taicpu(hp2).opcode Of
  480. A_FADDP: taicpu(p).opcode := A_FADD;
  481. A_FMULP: taicpu(p).opcode := A_FMUL;
  482. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  483. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  484. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  485. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  486. end;
  487. asml.remove(hp2);
  488. hp2.free;
  489. end
  490. end
  491. end
  492. end;
  493. A_FSTP,A_FISTP:
  494. if OptPass1FSTP(p) then
  495. continue;
  496. A_LEA:
  497. begin
  498. if OptPass1LEA(p) then
  499. continue;
  500. end;
  501. A_MOV:
  502. begin
  503. If OptPass1MOV(p) then
  504. Continue;
  505. end;
  506. A_MOVSX,
  507. A_MOVZX :
  508. begin
  509. If OptPass1Movx(p) then
  510. Continue
  511. end;
  512. (* should not be generated anymore by the current code generator
  513. A_POP:
  514. begin
  515. if target_info.system=system_i386_go32v2 then
  516. begin
  517. { Transform a series of pop/pop/pop/push/push/push to }
  518. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  519. { because I'm not sure whether they can cope with }
  520. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  521. { such a problem when using esp as frame pointer (JM) }
  522. if (taicpu(p).oper[0]^.typ = top_reg) then
  523. begin
  524. hp1 := p;
  525. hp2 := p;
  526. l := 0;
  527. while getNextInstruction(hp1,hp1) and
  528. (hp1.typ = ait_instruction) and
  529. (taicpu(hp1).opcode = A_POP) and
  530. (taicpu(hp1).oper[0]^.typ = top_reg) do
  531. begin
  532. hp2 := hp1;
  533. inc(l,4);
  534. end;
  535. getLastInstruction(p,hp3);
  536. l1 := 0;
  537. while (hp2 <> hp3) and
  538. assigned(hp1) and
  539. (hp1.typ = ait_instruction) and
  540. (taicpu(hp1).opcode = A_PUSH) and
  541. (taicpu(hp1).oper[0]^.typ = top_reg) and
  542. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  543. begin
  544. { change it to a two op operation }
  545. taicpu(hp2).oper[1]^.typ:=top_none;
  546. taicpu(hp2).ops:=2;
  547. taicpu(hp2).opcode := A_MOV;
  548. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  549. reference_reset(tmpref);
  550. tmpRef.base.enum:=R_INTREGISTER;
  551. tmpRef.base.number:=NR_STACK_POINTER_REG;
  552. convert_register_to_enum(tmpref.base);
  553. tmpRef.offset := l;
  554. taicpu(hp2).loadRef(0,tmpRef);
  555. hp4 := hp1;
  556. getNextInstruction(hp1,hp1);
  557. asml.remove(hp4);
  558. hp4.free;
  559. getLastInstruction(hp2,hp2);
  560. dec(l,4);
  561. inc(l1);
  562. end;
  563. if l <> -4 then
  564. begin
  565. inc(l,4);
  566. for l1 := l1 downto 1 do
  567. begin
  568. getNextInstruction(hp2,hp2);
  569. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  570. end
  571. end
  572. end
  573. end
  574. else
  575. begin
  576. if (taicpu(p).oper[0]^.typ = top_reg) and
  577. GetNextInstruction(p, hp1) and
  578. (tai(hp1).typ=ait_instruction) and
  579. (taicpu(hp1).opcode=A_PUSH) and
  580. (taicpu(hp1).oper[0]^.typ = top_reg) and
  581. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  582. begin
  583. { change it to a two op operation }
  584. taicpu(p).oper[1]^.typ:=top_none;
  585. taicpu(p).ops:=2;
  586. taicpu(p).opcode := A_MOV;
  587. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  588. reference_reset(tmpref);
  589. TmpRef.base.enum := R_ESP;
  590. taicpu(p).loadRef(0,TmpRef);
  591. asml.remove(hp1);
  592. hp1.free;
  593. end;
  594. end;
  595. end;
  596. *)
  597. A_PUSH:
  598. begin
  599. if (taicpu(p).opsize = S_W) and
  600. (taicpu(p).oper[0]^.typ = Top_Const) and
  601. GetNextInstruction(p, hp1) and
  602. (tai(hp1).typ = ait_instruction) and
  603. (taicpu(hp1).opcode = A_PUSH) and
  604. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  605. (taicpu(hp1).opsize = S_W) then
  606. begin
  607. taicpu(p).changeopsize(S_L);
  608. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  609. asml.remove(hp1);
  610. hp1.free;
  611. end;
  612. end;
  613. A_SHL, A_SAL:
  614. if OptPass1SHLSAL(p) then
  615. Continue;
  616. A_SUB:
  617. if OptPass1Sub(p) then
  618. continue;
  619. A_VMOVAPS,
  620. A_VMOVAPD:
  621. if OptPass1VMOVAP(p) then
  622. continue;
  623. A_VDIVSD,
  624. A_VDIVSS,
  625. A_VSUBSD,
  626. A_VSUBSS,
  627. A_VMULSD,
  628. A_VMULSS,
  629. A_VADDSD,
  630. A_VADDSS,
  631. A_VANDPD,
  632. A_VANDPS,
  633. A_VORPD,
  634. A_VORPS,
  635. A_VXORPD,
  636. A_VXORPS:
  637. if OptPass1VOP(p) then
  638. continue;
  639. A_MULSD,
  640. A_MULSS,
  641. A_ADDSD,
  642. A_ADDSS:
  643. if OptPass1OP(p) then
  644. continue;
  645. A_MOVAPD,
  646. A_MOVAPS:
  647. if OptPass1MOVAP(p) then
  648. continue;
  649. A_VMOVSD,
  650. A_VMOVSS,
  651. A_MOVSD,
  652. A_MOVSS:
  653. if OptPass1MOVXX(p) then
  654. continue;
  655. A_SETcc:
  656. if OptPass1SETcc(p) then
  657. continue;
  658. end;
  659. end; { if is_jmp }
  660. end;
  661. end;
  662. updateUsedRegs(UsedRegs,p);
  663. p:=tai(p.next);
  664. end;
  665. end;
  666. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  667. var
  668. p : tai;
  669. begin
  670. p := BlockStart;
  671. ClearUsedRegs;
  672. while (p <> BlockEnd) Do
  673. begin
  674. UpdateUsedRegs(UsedRegs, tai(p.next));
  675. case p.Typ Of
  676. Ait_Instruction:
  677. begin
  678. if InsContainsSegRef(taicpu(p)) then
  679. begin
  680. p := tai(p.next);
  681. continue;
  682. end;
  683. case taicpu(p).opcode Of
  684. A_Jcc:
  685. if OptPass2Jcc(p) then
  686. continue;
  687. A_FSTP,A_FISTP:
  688. if OptPass1FSTP(p) then
  689. continue;
  690. A_IMUL:
  691. if OptPass2Imul(p) then
  692. continue;
  693. A_JMP:
  694. if OptPass2Jmp(p) then
  695. continue;
  696. A_MOV:
  697. if OptPass2MOV(p) then
  698. continue;
  699. end;
  700. end;
  701. end;
  702. p := tai(p.next)
  703. end;
  704. end;
  705. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  706. var
  707. p,hp1: tai;
  708. begin
  709. p := BlockStart;
  710. ClearUsedRegs;
  711. while (p <> BlockEnd) Do
  712. begin
  713. UpdateUsedRegs(UsedRegs, tai(p.next));
  714. case p.Typ Of
  715. Ait_Instruction:
  716. begin
  717. if InsContainsSegRef(taicpu(p)) then
  718. begin
  719. p := tai(p.next);
  720. continue;
  721. end;
  722. case taicpu(p).opcode Of
  723. A_CALL:
  724. if PostPeepHoleOptCall(p) then
  725. Continue;
  726. A_LEA:
  727. if PostPeepholeOptLea(p) then
  728. Continue;
  729. A_CMP:
  730. if PostPeepholeOptCmp(p) then
  731. Continue;
  732. A_MOV:
  733. if PostPeepholeOptMov(p) then
  734. Continue;
  735. A_MOVZX:
  736. { if register vars are on, it's possible there is code like }
  737. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  738. { so we can't safely replace the movzx then with xor/mov, }
  739. { since that would change the flags (JM) }
  740. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  741. begin
  742. if (taicpu(p).oper[1]^.typ = top_reg) then
  743. if (taicpu(p).oper[0]^.typ = top_reg)
  744. then
  745. case taicpu(p).opsize of
  746. S_BL:
  747. begin
  748. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  749. not(cs_opt_size in current_settings.optimizerswitches) and
  750. (current_settings.optimizecputype = cpu_Pentium) then
  751. {Change "movzbl %reg1, %reg2" to
  752. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  753. PentiumMMX}
  754. begin
  755. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  756. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  757. InsertLLItem(p.previous, p, hp1);
  758. taicpu(p).opcode := A_MOV;
  759. taicpu(p).changeopsize(S_B);
  760. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  761. end;
  762. end;
  763. end
  764. else if (taicpu(p).oper[0]^.typ = top_ref) and
  765. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  766. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  767. not(cs_opt_size in current_settings.optimizerswitches) and
  768. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  769. (current_settings.optimizecputype = cpu_Pentium) and
  770. (taicpu(p).opsize = S_BL) then
  771. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  772. Pentium and PentiumMMX}
  773. begin
  774. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  775. taicpu(p).oper[1]^.reg);
  776. taicpu(p).opcode := A_MOV;
  777. taicpu(p).changeopsize(S_B);
  778. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  779. InsertLLItem(p.previous, p, hp1);
  780. end;
  781. end;
  782. A_TEST, A_OR:
  783. if PostPeepholeOptTestOr(p) then
  784. Continue;
  785. end;
  786. end;
  787. end;
  788. p := tai(p.next)
  789. end;
  790. OptReferences;
  791. end;
  792. Procedure TCpuAsmOptimizer.Optimize;
  793. Var
  794. HP: Tai;
  795. pass: longint;
  796. slowopt, changed, lastLoop: boolean;
  797. Begin
  798. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  799. pass := 0;
  800. changed := false;
  801. repeat
  802. lastLoop :=
  803. not(slowopt) or
  804. (not changed and (pass > 2)) or
  805. { prevent endless loops }
  806. (pass = 4);
  807. changed := false;
  808. { Setup labeltable, always necessary }
  809. blockstart := tai(asml.first);
  810. pass_1;
  811. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  812. { or nil }
  813. While Assigned(BlockStart) Do
  814. Begin
  815. if (cs_opt_peephole in current_settings.optimizerswitches) then
  816. begin
  817. if (pass = 0) then
  818. PrePeepHoleOpts;
  819. { Peephole optimizations }
  820. PeepHoleOptPass1;
  821. { Only perform them twice in the first pass }
  822. if pass = 0 then
  823. PeepHoleOptPass1;
  824. end;
  825. { More peephole optimizations }
  826. if (cs_opt_peephole in current_settings.optimizerswitches) then
  827. begin
  828. PeepHoleOptPass2;
  829. if lastLoop then
  830. PostPeepHoleOpts;
  831. end;
  832. { Continue where we left off, BlockEnd is either the start of an }
  833. { assembler block or nil }
  834. BlockStart := BlockEnd;
  835. While Assigned(BlockStart) And
  836. (BlockStart.typ = ait_Marker) And
  837. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  838. Begin
  839. { We stopped at an assembler block, so skip it }
  840. Repeat
  841. BlockStart := Tai(BlockStart.Next);
  842. Until (BlockStart.Typ = Ait_Marker) And
  843. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  844. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  845. If GetNextInstruction(BlockStart, HP) And
  846. ((HP.typ <> ait_Marker) Or
  847. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  848. { There is no assembler block anymore after the current one, so }
  849. { optimize the next block of "normal" instructions }
  850. pass_1
  851. { Otherwise, skip the next assembler block }
  852. else
  853. blockStart := hp;
  854. End;
  855. End;
  856. inc(pass);
  857. until lastLoop;
  858. dfa.free;
  859. End;
  860. begin
  861. casmoptimizer:=TCpuAsmOptimizer;
  862. end.