aoptcpu.pas 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. end;
  33. Var
  34. AsmOptimizer : TCpuAsmOptimizer;
  35. Implementation
  36. uses
  37. verbose,globtype,globals,
  38. cpuinfo,
  39. aasmcpu,
  40. aoptutils,
  41. procinfo,
  42. cgutils,
  43. { units we should get rid off: }
  44. symsym,symconst;
  45. { Checks if the register is a 32 bit general purpose register }
  46. function isgp32reg(reg: TRegister): boolean;
  47. begin
  48. {$push}{$warnings off}
  49. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  50. {$pop}
  51. end;
  52. { returns true if p contains a memory operand with a segment set }
  53. function InsContainsSegRef(p: taicpu): boolean;
  54. var
  55. i: longint;
  56. begin
  57. result:=true;
  58. for i:=0 to p.opercnt-1 do
  59. if (p.oper[i]^.typ=top_ref) and
  60. (p.oper[i]^.ref^.segment<>NR_NO) then
  61. exit;
  62. result:=false;
  63. end;
  64. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  65. var
  66. p: tai;
  67. begin
  68. p := BlockStart;
  69. while (p <> BlockEnd) Do
  70. begin
  71. case p.Typ Of
  72. Ait_Instruction:
  73. begin
  74. if InsContainsSegRef(taicpu(p)) then
  75. begin
  76. p := tai(p.next);
  77. continue;
  78. end;
  79. case taicpu(p).opcode Of
  80. A_IMUL:
  81. if PrePeepholeOptIMUL(p) then
  82. Continue;
  83. A_SAR,A_SHR:
  84. if PrePeepholeOptSxx(p) then
  85. continue;
  86. A_XOR:
  87. if (taicpu(p).oper[0]^.typ = top_reg) and
  88. (taicpu(p).oper[1]^.typ = top_reg) and
  89. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  90. { temporarily change this to 'mov reg,0' to make it easier }
  91. { for the CSE. Will be changed back in pass 2 }
  92. begin
  93. taicpu(p).opcode := A_MOV;
  94. taicpu(p).loadConst(0,0);
  95. end;
  96. end;
  97. end;
  98. end;
  99. p := tai(p.next)
  100. end;
  101. end;
  102. { First pass of peephole optimizations }
  103. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  104. function WriteOk : Boolean;
  105. begin
  106. writeln('Ok');
  107. Result:=True;
  108. end;
  109. var
  110. p,hp1,hp2 : tai;
  111. hp3,hp4: tai;
  112. v:aint;
  113. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  114. {traces sucessive jumps to their final destination and sets it, e.g.
  115. je l1 je l3
  116. <code> <code>
  117. l1: becomes l1:
  118. je l2 je l3
  119. <code> <code>
  120. l2: l2:
  121. jmp l3 jmp l3
  122. the level parameter denotes how deeep we have already followed the jump,
  123. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  124. var p1, p2: tai;
  125. l: tasmlabel;
  126. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  127. begin
  128. FindAnyLabel := false;
  129. while assigned(hp.next) and
  130. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  131. hp := tai(hp.next);
  132. if assigned(hp.next) and
  133. (tai(hp.next).typ = ait_label) then
  134. begin
  135. FindAnyLabel := true;
  136. l := tai_label(hp.next).labsym;
  137. end
  138. end;
  139. begin
  140. GetfinalDestination := false;
  141. if level > 20 then
  142. exit;
  143. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  144. if assigned(p1) then
  145. begin
  146. SkipLabels(p1,p1);
  147. if (tai(p1).typ = ait_instruction) and
  148. (taicpu(p1).is_jmp) then
  149. if { the next instruction after the label where the jump hp arrives}
  150. { is unconditional or of the same type as hp, so continue }
  151. (taicpu(p1).condition in [C_None,hp.condition]) or
  152. { the next instruction after the label where the jump hp arrives}
  153. { is the opposite of hp (so this one is never taken), but after }
  154. { that one there is a branch that will be taken, so perform a }
  155. { little hack: set p1 equal to this instruction (that's what the}
  156. { last SkipLabels is for, only works with short bool evaluation)}
  157. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  158. SkipLabels(p1,p2) and
  159. (p2.typ = ait_instruction) and
  160. (taicpu(p2).is_jmp) and
  161. (taicpu(p2).condition in [C_None,hp.condition]) and
  162. SkipLabels(p1,p1)) then
  163. begin
  164. { quick check for loops of the form "l5: ; jmp l5 }
  165. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  166. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  167. exit;
  168. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  169. exit;
  170. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  171. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  172. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  173. end
  174. else
  175. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  176. if not FindAnyLabel(p1,l) then
  177. begin
  178. {$ifdef finaldestdebug}
  179. insertllitem(asml,p1,p1.next,tai_comment.Create(
  180. strpnew('previous label inserted'))));
  181. {$endif finaldestdebug}
  182. current_asmdata.getjumplabel(l);
  183. insertllitem(p1,p1.next,tai_label.Create(l));
  184. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  185. hp.oper[0]^.ref^.symbol := l;
  186. l.increfs;
  187. { this won't work, since the new label isn't in the labeltable }
  188. { so it will fail the rangecheck. Labeltable should become a }
  189. { hashtable to support this: }
  190. { GetFinalDestination(asml, hp); }
  191. end
  192. else
  193. begin
  194. {$ifdef finaldestdebug}
  195. insertllitem(asml,p1,p1.next,tai_comment.Create(
  196. strpnew('next label reused'))));
  197. {$endif finaldestdebug}
  198. l.increfs;
  199. hp.oper[0]^.ref^.symbol := l;
  200. if not GetFinalDestination(asml, hp,succ(level)) then
  201. exit;
  202. end;
  203. end;
  204. GetFinalDestination := true;
  205. end;
  206. begin
  207. p := BlockStart;
  208. ClearUsedRegs;
  209. while (p <> BlockEnd) Do
  210. begin
  211. UpDateUsedRegs(UsedRegs, tai(p.next));
  212. case p.Typ Of
  213. ait_instruction:
  214. begin
  215. current_filepos:=taicpu(p).fileinfo;
  216. if InsContainsSegRef(taicpu(p)) then
  217. begin
  218. p := tai(p.next);
  219. continue;
  220. end;
  221. { Handle Jmp Optimizations }
  222. if taicpu(p).is_jmp then
  223. begin
  224. { the following if-block removes all code between a jmp and the next label,
  225. because it can never be executed }
  226. if (taicpu(p).opcode = A_JMP) then
  227. begin
  228. hp2:=p;
  229. while GetNextInstruction(hp2, hp1) and
  230. (hp1.typ <> ait_label) do
  231. if not(hp1.typ in ([ait_label]+skipinstr)) then
  232. begin
  233. { don't kill start/end of assembler block,
  234. no-line-info-start/end etc }
  235. if not(hp1.typ in [ait_align,ait_marker]) then
  236. begin
  237. asml.remove(hp1);
  238. hp1.free;
  239. end
  240. else
  241. hp2:=hp1;
  242. end
  243. else break;
  244. end;
  245. { remove jumps to a label coming right after them }
  246. if GetNextInstruction(p, hp1) then
  247. begin
  248. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  249. { TODO: FIXME removing the first instruction fails}
  250. (p<>blockstart) then
  251. begin
  252. hp2:=tai(hp1.next);
  253. asml.remove(p);
  254. p.free;
  255. p:=hp2;
  256. continue;
  257. end
  258. else
  259. begin
  260. if hp1.typ = ait_label then
  261. SkipLabels(hp1,hp1);
  262. if (tai(hp1).typ=ait_instruction) and
  263. (taicpu(hp1).opcode=A_JMP) and
  264. GetNextInstruction(hp1, hp2) and
  265. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  266. begin
  267. if taicpu(p).opcode=A_Jcc then
  268. begin
  269. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  270. tai_label(hp2).labsym.decrefs;
  271. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  272. { when free'ing hp1, the ref. isn't decresed, so we don't
  273. increase it (FK)
  274. taicpu(p).oper[0]^.ref^.symbol.increfs;
  275. }
  276. asml.remove(hp1);
  277. hp1.free;
  278. GetFinalDestination(asml, taicpu(p),0);
  279. end
  280. else
  281. begin
  282. GetFinalDestination(asml, taicpu(p),0);
  283. p:=tai(p.next);
  284. continue;
  285. end;
  286. end
  287. else
  288. GetFinalDestination(asml, taicpu(p),0);
  289. end;
  290. end;
  291. end
  292. else
  293. { All other optimizes }
  294. begin
  295. case taicpu(p).opcode Of
  296. A_AND:
  297. if OptPass1And(p) then
  298. continue;
  299. A_CMP:
  300. begin
  301. { cmp register,$8000 neg register
  302. je target --> jo target
  303. .... only if register is deallocated before jump.}
  304. case Taicpu(p).opsize of
  305. S_B: v:=$80;
  306. S_W: v:=$8000;
  307. S_L: v:=aint($80000000);
  308. else
  309. internalerror(2013112905);
  310. end;
  311. if (taicpu(p).oper[0]^.typ=Top_const) and
  312. (taicpu(p).oper[0]^.val=v) and
  313. (Taicpu(p).oper[1]^.typ=top_reg) and
  314. GetNextInstruction(p, hp1) and
  315. (hp1.typ=ait_instruction) and
  316. (taicpu(hp1).opcode=A_Jcc) and
  317. (Taicpu(hp1).condition in [C_E,C_NE]) and
  318. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  319. begin
  320. Taicpu(p).opcode:=A_NEG;
  321. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  322. Taicpu(p).clearop(1);
  323. Taicpu(p).ops:=1;
  324. if Taicpu(hp1).condition=C_E then
  325. Taicpu(hp1).condition:=C_O
  326. else
  327. Taicpu(hp1).condition:=C_NO;
  328. continue;
  329. end;
  330. {
  331. @@2: @@2:
  332. .... ....
  333. cmp operand1,0
  334. jle/jbe @@1
  335. dec operand1 --> sub operand1,1
  336. jmp @@2 jge/jae @@2
  337. @@1: @@1:
  338. ... ....}
  339. if (taicpu(p).oper[0]^.typ = top_const) and
  340. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  341. (taicpu(p).oper[0]^.val = 0) and
  342. GetNextInstruction(p, hp1) and
  343. (hp1.typ = ait_instruction) and
  344. (taicpu(hp1).is_jmp) and
  345. (taicpu(hp1).opcode=A_Jcc) and
  346. (taicpu(hp1).condition in [C_LE,C_BE]) and
  347. GetNextInstruction(hp1,hp2) and
  348. (hp2.typ = ait_instruction) and
  349. (taicpu(hp2).opcode = A_DEC) and
  350. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  351. GetNextInstruction(hp2, hp3) and
  352. (hp3.typ = ait_instruction) and
  353. (taicpu(hp3).is_jmp) and
  354. (taicpu(hp3).opcode = A_JMP) and
  355. GetNextInstruction(hp3, hp4) and
  356. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  357. begin
  358. taicpu(hp2).Opcode := A_SUB;
  359. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  360. taicpu(hp2).loadConst(0,1);
  361. taicpu(hp2).ops:=2;
  362. taicpu(hp3).Opcode := A_Jcc;
  363. case taicpu(hp1).condition of
  364. C_LE: taicpu(hp3).condition := C_GE;
  365. C_BE: taicpu(hp3).condition := C_AE;
  366. end;
  367. asml.remove(p);
  368. asml.remove(hp1);
  369. p.free;
  370. hp1.free;
  371. p := hp2;
  372. continue;
  373. end
  374. end;
  375. A_FLD:
  376. if OptPass1FLD(p) then
  377. continue;
  378. A_FSTP,A_FISTP:
  379. if OptPass1FSTP(p) then
  380. continue;
  381. A_LEA:
  382. begin
  383. if OptPass1LEA(p) then
  384. continue;
  385. end;
  386. A_MOV:
  387. begin
  388. If OptPass1MOV(p) then
  389. Continue;
  390. end;
  391. A_MOVSX,
  392. A_MOVZX :
  393. begin
  394. If OptPass1Movx(p) then
  395. Continue
  396. end;
  397. (* should not be generated anymore by the current code generator
  398. A_POP:
  399. begin
  400. if target_info.system=system_i386_go32v2 then
  401. begin
  402. { Transform a series of pop/pop/pop/push/push/push to }
  403. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  404. { because I'm not sure whether they can cope with }
  405. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  406. { such a problem when using esp as frame pointer (JM) }
  407. if (taicpu(p).oper[0]^.typ = top_reg) then
  408. begin
  409. hp1 := p;
  410. hp2 := p;
  411. l := 0;
  412. while getNextInstruction(hp1,hp1) and
  413. (hp1.typ = ait_instruction) and
  414. (taicpu(hp1).opcode = A_POP) and
  415. (taicpu(hp1).oper[0]^.typ = top_reg) do
  416. begin
  417. hp2 := hp1;
  418. inc(l,4);
  419. end;
  420. getLastInstruction(p,hp3);
  421. l1 := 0;
  422. while (hp2 <> hp3) and
  423. assigned(hp1) and
  424. (hp1.typ = ait_instruction) and
  425. (taicpu(hp1).opcode = A_PUSH) and
  426. (taicpu(hp1).oper[0]^.typ = top_reg) and
  427. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  428. begin
  429. { change it to a two op operation }
  430. taicpu(hp2).oper[1]^.typ:=top_none;
  431. taicpu(hp2).ops:=2;
  432. taicpu(hp2).opcode := A_MOV;
  433. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  434. reference_reset(tmpref);
  435. tmpRef.base.enum:=R_INTREGISTER;
  436. tmpRef.base.number:=NR_STACK_POINTER_REG;
  437. convert_register_to_enum(tmpref.base);
  438. tmpRef.offset := l;
  439. taicpu(hp2).loadRef(0,tmpRef);
  440. hp4 := hp1;
  441. getNextInstruction(hp1,hp1);
  442. asml.remove(hp4);
  443. hp4.free;
  444. getLastInstruction(hp2,hp2);
  445. dec(l,4);
  446. inc(l1);
  447. end;
  448. if l <> -4 then
  449. begin
  450. inc(l,4);
  451. for l1 := l1 downto 1 do
  452. begin
  453. getNextInstruction(hp2,hp2);
  454. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  455. end
  456. end
  457. end
  458. end
  459. else
  460. begin
  461. if (taicpu(p).oper[0]^.typ = top_reg) and
  462. GetNextInstruction(p, hp1) and
  463. (tai(hp1).typ=ait_instruction) and
  464. (taicpu(hp1).opcode=A_PUSH) and
  465. (taicpu(hp1).oper[0]^.typ = top_reg) and
  466. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  467. begin
  468. { change it to a two op operation }
  469. taicpu(p).oper[1]^.typ:=top_none;
  470. taicpu(p).ops:=2;
  471. taicpu(p).opcode := A_MOV;
  472. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  473. reference_reset(tmpref);
  474. TmpRef.base.enum := R_ESP;
  475. taicpu(p).loadRef(0,TmpRef);
  476. asml.remove(hp1);
  477. hp1.free;
  478. end;
  479. end;
  480. end;
  481. *)
  482. A_PUSH:
  483. begin
  484. if (taicpu(p).opsize = S_W) and
  485. (taicpu(p).oper[0]^.typ = Top_Const) and
  486. GetNextInstruction(p, hp1) and
  487. (tai(hp1).typ = ait_instruction) and
  488. (taicpu(hp1).opcode = A_PUSH) and
  489. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  490. (taicpu(hp1).opsize = S_W) then
  491. begin
  492. taicpu(p).changeopsize(S_L);
  493. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  494. asml.remove(hp1);
  495. hp1.free;
  496. end;
  497. end;
  498. A_SHL, A_SAL:
  499. if OptPass1SHLSAL(p) then
  500. Continue;
  501. A_SUB:
  502. if OptPass1Sub(p) then
  503. continue;
  504. A_VMOVAPS,
  505. A_VMOVAPD:
  506. if OptPass1VMOVAP(p) then
  507. continue;
  508. A_VDIVSD,
  509. A_VDIVSS,
  510. A_VSUBSD,
  511. A_VSUBSS,
  512. A_VMULSD,
  513. A_VMULSS,
  514. A_VADDSD,
  515. A_VADDSS,
  516. A_VANDPD,
  517. A_VANDPS,
  518. A_VORPD,
  519. A_VORPS,
  520. A_VXORPD,
  521. A_VXORPS:
  522. if OptPass1VOP(p) then
  523. continue;
  524. A_MULSD,
  525. A_MULSS,
  526. A_ADDSD,
  527. A_ADDSS:
  528. if OptPass1OP(p) then
  529. continue;
  530. A_MOVAPD,
  531. A_MOVAPS:
  532. if OptPass1MOVAP(p) then
  533. continue;
  534. A_VMOVSD,
  535. A_VMOVSS,
  536. A_MOVSD,
  537. A_MOVSS:
  538. if OptPass1MOVXX(p) then
  539. continue;
  540. A_SETcc:
  541. if OptPass1SETcc(p) then
  542. continue;
  543. end;
  544. end; { if is_jmp }
  545. end;
  546. end;
  547. updateUsedRegs(UsedRegs,p);
  548. p:=tai(p.next);
  549. end;
  550. end;
  551. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  552. var
  553. p : tai;
  554. begin
  555. p := BlockStart;
  556. ClearUsedRegs;
  557. while (p <> BlockEnd) Do
  558. begin
  559. UpdateUsedRegs(UsedRegs, tai(p.next));
  560. case p.Typ Of
  561. Ait_Instruction:
  562. begin
  563. if InsContainsSegRef(taicpu(p)) then
  564. begin
  565. p := tai(p.next);
  566. continue;
  567. end;
  568. case taicpu(p).opcode Of
  569. A_Jcc:
  570. if OptPass2Jcc(p) then
  571. continue;
  572. A_FSTP,A_FISTP:
  573. if OptPass1FSTP(p) then
  574. continue;
  575. A_IMUL:
  576. if OptPass2Imul(p) then
  577. continue;
  578. A_JMP:
  579. if OptPass2Jmp(p) then
  580. continue;
  581. A_MOV:
  582. if OptPass2MOV(p) then
  583. continue;
  584. end;
  585. end;
  586. end;
  587. p := tai(p.next)
  588. end;
  589. end;
  590. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  591. var
  592. p,hp1: tai;
  593. begin
  594. p := BlockStart;
  595. ClearUsedRegs;
  596. while (p <> BlockEnd) Do
  597. begin
  598. UpdateUsedRegs(UsedRegs, tai(p.next));
  599. case p.Typ Of
  600. Ait_Instruction:
  601. begin
  602. if InsContainsSegRef(taicpu(p)) then
  603. begin
  604. p := tai(p.next);
  605. continue;
  606. end;
  607. case taicpu(p).opcode Of
  608. A_CALL:
  609. if PostPeepHoleOptCall(p) then
  610. Continue;
  611. A_LEA:
  612. if PostPeepholeOptLea(p) then
  613. Continue;
  614. A_CMP:
  615. if PostPeepholeOptCmp(p) then
  616. Continue;
  617. A_MOV:
  618. if PostPeepholeOptMov(p) then
  619. Continue;
  620. A_MOVZX:
  621. { if register vars are on, it's possible there is code like }
  622. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  623. { so we can't safely replace the movzx then with xor/mov, }
  624. { since that would change the flags (JM) }
  625. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  626. begin
  627. if (taicpu(p).oper[1]^.typ = top_reg) then
  628. if (taicpu(p).oper[0]^.typ = top_reg)
  629. then
  630. case taicpu(p).opsize of
  631. S_BL:
  632. begin
  633. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  634. not(cs_opt_size in current_settings.optimizerswitches) and
  635. (current_settings.optimizecputype = cpu_Pentium) then
  636. {Change "movzbl %reg1, %reg2" to
  637. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  638. PentiumMMX}
  639. begin
  640. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  641. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  642. InsertLLItem(p.previous, p, hp1);
  643. taicpu(p).opcode := A_MOV;
  644. taicpu(p).changeopsize(S_B);
  645. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  646. end;
  647. end;
  648. end
  649. else if (taicpu(p).oper[0]^.typ = top_ref) and
  650. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  651. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  652. not(cs_opt_size in current_settings.optimizerswitches) and
  653. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  654. (current_settings.optimizecputype = cpu_Pentium) and
  655. (taicpu(p).opsize = S_BL) then
  656. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  657. Pentium and PentiumMMX}
  658. begin
  659. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  660. taicpu(p).oper[1]^.reg);
  661. taicpu(p).opcode := A_MOV;
  662. taicpu(p).changeopsize(S_B);
  663. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  664. InsertLLItem(p.previous, p, hp1);
  665. end;
  666. end;
  667. A_TEST, A_OR:
  668. if PostPeepholeOptTestOr(p) then
  669. Continue;
  670. end;
  671. end;
  672. end;
  673. p := tai(p.next)
  674. end;
  675. OptReferences;
  676. end;
  677. Procedure TCpuAsmOptimizer.Optimize;
  678. Var
  679. HP: Tai;
  680. pass: longint;
  681. slowopt, changed, lastLoop: boolean;
  682. Begin
  683. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  684. pass := 0;
  685. changed := false;
  686. repeat
  687. lastLoop :=
  688. not(slowopt) or
  689. (not changed and (pass > 2)) or
  690. { prevent endless loops }
  691. (pass = 4);
  692. changed := false;
  693. { Setup labeltable, always necessary }
  694. blockstart := tai(asml.first);
  695. pass_1;
  696. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  697. { or nil }
  698. While Assigned(BlockStart) Do
  699. Begin
  700. if (cs_opt_peephole in current_settings.optimizerswitches) then
  701. begin
  702. if (pass = 0) then
  703. PrePeepHoleOpts;
  704. { Peephole optimizations }
  705. PeepHoleOptPass1;
  706. { Only perform them twice in the first pass }
  707. if pass = 0 then
  708. PeepHoleOptPass1;
  709. end;
  710. { More peephole optimizations }
  711. if (cs_opt_peephole in current_settings.optimizerswitches) then
  712. begin
  713. PeepHoleOptPass2;
  714. if lastLoop then
  715. PostPeepHoleOpts;
  716. end;
  717. { Continue where we left off, BlockEnd is either the start of an }
  718. { assembler block or nil }
  719. BlockStart := BlockEnd;
  720. While Assigned(BlockStart) And
  721. (BlockStart.typ = ait_Marker) And
  722. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  723. Begin
  724. { We stopped at an assembler block, so skip it }
  725. Repeat
  726. BlockStart := Tai(BlockStart.Next);
  727. Until (BlockStart.Typ = Ait_Marker) And
  728. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  729. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  730. If GetNextInstruction(BlockStart, HP) And
  731. ((HP.typ <> ait_Marker) Or
  732. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  733. { There is no assembler block anymore after the current one, so }
  734. { optimize the next block of "normal" instructions }
  735. pass_1
  736. { Otherwise, skip the next assembler block }
  737. else
  738. blockStart := hp;
  739. End;
  740. End;
  741. inc(pass);
  742. until lastLoop;
  743. dfa.free;
  744. End;
  745. begin
  746. casmoptimizer:=TCpuAsmOptimizer;
  747. end.