aoptcpu.pas 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. end;
  33. Var
  34. AsmOptimizer : TCpuAsmOptimizer;
  35. Implementation
  36. uses
  37. verbose,globtype,globals,
  38. cpuinfo,
  39. aasmcpu,
  40. aoptutils,
  41. procinfo,
  42. cgutils,
  43. { units we should get rid off: }
  44. symsym,symconst;
  45. { Checks if the register is a 32 bit general purpose register }
  46. function isgp32reg(reg: TRegister): boolean;
  47. begin
  48. {$push}{$warnings off}
  49. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  50. {$pop}
  51. end;
  52. { returns true if p contains a memory operand with a segment set }
  53. function InsContainsSegRef(p: taicpu): boolean;
  54. var
  55. i: longint;
  56. begin
  57. result:=true;
  58. for i:=0 to p.opercnt-1 do
  59. if (p.oper[i]^.typ=top_ref) and
  60. (p.oper[i]^.ref^.segment<>NR_NO) then
  61. exit;
  62. result:=false;
  63. end;
  64. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  65. var
  66. p: tai;
  67. begin
  68. p := BlockStart;
  69. while (p <> BlockEnd) Do
  70. begin
  71. case p.Typ Of
  72. Ait_Instruction:
  73. begin
  74. if InsContainsSegRef(taicpu(p)) then
  75. begin
  76. p := tai(p.next);
  77. continue;
  78. end;
  79. case taicpu(p).opcode Of
  80. A_IMUL:
  81. if PrePeepholeOptIMUL(p) then
  82. Continue;
  83. A_SAR,A_SHR:
  84. if PrePeepholeOptSxx(p) then
  85. continue;
  86. A_XOR:
  87. if (taicpu(p).oper[0]^.typ = top_reg) and
  88. (taicpu(p).oper[1]^.typ = top_reg) and
  89. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  90. { temporarily change this to 'mov reg,0' to make it easier }
  91. { for the CSE. Will be changed back in pass 2 }
  92. begin
  93. taicpu(p).opcode := A_MOV;
  94. taicpu(p).loadConst(0,0);
  95. end;
  96. end;
  97. end;
  98. end;
  99. p := tai(p.next)
  100. end;
  101. end;
  102. { First pass of peephole optimizations }
  103. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  104. function WriteOk : Boolean;
  105. begin
  106. writeln('Ok');
  107. Result:=True;
  108. end;
  109. var
  110. l : longint;
  111. p,hp1,hp2 : tai;
  112. hp3,hp4: tai;
  113. v:aint;
  114. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  115. {traces sucessive jumps to their final destination and sets it, e.g.
  116. je l1 je l3
  117. <code> <code>
  118. l1: becomes l1:
  119. je l2 je l3
  120. <code> <code>
  121. l2: l2:
  122. jmp l3 jmp l3
  123. the level parameter denotes how deeep we have already followed the jump,
  124. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  125. var p1, p2: tai;
  126. l: tasmlabel;
  127. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  128. begin
  129. FindAnyLabel := false;
  130. while assigned(hp.next) and
  131. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  132. hp := tai(hp.next);
  133. if assigned(hp.next) and
  134. (tai(hp.next).typ = ait_label) then
  135. begin
  136. FindAnyLabel := true;
  137. l := tai_label(hp.next).labsym;
  138. end
  139. end;
  140. begin
  141. GetfinalDestination := false;
  142. if level > 20 then
  143. exit;
  144. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  145. if assigned(p1) then
  146. begin
  147. SkipLabels(p1,p1);
  148. if (tai(p1).typ = ait_instruction) and
  149. (taicpu(p1).is_jmp) then
  150. if { the next instruction after the label where the jump hp arrives}
  151. { is unconditional or of the same type as hp, so continue }
  152. (taicpu(p1).condition in [C_None,hp.condition]) or
  153. { the next instruction after the label where the jump hp arrives}
  154. { is the opposite of hp (so this one is never taken), but after }
  155. { that one there is a branch that will be taken, so perform a }
  156. { little hack: set p1 equal to this instruction (that's what the}
  157. { last SkipLabels is for, only works with short bool evaluation)}
  158. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  159. SkipLabels(p1,p2) and
  160. (p2.typ = ait_instruction) and
  161. (taicpu(p2).is_jmp) and
  162. (taicpu(p2).condition in [C_None,hp.condition]) and
  163. SkipLabels(p1,p1)) then
  164. begin
  165. { quick check for loops of the form "l5: ; jmp l5 }
  166. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  167. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  168. exit;
  169. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  170. exit;
  171. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  172. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  173. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  174. end
  175. else
  176. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  177. if not FindAnyLabel(p1,l) then
  178. begin
  179. {$ifdef finaldestdebug}
  180. insertllitem(asml,p1,p1.next,tai_comment.Create(
  181. strpnew('previous label inserted'))));
  182. {$endif finaldestdebug}
  183. current_asmdata.getjumplabel(l);
  184. insertllitem(p1,p1.next,tai_label.Create(l));
  185. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  186. hp.oper[0]^.ref^.symbol := l;
  187. l.increfs;
  188. { this won't work, since the new label isn't in the labeltable }
  189. { so it will fail the rangecheck. Labeltable should become a }
  190. { hashtable to support this: }
  191. { GetFinalDestination(asml, hp); }
  192. end
  193. else
  194. begin
  195. {$ifdef finaldestdebug}
  196. insertllitem(asml,p1,p1.next,tai_comment.Create(
  197. strpnew('next label reused'))));
  198. {$endif finaldestdebug}
  199. l.increfs;
  200. hp.oper[0]^.ref^.symbol := l;
  201. if not GetFinalDestination(asml, hp,succ(level)) then
  202. exit;
  203. end;
  204. end;
  205. GetFinalDestination := true;
  206. end;
  207. begin
  208. p := BlockStart;
  209. ClearUsedRegs;
  210. while (p <> BlockEnd) Do
  211. begin
  212. UpDateUsedRegs(UsedRegs, tai(p.next));
  213. case p.Typ Of
  214. ait_instruction:
  215. begin
  216. current_filepos:=taicpu(p).fileinfo;
  217. if InsContainsSegRef(taicpu(p)) then
  218. begin
  219. p := tai(p.next);
  220. continue;
  221. end;
  222. { Handle Jmp Optimizations }
  223. if taicpu(p).is_jmp then
  224. begin
  225. { the following if-block removes all code between a jmp and the next label,
  226. because it can never be executed }
  227. if (taicpu(p).opcode = A_JMP) then
  228. begin
  229. hp2:=p;
  230. while GetNextInstruction(hp2, hp1) and
  231. (hp1.typ <> ait_label) do
  232. if not(hp1.typ in ([ait_label]+skipinstr)) then
  233. begin
  234. { don't kill start/end of assembler block,
  235. no-line-info-start/end etc }
  236. if not(hp1.typ in [ait_align,ait_marker]) then
  237. begin
  238. asml.remove(hp1);
  239. hp1.free;
  240. end
  241. else
  242. hp2:=hp1;
  243. end
  244. else break;
  245. end;
  246. { remove jumps to a label coming right after them }
  247. if GetNextInstruction(p, hp1) then
  248. begin
  249. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  250. { TODO: FIXME removing the first instruction fails}
  251. (p<>blockstart) then
  252. begin
  253. hp2:=tai(hp1.next);
  254. asml.remove(p);
  255. p.free;
  256. p:=hp2;
  257. continue;
  258. end
  259. else
  260. begin
  261. if hp1.typ = ait_label then
  262. SkipLabels(hp1,hp1);
  263. if (tai(hp1).typ=ait_instruction) and
  264. (taicpu(hp1).opcode=A_JMP) and
  265. GetNextInstruction(hp1, hp2) and
  266. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  267. begin
  268. if taicpu(p).opcode=A_Jcc then
  269. begin
  270. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  271. tai_label(hp2).labsym.decrefs;
  272. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  273. { when free'ing hp1, the ref. isn't decresed, so we don't
  274. increase it (FK)
  275. taicpu(p).oper[0]^.ref^.symbol.increfs;
  276. }
  277. asml.remove(hp1);
  278. hp1.free;
  279. GetFinalDestination(asml, taicpu(p),0);
  280. end
  281. else
  282. begin
  283. GetFinalDestination(asml, taicpu(p),0);
  284. p:=tai(p.next);
  285. continue;
  286. end;
  287. end
  288. else
  289. GetFinalDestination(asml, taicpu(p),0);
  290. end;
  291. end;
  292. end
  293. else
  294. { All other optimizes }
  295. begin
  296. case taicpu(p).opcode Of
  297. A_AND:
  298. if OptPass1And(p) then
  299. continue;
  300. A_CMP:
  301. begin
  302. { cmp register,$8000 neg register
  303. je target --> jo target
  304. .... only if register is deallocated before jump.}
  305. case Taicpu(p).opsize of
  306. S_B: v:=$80;
  307. S_W: v:=$8000;
  308. S_L: v:=aint($80000000);
  309. else
  310. internalerror(2013112905);
  311. end;
  312. if (taicpu(p).oper[0]^.typ=Top_const) and
  313. (taicpu(p).oper[0]^.val=v) and
  314. (Taicpu(p).oper[1]^.typ=top_reg) and
  315. GetNextInstruction(p, hp1) and
  316. (hp1.typ=ait_instruction) and
  317. (taicpu(hp1).opcode=A_Jcc) and
  318. (Taicpu(hp1).condition in [C_E,C_NE]) and
  319. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  320. begin
  321. Taicpu(p).opcode:=A_NEG;
  322. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  323. Taicpu(p).clearop(1);
  324. Taicpu(p).ops:=1;
  325. if Taicpu(hp1).condition=C_E then
  326. Taicpu(hp1).condition:=C_O
  327. else
  328. Taicpu(hp1).condition:=C_NO;
  329. continue;
  330. end;
  331. {
  332. @@2: @@2:
  333. .... ....
  334. cmp operand1,0
  335. jle/jbe @@1
  336. dec operand1 --> sub operand1,1
  337. jmp @@2 jge/jae @@2
  338. @@1: @@1:
  339. ... ....}
  340. if (taicpu(p).oper[0]^.typ = top_const) and
  341. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  342. (taicpu(p).oper[0]^.val = 0) and
  343. GetNextInstruction(p, hp1) and
  344. (hp1.typ = ait_instruction) and
  345. (taicpu(hp1).is_jmp) and
  346. (taicpu(hp1).opcode=A_Jcc) and
  347. (taicpu(hp1).condition in [C_LE,C_BE]) and
  348. GetNextInstruction(hp1,hp2) and
  349. (hp2.typ = ait_instruction) and
  350. (taicpu(hp2).opcode = A_DEC) and
  351. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  352. GetNextInstruction(hp2, hp3) and
  353. (hp3.typ = ait_instruction) and
  354. (taicpu(hp3).is_jmp) and
  355. (taicpu(hp3).opcode = A_JMP) and
  356. GetNextInstruction(hp3, hp4) and
  357. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  358. begin
  359. taicpu(hp2).Opcode := A_SUB;
  360. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  361. taicpu(hp2).loadConst(0,1);
  362. taicpu(hp2).ops:=2;
  363. taicpu(hp3).Opcode := A_Jcc;
  364. case taicpu(hp1).condition of
  365. C_LE: taicpu(hp3).condition := C_GE;
  366. C_BE: taicpu(hp3).condition := C_AE;
  367. end;
  368. asml.remove(p);
  369. asml.remove(hp1);
  370. p.free;
  371. hp1.free;
  372. p := hp2;
  373. continue;
  374. end
  375. end;
  376. A_FLD:
  377. if OptPass1FLD(p) then
  378. continue;
  379. A_FSTP,A_FISTP:
  380. if OptPass1FSTP(p) then
  381. continue;
  382. A_LEA:
  383. begin
  384. if OptPass1LEA(p) then
  385. continue;
  386. end;
  387. A_MOV:
  388. begin
  389. If OptPass1MOV(p) then
  390. Continue;
  391. end;
  392. A_MOVSX,
  393. A_MOVZX :
  394. begin
  395. If OptPass1Movx(p) then
  396. Continue
  397. end;
  398. (* should not be generated anymore by the current code generator
  399. A_POP:
  400. begin
  401. if target_info.system=system_i386_go32v2 then
  402. begin
  403. { Transform a series of pop/pop/pop/push/push/push to }
  404. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  405. { because I'm not sure whether they can cope with }
  406. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  407. { such a problem when using esp as frame pointer (JM) }
  408. if (taicpu(p).oper[0]^.typ = top_reg) then
  409. begin
  410. hp1 := p;
  411. hp2 := p;
  412. l := 0;
  413. while getNextInstruction(hp1,hp1) and
  414. (hp1.typ = ait_instruction) and
  415. (taicpu(hp1).opcode = A_POP) and
  416. (taicpu(hp1).oper[0]^.typ = top_reg) do
  417. begin
  418. hp2 := hp1;
  419. inc(l,4);
  420. end;
  421. getLastInstruction(p,hp3);
  422. l1 := 0;
  423. while (hp2 <> hp3) and
  424. assigned(hp1) and
  425. (hp1.typ = ait_instruction) and
  426. (taicpu(hp1).opcode = A_PUSH) and
  427. (taicpu(hp1).oper[0]^.typ = top_reg) and
  428. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  429. begin
  430. { change it to a two op operation }
  431. taicpu(hp2).oper[1]^.typ:=top_none;
  432. taicpu(hp2).ops:=2;
  433. taicpu(hp2).opcode := A_MOV;
  434. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  435. reference_reset(tmpref);
  436. tmpRef.base.enum:=R_INTREGISTER;
  437. tmpRef.base.number:=NR_STACK_POINTER_REG;
  438. convert_register_to_enum(tmpref.base);
  439. tmpRef.offset := l;
  440. taicpu(hp2).loadRef(0,tmpRef);
  441. hp4 := hp1;
  442. getNextInstruction(hp1,hp1);
  443. asml.remove(hp4);
  444. hp4.free;
  445. getLastInstruction(hp2,hp2);
  446. dec(l,4);
  447. inc(l1);
  448. end;
  449. if l <> -4 then
  450. begin
  451. inc(l,4);
  452. for l1 := l1 downto 1 do
  453. begin
  454. getNextInstruction(hp2,hp2);
  455. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  456. end
  457. end
  458. end
  459. end
  460. else
  461. begin
  462. if (taicpu(p).oper[0]^.typ = top_reg) and
  463. GetNextInstruction(p, hp1) and
  464. (tai(hp1).typ=ait_instruction) and
  465. (taicpu(hp1).opcode=A_PUSH) and
  466. (taicpu(hp1).oper[0]^.typ = top_reg) and
  467. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  468. begin
  469. { change it to a two op operation }
  470. taicpu(p).oper[1]^.typ:=top_none;
  471. taicpu(p).ops:=2;
  472. taicpu(p).opcode := A_MOV;
  473. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  474. reference_reset(tmpref);
  475. TmpRef.base.enum := R_ESP;
  476. taicpu(p).loadRef(0,TmpRef);
  477. asml.remove(hp1);
  478. hp1.free;
  479. end;
  480. end;
  481. end;
  482. *)
  483. A_PUSH:
  484. begin
  485. if (taicpu(p).opsize = S_W) and
  486. (taicpu(p).oper[0]^.typ = Top_Const) and
  487. GetNextInstruction(p, hp1) and
  488. (tai(hp1).typ = ait_instruction) and
  489. (taicpu(hp1).opcode = A_PUSH) and
  490. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  491. (taicpu(hp1).opsize = S_W) then
  492. begin
  493. taicpu(p).changeopsize(S_L);
  494. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  495. asml.remove(hp1);
  496. hp1.free;
  497. end;
  498. end;
  499. A_SHL, A_SAL:
  500. if OptPass1SHLSAL(p) then
  501. Continue;
  502. A_SUB:
  503. if OptPass1Sub(p) then
  504. continue;
  505. A_VMOVAPS,
  506. A_VMOVAPD:
  507. if OptPass1VMOVAP(p) then
  508. continue;
  509. A_VDIVSD,
  510. A_VDIVSS,
  511. A_VSUBSD,
  512. A_VSUBSS,
  513. A_VMULSD,
  514. A_VMULSS,
  515. A_VADDSD,
  516. A_VADDSS,
  517. A_VANDPD,
  518. A_VANDPS,
  519. A_VORPD,
  520. A_VORPS,
  521. A_VXORPD,
  522. A_VXORPS:
  523. if OptPass1VOP(p) then
  524. continue;
  525. A_MULSD,
  526. A_MULSS,
  527. A_ADDSD,
  528. A_ADDSS:
  529. if OptPass1OP(p) then
  530. continue;
  531. A_MOVAPD,
  532. A_MOVAPS:
  533. if OptPass1MOVAP(p) then
  534. continue;
  535. A_VMOVSD,
  536. A_VMOVSS,
  537. A_MOVSD,
  538. A_MOVSS:
  539. if OptPass1MOVXX(p) then
  540. continue;
  541. A_SETcc:
  542. if OptPass1SETcc(p) then
  543. continue;
  544. end;
  545. end; { if is_jmp }
  546. end;
  547. end;
  548. updateUsedRegs(UsedRegs,p);
  549. p:=tai(p.next);
  550. end;
  551. end;
  552. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  553. var
  554. p : tai;
  555. begin
  556. p := BlockStart;
  557. ClearUsedRegs;
  558. while (p <> BlockEnd) Do
  559. begin
  560. UpdateUsedRegs(UsedRegs, tai(p.next));
  561. case p.Typ Of
  562. Ait_Instruction:
  563. begin
  564. if InsContainsSegRef(taicpu(p)) then
  565. begin
  566. p := tai(p.next);
  567. continue;
  568. end;
  569. case taicpu(p).opcode Of
  570. A_Jcc:
  571. if OptPass2Jcc(p) then
  572. continue;
  573. A_FSTP,A_FISTP:
  574. if OptPass1FSTP(p) then
  575. continue;
  576. A_IMUL:
  577. if OptPass2Imul(p) then
  578. continue;
  579. A_JMP:
  580. if OptPass2Jmp(p) then
  581. continue;
  582. A_MOV:
  583. if OptPass2MOV(p) then
  584. continue;
  585. end;
  586. end;
  587. end;
  588. p := tai(p.next)
  589. end;
  590. end;
  591. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  592. var
  593. p,hp1: tai;
  594. begin
  595. p := BlockStart;
  596. ClearUsedRegs;
  597. while (p <> BlockEnd) Do
  598. begin
  599. UpdateUsedRegs(UsedRegs, tai(p.next));
  600. case p.Typ Of
  601. Ait_Instruction:
  602. begin
  603. if InsContainsSegRef(taicpu(p)) then
  604. begin
  605. p := tai(p.next);
  606. continue;
  607. end;
  608. case taicpu(p).opcode Of
  609. A_CALL:
  610. if PostPeepHoleOptCall(p) then
  611. Continue;
  612. A_LEA:
  613. if PostPeepholeOptLea(p) then
  614. Continue;
  615. A_CMP:
  616. if PostPeepholeOptCmp(p) then
  617. Continue;
  618. A_MOV:
  619. if PostPeepholeOptMov(p) then
  620. Continue;
  621. A_MOVZX:
  622. { if register vars are on, it's possible there is code like }
  623. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  624. { so we can't safely replace the movzx then with xor/mov, }
  625. { since that would change the flags (JM) }
  626. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  627. begin
  628. if (taicpu(p).oper[1]^.typ = top_reg) then
  629. if (taicpu(p).oper[0]^.typ = top_reg)
  630. then
  631. case taicpu(p).opsize of
  632. S_BL:
  633. begin
  634. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  635. not(cs_opt_size in current_settings.optimizerswitches) and
  636. (current_settings.optimizecputype = cpu_Pentium) then
  637. {Change "movzbl %reg1, %reg2" to
  638. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  639. PentiumMMX}
  640. begin
  641. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  642. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  643. InsertLLItem(p.previous, p, hp1);
  644. taicpu(p).opcode := A_MOV;
  645. taicpu(p).changeopsize(S_B);
  646. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  647. end;
  648. end;
  649. end
  650. else if (taicpu(p).oper[0]^.typ = top_ref) and
  651. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  652. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  653. not(cs_opt_size in current_settings.optimizerswitches) and
  654. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  655. (current_settings.optimizecputype = cpu_Pentium) and
  656. (taicpu(p).opsize = S_BL) then
  657. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  658. Pentium and PentiumMMX}
  659. begin
  660. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  661. taicpu(p).oper[1]^.reg);
  662. taicpu(p).opcode := A_MOV;
  663. taicpu(p).changeopsize(S_B);
  664. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  665. InsertLLItem(p.previous, p, hp1);
  666. end;
  667. end;
  668. A_TEST, A_OR:
  669. if PostPeepholeOptTestOr(p) then
  670. Continue;
  671. end;
  672. end;
  673. end;
  674. p := tai(p.next)
  675. end;
  676. OptReferences;
  677. end;
  678. Procedure TCpuAsmOptimizer.Optimize;
  679. Var
  680. HP: Tai;
  681. pass: longint;
  682. slowopt, changed, lastLoop: boolean;
  683. Begin
  684. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  685. pass := 0;
  686. changed := false;
  687. repeat
  688. lastLoop :=
  689. not(slowopt) or
  690. (not changed and (pass > 2)) or
  691. { prevent endless loops }
  692. (pass = 4);
  693. changed := false;
  694. { Setup labeltable, always necessary }
  695. blockstart := tai(asml.first);
  696. pass_1;
  697. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  698. { or nil }
  699. While Assigned(BlockStart) Do
  700. Begin
  701. if (cs_opt_peephole in current_settings.optimizerswitches) then
  702. begin
  703. if (pass = 0) then
  704. PrePeepHoleOpts;
  705. { Peephole optimizations }
  706. PeepHoleOptPass1;
  707. { Only perform them twice in the first pass }
  708. if pass = 0 then
  709. PeepHoleOptPass1;
  710. end;
  711. { More peephole optimizations }
  712. if (cs_opt_peephole in current_settings.optimizerswitches) then
  713. begin
  714. PeepHoleOptPass2;
  715. if lastLoop then
  716. PostPeepHoleOpts;
  717. end;
  718. { Continue where we left off, BlockEnd is either the start of an }
  719. { assembler block or nil }
  720. BlockStart := BlockEnd;
  721. While Assigned(BlockStart) And
  722. (BlockStart.typ = ait_Marker) And
  723. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  724. Begin
  725. { We stopped at an assembler block, so skip it }
  726. Repeat
  727. BlockStart := Tai(BlockStart.Next);
  728. Until (BlockStart.Typ = Ait_Marker) And
  729. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  730. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  731. If GetNextInstruction(BlockStart, HP) And
  732. ((HP.typ <> ait_Marker) Or
  733. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  734. { There is no assembler block anymore after the current one, so }
  735. { optimize the next block of "normal" instructions }
  736. pass_1
  737. { Otherwise, skip the next assembler block }
  738. else
  739. blockStart := hp;
  740. End;
  741. End;
  742. inc(pass);
  743. until lastLoop;
  744. dfa.free;
  745. End;
  746. begin
  747. casmoptimizer:=TCpuAsmOptimizer;
  748. end.