aoptcpu.pas 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. end;
  33. Var
  34. AsmOptimizer : TCpuAsmOptimizer;
  35. Implementation
  36. uses
  37. verbose,globtype,globals,
  38. cpuinfo,
  39. aasmcpu,
  40. aoptutils,
  41. procinfo,
  42. cgutils,
  43. { units we should get rid off: }
  44. symsym,symconst;
  45. { Checks if the register is a 32 bit general purpose register }
  46. function isgp32reg(reg: TRegister): boolean;
  47. begin
  48. {$push}{$warnings off}
  49. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  50. {$pop}
  51. end;
  52. { returns true if p contains a memory operand with a segment set }
  53. function InsContainsSegRef(p: taicpu): boolean;
  54. var
  55. i: longint;
  56. begin
  57. result:=true;
  58. for i:=0 to p.opercnt-1 do
  59. if (p.oper[i]^.typ=top_ref) and
  60. (p.oper[i]^.ref^.segment<>NR_NO) then
  61. exit;
  62. result:=false;
  63. end;
  64. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  65. var
  66. p: tai;
  67. begin
  68. p := BlockStart;
  69. while (p <> BlockEnd) Do
  70. begin
  71. case p.Typ Of
  72. Ait_Instruction:
  73. begin
  74. if InsContainsSegRef(taicpu(p)) then
  75. begin
  76. p := tai(p.next);
  77. continue;
  78. end;
  79. case taicpu(p).opcode Of
  80. A_IMUL:
  81. if PrePeepholeOptIMUL(p) then
  82. Continue;
  83. A_SAR,A_SHR:
  84. if PrePeepholeOptSxx(p) then
  85. continue;
  86. A_XOR:
  87. begin
  88. if (taicpu(p).oper[0]^.typ = top_reg) and
  89. (taicpu(p).oper[1]^.typ = top_reg) and
  90. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  91. { temporarily change this to 'mov reg,0' to make it easier }
  92. { for the CSE. Will be changed back in pass 2 }
  93. begin
  94. taicpu(p).opcode := A_MOV;
  95. taicpu(p).loadConst(0,0);
  96. end;
  97. end;
  98. else
  99. ;
  100. end;
  101. end;
  102. else
  103. ;
  104. end;
  105. p := tai(p.next)
  106. end;
  107. end;
  108. { First pass of peephole optimizations }
  109. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  110. function WriteOk : Boolean;
  111. begin
  112. writeln('Ok');
  113. Result:=True;
  114. end;
  115. var
  116. p,hp1,hp2 : tai;
  117. hp3,hp4: tai;
  118. v:aint;
  119. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  120. {traces sucessive jumps to their final destination and sets it, e.g.
  121. je l1 je l3
  122. <code> <code>
  123. l1: becomes l1:
  124. je l2 je l3
  125. <code> <code>
  126. l2: l2:
  127. jmp l3 jmp l3
  128. the level parameter denotes how deeep we have already followed the jump,
  129. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  130. var p1, p2: tai;
  131. l: tasmlabel;
  132. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  133. begin
  134. FindAnyLabel := false;
  135. while assigned(hp.next) and
  136. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  137. hp := tai(hp.next);
  138. if assigned(hp.next) and
  139. (tai(hp.next).typ = ait_label) then
  140. begin
  141. FindAnyLabel := true;
  142. l := tai_label(hp.next).labsym;
  143. end
  144. end;
  145. begin
  146. GetfinalDestination := false;
  147. if level > 20 then
  148. exit;
  149. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  150. if assigned(p1) then
  151. begin
  152. SkipLabels(p1,p1);
  153. if (tai(p1).typ = ait_instruction) and
  154. (taicpu(p1).is_jmp) then
  155. if { the next instruction after the label where the jump hp arrives}
  156. { is unconditional or of the same type as hp, so continue }
  157. (taicpu(p1).condition in [C_None,hp.condition]) or
  158. { the next instruction after the label where the jump hp arrives}
  159. { is the opposite of hp (so this one is never taken), but after }
  160. { that one there is a branch that will be taken, so perform a }
  161. { little hack: set p1 equal to this instruction (that's what the}
  162. { last SkipLabels is for, only works with short bool evaluation)}
  163. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  164. SkipLabels(p1,p2) and
  165. (p2.typ = ait_instruction) and
  166. (taicpu(p2).is_jmp) and
  167. (taicpu(p2).condition in [C_None,hp.condition]) and
  168. SkipLabels(p1,p1)) then
  169. begin
  170. { quick check for loops of the form "l5: ; jmp l5 }
  171. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  172. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  173. exit;
  174. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  175. exit;
  176. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  177. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  178. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  179. end
  180. else
  181. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  182. if not FindAnyLabel(p1,l) then
  183. begin
  184. {$ifdef finaldestdebug}
  185. insertllitem(asml,p1,p1.next,tai_comment.Create(
  186. strpnew('previous label inserted'))));
  187. {$endif finaldestdebug}
  188. current_asmdata.getjumplabel(l);
  189. insertllitem(p1,p1.next,tai_label.Create(l));
  190. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  191. hp.oper[0]^.ref^.symbol := l;
  192. l.increfs;
  193. { this won't work, since the new label isn't in the labeltable }
  194. { so it will fail the rangecheck. Labeltable should become a }
  195. { hashtable to support this: }
  196. { GetFinalDestination(asml, hp); }
  197. end
  198. else
  199. begin
  200. {$ifdef finaldestdebug}
  201. insertllitem(asml,p1,p1.next,tai_comment.Create(
  202. strpnew('next label reused'))));
  203. {$endif finaldestdebug}
  204. l.increfs;
  205. hp.oper[0]^.ref^.symbol := l;
  206. if not GetFinalDestination(asml, hp,succ(level)) then
  207. exit;
  208. end;
  209. end;
  210. GetFinalDestination := true;
  211. end;
  212. begin
  213. p := BlockStart;
  214. ClearUsedRegs;
  215. while (p <> BlockEnd) Do
  216. begin
  217. UpDateUsedRegs(UsedRegs, tai(p.next));
  218. case p.Typ Of
  219. ait_instruction:
  220. begin
  221. current_filepos:=taicpu(p).fileinfo;
  222. if InsContainsSegRef(taicpu(p)) then
  223. begin
  224. p := tai(p.next);
  225. continue;
  226. end;
  227. { Handle Jmp Optimizations }
  228. if taicpu(p).is_jmp then
  229. begin
  230. { the following if-block removes all code between a jmp and the next label,
  231. because it can never be executed }
  232. if (taicpu(p).opcode = A_JMP) then
  233. begin
  234. hp2:=p;
  235. while GetNextInstruction(hp2, hp1) and
  236. (hp1.typ <> ait_label) do
  237. if not(hp1.typ in ([ait_label]+skipinstr)) then
  238. begin
  239. { don't kill start/end of assembler block,
  240. no-line-info-start/end etc }
  241. if not(hp1.typ in [ait_align,ait_marker]) then
  242. begin
  243. asml.remove(hp1);
  244. hp1.free;
  245. end
  246. else
  247. hp2:=hp1;
  248. end
  249. else break;
  250. end;
  251. { remove jumps to a label coming right after them }
  252. if GetNextInstruction(p, hp1) then
  253. begin
  254. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  255. { TODO: FIXME removing the first instruction fails}
  256. (p<>blockstart) then
  257. begin
  258. hp2:=tai(hp1.next);
  259. asml.remove(p);
  260. p.free;
  261. p:=hp2;
  262. continue;
  263. end
  264. else
  265. begin
  266. if hp1.typ = ait_label then
  267. SkipLabels(hp1,hp1);
  268. if (tai(hp1).typ=ait_instruction) and
  269. (taicpu(hp1).opcode=A_JMP) and
  270. GetNextInstruction(hp1, hp2) and
  271. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  272. begin
  273. if taicpu(p).opcode=A_Jcc then
  274. begin
  275. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  276. tai_label(hp2).labsym.decrefs;
  277. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  278. { when free'ing hp1, the ref. isn't decresed, so we don't
  279. increase it (FK)
  280. taicpu(p).oper[0]^.ref^.symbol.increfs;
  281. }
  282. asml.remove(hp1);
  283. hp1.free;
  284. GetFinalDestination(asml, taicpu(p),0);
  285. end
  286. else
  287. begin
  288. GetFinalDestination(asml, taicpu(p),0);
  289. p:=tai(p.next);
  290. continue;
  291. end;
  292. end
  293. else
  294. GetFinalDestination(asml, taicpu(p),0);
  295. end;
  296. end;
  297. end
  298. else
  299. { All other optimizes }
  300. begin
  301. case taicpu(p).opcode Of
  302. A_AND:
  303. if OptPass1And(p) then
  304. continue;
  305. A_CMP:
  306. begin
  307. { cmp register,$8000 neg register
  308. je target --> jo target
  309. .... only if register is deallocated before jump.}
  310. case Taicpu(p).opsize of
  311. S_B: v:=$80;
  312. S_W: v:=$8000;
  313. S_L: v:=aint($80000000);
  314. else
  315. internalerror(2013112905);
  316. end;
  317. if (taicpu(p).oper[0]^.typ=Top_const) and
  318. (taicpu(p).oper[0]^.val=v) and
  319. (Taicpu(p).oper[1]^.typ=top_reg) and
  320. GetNextInstruction(p, hp1) and
  321. (hp1.typ=ait_instruction) and
  322. (taicpu(hp1).opcode=A_Jcc) and
  323. (Taicpu(hp1).condition in [C_E,C_NE]) and
  324. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  325. begin
  326. Taicpu(p).opcode:=A_NEG;
  327. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  328. Taicpu(p).clearop(1);
  329. Taicpu(p).ops:=1;
  330. if Taicpu(hp1).condition=C_E then
  331. Taicpu(hp1).condition:=C_O
  332. else
  333. Taicpu(hp1).condition:=C_NO;
  334. continue;
  335. end;
  336. {
  337. @@2: @@2:
  338. .... ....
  339. cmp operand1,0
  340. jle/jbe @@1
  341. dec operand1 --> sub operand1,1
  342. jmp @@2 jge/jae @@2
  343. @@1: @@1:
  344. ... ....}
  345. if (taicpu(p).oper[0]^.typ = top_const) and
  346. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  347. (taicpu(p).oper[0]^.val = 0) and
  348. GetNextInstruction(p, hp1) and
  349. (hp1.typ = ait_instruction) and
  350. (taicpu(hp1).is_jmp) and
  351. (taicpu(hp1).opcode=A_Jcc) and
  352. (taicpu(hp1).condition in [C_LE,C_BE]) and
  353. GetNextInstruction(hp1,hp2) and
  354. (hp2.typ = ait_instruction) and
  355. (taicpu(hp2).opcode = A_DEC) and
  356. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  357. GetNextInstruction(hp2, hp3) and
  358. (hp3.typ = ait_instruction) and
  359. (taicpu(hp3).is_jmp) and
  360. (taicpu(hp3).opcode = A_JMP) and
  361. GetNextInstruction(hp3, hp4) and
  362. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  363. begin
  364. taicpu(hp2).Opcode := A_SUB;
  365. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  366. taicpu(hp2).loadConst(0,1);
  367. taicpu(hp2).ops:=2;
  368. taicpu(hp3).Opcode := A_Jcc;
  369. case taicpu(hp1).condition of
  370. C_LE: taicpu(hp3).condition := C_GE;
  371. C_BE: taicpu(hp3).condition := C_AE;
  372. else
  373. internalerror(2019050903);
  374. end;
  375. asml.remove(p);
  376. asml.remove(hp1);
  377. p.free;
  378. hp1.free;
  379. p := hp2;
  380. continue;
  381. end
  382. end;
  383. A_FLD:
  384. if OptPass1FLD(p) then
  385. continue;
  386. A_FSTP,A_FISTP:
  387. if OptPass1FSTP(p) then
  388. continue;
  389. A_LEA:
  390. begin
  391. if OptPass1LEA(p) then
  392. continue;
  393. end;
  394. A_MOV:
  395. begin
  396. If OptPass1MOV(p) then
  397. Continue;
  398. end;
  399. A_MOVSX,
  400. A_MOVZX :
  401. begin
  402. If OptPass1Movx(p) then
  403. Continue
  404. end;
  405. (* should not be generated anymore by the current code generator
  406. A_POP:
  407. begin
  408. if target_info.system=system_i386_go32v2 then
  409. begin
  410. { Transform a series of pop/pop/pop/push/push/push to }
  411. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  412. { because I'm not sure whether they can cope with }
  413. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  414. { such a problem when using esp as frame pointer (JM) }
  415. if (taicpu(p).oper[0]^.typ = top_reg) then
  416. begin
  417. hp1 := p;
  418. hp2 := p;
  419. l := 0;
  420. while getNextInstruction(hp1,hp1) and
  421. (hp1.typ = ait_instruction) and
  422. (taicpu(hp1).opcode = A_POP) and
  423. (taicpu(hp1).oper[0]^.typ = top_reg) do
  424. begin
  425. hp2 := hp1;
  426. inc(l,4);
  427. end;
  428. getLastInstruction(p,hp3);
  429. l1 := 0;
  430. while (hp2 <> hp3) and
  431. assigned(hp1) and
  432. (hp1.typ = ait_instruction) and
  433. (taicpu(hp1).opcode = A_PUSH) and
  434. (taicpu(hp1).oper[0]^.typ = top_reg) and
  435. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  436. begin
  437. { change it to a two op operation }
  438. taicpu(hp2).oper[1]^.typ:=top_none;
  439. taicpu(hp2).ops:=2;
  440. taicpu(hp2).opcode := A_MOV;
  441. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  442. reference_reset(tmpref);
  443. tmpRef.base.enum:=R_INTREGISTER;
  444. tmpRef.base.number:=NR_STACK_POINTER_REG;
  445. convert_register_to_enum(tmpref.base);
  446. tmpRef.offset := l;
  447. taicpu(hp2).loadRef(0,tmpRef);
  448. hp4 := hp1;
  449. getNextInstruction(hp1,hp1);
  450. asml.remove(hp4);
  451. hp4.free;
  452. getLastInstruction(hp2,hp2);
  453. dec(l,4);
  454. inc(l1);
  455. end;
  456. if l <> -4 then
  457. begin
  458. inc(l,4);
  459. for l1 := l1 downto 1 do
  460. begin
  461. getNextInstruction(hp2,hp2);
  462. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  463. end
  464. end
  465. end
  466. end
  467. else
  468. begin
  469. if (taicpu(p).oper[0]^.typ = top_reg) and
  470. GetNextInstruction(p, hp1) and
  471. (tai(hp1).typ=ait_instruction) and
  472. (taicpu(hp1).opcode=A_PUSH) and
  473. (taicpu(hp1).oper[0]^.typ = top_reg) and
  474. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  475. begin
  476. { change it to a two op operation }
  477. taicpu(p).oper[1]^.typ:=top_none;
  478. taicpu(p).ops:=2;
  479. taicpu(p).opcode := A_MOV;
  480. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  481. reference_reset(tmpref);
  482. TmpRef.base.enum := R_ESP;
  483. taicpu(p).loadRef(0,TmpRef);
  484. asml.remove(hp1);
  485. hp1.free;
  486. end;
  487. end;
  488. end;
  489. *)
  490. A_PUSH:
  491. begin
  492. if (taicpu(p).opsize = S_W) and
  493. (taicpu(p).oper[0]^.typ = Top_Const) and
  494. GetNextInstruction(p, hp1) and
  495. (tai(hp1).typ = ait_instruction) and
  496. (taicpu(hp1).opcode = A_PUSH) and
  497. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  498. (taicpu(hp1).opsize = S_W) then
  499. begin
  500. taicpu(p).changeopsize(S_L);
  501. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  502. asml.remove(hp1);
  503. hp1.free;
  504. end;
  505. end;
  506. A_SHL, A_SAL:
  507. if OptPass1SHLSAL(p) then
  508. Continue;
  509. A_SUB:
  510. if OptPass1Sub(p) then
  511. continue;
  512. A_VMOVAPS,
  513. A_VMOVAPD:
  514. if OptPass1VMOVAP(p) then
  515. continue;
  516. A_VDIVSD,
  517. A_VDIVSS,
  518. A_VSUBSD,
  519. A_VSUBSS,
  520. A_VMULSD,
  521. A_VMULSS,
  522. A_VADDSD,
  523. A_VADDSS,
  524. A_VANDPD,
  525. A_VANDPS,
  526. A_VORPD,
  527. A_VORPS,
  528. A_VXORPD,
  529. A_VXORPS:
  530. if OptPass1VOP(p) then
  531. continue;
  532. A_MULSD,
  533. A_MULSS,
  534. A_ADDSD,
  535. A_ADDSS:
  536. if OptPass1OP(p) then
  537. continue;
  538. A_MOVAPD,
  539. A_MOVAPS:
  540. if OptPass1MOVAP(p) then
  541. continue;
  542. A_VMOVSD,
  543. A_VMOVSS,
  544. A_MOVSD,
  545. A_MOVSS:
  546. if OptPass1MOVXX(p) then
  547. continue;
  548. A_SETcc:
  549. begin
  550. if OptPass1SETcc(p) then
  551. continue;
  552. end
  553. else
  554. ;
  555. end;
  556. end; { if is_jmp }
  557. end;
  558. else
  559. ;
  560. end;
  561. updateUsedRegs(UsedRegs,p);
  562. p:=tai(p.next);
  563. end;
  564. end;
  565. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  566. var
  567. p : tai;
  568. begin
  569. p := BlockStart;
  570. ClearUsedRegs;
  571. while (p <> BlockEnd) Do
  572. begin
  573. UpdateUsedRegs(UsedRegs, tai(p.next));
  574. case p.Typ Of
  575. Ait_Instruction:
  576. begin
  577. if InsContainsSegRef(taicpu(p)) then
  578. begin
  579. p := tai(p.next);
  580. continue;
  581. end;
  582. case taicpu(p).opcode Of
  583. A_Jcc:
  584. if OptPass2Jcc(p) then
  585. continue;
  586. A_FSTP,A_FISTP:
  587. if OptPass1FSTP(p) then
  588. continue;
  589. A_IMUL:
  590. if OptPass2Imul(p) then
  591. continue;
  592. A_JMP:
  593. if OptPass2Jmp(p) then
  594. continue;
  595. A_MOV:
  596. begin
  597. if OptPass2MOV(p) then
  598. continue;
  599. end
  600. else
  601. ;
  602. end;
  603. end;
  604. else
  605. ;
  606. end;
  607. p := tai(p.next)
  608. end;
  609. end;
  610. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  611. var
  612. p,hp1: tai;
  613. begin
  614. p := BlockStart;
  615. ClearUsedRegs;
  616. while (p <> BlockEnd) Do
  617. begin
  618. UpdateUsedRegs(UsedRegs, tai(p.next));
  619. case p.Typ Of
  620. Ait_Instruction:
  621. begin
  622. if InsContainsSegRef(taicpu(p)) then
  623. begin
  624. p := tai(p.next);
  625. continue;
  626. end;
  627. case taicpu(p).opcode Of
  628. A_CALL:
  629. if PostPeepHoleOptCall(p) then
  630. Continue;
  631. A_LEA:
  632. if PostPeepholeOptLea(p) then
  633. Continue;
  634. A_CMP:
  635. if PostPeepholeOptCmp(p) then
  636. Continue;
  637. A_MOV:
  638. if PostPeepholeOptMov(p) then
  639. Continue;
  640. A_MOVZX:
  641. { if register vars are on, it's possible there is code like }
  642. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  643. { so we can't safely replace the movzx then with xor/mov, }
  644. { since that would change the flags (JM) }
  645. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  646. begin
  647. if (taicpu(p).oper[1]^.typ = top_reg) then
  648. if (taicpu(p).oper[0]^.typ = top_reg)
  649. then
  650. case taicpu(p).opsize of
  651. S_BL:
  652. begin
  653. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  654. not(cs_opt_size in current_settings.optimizerswitches) and
  655. (current_settings.optimizecputype = cpu_Pentium) then
  656. {Change "movzbl %reg1, %reg2" to
  657. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  658. PentiumMMX}
  659. begin
  660. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  661. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  662. InsertLLItem(p.previous, p, hp1);
  663. taicpu(p).opcode := A_MOV;
  664. taicpu(p).changeopsize(S_B);
  665. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  666. end;
  667. end;
  668. else
  669. ;
  670. end
  671. else if (taicpu(p).oper[0]^.typ = top_ref) and
  672. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  673. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  674. not(cs_opt_size in current_settings.optimizerswitches) and
  675. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  676. (current_settings.optimizecputype = cpu_Pentium) and
  677. (taicpu(p).opsize = S_BL) then
  678. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  679. Pentium and PentiumMMX}
  680. begin
  681. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  682. taicpu(p).oper[1]^.reg);
  683. taicpu(p).opcode := A_MOV;
  684. taicpu(p).changeopsize(S_B);
  685. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  686. InsertLLItem(p.previous, p, hp1);
  687. end;
  688. end;
  689. A_TEST, A_OR:
  690. begin
  691. if PostPeepholeOptTestOr(p) then
  692. Continue;
  693. end;
  694. else
  695. ;
  696. end;
  697. end;
  698. else
  699. ;
  700. end;
  701. p := tai(p.next)
  702. end;
  703. OptReferences;
  704. end;
  705. Procedure TCpuAsmOptimizer.Optimize;
  706. Var
  707. HP: Tai;
  708. pass: longint;
  709. slowopt, changed, lastLoop: boolean;
  710. Begin
  711. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  712. pass := 0;
  713. changed := false;
  714. repeat
  715. lastLoop :=
  716. not(slowopt) or
  717. (not changed and (pass > 2)) or
  718. { prevent endless loops }
  719. (pass = 4);
  720. changed := false;
  721. { Setup labeltable, always necessary }
  722. blockstart := tai(asml.first);
  723. pass_1;
  724. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  725. { or nil }
  726. While Assigned(BlockStart) Do
  727. Begin
  728. if (cs_opt_peephole in current_settings.optimizerswitches) then
  729. begin
  730. if (pass = 0) then
  731. PrePeepHoleOpts;
  732. { Peephole optimizations }
  733. PeepHoleOptPass1;
  734. { Only perform them twice in the first pass }
  735. if pass = 0 then
  736. PeepHoleOptPass1;
  737. end;
  738. { More peephole optimizations }
  739. if (cs_opt_peephole in current_settings.optimizerswitches) then
  740. begin
  741. PeepHoleOptPass2;
  742. if lastLoop then
  743. PostPeepHoleOpts;
  744. end;
  745. { Continue where we left off, BlockEnd is either the start of an }
  746. { assembler block or nil }
  747. BlockStart := BlockEnd;
  748. While Assigned(BlockStart) And
  749. (BlockStart.typ = ait_Marker) And
  750. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  751. Begin
  752. { We stopped at an assembler block, so skip it }
  753. Repeat
  754. BlockStart := Tai(BlockStart.Next);
  755. Until (BlockStart.Typ = Ait_Marker) And
  756. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  757. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  758. If GetNextInstruction(BlockStart, HP) And
  759. ((HP.typ <> ait_Marker) Or
  760. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  761. { There is no assembler block anymore after the current one, so }
  762. { optimize the next block of "normal" instructions }
  763. pass_1
  764. { Otherwise, skip the next assembler block }
  765. else
  766. blockStart := hp;
  767. End;
  768. End;
  769. inc(pass);
  770. until lastLoop;
  771. dfa.free;
  772. End;
  773. begin
  774. casmoptimizer:=TCpuAsmOptimizer;
  775. end.