aoptcpu.pas 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. end;
  33. Var
  34. AsmOptimizer : TCpuAsmOptimizer;
  35. Implementation
  36. uses
  37. verbose,globtype,globals,
  38. cpuinfo,
  39. aasmcpu,
  40. aoptutils,
  41. aasmcfi,
  42. procinfo,
  43. cgutils,
  44. { units we should get rid off: }
  45. symsym,symconst;
  46. { Checks if the register is a 32 bit general purpose register }
  47. function isgp32reg(reg: TRegister): boolean;
  48. begin
  49. {$push}{$warnings off}
  50. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  51. {$pop}
  52. end;
  53. { returns true if p contains a memory operand with a segment set }
  54. function InsContainsSegRef(p: taicpu): boolean;
  55. var
  56. i: longint;
  57. begin
  58. result:=true;
  59. for i:=0 to p.opercnt-1 do
  60. if (p.oper[i]^.typ=top_ref) and
  61. (p.oper[i]^.ref^.segment<>NR_NO) then
  62. exit;
  63. result:=false;
  64. end;
  65. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  66. var
  67. p: tai;
  68. begin
  69. p := BlockStart;
  70. while (p <> BlockEnd) Do
  71. begin
  72. case p.Typ Of
  73. Ait_Instruction:
  74. begin
  75. if InsContainsSegRef(taicpu(p)) then
  76. begin
  77. p := tai(p.next);
  78. continue;
  79. end;
  80. case taicpu(p).opcode Of
  81. A_IMUL:
  82. if PrePeepholeOptIMUL(p) then
  83. Continue;
  84. A_SAR,A_SHR:
  85. if PrePeepholeOptSxx(p) then
  86. continue;
  87. A_XOR:
  88. begin
  89. if (taicpu(p).oper[0]^.typ = top_reg) and
  90. (taicpu(p).oper[1]^.typ = top_reg) and
  91. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  92. { temporarily change this to 'mov reg,0' to make it easier }
  93. { for the CSE. Will be changed back in pass 2 }
  94. begin
  95. taicpu(p).opcode := A_MOV;
  96. taicpu(p).loadConst(0,0);
  97. end;
  98. end;
  99. else
  100. ;
  101. end;
  102. end;
  103. else
  104. ;
  105. end;
  106. p := tai(p.next)
  107. end;
  108. end;
  109. { First pass of peephole optimizations }
  110. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  111. function WriteOk : Boolean;
  112. begin
  113. writeln('Ok');
  114. Result:=True;
  115. end;
  116. var
  117. p,hp1,hp2 : tai;
  118. hp3,hp4: tai;
  119. v:aint;
  120. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  121. {traces sucessive jumps to their final destination and sets it, e.g.
  122. je l1 je l3
  123. <code> <code>
  124. l1: becomes l1:
  125. je l2 je l3
  126. <code> <code>
  127. l2: l2:
  128. jmp l3 jmp l3
  129. the level parameter denotes how deeep we have already followed the jump,
  130. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  131. var p1, p2: tai;
  132. l: tasmlabel;
  133. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  134. begin
  135. FindAnyLabel := false;
  136. while assigned(hp.next) and
  137. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  138. hp := tai(hp.next);
  139. if assigned(hp.next) and
  140. (tai(hp.next).typ = ait_label) then
  141. begin
  142. FindAnyLabel := true;
  143. l := tai_label(hp.next).labsym;
  144. end
  145. end;
  146. begin
  147. GetfinalDestination := false;
  148. if level > 20 then
  149. exit;
  150. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  151. if assigned(p1) then
  152. begin
  153. SkipLabels(p1,p1);
  154. if (tai(p1).typ = ait_instruction) and
  155. (taicpu(p1).is_jmp) then
  156. if { the next instruction after the label where the jump hp arrives}
  157. { is unconditional or of the same type as hp, so continue }
  158. (taicpu(p1).condition in [C_None,hp.condition]) or
  159. { the next instruction after the label where the jump hp arrives}
  160. { is the opposite of hp (so this one is never taken), but after }
  161. { that one there is a branch that will be taken, so perform a }
  162. { little hack: set p1 equal to this instruction (that's what the}
  163. { last SkipLabels is for, only works with short bool evaluation)}
  164. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  165. SkipLabels(p1,p2) and
  166. (p2.typ = ait_instruction) and
  167. (taicpu(p2).is_jmp) and
  168. (taicpu(p2).condition in [C_None,hp.condition]) and
  169. SkipLabels(p1,p1)) then
  170. begin
  171. { quick check for loops of the form "l5: ; jmp l5 }
  172. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  173. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  174. exit;
  175. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  176. exit;
  177. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  178. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  179. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  180. end
  181. else
  182. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  183. if not FindAnyLabel(p1,l) then
  184. begin
  185. {$ifdef finaldestdebug}
  186. insertllitem(asml,p1,p1.next,tai_comment.Create(
  187. strpnew('previous label inserted'))));
  188. {$endif finaldestdebug}
  189. current_asmdata.getjumplabel(l);
  190. insertllitem(p1,p1.next,tai_label.Create(l));
  191. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  192. hp.oper[0]^.ref^.symbol := l;
  193. l.increfs;
  194. { this won't work, since the new label isn't in the labeltable }
  195. { so it will fail the rangecheck. Labeltable should become a }
  196. { hashtable to support this: }
  197. { GetFinalDestination(asml, hp); }
  198. end
  199. else
  200. begin
  201. {$ifdef finaldestdebug}
  202. insertllitem(asml,p1,p1.next,tai_comment.Create(
  203. strpnew('next label reused'))));
  204. {$endif finaldestdebug}
  205. l.increfs;
  206. hp.oper[0]^.ref^.symbol := l;
  207. if not GetFinalDestination(asml, hp,succ(level)) then
  208. exit;
  209. end;
  210. end;
  211. GetFinalDestination := true;
  212. end;
  213. begin
  214. p := BlockStart;
  215. ClearUsedRegs;
  216. while (p <> BlockEnd) Do
  217. begin
  218. UpDateUsedRegs(UsedRegs, tai(p.next));
  219. case p.Typ Of
  220. ait_instruction:
  221. begin
  222. current_filepos:=taicpu(p).fileinfo;
  223. if InsContainsSegRef(taicpu(p)) then
  224. begin
  225. p := tai(p.next);
  226. continue;
  227. end;
  228. { Handle Jmp Optimizations }
  229. if taicpu(p).is_jmp then
  230. begin
  231. { the following if-block removes all code between a jmp and the next label,
  232. because it can never be executed }
  233. if (taicpu(p).opcode = A_JMP) then
  234. begin
  235. hp2:=p;
  236. while GetNextInstruction(hp2, hp1) and
  237. (hp1.typ <> ait_label) do
  238. if not(hp1.typ in ([ait_label]+skipinstr)) then
  239. begin
  240. { don't kill start/end of assembler block,
  241. no-line-info-start/end, cfi end, etc }
  242. if not(hp1.typ in [ait_align,ait_marker]) and
  243. ((hp1.typ<>ait_cfi) or
  244. (tai_cfi_base(hp1).cfityp<>cfi_endproc)) then
  245. begin
  246. asml.remove(hp1);
  247. hp1.free;
  248. end
  249. else
  250. hp2:=hp1;
  251. end
  252. else break;
  253. end;
  254. { remove jumps to a label coming right after them }
  255. if GetNextInstruction(p, hp1) then
  256. begin
  257. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  258. { TODO: FIXME removing the first instruction fails}
  259. (p<>blockstart) then
  260. begin
  261. hp2:=tai(hp1.next);
  262. asml.remove(p);
  263. p.free;
  264. p:=hp2;
  265. continue;
  266. end
  267. else
  268. begin
  269. if hp1.typ = ait_label then
  270. SkipLabels(hp1,hp1);
  271. if (tai(hp1).typ=ait_instruction) and
  272. (taicpu(hp1).opcode=A_JMP) and
  273. GetNextInstruction(hp1, hp2) and
  274. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  275. begin
  276. if taicpu(p).opcode=A_Jcc then
  277. begin
  278. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  279. tai_label(hp2).labsym.decrefs;
  280. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  281. { when free'ing hp1, the ref. isn't decresed, so we don't
  282. increase it (FK)
  283. taicpu(p).oper[0]^.ref^.symbol.increfs;
  284. }
  285. asml.remove(hp1);
  286. hp1.free;
  287. GetFinalDestination(asml, taicpu(p),0);
  288. end
  289. else
  290. begin
  291. GetFinalDestination(asml, taicpu(p),0);
  292. p:=tai(p.next);
  293. continue;
  294. end;
  295. end
  296. else
  297. GetFinalDestination(asml, taicpu(p),0);
  298. end;
  299. end;
  300. end
  301. else
  302. { All other optimizes }
  303. begin
  304. case taicpu(p).opcode Of
  305. A_AND:
  306. if OptPass1And(p) then
  307. continue;
  308. A_CMP:
  309. begin
  310. { cmp register,$8000 neg register
  311. je target --> jo target
  312. .... only if register is deallocated before jump.}
  313. case Taicpu(p).opsize of
  314. S_B: v:=$80;
  315. S_W: v:=$8000;
  316. S_L: v:=aint($80000000);
  317. else
  318. internalerror(2013112905);
  319. end;
  320. if (taicpu(p).oper[0]^.typ=Top_const) and
  321. (taicpu(p).oper[0]^.val=v) and
  322. (Taicpu(p).oper[1]^.typ=top_reg) and
  323. GetNextInstruction(p, hp1) and
  324. (hp1.typ=ait_instruction) and
  325. (taicpu(hp1).opcode=A_Jcc) and
  326. (Taicpu(hp1).condition in [C_E,C_NE]) and
  327. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  328. begin
  329. Taicpu(p).opcode:=A_NEG;
  330. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  331. Taicpu(p).clearop(1);
  332. Taicpu(p).ops:=1;
  333. if Taicpu(hp1).condition=C_E then
  334. Taicpu(hp1).condition:=C_O
  335. else
  336. Taicpu(hp1).condition:=C_NO;
  337. continue;
  338. end;
  339. {
  340. @@2: @@2:
  341. .... ....
  342. cmp operand1,0
  343. jle/jbe @@1
  344. dec operand1 --> sub operand1,1
  345. jmp @@2 jge/jae @@2
  346. @@1: @@1:
  347. ... ....}
  348. if (taicpu(p).oper[0]^.typ = top_const) and
  349. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  350. (taicpu(p).oper[0]^.val = 0) and
  351. GetNextInstruction(p, hp1) and
  352. (hp1.typ = ait_instruction) and
  353. (taicpu(hp1).is_jmp) and
  354. (taicpu(hp1).opcode=A_Jcc) and
  355. (taicpu(hp1).condition in [C_LE,C_BE]) and
  356. GetNextInstruction(hp1,hp2) and
  357. (hp2.typ = ait_instruction) and
  358. (taicpu(hp2).opcode = A_DEC) and
  359. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  360. GetNextInstruction(hp2, hp3) and
  361. (hp3.typ = ait_instruction) and
  362. (taicpu(hp3).is_jmp) and
  363. (taicpu(hp3).opcode = A_JMP) and
  364. GetNextInstruction(hp3, hp4) and
  365. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  366. begin
  367. taicpu(hp2).Opcode := A_SUB;
  368. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  369. taicpu(hp2).loadConst(0,1);
  370. taicpu(hp2).ops:=2;
  371. taicpu(hp3).Opcode := A_Jcc;
  372. case taicpu(hp1).condition of
  373. C_LE: taicpu(hp3).condition := C_GE;
  374. C_BE: taicpu(hp3).condition := C_AE;
  375. else
  376. internalerror(2019050903);
  377. end;
  378. asml.remove(p);
  379. asml.remove(hp1);
  380. p.free;
  381. hp1.free;
  382. p := hp2;
  383. continue;
  384. end
  385. end;
  386. A_FLD:
  387. if OptPass1FLD(p) then
  388. continue;
  389. A_FSTP,A_FISTP:
  390. if OptPass1FSTP(p) then
  391. continue;
  392. A_LEA:
  393. begin
  394. if OptPass1LEA(p) then
  395. continue;
  396. end;
  397. A_MOV:
  398. begin
  399. If OptPass1MOV(p) then
  400. Continue;
  401. end;
  402. A_MOVSX,
  403. A_MOVZX :
  404. begin
  405. If OptPass1Movx(p) then
  406. Continue
  407. end;
  408. (* should not be generated anymore by the current code generator
  409. A_POP:
  410. begin
  411. if target_info.system=system_i386_go32v2 then
  412. begin
  413. { Transform a series of pop/pop/pop/push/push/push to }
  414. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  415. { because I'm not sure whether they can cope with }
  416. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  417. { such a problem when using esp as frame pointer (JM) }
  418. if (taicpu(p).oper[0]^.typ = top_reg) then
  419. begin
  420. hp1 := p;
  421. hp2 := p;
  422. l := 0;
  423. while getNextInstruction(hp1,hp1) and
  424. (hp1.typ = ait_instruction) and
  425. (taicpu(hp1).opcode = A_POP) and
  426. (taicpu(hp1).oper[0]^.typ = top_reg) do
  427. begin
  428. hp2 := hp1;
  429. inc(l,4);
  430. end;
  431. getLastInstruction(p,hp3);
  432. l1 := 0;
  433. while (hp2 <> hp3) and
  434. assigned(hp1) and
  435. (hp1.typ = ait_instruction) and
  436. (taicpu(hp1).opcode = A_PUSH) and
  437. (taicpu(hp1).oper[0]^.typ = top_reg) and
  438. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  439. begin
  440. { change it to a two op operation }
  441. taicpu(hp2).oper[1]^.typ:=top_none;
  442. taicpu(hp2).ops:=2;
  443. taicpu(hp2).opcode := A_MOV;
  444. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  445. reference_reset(tmpref);
  446. tmpRef.base.enum:=R_INTREGISTER;
  447. tmpRef.base.number:=NR_STACK_POINTER_REG;
  448. convert_register_to_enum(tmpref.base);
  449. tmpRef.offset := l;
  450. taicpu(hp2).loadRef(0,tmpRef);
  451. hp4 := hp1;
  452. getNextInstruction(hp1,hp1);
  453. asml.remove(hp4);
  454. hp4.free;
  455. getLastInstruction(hp2,hp2);
  456. dec(l,4);
  457. inc(l1);
  458. end;
  459. if l <> -4 then
  460. begin
  461. inc(l,4);
  462. for l1 := l1 downto 1 do
  463. begin
  464. getNextInstruction(hp2,hp2);
  465. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  466. end
  467. end
  468. end
  469. end
  470. else
  471. begin
  472. if (taicpu(p).oper[0]^.typ = top_reg) and
  473. GetNextInstruction(p, hp1) and
  474. (tai(hp1).typ=ait_instruction) and
  475. (taicpu(hp1).opcode=A_PUSH) and
  476. (taicpu(hp1).oper[0]^.typ = top_reg) and
  477. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  478. begin
  479. { change it to a two op operation }
  480. taicpu(p).oper[1]^.typ:=top_none;
  481. taicpu(p).ops:=2;
  482. taicpu(p).opcode := A_MOV;
  483. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  484. reference_reset(tmpref);
  485. TmpRef.base.enum := R_ESP;
  486. taicpu(p).loadRef(0,TmpRef);
  487. asml.remove(hp1);
  488. hp1.free;
  489. end;
  490. end;
  491. end;
  492. *)
  493. A_PUSH:
  494. begin
  495. if (taicpu(p).opsize = S_W) and
  496. (taicpu(p).oper[0]^.typ = Top_Const) and
  497. GetNextInstruction(p, hp1) and
  498. (tai(hp1).typ = ait_instruction) and
  499. (taicpu(hp1).opcode = A_PUSH) and
  500. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  501. (taicpu(hp1).opsize = S_W) then
  502. begin
  503. taicpu(p).changeopsize(S_L);
  504. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  505. asml.remove(hp1);
  506. hp1.free;
  507. end;
  508. end;
  509. A_SHL, A_SAL:
  510. if OptPass1SHLSAL(p) then
  511. Continue;
  512. A_SUB:
  513. if OptPass1Sub(p) then
  514. continue;
  515. A_VMOVAPS,
  516. A_VMOVAPD:
  517. if OptPass1VMOVAP(p) then
  518. continue;
  519. A_VDIVSD,
  520. A_VDIVSS,
  521. A_VSUBSD,
  522. A_VSUBSS,
  523. A_VMULSD,
  524. A_VMULSS,
  525. A_VADDSD,
  526. A_VADDSS,
  527. A_VANDPD,
  528. A_VANDPS,
  529. A_VORPD,
  530. A_VORPS,
  531. A_VXORPD,
  532. A_VXORPS:
  533. if OptPass1VOP(p) then
  534. continue;
  535. A_MULSD,
  536. A_MULSS,
  537. A_ADDSD,
  538. A_ADDSS:
  539. if OptPass1OP(p) then
  540. continue;
  541. A_MOVAPD,
  542. A_MOVAPS:
  543. if OptPass1MOVAP(p) then
  544. continue;
  545. A_VMOVSD,
  546. A_VMOVSS,
  547. A_MOVSD,
  548. A_MOVSS:
  549. if OptPass1MOVXX(p) then
  550. continue;
  551. A_SETcc:
  552. begin
  553. if OptPass1SETcc(p) then
  554. continue;
  555. end
  556. else
  557. ;
  558. end;
  559. end; { if is_jmp }
  560. end;
  561. else
  562. ;
  563. end;
  564. updateUsedRegs(UsedRegs,p);
  565. p:=tai(p.next);
  566. end;
  567. end;
  568. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  569. var
  570. p : tai;
  571. begin
  572. p := BlockStart;
  573. ClearUsedRegs;
  574. while (p <> BlockEnd) Do
  575. begin
  576. UpdateUsedRegs(UsedRegs, tai(p.next));
  577. case p.Typ Of
  578. Ait_Instruction:
  579. begin
  580. if InsContainsSegRef(taicpu(p)) then
  581. begin
  582. p := tai(p.next);
  583. continue;
  584. end;
  585. case taicpu(p).opcode Of
  586. A_Jcc:
  587. if OptPass2Jcc(p) then
  588. continue;
  589. A_FSTP,A_FISTP:
  590. if OptPass1FSTP(p) then
  591. continue;
  592. A_IMUL:
  593. if OptPass2Imul(p) then
  594. continue;
  595. A_JMP:
  596. if OptPass2Jmp(p) then
  597. continue;
  598. A_MOV:
  599. begin
  600. if OptPass2MOV(p) then
  601. continue;
  602. end
  603. else
  604. ;
  605. end;
  606. end;
  607. else
  608. ;
  609. end;
  610. p := tai(p.next)
  611. end;
  612. end;
  613. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  614. var
  615. p,hp1: tai;
  616. begin
  617. p := BlockStart;
  618. ClearUsedRegs;
  619. while (p <> BlockEnd) Do
  620. begin
  621. UpdateUsedRegs(UsedRegs, tai(p.next));
  622. case p.Typ Of
  623. Ait_Instruction:
  624. begin
  625. if InsContainsSegRef(taicpu(p)) then
  626. begin
  627. p := tai(p.next);
  628. continue;
  629. end;
  630. case taicpu(p).opcode Of
  631. A_CALL:
  632. if PostPeepHoleOptCall(p) then
  633. Continue;
  634. A_LEA:
  635. if PostPeepholeOptLea(p) then
  636. Continue;
  637. A_CMP:
  638. if PostPeepholeOptCmp(p) then
  639. Continue;
  640. A_MOV:
  641. if PostPeepholeOptMov(p) then
  642. Continue;
  643. A_MOVZX:
  644. { if register vars are on, it's possible there is code like }
  645. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  646. { so we can't safely replace the movzx then with xor/mov, }
  647. { since that would change the flags (JM) }
  648. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  649. begin
  650. if (taicpu(p).oper[1]^.typ = top_reg) then
  651. if (taicpu(p).oper[0]^.typ = top_reg)
  652. then
  653. case taicpu(p).opsize of
  654. S_BL:
  655. begin
  656. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  657. not(cs_opt_size in current_settings.optimizerswitches) and
  658. (current_settings.optimizecputype = cpu_Pentium) then
  659. {Change "movzbl %reg1, %reg2" to
  660. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  661. PentiumMMX}
  662. begin
  663. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  664. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  665. InsertLLItem(p.previous, p, hp1);
  666. taicpu(p).opcode := A_MOV;
  667. taicpu(p).changeopsize(S_B);
  668. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  669. end;
  670. end;
  671. else
  672. ;
  673. end
  674. else if (taicpu(p).oper[0]^.typ = top_ref) and
  675. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  676. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  677. not(cs_opt_size in current_settings.optimizerswitches) and
  678. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  679. (current_settings.optimizecputype = cpu_Pentium) and
  680. (taicpu(p).opsize = S_BL) then
  681. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  682. Pentium and PentiumMMX}
  683. begin
  684. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  685. taicpu(p).oper[1]^.reg);
  686. taicpu(p).opcode := A_MOV;
  687. taicpu(p).changeopsize(S_B);
  688. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  689. InsertLLItem(p.previous, p, hp1);
  690. end;
  691. end;
  692. A_TEST, A_OR:
  693. begin
  694. if PostPeepholeOptTestOr(p) then
  695. Continue;
  696. end;
  697. else
  698. ;
  699. end;
  700. end;
  701. else
  702. ;
  703. end;
  704. p := tai(p.next)
  705. end;
  706. OptReferences;
  707. end;
  708. Procedure TCpuAsmOptimizer.Optimize;
  709. Var
  710. HP: Tai;
  711. pass: longint;
  712. slowopt, changed, lastLoop: boolean;
  713. Begin
  714. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  715. pass := 0;
  716. changed := false;
  717. repeat
  718. lastLoop :=
  719. not(slowopt) or
  720. (not changed and (pass > 2)) or
  721. { prevent endless loops }
  722. (pass = 4);
  723. changed := false;
  724. { Setup labeltable, always necessary }
  725. blockstart := tai(asml.first);
  726. pass_1;
  727. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  728. { or nil }
  729. While Assigned(BlockStart) Do
  730. Begin
  731. if (cs_opt_peephole in current_settings.optimizerswitches) then
  732. begin
  733. if (pass = 0) then
  734. PrePeepHoleOpts;
  735. { Peephole optimizations }
  736. PeepHoleOptPass1;
  737. { Only perform them twice in the first pass }
  738. if pass = 0 then
  739. PeepHoleOptPass1;
  740. end;
  741. { More peephole optimizations }
  742. if (cs_opt_peephole in current_settings.optimizerswitches) then
  743. begin
  744. PeepHoleOptPass2;
  745. if lastLoop then
  746. PostPeepHoleOpts;
  747. end;
  748. { Continue where we left off, BlockEnd is either the start of an }
  749. { assembler block or nil }
  750. BlockStart := BlockEnd;
  751. While Assigned(BlockStart) And
  752. (BlockStart.typ = ait_Marker) And
  753. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  754. Begin
  755. { We stopped at an assembler block, so skip it }
  756. Repeat
  757. BlockStart := Tai(BlockStart.Next);
  758. Until (BlockStart.Typ = Ait_Marker) And
  759. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  760. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  761. If GetNextInstruction(BlockStart, HP) And
  762. ((HP.typ <> ait_Marker) Or
  763. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  764. { There is no assembler block anymore after the current one, so }
  765. { optimize the next block of "normal" instructions }
  766. pass_1
  767. { Otherwise, skip the next assembler block }
  768. else
  769. blockStart := hp;
  770. End;
  771. End;
  772. inc(pass);
  773. until lastLoop;
  774. dfa.free;
  775. End;
  776. begin
  777. casmoptimizer:=TCpuAsmOptimizer;
  778. end.