aoptcpu.pas 60 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. end;
  34. Var
  35. AsmOptimizer : TCpuAsmOptimizer;
  36. Implementation
  37. uses
  38. verbose,globtype,globals,
  39. cpuinfo,
  40. aasmcpu,
  41. aoptutils,
  42. procinfo,
  43. cgutils,
  44. { units we should get rid off: }
  45. symsym,symconst;
  46. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  47. { returns true if a "continue" should be done after this optimization }
  48. var hp1, hp2: tai;
  49. begin
  50. DoFpuLoadStoreOpt := false;
  51. if (taicpu(p).oper[0]^.typ = top_ref) and
  52. getNextInstruction(p, hp1) and
  53. (hp1.typ = ait_instruction) and
  54. (((taicpu(hp1).opcode = A_FLD) and
  55. (taicpu(p).opcode = A_FSTP)) or
  56. ((taicpu(p).opcode = A_FISTP) and
  57. (taicpu(hp1).opcode = A_FILD))) and
  58. (taicpu(hp1).oper[0]^.typ = top_ref) and
  59. (taicpu(hp1).opsize = taicpu(p).opsize) and
  60. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  61. begin
  62. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  63. if (taicpu(p).opsize=S_FX) and
  64. getNextInstruction(hp1, hp2) and
  65. (hp2.typ = ait_instruction) and
  66. IsExitCode(hp2) and
  67. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  68. not(assigned(current_procinfo.procdef.funcretsym) and
  69. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  70. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  71. begin
  72. asml.remove(p);
  73. asml.remove(hp1);
  74. p.free;
  75. hp1.free;
  76. p := hp2;
  77. removeLastDeallocForFuncRes(p);
  78. doFPULoadStoreOpt := true;
  79. end
  80. (* can't be done because the store operation rounds
  81. else
  82. { fst can't store an extended value! }
  83. if (taicpu(p).opsize <> S_FX) and
  84. (taicpu(p).opsize <> S_IQ) then
  85. begin
  86. if (taicpu(p).opcode = A_FSTP) then
  87. taicpu(p).opcode := A_FST
  88. else taicpu(p).opcode := A_FIST;
  89. asml.remove(hp1);
  90. hp1.free;
  91. end
  92. *)
  93. end;
  94. end;
  95. { converts a TChange variable to a TRegister }
  96. function tch2reg(ch: tinschange): tsuperregister;
  97. const
  98. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  99. begin
  100. if (ch <= CH_REDI) then
  101. tch2reg := ch2reg[ch]
  102. else if (ch <= CH_WEDI) then
  103. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  104. else if (ch <= CH_RWEDI) then
  105. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  106. else if (ch <= CH_MEDI) then
  107. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  108. else
  109. InternalError(2016041901)
  110. end;
  111. { Checks if the register is a 32 bit general purpose register }
  112. function isgp32reg(reg: TRegister): boolean;
  113. begin
  114. {$push}{$warnings off}
  115. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  116. {$pop}
  117. end;
  118. { returns true if p contains a memory operand with a segment set }
  119. function InsContainsSegRef(p: taicpu): boolean;
  120. var
  121. i: longint;
  122. begin
  123. result:=true;
  124. for i:=0 to p.opercnt-1 do
  125. if (p.oper[i]^.typ=top_ref) and
  126. (p.oper[i]^.ref^.segment<>NR_NO) then
  127. exit;
  128. result:=false;
  129. end;
  130. function InstrReadsFlags(p: tai): boolean;
  131. var
  132. l: longint;
  133. begin
  134. InstrReadsFlags := true;
  135. case p.typ of
  136. ait_instruction:
  137. if InsProp[taicpu(p).opcode].Ch*
  138. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  139. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  140. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  141. exit;
  142. ait_label:
  143. exit;
  144. end;
  145. InstrReadsFlags := false;
  146. end;
  147. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  148. var
  149. p,hp1: tai;
  150. l: aint;
  151. tmpRef: treference;
  152. begin
  153. p := BlockStart;
  154. while (p <> BlockEnd) Do
  155. begin
  156. case p.Typ Of
  157. Ait_Instruction:
  158. begin
  159. if InsContainsSegRef(taicpu(p)) then
  160. begin
  161. p := tai(p.next);
  162. continue;
  163. end;
  164. case taicpu(p).opcode Of
  165. A_IMUL:
  166. {changes certain "imul const, %reg"'s to lea sequences}
  167. begin
  168. if (taicpu(p).oper[0]^.typ = Top_Const) and
  169. (taicpu(p).oper[1]^.typ = Top_Reg) and
  170. (taicpu(p).opsize = S_L) then
  171. if (taicpu(p).oper[0]^.val = 1) then
  172. if (taicpu(p).ops = 2) then
  173. {remove "imul $1, reg"}
  174. begin
  175. hp1 := tai(p.Next);
  176. asml.remove(p);
  177. p.free;
  178. p := hp1;
  179. continue;
  180. end
  181. else
  182. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  183. begin
  184. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  185. InsertLLItem(p.previous, p.next, hp1);
  186. p.free;
  187. p := hp1;
  188. end
  189. else if
  190. ((taicpu(p).ops <= 2) or
  191. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  192. (taicpu(p).oper[0]^.val <= 12) and
  193. not(cs_opt_size in current_settings.optimizerswitches) and
  194. (not(GetNextInstruction(p, hp1)) or
  195. {GetNextInstruction(p, hp1) and}
  196. not((tai(hp1).typ = ait_instruction) and
  197. ((taicpu(hp1).opcode=A_Jcc) and
  198. (taicpu(hp1).condition in [C_O,C_NO])))) then
  199. begin
  200. reference_reset(tmpref,1,[]);
  201. case taicpu(p).oper[0]^.val Of
  202. 3: begin
  203. {imul 3, reg1, reg2 to
  204. lea (reg1,reg1,2), reg2
  205. imul 3, reg1 to
  206. lea (reg1,reg1,2), reg1}
  207. TmpRef.base := taicpu(p).oper[1]^.reg;
  208. TmpRef.index := taicpu(p).oper[1]^.reg;
  209. TmpRef.ScaleFactor := 2;
  210. if (taicpu(p).ops = 2) then
  211. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  212. else
  213. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  214. InsertLLItem(p.previous, p.next, hp1);
  215. p.free;
  216. p := hp1;
  217. end;
  218. 5: begin
  219. {imul 5, reg1, reg2 to
  220. lea (reg1,reg1,4), reg2
  221. imul 5, reg1 to
  222. lea (reg1,reg1,4), reg1}
  223. TmpRef.base := taicpu(p).oper[1]^.reg;
  224. TmpRef.index := taicpu(p).oper[1]^.reg;
  225. TmpRef.ScaleFactor := 4;
  226. if (taicpu(p).ops = 2) then
  227. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  228. else
  229. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  230. InsertLLItem(p.previous, p.next, hp1);
  231. p.free;
  232. p := hp1;
  233. end;
  234. 6: begin
  235. {imul 6, reg1, reg2 to
  236. lea (,reg1,2), reg2
  237. lea (reg2,reg1,4), reg2
  238. imul 6, reg1 to
  239. lea (reg1,reg1,2), reg1
  240. add reg1, reg1}
  241. if (current_settings.optimizecputype <= cpu_386) then
  242. begin
  243. TmpRef.index := taicpu(p).oper[1]^.reg;
  244. if (taicpu(p).ops = 3) then
  245. begin
  246. TmpRef.base := taicpu(p).oper[2]^.reg;
  247. TmpRef.ScaleFactor := 4;
  248. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  249. end
  250. else
  251. begin
  252. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  253. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  254. end;
  255. InsertLLItem(p, p.next, hp1);
  256. reference_reset(tmpref,2,[]);
  257. TmpRef.index := taicpu(p).oper[1]^.reg;
  258. TmpRef.ScaleFactor := 2;
  259. if (taicpu(p).ops = 3) then
  260. begin
  261. TmpRef.base := NR_NO;
  262. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  263. taicpu(p).oper[2]^.reg);
  264. end
  265. else
  266. begin
  267. TmpRef.base := taicpu(p).oper[1]^.reg;
  268. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  269. end;
  270. InsertLLItem(p.previous, p.next, hp1);
  271. p.free;
  272. p := tai(hp1.next);
  273. end
  274. end;
  275. 9: begin
  276. {imul 9, reg1, reg2 to
  277. lea (reg1,reg1,8), reg2
  278. imul 9, reg1 to
  279. lea (reg1,reg1,8), reg1}
  280. TmpRef.base := taicpu(p).oper[1]^.reg;
  281. TmpRef.index := taicpu(p).oper[1]^.reg;
  282. TmpRef.ScaleFactor := 8;
  283. if (taicpu(p).ops = 2) then
  284. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  285. else
  286. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  287. InsertLLItem(p.previous, p.next, hp1);
  288. p.free;
  289. p := hp1;
  290. end;
  291. 10: begin
  292. {imul 10, reg1, reg2 to
  293. lea (reg1,reg1,4), reg2
  294. add reg2, reg2
  295. imul 10, reg1 to
  296. lea (reg1,reg1,4), reg1
  297. add reg1, reg1}
  298. if (current_settings.optimizecputype <= cpu_386) then
  299. begin
  300. if (taicpu(p).ops = 3) then
  301. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  302. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  303. else
  304. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  305. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  306. InsertLLItem(p, p.next, hp1);
  307. TmpRef.base := taicpu(p).oper[1]^.reg;
  308. TmpRef.index := taicpu(p).oper[1]^.reg;
  309. TmpRef.ScaleFactor := 4;
  310. if (taicpu(p).ops = 3) then
  311. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  312. else
  313. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  314. InsertLLItem(p.previous, p.next, hp1);
  315. p.free;
  316. p := tai(hp1.next);
  317. end
  318. end;
  319. 12: begin
  320. {imul 12, reg1, reg2 to
  321. lea (,reg1,4), reg2
  322. lea (reg2,reg1,8), reg2
  323. imul 12, reg1 to
  324. lea (reg1,reg1,2), reg1
  325. lea (,reg1,4), reg1}
  326. if (current_settings.optimizecputype <= cpu_386)
  327. then
  328. begin
  329. TmpRef.index := taicpu(p).oper[1]^.reg;
  330. if (taicpu(p).ops = 3) then
  331. begin
  332. TmpRef.base := taicpu(p).oper[2]^.reg;
  333. TmpRef.ScaleFactor := 8;
  334. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  335. end
  336. else
  337. begin
  338. TmpRef.base := NR_NO;
  339. TmpRef.ScaleFactor := 4;
  340. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  341. end;
  342. InsertLLItem(p, p.next, hp1);
  343. reference_reset(tmpref,2,[]);
  344. TmpRef.index := taicpu(p).oper[1]^.reg;
  345. if (taicpu(p).ops = 3) then
  346. begin
  347. TmpRef.base := NR_NO;
  348. TmpRef.ScaleFactor := 4;
  349. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  350. end
  351. else
  352. begin
  353. TmpRef.base := taicpu(p).oper[1]^.reg;
  354. TmpRef.ScaleFactor := 2;
  355. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  356. end;
  357. InsertLLItem(p.previous, p.next, hp1);
  358. p.free;
  359. p := tai(hp1.next);
  360. end
  361. end
  362. end;
  363. end;
  364. end;
  365. A_SAR,A_SHR:
  366. if PrePeepholeOptSxx(p) then
  367. continue;
  368. A_XOR:
  369. if (taicpu(p).oper[0]^.typ = top_reg) and
  370. (taicpu(p).oper[1]^.typ = top_reg) and
  371. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  372. { temporarily change this to 'mov reg,0' to make it easier }
  373. { for the CSE. Will be changed back in pass 2 }
  374. begin
  375. taicpu(p).opcode := A_MOV;
  376. taicpu(p).loadConst(0,0);
  377. end;
  378. end;
  379. end;
  380. end;
  381. p := tai(p.next)
  382. end;
  383. end;
  384. { First pass of peephole optimizations }
  385. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  386. function WriteOk : Boolean;
  387. begin
  388. writeln('Ok');
  389. Result:=True;
  390. end;
  391. var
  392. l : longint;
  393. p,hp1,hp2 : tai;
  394. hp3,hp4: tai;
  395. v:aint;
  396. TmpRef: TReference;
  397. TmpBool1, TmpBool2: Boolean;
  398. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  399. {traces sucessive jumps to their final destination and sets it, e.g.
  400. je l1 je l3
  401. <code> <code>
  402. l1: becomes l1:
  403. je l2 je l3
  404. <code> <code>
  405. l2: l2:
  406. jmp l3 jmp l3
  407. the level parameter denotes how deeep we have already followed the jump,
  408. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  409. var p1, p2: tai;
  410. l: tasmlabel;
  411. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  412. begin
  413. FindAnyLabel := false;
  414. while assigned(hp.next) and
  415. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  416. hp := tai(hp.next);
  417. if assigned(hp.next) and
  418. (tai(hp.next).typ = ait_label) then
  419. begin
  420. FindAnyLabel := true;
  421. l := tai_label(hp.next).labsym;
  422. end
  423. end;
  424. begin
  425. GetfinalDestination := false;
  426. if level > 20 then
  427. exit;
  428. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  429. if assigned(p1) then
  430. begin
  431. SkipLabels(p1,p1);
  432. if (tai(p1).typ = ait_instruction) and
  433. (taicpu(p1).is_jmp) then
  434. if { the next instruction after the label where the jump hp arrives}
  435. { is unconditional or of the same type as hp, so continue }
  436. (taicpu(p1).condition in [C_None,hp.condition]) or
  437. { the next instruction after the label where the jump hp arrives}
  438. { is the opposite of hp (so this one is never taken), but after }
  439. { that one there is a branch that will be taken, so perform a }
  440. { little hack: set p1 equal to this instruction (that's what the}
  441. { last SkipLabels is for, only works with short bool evaluation)}
  442. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  443. SkipLabels(p1,p2) and
  444. (p2.typ = ait_instruction) and
  445. (taicpu(p2).is_jmp) and
  446. (taicpu(p2).condition in [C_None,hp.condition]) and
  447. SkipLabels(p1,p1)) then
  448. begin
  449. { quick check for loops of the form "l5: ; jmp l5 }
  450. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  451. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  452. exit;
  453. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  454. exit;
  455. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  456. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  457. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  458. end
  459. else
  460. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  461. if not FindAnyLabel(p1,l) then
  462. begin
  463. {$ifdef finaldestdebug}
  464. insertllitem(asml,p1,p1.next,tai_comment.Create(
  465. strpnew('previous label inserted'))));
  466. {$endif finaldestdebug}
  467. current_asmdata.getjumplabel(l);
  468. insertllitem(p1,p1.next,tai_label.Create(l));
  469. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  470. hp.oper[0]^.ref^.symbol := l;
  471. l.increfs;
  472. { this won't work, since the new label isn't in the labeltable }
  473. { so it will fail the rangecheck. Labeltable should become a }
  474. { hashtable to support this: }
  475. { GetFinalDestination(asml, hp); }
  476. end
  477. else
  478. begin
  479. {$ifdef finaldestdebug}
  480. insertllitem(asml,p1,p1.next,tai_comment.Create(
  481. strpnew('next label reused'))));
  482. {$endif finaldestdebug}
  483. l.increfs;
  484. hp.oper[0]^.ref^.symbol := l;
  485. if not GetFinalDestination(asml, hp,succ(level)) then
  486. exit;
  487. end;
  488. end;
  489. GetFinalDestination := true;
  490. end;
  491. begin
  492. p := BlockStart;
  493. ClearUsedRegs;
  494. while (p <> BlockEnd) Do
  495. begin
  496. UpDateUsedRegs(UsedRegs, tai(p.next));
  497. case p.Typ Of
  498. ait_instruction:
  499. begin
  500. current_filepos:=taicpu(p).fileinfo;
  501. if InsContainsSegRef(taicpu(p)) then
  502. begin
  503. p := tai(p.next);
  504. continue;
  505. end;
  506. { Handle Jmp Optimizations }
  507. if taicpu(p).is_jmp then
  508. begin
  509. {the following if-block removes all code between a jmp and the next label,
  510. because it can never be executed}
  511. if (taicpu(p).opcode = A_JMP) then
  512. begin
  513. hp2:=p;
  514. while GetNextInstruction(hp2, hp1) and
  515. (hp1.typ <> ait_label) do
  516. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  517. begin
  518. { don't kill start/end of assembler block,
  519. no-line-info-start/end etc }
  520. if hp1.typ<>ait_marker then
  521. begin
  522. asml.remove(hp1);
  523. hp1.free;
  524. end
  525. else
  526. hp2:=hp1;
  527. end
  528. else break;
  529. end;
  530. { remove jumps to a label coming right after them }
  531. if GetNextInstruction(p, hp1) then
  532. begin
  533. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  534. { TODO: FIXME removing the first instruction fails}
  535. (p<>blockstart) then
  536. begin
  537. hp2:=tai(hp1.next);
  538. asml.remove(p);
  539. p.free;
  540. p:=hp2;
  541. continue;
  542. end
  543. else
  544. begin
  545. if hp1.typ = ait_label then
  546. SkipLabels(hp1,hp1);
  547. if (tai(hp1).typ=ait_instruction) and
  548. (taicpu(hp1).opcode=A_JMP) and
  549. GetNextInstruction(hp1, hp2) and
  550. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  551. begin
  552. if taicpu(p).opcode=A_Jcc then
  553. begin
  554. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  555. tai_label(hp2).labsym.decrefs;
  556. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  557. { when free'ing hp1, the ref. isn't decresed, so we don't
  558. increase it (FK)
  559. taicpu(p).oper[0]^.ref^.symbol.increfs;
  560. }
  561. asml.remove(hp1);
  562. hp1.free;
  563. GetFinalDestination(asml, taicpu(p),0);
  564. end
  565. else
  566. begin
  567. GetFinalDestination(asml, taicpu(p),0);
  568. p:=tai(p.next);
  569. continue;
  570. end;
  571. end
  572. else
  573. GetFinalDestination(asml, taicpu(p),0);
  574. end;
  575. end;
  576. end
  577. else
  578. { All other optimizes }
  579. begin
  580. for l := 0 to taicpu(p).ops-1 Do
  581. if (taicpu(p).oper[l]^.typ = top_ref) then
  582. With taicpu(p).oper[l]^.ref^ Do
  583. begin
  584. if (base = NR_NO) and
  585. (index <> NR_NO) and
  586. (scalefactor in [0,1]) then
  587. begin
  588. base := index;
  589. index := NR_NO
  590. end
  591. end;
  592. case taicpu(p).opcode Of
  593. A_AND:
  594. if OptPass1And(p) then
  595. continue;
  596. A_CMP:
  597. begin
  598. { cmp register,$8000 neg register
  599. je target --> jo target
  600. .... only if register is deallocated before jump.}
  601. case Taicpu(p).opsize of
  602. S_B: v:=$80;
  603. S_W: v:=$8000;
  604. S_L: v:=aint($80000000);
  605. else
  606. internalerror(2013112905);
  607. end;
  608. if (taicpu(p).oper[0]^.typ=Top_const) and
  609. (taicpu(p).oper[0]^.val=v) and
  610. (Taicpu(p).oper[1]^.typ=top_reg) and
  611. GetNextInstruction(p, hp1) and
  612. (hp1.typ=ait_instruction) and
  613. (taicpu(hp1).opcode=A_Jcc) and
  614. (Taicpu(hp1).condition in [C_E,C_NE]) and
  615. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  616. begin
  617. Taicpu(p).opcode:=A_NEG;
  618. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  619. Taicpu(p).clearop(1);
  620. Taicpu(p).ops:=1;
  621. if Taicpu(hp1).condition=C_E then
  622. Taicpu(hp1).condition:=C_O
  623. else
  624. Taicpu(hp1).condition:=C_NO;
  625. continue;
  626. end;
  627. {
  628. @@2: @@2:
  629. .... ....
  630. cmp operand1,0
  631. jle/jbe @@1
  632. dec operand1 --> sub operand1,1
  633. jmp @@2 jge/jae @@2
  634. @@1: @@1:
  635. ... ....}
  636. if (taicpu(p).oper[0]^.typ = top_const) and
  637. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  638. (taicpu(p).oper[0]^.val = 0) and
  639. GetNextInstruction(p, hp1) and
  640. (hp1.typ = ait_instruction) and
  641. (taicpu(hp1).is_jmp) and
  642. (taicpu(hp1).opcode=A_Jcc) and
  643. (taicpu(hp1).condition in [C_LE,C_BE]) and
  644. GetNextInstruction(hp1,hp2) and
  645. (hp2.typ = ait_instruction) and
  646. (taicpu(hp2).opcode = A_DEC) and
  647. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  648. GetNextInstruction(hp2, hp3) and
  649. (hp3.typ = ait_instruction) and
  650. (taicpu(hp3).is_jmp) and
  651. (taicpu(hp3).opcode = A_JMP) and
  652. GetNextInstruction(hp3, hp4) and
  653. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  654. begin
  655. taicpu(hp2).Opcode := A_SUB;
  656. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  657. taicpu(hp2).loadConst(0,1);
  658. taicpu(hp2).ops:=2;
  659. taicpu(hp3).Opcode := A_Jcc;
  660. case taicpu(hp1).condition of
  661. C_LE: taicpu(hp3).condition := C_GE;
  662. C_BE: taicpu(hp3).condition := C_AE;
  663. end;
  664. asml.remove(p);
  665. asml.remove(hp1);
  666. p.free;
  667. hp1.free;
  668. p := hp2;
  669. continue;
  670. end
  671. end;
  672. A_FLD:
  673. begin
  674. if (taicpu(p).oper[0]^.typ = top_reg) and
  675. GetNextInstruction(p, hp1) and
  676. (hp1.typ = Ait_Instruction) and
  677. (taicpu(hp1).oper[0]^.typ = top_reg) and
  678. (taicpu(hp1).oper[1]^.typ = top_reg) and
  679. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  680. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  681. { change to
  682. fld reg fxxx reg,st
  683. fxxxp st, st1 (hp1)
  684. Remark: non commutative operations must be reversed!
  685. }
  686. begin
  687. case taicpu(hp1).opcode Of
  688. A_FMULP,A_FADDP,
  689. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  690. begin
  691. case taicpu(hp1).opcode Of
  692. A_FADDP: taicpu(hp1).opcode := A_FADD;
  693. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  694. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  695. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  696. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  697. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  698. end;
  699. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  700. taicpu(hp1).oper[1]^.reg := NR_ST;
  701. asml.remove(p);
  702. p.free;
  703. p := hp1;
  704. continue;
  705. end;
  706. end;
  707. end
  708. else
  709. if (taicpu(p).oper[0]^.typ = top_ref) and
  710. GetNextInstruction(p, hp2) and
  711. (hp2.typ = Ait_Instruction) and
  712. (taicpu(hp2).ops = 2) and
  713. (taicpu(hp2).oper[0]^.typ = top_reg) and
  714. (taicpu(hp2).oper[1]^.typ = top_reg) and
  715. (taicpu(p).opsize in [S_FS, S_FL]) and
  716. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  717. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  718. if GetLastInstruction(p, hp1) and
  719. (hp1.typ = Ait_Instruction) and
  720. ((taicpu(hp1).opcode = A_FLD) or
  721. (taicpu(hp1).opcode = A_FST)) and
  722. (taicpu(hp1).opsize = taicpu(p).opsize) and
  723. (taicpu(hp1).oper[0]^.typ = top_ref) and
  724. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  725. if ((taicpu(hp2).opcode = A_FMULP) or
  726. (taicpu(hp2).opcode = A_FADDP)) then
  727. { change to
  728. fld/fst mem1 (hp1) fld/fst mem1
  729. fld mem1 (p) fadd/
  730. faddp/ fmul st, st
  731. fmulp st, st1 (hp2) }
  732. begin
  733. asml.remove(p);
  734. p.free;
  735. p := hp1;
  736. if (taicpu(hp2).opcode = A_FADDP) then
  737. taicpu(hp2).opcode := A_FADD
  738. else
  739. taicpu(hp2).opcode := A_FMUL;
  740. taicpu(hp2).oper[1]^.reg := NR_ST;
  741. end
  742. else
  743. { change to
  744. fld/fst mem1 (hp1) fld/fst mem1
  745. fld mem1 (p) fld st}
  746. begin
  747. taicpu(p).changeopsize(S_FL);
  748. taicpu(p).loadreg(0,NR_ST);
  749. end
  750. else
  751. begin
  752. case taicpu(hp2).opcode Of
  753. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  754. { change to
  755. fld/fst mem1 (hp1) fld/fst mem1
  756. fld mem2 (p) fxxx mem2
  757. fxxxp st, st1 (hp2) }
  758. begin
  759. case taicpu(hp2).opcode Of
  760. A_FADDP: taicpu(p).opcode := A_FADD;
  761. A_FMULP: taicpu(p).opcode := A_FMUL;
  762. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  763. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  764. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  765. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  766. end;
  767. asml.remove(hp2);
  768. hp2.free;
  769. end
  770. end
  771. end
  772. end;
  773. A_FSTP,A_FISTP:
  774. if doFpuLoadStoreOpt(p) then
  775. continue;
  776. A_LEA:
  777. begin
  778. if OptPass1LEA(p) then
  779. continue;
  780. end;
  781. A_MOV:
  782. begin
  783. If OptPass1MOV(p) then
  784. Continue;
  785. end;
  786. A_MOVSX,
  787. A_MOVZX :
  788. begin
  789. If OptPass1Movx(p) then
  790. Continue
  791. end;
  792. (* should not be generated anymore by the current code generator
  793. A_POP:
  794. begin
  795. if target_info.system=system_i386_go32v2 then
  796. begin
  797. { Transform a series of pop/pop/pop/push/push/push to }
  798. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  799. { because I'm not sure whether they can cope with }
  800. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  801. { such a problem when using esp as frame pointer (JM) }
  802. if (taicpu(p).oper[0]^.typ = top_reg) then
  803. begin
  804. hp1 := p;
  805. hp2 := p;
  806. l := 0;
  807. while getNextInstruction(hp1,hp1) and
  808. (hp1.typ = ait_instruction) and
  809. (taicpu(hp1).opcode = A_POP) and
  810. (taicpu(hp1).oper[0]^.typ = top_reg) do
  811. begin
  812. hp2 := hp1;
  813. inc(l,4);
  814. end;
  815. getLastInstruction(p,hp3);
  816. l1 := 0;
  817. while (hp2 <> hp3) and
  818. assigned(hp1) and
  819. (hp1.typ = ait_instruction) and
  820. (taicpu(hp1).opcode = A_PUSH) and
  821. (taicpu(hp1).oper[0]^.typ = top_reg) and
  822. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  823. begin
  824. { change it to a two op operation }
  825. taicpu(hp2).oper[1]^.typ:=top_none;
  826. taicpu(hp2).ops:=2;
  827. taicpu(hp2).opcode := A_MOV;
  828. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  829. reference_reset(tmpref);
  830. tmpRef.base.enum:=R_INTREGISTER;
  831. tmpRef.base.number:=NR_STACK_POINTER_REG;
  832. convert_register_to_enum(tmpref.base);
  833. tmpRef.offset := l;
  834. taicpu(hp2).loadRef(0,tmpRef);
  835. hp4 := hp1;
  836. getNextInstruction(hp1,hp1);
  837. asml.remove(hp4);
  838. hp4.free;
  839. getLastInstruction(hp2,hp2);
  840. dec(l,4);
  841. inc(l1);
  842. end;
  843. if l <> -4 then
  844. begin
  845. inc(l,4);
  846. for l1 := l1 downto 1 do
  847. begin
  848. getNextInstruction(hp2,hp2);
  849. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  850. end
  851. end
  852. end
  853. end
  854. else
  855. begin
  856. if (taicpu(p).oper[0]^.typ = top_reg) and
  857. GetNextInstruction(p, hp1) and
  858. (tai(hp1).typ=ait_instruction) and
  859. (taicpu(hp1).opcode=A_PUSH) and
  860. (taicpu(hp1).oper[0]^.typ = top_reg) and
  861. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  862. begin
  863. { change it to a two op operation }
  864. taicpu(p).oper[1]^.typ:=top_none;
  865. taicpu(p).ops:=2;
  866. taicpu(p).opcode := A_MOV;
  867. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  868. reference_reset(tmpref);
  869. TmpRef.base.enum := R_ESP;
  870. taicpu(p).loadRef(0,TmpRef);
  871. asml.remove(hp1);
  872. hp1.free;
  873. end;
  874. end;
  875. end;
  876. *)
  877. A_PUSH:
  878. begin
  879. if (taicpu(p).opsize = S_W) and
  880. (taicpu(p).oper[0]^.typ = Top_Const) and
  881. GetNextInstruction(p, hp1) and
  882. (tai(hp1).typ = ait_instruction) and
  883. (taicpu(hp1).opcode = A_PUSH) and
  884. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  885. (taicpu(hp1).opsize = S_W) then
  886. begin
  887. taicpu(p).changeopsize(S_L);
  888. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  889. asml.remove(hp1);
  890. hp1.free;
  891. end;
  892. end;
  893. A_SHL, A_SAL:
  894. begin
  895. if (taicpu(p).oper[0]^.typ = Top_Const) and
  896. (taicpu(p).oper[1]^.typ = Top_Reg) and
  897. (taicpu(p).opsize = S_L) and
  898. (taicpu(p).oper[0]^.val <= 3) then
  899. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  900. begin
  901. TmpBool1 := True; {should we check the next instruction?}
  902. TmpBool2 := False; {have we found an add/sub which could be
  903. integrated in the lea?}
  904. reference_reset(tmpref,2,[]);
  905. TmpRef.index := taicpu(p).oper[1]^.reg;
  906. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  907. while TmpBool1 and
  908. GetNextInstruction(p, hp1) and
  909. (tai(hp1).typ = ait_instruction) and
  910. ((((taicpu(hp1).opcode = A_ADD) or
  911. (taicpu(hp1).opcode = A_SUB)) and
  912. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  913. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  914. (((taicpu(hp1).opcode = A_INC) or
  915. (taicpu(hp1).opcode = A_DEC)) and
  916. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  917. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  918. (not GetNextInstruction(hp1,hp2) or
  919. not instrReadsFlags(hp2)) Do
  920. begin
  921. TmpBool1 := False;
  922. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  923. begin
  924. TmpBool1 := True;
  925. TmpBool2 := True;
  926. case taicpu(hp1).opcode of
  927. A_ADD:
  928. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  929. A_SUB:
  930. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  931. end;
  932. asml.remove(hp1);
  933. hp1.free;
  934. end
  935. else
  936. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  937. (((taicpu(hp1).opcode = A_ADD) and
  938. (TmpRef.base = NR_NO)) or
  939. (taicpu(hp1).opcode = A_INC) or
  940. (taicpu(hp1).opcode = A_DEC)) then
  941. begin
  942. TmpBool1 := True;
  943. TmpBool2 := True;
  944. case taicpu(hp1).opcode of
  945. A_ADD:
  946. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  947. A_INC:
  948. inc(TmpRef.offset);
  949. A_DEC:
  950. dec(TmpRef.offset);
  951. end;
  952. asml.remove(hp1);
  953. hp1.free;
  954. end;
  955. end;
  956. if TmpBool2 or
  957. ((current_settings.optimizecputype < cpu_Pentium2) and
  958. (taicpu(p).oper[0]^.val <= 3) and
  959. not(cs_opt_size in current_settings.optimizerswitches)) then
  960. begin
  961. if not(TmpBool2) and
  962. (taicpu(p).oper[0]^.val = 1) then
  963. begin
  964. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  965. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  966. end
  967. else
  968. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  969. taicpu(p).oper[1]^.reg);
  970. InsertLLItem(p.previous, p.next, hp1);
  971. p.free;
  972. p := hp1;
  973. end;
  974. end
  975. else
  976. if (current_settings.optimizecputype < cpu_Pentium2) and
  977. (taicpu(p).oper[0]^.typ = top_const) and
  978. (taicpu(p).oper[1]^.typ = top_reg) then
  979. if (taicpu(p).oper[0]^.val = 1) then
  980. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  981. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  982. (unlike shl, which is only Tairable in the U pipe)}
  983. begin
  984. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  985. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  986. InsertLLItem(p.previous, p.next, hp1);
  987. p.free;
  988. p := hp1;
  989. end
  990. else if (taicpu(p).opsize = S_L) and
  991. (taicpu(p).oper[0]^.val<= 3) then
  992. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  993. "shl $3, %reg" to "lea (,%reg,8), %reg}
  994. begin
  995. reference_reset(tmpref,2,[]);
  996. TmpRef.index := taicpu(p).oper[1]^.reg;
  997. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  998. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  999. InsertLLItem(p.previous, p.next, hp1);
  1000. p.free;
  1001. p := hp1;
  1002. end
  1003. end;
  1004. A_SETcc :
  1005. { changes
  1006. setcc (funcres) setcc reg
  1007. movb (funcres), reg to leave/ret
  1008. leave/ret }
  1009. begin
  1010. if (taicpu(p).oper[0]^.typ = top_ref) and
  1011. GetNextInstruction(p, hp1) and
  1012. GetNextInstruction(hp1, hp2) and
  1013. IsExitCode(hp2) and
  1014. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1015. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1016. not(assigned(current_procinfo.procdef.funcretsym) and
  1017. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1018. (hp1.typ = ait_instruction) and
  1019. (taicpu(hp1).opcode = A_MOV) and
  1020. (taicpu(hp1).opsize = S_B) and
  1021. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1022. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1023. begin
  1024. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1025. DebugMsg('Peephole optimizer SetccMovbLeaveRet2SetccLeaveRet',p);
  1026. asml.remove(hp1);
  1027. hp1.free;
  1028. end
  1029. end;
  1030. A_SUB:
  1031. if OptPass1Sub(p) then
  1032. continue;
  1033. A_VMOVAPS,
  1034. A_VMOVAPD:
  1035. if OptPass1VMOVAP(p) then
  1036. continue;
  1037. A_VDIVSD,
  1038. A_VDIVSS,
  1039. A_VSUBSD,
  1040. A_VSUBSS,
  1041. A_VMULSD,
  1042. A_VMULSS,
  1043. A_VADDSD,
  1044. A_VADDSS,
  1045. A_VANDPD,
  1046. A_VANDPS,
  1047. A_VORPD,
  1048. A_VORPS,
  1049. A_VXORPD,
  1050. A_VXORPS:
  1051. if OptPass1VOP(p) then
  1052. continue;
  1053. A_MULSD,
  1054. A_MULSS,
  1055. A_ADDSD,
  1056. A_ADDSS:
  1057. if OptPass1OP(p) then
  1058. continue;
  1059. A_MOVAPD,
  1060. A_MOVAPS:
  1061. if OptPass1MOVAP(p) then
  1062. continue;
  1063. A_VMOVSD,
  1064. A_VMOVSS,
  1065. A_MOVSD,
  1066. A_MOVSS:
  1067. if OptPass1MOVXX(p) then
  1068. continue;
  1069. end;
  1070. end; { if is_jmp }
  1071. end;
  1072. end;
  1073. updateUsedRegs(UsedRegs,p);
  1074. p:=tai(p.next);
  1075. end;
  1076. end;
  1077. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1078. var
  1079. p : tai;
  1080. begin
  1081. p := BlockStart;
  1082. ClearUsedRegs;
  1083. while (p <> BlockEnd) Do
  1084. begin
  1085. UpdateUsedRegs(UsedRegs, tai(p.next));
  1086. case p.Typ Of
  1087. Ait_Instruction:
  1088. begin
  1089. if InsContainsSegRef(taicpu(p)) then
  1090. begin
  1091. p := tai(p.next);
  1092. continue;
  1093. end;
  1094. case taicpu(p).opcode Of
  1095. A_Jcc:
  1096. if OptPass2Jcc(p) then
  1097. continue;
  1098. A_FSTP,A_FISTP:
  1099. if DoFpuLoadStoreOpt(p) then
  1100. continue;
  1101. A_IMUL:
  1102. if OptPass2Imul(p) then
  1103. continue;
  1104. A_JMP:
  1105. if OptPass2Jmp(p) then
  1106. continue;
  1107. A_MOV:
  1108. if OptPass2MOV(p) then
  1109. continue;
  1110. end;
  1111. end;
  1112. end;
  1113. p := tai(p.next)
  1114. end;
  1115. end;
  1116. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1117. var
  1118. p,hp1,hp2: tai;
  1119. begin
  1120. p := BlockStart;
  1121. ClearUsedRegs;
  1122. while (p <> BlockEnd) Do
  1123. begin
  1124. UpdateUsedRegs(UsedRegs, tai(p.next));
  1125. case p.Typ Of
  1126. Ait_Instruction:
  1127. begin
  1128. if InsContainsSegRef(taicpu(p)) then
  1129. begin
  1130. p := tai(p.next);
  1131. continue;
  1132. end;
  1133. case taicpu(p).opcode Of
  1134. A_CALL:
  1135. begin
  1136. { don't do this on modern CPUs, this really hurts them due to
  1137. broken call/ret pairing }
  1138. if (current_settings.optimizecputype < cpu_Pentium2) and
  1139. not(cs_create_pic in current_settings.moduleswitches) and
  1140. GetNextInstruction(p, hp1) and
  1141. (hp1.typ = ait_instruction) and
  1142. (taicpu(hp1).opcode = A_JMP) and
  1143. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1144. begin
  1145. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1146. InsertLLItem(p.previous, p, hp2);
  1147. taicpu(p).opcode := A_JMP;
  1148. taicpu(p).is_jmp := true;
  1149. asml.remove(hp1);
  1150. hp1.free;
  1151. end
  1152. { replace
  1153. call procname
  1154. ret
  1155. by
  1156. jmp procname
  1157. this should never hurt except when pic is used, not sure
  1158. how to handle it then
  1159. but do it only on level 4 because it destroys stack back traces
  1160. }
  1161. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  1162. not(cs_create_pic in current_settings.moduleswitches) and
  1163. GetNextInstruction(p, hp1) and
  1164. (hp1.typ = ait_instruction) and
  1165. (taicpu(hp1).opcode = A_RET) and
  1166. (taicpu(hp1).ops=0) then
  1167. begin
  1168. taicpu(p).opcode := A_JMP;
  1169. taicpu(p).is_jmp := true;
  1170. asml.remove(hp1);
  1171. hp1.free;
  1172. end;
  1173. end;
  1174. A_CMP:
  1175. if PostPeepholeOptCmp(p) then
  1176. Continue;
  1177. A_MOV:
  1178. if PostPeepholeOptMov(p) then
  1179. Continue;
  1180. A_MOVZX:
  1181. { if register vars are on, it's possible there is code like }
  1182. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1183. { so we can't safely replace the movzx then with xor/mov, }
  1184. { since that would change the flags (JM) }
  1185. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  1186. begin
  1187. if (taicpu(p).oper[1]^.typ = top_reg) then
  1188. if (taicpu(p).oper[0]^.typ = top_reg)
  1189. then
  1190. case taicpu(p).opsize of
  1191. S_BL:
  1192. begin
  1193. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1194. not(cs_opt_size in current_settings.optimizerswitches) and
  1195. (current_settings.optimizecputype = cpu_Pentium) then
  1196. {Change "movzbl %reg1, %reg2" to
  1197. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1198. PentiumMMX}
  1199. begin
  1200. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1201. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1202. InsertLLItem(p.previous, p, hp1);
  1203. taicpu(p).opcode := A_MOV;
  1204. taicpu(p).changeopsize(S_B);
  1205. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1206. end;
  1207. end;
  1208. end
  1209. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1210. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1211. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1212. not(cs_opt_size in current_settings.optimizerswitches) and
  1213. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1214. (current_settings.optimizecputype = cpu_Pentium) and
  1215. (taicpu(p).opsize = S_BL) then
  1216. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1217. Pentium and PentiumMMX}
  1218. begin
  1219. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1220. taicpu(p).oper[1]^.reg);
  1221. taicpu(p).opcode := A_MOV;
  1222. taicpu(p).changeopsize(S_B);
  1223. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1224. InsertLLItem(p.previous, p, hp1);
  1225. end;
  1226. end;
  1227. A_TEST, A_OR:
  1228. if PostPeepholeOptTestOr(p) then
  1229. Continue;
  1230. end;
  1231. end;
  1232. end;
  1233. p := tai(p.next)
  1234. end;
  1235. OptReferences;
  1236. end;
  1237. Procedure TCpuAsmOptimizer.Optimize;
  1238. Var
  1239. HP: Tai;
  1240. pass: longint;
  1241. slowopt, changed, lastLoop: boolean;
  1242. Begin
  1243. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  1244. pass := 0;
  1245. changed := false;
  1246. repeat
  1247. lastLoop :=
  1248. not(slowopt) or
  1249. (not changed and (pass > 2)) or
  1250. { prevent endless loops }
  1251. (pass = 4);
  1252. changed := false;
  1253. { Setup labeltable, always necessary }
  1254. blockstart := tai(asml.first);
  1255. pass_1;
  1256. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  1257. { or nil }
  1258. While Assigned(BlockStart) Do
  1259. Begin
  1260. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1261. begin
  1262. if (pass = 0) then
  1263. PrePeepHoleOpts;
  1264. { Peephole optimizations }
  1265. PeepHoleOptPass1;
  1266. { Only perform them twice in the first pass }
  1267. if pass = 0 then
  1268. PeepHoleOptPass1;
  1269. end;
  1270. { More peephole optimizations }
  1271. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1272. begin
  1273. PeepHoleOptPass2;
  1274. if lastLoop then
  1275. PostPeepHoleOpts;
  1276. end;
  1277. { Continue where we left off, BlockEnd is either the start of an }
  1278. { assembler block or nil }
  1279. BlockStart := BlockEnd;
  1280. While Assigned(BlockStart) And
  1281. (BlockStart.typ = ait_Marker) And
  1282. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  1283. Begin
  1284. { We stopped at an assembler block, so skip it }
  1285. Repeat
  1286. BlockStart := Tai(BlockStart.Next);
  1287. Until (BlockStart.Typ = Ait_Marker) And
  1288. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  1289. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  1290. If GetNextInstruction(BlockStart, HP) And
  1291. ((HP.typ <> ait_Marker) Or
  1292. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  1293. { There is no assembler block anymore after the current one, so }
  1294. { optimize the next block of "normal" instructions }
  1295. pass_1
  1296. { Otherwise, skip the next assembler block }
  1297. else
  1298. blockStart := hp;
  1299. End;
  1300. End;
  1301. inc(pass);
  1302. until lastLoop;
  1303. dfa.free;
  1304. End;
  1305. begin
  1306. casmoptimizer:=TCpuAsmOptimizer;
  1307. end.