aoptcpu.pas 58 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. end;
  34. Var
  35. AsmOptimizer : TCpuAsmOptimizer;
  36. Implementation
  37. uses
  38. verbose,globtype,globals,
  39. cpuinfo,
  40. aasmcpu,
  41. aoptutils,
  42. procinfo,
  43. cgutils,
  44. { units we should get rid off: }
  45. symsym,symconst;
  46. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  47. { returns true if a "continue" should be done after this optimization }
  48. var hp1, hp2: tai;
  49. begin
  50. DoFpuLoadStoreOpt := false;
  51. if (taicpu(p).oper[0]^.typ = top_ref) and
  52. getNextInstruction(p, hp1) and
  53. (hp1.typ = ait_instruction) and
  54. (((taicpu(hp1).opcode = A_FLD) and
  55. (taicpu(p).opcode = A_FSTP)) or
  56. ((taicpu(p).opcode = A_FISTP) and
  57. (taicpu(hp1).opcode = A_FILD))) and
  58. (taicpu(hp1).oper[0]^.typ = top_ref) and
  59. (taicpu(hp1).opsize = taicpu(p).opsize) and
  60. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  61. begin
  62. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  63. if (taicpu(p).opsize=S_FX) and
  64. getNextInstruction(hp1, hp2) and
  65. (hp2.typ = ait_instruction) and
  66. IsExitCode(hp2) and
  67. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  68. not(assigned(current_procinfo.procdef.funcretsym) and
  69. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  70. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  71. begin
  72. asml.remove(p);
  73. asml.remove(hp1);
  74. p.free;
  75. hp1.free;
  76. p := hp2;
  77. removeLastDeallocForFuncRes(p);
  78. doFPULoadStoreOpt := true;
  79. end
  80. (* can't be done because the store operation rounds
  81. else
  82. { fst can't store an extended value! }
  83. if (taicpu(p).opsize <> S_FX) and
  84. (taicpu(p).opsize <> S_IQ) then
  85. begin
  86. if (taicpu(p).opcode = A_FSTP) then
  87. taicpu(p).opcode := A_FST
  88. else taicpu(p).opcode := A_FIST;
  89. asml.remove(hp1);
  90. hp1.free;
  91. end
  92. *)
  93. end;
  94. end;
  95. { converts a TChange variable to a TRegister }
  96. function tch2reg(ch: tinschange): tsuperregister;
  97. const
  98. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  99. begin
  100. if (ch <= CH_REDI) then
  101. tch2reg := ch2reg[ch]
  102. else if (ch <= CH_WEDI) then
  103. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  104. else if (ch <= CH_RWEDI) then
  105. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  106. else if (ch <= CH_MEDI) then
  107. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  108. else
  109. InternalError(2016041901)
  110. end;
  111. { Checks if the register is a 32 bit general purpose register }
  112. function isgp32reg(reg: TRegister): boolean;
  113. begin
  114. {$push}{$warnings off}
  115. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  116. {$pop}
  117. end;
  118. { returns true if p contains a memory operand with a segment set }
  119. function InsContainsSegRef(p: taicpu): boolean;
  120. var
  121. i: longint;
  122. begin
  123. result:=true;
  124. for i:=0 to p.opercnt-1 do
  125. if (p.oper[i]^.typ=top_ref) and
  126. (p.oper[i]^.ref^.segment<>NR_NO) then
  127. exit;
  128. result:=false;
  129. end;
  130. function InstrReadsFlags(p: tai): boolean;
  131. var
  132. l: longint;
  133. begin
  134. InstrReadsFlags := true;
  135. case p.typ of
  136. ait_instruction:
  137. if InsProp[taicpu(p).opcode].Ch*
  138. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  139. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  140. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  141. exit;
  142. ait_label:
  143. exit;
  144. end;
  145. InstrReadsFlags := false;
  146. end;
  147. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  148. var
  149. p,hp1: tai;
  150. l: aint;
  151. tmpRef: treference;
  152. begin
  153. p := BlockStart;
  154. while (p <> BlockEnd) Do
  155. begin
  156. case p.Typ Of
  157. Ait_Instruction:
  158. begin
  159. if InsContainsSegRef(taicpu(p)) then
  160. begin
  161. p := tai(p.next);
  162. continue;
  163. end;
  164. case taicpu(p).opcode Of
  165. A_IMUL:
  166. {changes certain "imul const, %reg"'s to lea sequences}
  167. begin
  168. if (taicpu(p).oper[0]^.typ = Top_Const) and
  169. (taicpu(p).oper[1]^.typ = Top_Reg) and
  170. (taicpu(p).opsize = S_L) then
  171. if (taicpu(p).oper[0]^.val = 1) then
  172. if (taicpu(p).ops = 2) then
  173. {remove "imul $1, reg"}
  174. begin
  175. hp1 := tai(p.Next);
  176. asml.remove(p);
  177. p.free;
  178. p := hp1;
  179. continue;
  180. end
  181. else
  182. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  183. begin
  184. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  185. InsertLLItem(p.previous, p.next, hp1);
  186. p.free;
  187. p := hp1;
  188. end
  189. else if
  190. ((taicpu(p).ops <= 2) or
  191. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  192. (taicpu(p).oper[0]^.val <= 12) and
  193. not(cs_opt_size in current_settings.optimizerswitches) and
  194. (not(GetNextInstruction(p, hp1)) or
  195. {GetNextInstruction(p, hp1) and}
  196. not((tai(hp1).typ = ait_instruction) and
  197. ((taicpu(hp1).opcode=A_Jcc) and
  198. (taicpu(hp1).condition in [C_O,C_NO])))) then
  199. begin
  200. reference_reset(tmpref,1,[]);
  201. case taicpu(p).oper[0]^.val Of
  202. 3: begin
  203. {imul 3, reg1, reg2 to
  204. lea (reg1,reg1,2), reg2
  205. imul 3, reg1 to
  206. lea (reg1,reg1,2), reg1}
  207. TmpRef.base := taicpu(p).oper[1]^.reg;
  208. TmpRef.index := taicpu(p).oper[1]^.reg;
  209. TmpRef.ScaleFactor := 2;
  210. if (taicpu(p).ops = 2) then
  211. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  212. else
  213. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  214. InsertLLItem(p.previous, p.next, hp1);
  215. p.free;
  216. p := hp1;
  217. end;
  218. 5: begin
  219. {imul 5, reg1, reg2 to
  220. lea (reg1,reg1,4), reg2
  221. imul 5, reg1 to
  222. lea (reg1,reg1,4), reg1}
  223. TmpRef.base := taicpu(p).oper[1]^.reg;
  224. TmpRef.index := taicpu(p).oper[1]^.reg;
  225. TmpRef.ScaleFactor := 4;
  226. if (taicpu(p).ops = 2) then
  227. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  228. else
  229. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  230. InsertLLItem(p.previous, p.next, hp1);
  231. p.free;
  232. p := hp1;
  233. end;
  234. 6: begin
  235. {imul 6, reg1, reg2 to
  236. lea (,reg1,2), reg2
  237. lea (reg2,reg1,4), reg2
  238. imul 6, reg1 to
  239. lea (reg1,reg1,2), reg1
  240. add reg1, reg1}
  241. if (current_settings.optimizecputype <= cpu_386) then
  242. begin
  243. TmpRef.index := taicpu(p).oper[1]^.reg;
  244. if (taicpu(p).ops = 3) then
  245. begin
  246. TmpRef.base := taicpu(p).oper[2]^.reg;
  247. TmpRef.ScaleFactor := 4;
  248. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  249. end
  250. else
  251. begin
  252. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  253. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  254. end;
  255. InsertLLItem(p, p.next, hp1);
  256. reference_reset(tmpref,2,[]);
  257. TmpRef.index := taicpu(p).oper[1]^.reg;
  258. TmpRef.ScaleFactor := 2;
  259. if (taicpu(p).ops = 3) then
  260. begin
  261. TmpRef.base := NR_NO;
  262. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  263. taicpu(p).oper[2]^.reg);
  264. end
  265. else
  266. begin
  267. TmpRef.base := taicpu(p).oper[1]^.reg;
  268. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  269. end;
  270. InsertLLItem(p.previous, p.next, hp1);
  271. p.free;
  272. p := tai(hp1.next);
  273. end
  274. end;
  275. 9: begin
  276. {imul 9, reg1, reg2 to
  277. lea (reg1,reg1,8), reg2
  278. imul 9, reg1 to
  279. lea (reg1,reg1,8), reg1}
  280. TmpRef.base := taicpu(p).oper[1]^.reg;
  281. TmpRef.index := taicpu(p).oper[1]^.reg;
  282. TmpRef.ScaleFactor := 8;
  283. if (taicpu(p).ops = 2) then
  284. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  285. else
  286. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  287. InsertLLItem(p.previous, p.next, hp1);
  288. p.free;
  289. p := hp1;
  290. end;
  291. 10: begin
  292. {imul 10, reg1, reg2 to
  293. lea (reg1,reg1,4), reg2
  294. add reg2, reg2
  295. imul 10, reg1 to
  296. lea (reg1,reg1,4), reg1
  297. add reg1, reg1}
  298. if (current_settings.optimizecputype <= cpu_386) then
  299. begin
  300. if (taicpu(p).ops = 3) then
  301. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  302. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  303. else
  304. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  305. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  306. InsertLLItem(p, p.next, hp1);
  307. TmpRef.base := taicpu(p).oper[1]^.reg;
  308. TmpRef.index := taicpu(p).oper[1]^.reg;
  309. TmpRef.ScaleFactor := 4;
  310. if (taicpu(p).ops = 3) then
  311. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  312. else
  313. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  314. InsertLLItem(p.previous, p.next, hp1);
  315. p.free;
  316. p := tai(hp1.next);
  317. end
  318. end;
  319. 12: begin
  320. {imul 12, reg1, reg2 to
  321. lea (,reg1,4), reg2
  322. lea (reg2,reg1,8), reg2
  323. imul 12, reg1 to
  324. lea (reg1,reg1,2), reg1
  325. lea (,reg1,4), reg1}
  326. if (current_settings.optimizecputype <= cpu_386)
  327. then
  328. begin
  329. TmpRef.index := taicpu(p).oper[1]^.reg;
  330. if (taicpu(p).ops = 3) then
  331. begin
  332. TmpRef.base := taicpu(p).oper[2]^.reg;
  333. TmpRef.ScaleFactor := 8;
  334. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  335. end
  336. else
  337. begin
  338. TmpRef.base := NR_NO;
  339. TmpRef.ScaleFactor := 4;
  340. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  341. end;
  342. InsertLLItem(p, p.next, hp1);
  343. reference_reset(tmpref,2,[]);
  344. TmpRef.index := taicpu(p).oper[1]^.reg;
  345. if (taicpu(p).ops = 3) then
  346. begin
  347. TmpRef.base := NR_NO;
  348. TmpRef.ScaleFactor := 4;
  349. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  350. end
  351. else
  352. begin
  353. TmpRef.base := taicpu(p).oper[1]^.reg;
  354. TmpRef.ScaleFactor := 2;
  355. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  356. end;
  357. InsertLLItem(p.previous, p.next, hp1);
  358. p.free;
  359. p := tai(hp1.next);
  360. end
  361. end
  362. end;
  363. end;
  364. end;
  365. A_SAR,A_SHR:
  366. if PrePeepholeOptSxx(p) then
  367. continue;
  368. A_XOR:
  369. if (taicpu(p).oper[0]^.typ = top_reg) and
  370. (taicpu(p).oper[1]^.typ = top_reg) and
  371. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  372. { temporarily change this to 'mov reg,0' to make it easier }
  373. { for the CSE. Will be changed back in pass 2 }
  374. begin
  375. taicpu(p).opcode := A_MOV;
  376. taicpu(p).loadConst(0,0);
  377. end;
  378. end;
  379. end;
  380. end;
  381. p := tai(p.next)
  382. end;
  383. end;
  384. { First pass of peephole optimizations }
  385. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  386. function WriteOk : Boolean;
  387. begin
  388. writeln('Ok');
  389. Result:=True;
  390. end;
  391. var
  392. l : longint;
  393. p,hp1,hp2 : tai;
  394. hp3,hp4: tai;
  395. v:aint;
  396. TmpRef: TReference;
  397. TmpBool1, TmpBool2: Boolean;
  398. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  399. {traces sucessive jumps to their final destination and sets it, e.g.
  400. je l1 je l3
  401. <code> <code>
  402. l1: becomes l1:
  403. je l2 je l3
  404. <code> <code>
  405. l2: l2:
  406. jmp l3 jmp l3
  407. the level parameter denotes how deeep we have already followed the jump,
  408. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  409. var p1, p2: tai;
  410. l: tasmlabel;
  411. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  412. begin
  413. FindAnyLabel := false;
  414. while assigned(hp.next) and
  415. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  416. hp := tai(hp.next);
  417. if assigned(hp.next) and
  418. (tai(hp.next).typ = ait_label) then
  419. begin
  420. FindAnyLabel := true;
  421. l := tai_label(hp.next).labsym;
  422. end
  423. end;
  424. begin
  425. GetfinalDestination := false;
  426. if level > 20 then
  427. exit;
  428. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  429. if assigned(p1) then
  430. begin
  431. SkipLabels(p1,p1);
  432. if (tai(p1).typ = ait_instruction) and
  433. (taicpu(p1).is_jmp) then
  434. if { the next instruction after the label where the jump hp arrives}
  435. { is unconditional or of the same type as hp, so continue }
  436. (taicpu(p1).condition in [C_None,hp.condition]) or
  437. { the next instruction after the label where the jump hp arrives}
  438. { is the opposite of hp (so this one is never taken), but after }
  439. { that one there is a branch that will be taken, so perform a }
  440. { little hack: set p1 equal to this instruction (that's what the}
  441. { last SkipLabels is for, only works with short bool evaluation)}
  442. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  443. SkipLabels(p1,p2) and
  444. (p2.typ = ait_instruction) and
  445. (taicpu(p2).is_jmp) and
  446. (taicpu(p2).condition in [C_None,hp.condition]) and
  447. SkipLabels(p1,p1)) then
  448. begin
  449. { quick check for loops of the form "l5: ; jmp l5 }
  450. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  451. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  452. exit;
  453. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  454. exit;
  455. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  456. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  457. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  458. end
  459. else
  460. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  461. if not FindAnyLabel(p1,l) then
  462. begin
  463. {$ifdef finaldestdebug}
  464. insertllitem(asml,p1,p1.next,tai_comment.Create(
  465. strpnew('previous label inserted'))));
  466. {$endif finaldestdebug}
  467. current_asmdata.getjumplabel(l);
  468. insertllitem(p1,p1.next,tai_label.Create(l));
  469. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  470. hp.oper[0]^.ref^.symbol := l;
  471. l.increfs;
  472. { this won't work, since the new label isn't in the labeltable }
  473. { so it will fail the rangecheck. Labeltable should become a }
  474. { hashtable to support this: }
  475. { GetFinalDestination(asml, hp); }
  476. end
  477. else
  478. begin
  479. {$ifdef finaldestdebug}
  480. insertllitem(asml,p1,p1.next,tai_comment.Create(
  481. strpnew('next label reused'))));
  482. {$endif finaldestdebug}
  483. l.increfs;
  484. hp.oper[0]^.ref^.symbol := l;
  485. if not GetFinalDestination(asml, hp,succ(level)) then
  486. exit;
  487. end;
  488. end;
  489. GetFinalDestination := true;
  490. end;
  491. begin
  492. p := BlockStart;
  493. ClearUsedRegs;
  494. while (p <> BlockEnd) Do
  495. begin
  496. UpDateUsedRegs(UsedRegs, tai(p.next));
  497. case p.Typ Of
  498. ait_instruction:
  499. begin
  500. current_filepos:=taicpu(p).fileinfo;
  501. if InsContainsSegRef(taicpu(p)) then
  502. begin
  503. p := tai(p.next);
  504. continue;
  505. end;
  506. { Handle Jmp Optimizations }
  507. if taicpu(p).is_jmp then
  508. begin
  509. {the following if-block removes all code between a jmp and the next label,
  510. because it can never be executed}
  511. if (taicpu(p).opcode = A_JMP) then
  512. begin
  513. hp2:=p;
  514. while GetNextInstruction(hp2, hp1) and
  515. (hp1.typ <> ait_label) do
  516. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  517. begin
  518. { don't kill start/end of assembler block,
  519. no-line-info-start/end etc }
  520. if hp1.typ<>ait_marker then
  521. begin
  522. asml.remove(hp1);
  523. hp1.free;
  524. end
  525. else
  526. hp2:=hp1;
  527. end
  528. else break;
  529. end;
  530. { remove jumps to a label coming right after them }
  531. if GetNextInstruction(p, hp1) then
  532. begin
  533. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  534. { TODO: FIXME removing the first instruction fails}
  535. (p<>blockstart) then
  536. begin
  537. hp2:=tai(hp1.next);
  538. asml.remove(p);
  539. p.free;
  540. p:=hp2;
  541. continue;
  542. end
  543. else
  544. begin
  545. if hp1.typ = ait_label then
  546. SkipLabels(hp1,hp1);
  547. if (tai(hp1).typ=ait_instruction) and
  548. (taicpu(hp1).opcode=A_JMP) and
  549. GetNextInstruction(hp1, hp2) and
  550. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  551. begin
  552. if taicpu(p).opcode=A_Jcc then
  553. begin
  554. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  555. tai_label(hp2).labsym.decrefs;
  556. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  557. { when free'ing hp1, the ref. isn't decresed, so we don't
  558. increase it (FK)
  559. taicpu(p).oper[0]^.ref^.symbol.increfs;
  560. }
  561. asml.remove(hp1);
  562. hp1.free;
  563. GetFinalDestination(asml, taicpu(p),0);
  564. end
  565. else
  566. begin
  567. GetFinalDestination(asml, taicpu(p),0);
  568. p:=tai(p.next);
  569. continue;
  570. end;
  571. end
  572. else
  573. GetFinalDestination(asml, taicpu(p),0);
  574. end;
  575. end;
  576. end
  577. else
  578. { All other optimizes }
  579. begin
  580. for l := 0 to taicpu(p).ops-1 Do
  581. if (taicpu(p).oper[l]^.typ = top_ref) then
  582. With taicpu(p).oper[l]^.ref^ Do
  583. begin
  584. if (base = NR_NO) and
  585. (index <> NR_NO) and
  586. (scalefactor in [0,1]) then
  587. begin
  588. base := index;
  589. index := NR_NO
  590. end
  591. end;
  592. case taicpu(p).opcode Of
  593. A_AND:
  594. if OptPass1And(p) then
  595. continue;
  596. A_CMP:
  597. begin
  598. { cmp register,$8000 neg register
  599. je target --> jo target
  600. .... only if register is deallocated before jump.}
  601. case Taicpu(p).opsize of
  602. S_B: v:=$80;
  603. S_W: v:=$8000;
  604. S_L: v:=aint($80000000);
  605. else
  606. internalerror(2013112905);
  607. end;
  608. if (taicpu(p).oper[0]^.typ=Top_const) and
  609. (taicpu(p).oper[0]^.val=v) and
  610. (Taicpu(p).oper[1]^.typ=top_reg) and
  611. GetNextInstruction(p, hp1) and
  612. (hp1.typ=ait_instruction) and
  613. (taicpu(hp1).opcode=A_Jcc) and
  614. (Taicpu(hp1).condition in [C_E,C_NE]) and
  615. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  616. begin
  617. Taicpu(p).opcode:=A_NEG;
  618. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  619. Taicpu(p).clearop(1);
  620. Taicpu(p).ops:=1;
  621. if Taicpu(hp1).condition=C_E then
  622. Taicpu(hp1).condition:=C_O
  623. else
  624. Taicpu(hp1).condition:=C_NO;
  625. continue;
  626. end;
  627. {
  628. @@2: @@2:
  629. .... ....
  630. cmp operand1,0
  631. jle/jbe @@1
  632. dec operand1 --> sub operand1,1
  633. jmp @@2 jge/jae @@2
  634. @@1: @@1:
  635. ... ....}
  636. if (taicpu(p).oper[0]^.typ = top_const) and
  637. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  638. (taicpu(p).oper[0]^.val = 0) and
  639. GetNextInstruction(p, hp1) and
  640. (hp1.typ = ait_instruction) and
  641. (taicpu(hp1).is_jmp) and
  642. (taicpu(hp1).opcode=A_Jcc) and
  643. (taicpu(hp1).condition in [C_LE,C_BE]) and
  644. GetNextInstruction(hp1,hp2) and
  645. (hp2.typ = ait_instruction) and
  646. (taicpu(hp2).opcode = A_DEC) and
  647. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  648. GetNextInstruction(hp2, hp3) and
  649. (hp3.typ = ait_instruction) and
  650. (taicpu(hp3).is_jmp) and
  651. (taicpu(hp3).opcode = A_JMP) and
  652. GetNextInstruction(hp3, hp4) and
  653. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  654. begin
  655. taicpu(hp2).Opcode := A_SUB;
  656. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  657. taicpu(hp2).loadConst(0,1);
  658. taicpu(hp2).ops:=2;
  659. taicpu(hp3).Opcode := A_Jcc;
  660. case taicpu(hp1).condition of
  661. C_LE: taicpu(hp3).condition := C_GE;
  662. C_BE: taicpu(hp3).condition := C_AE;
  663. end;
  664. asml.remove(p);
  665. asml.remove(hp1);
  666. p.free;
  667. hp1.free;
  668. p := hp2;
  669. continue;
  670. end
  671. end;
  672. A_FLD:
  673. begin
  674. if (taicpu(p).oper[0]^.typ = top_reg) and
  675. GetNextInstruction(p, hp1) and
  676. (hp1.typ = Ait_Instruction) and
  677. (taicpu(hp1).oper[0]^.typ = top_reg) and
  678. (taicpu(hp1).oper[1]^.typ = top_reg) and
  679. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  680. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  681. { change to
  682. fld reg fxxx reg,st
  683. fxxxp st, st1 (hp1)
  684. Remark: non commutative operations must be reversed!
  685. }
  686. begin
  687. case taicpu(hp1).opcode Of
  688. A_FMULP,A_FADDP,
  689. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  690. begin
  691. case taicpu(hp1).opcode Of
  692. A_FADDP: taicpu(hp1).opcode := A_FADD;
  693. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  694. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  695. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  696. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  697. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  698. end;
  699. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  700. taicpu(hp1).oper[1]^.reg := NR_ST;
  701. asml.remove(p);
  702. p.free;
  703. p := hp1;
  704. continue;
  705. end;
  706. end;
  707. end
  708. else
  709. if (taicpu(p).oper[0]^.typ = top_ref) and
  710. GetNextInstruction(p, hp2) and
  711. (hp2.typ = Ait_Instruction) and
  712. (taicpu(hp2).ops = 2) and
  713. (taicpu(hp2).oper[0]^.typ = top_reg) and
  714. (taicpu(hp2).oper[1]^.typ = top_reg) and
  715. (taicpu(p).opsize in [S_FS, S_FL]) and
  716. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  717. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  718. if GetLastInstruction(p, hp1) and
  719. (hp1.typ = Ait_Instruction) and
  720. ((taicpu(hp1).opcode = A_FLD) or
  721. (taicpu(hp1).opcode = A_FST)) and
  722. (taicpu(hp1).opsize = taicpu(p).opsize) and
  723. (taicpu(hp1).oper[0]^.typ = top_ref) and
  724. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  725. if ((taicpu(hp2).opcode = A_FMULP) or
  726. (taicpu(hp2).opcode = A_FADDP)) then
  727. { change to
  728. fld/fst mem1 (hp1) fld/fst mem1
  729. fld mem1 (p) fadd/
  730. faddp/ fmul st, st
  731. fmulp st, st1 (hp2) }
  732. begin
  733. asml.remove(p);
  734. p.free;
  735. p := hp1;
  736. if (taicpu(hp2).opcode = A_FADDP) then
  737. taicpu(hp2).opcode := A_FADD
  738. else
  739. taicpu(hp2).opcode := A_FMUL;
  740. taicpu(hp2).oper[1]^.reg := NR_ST;
  741. end
  742. else
  743. { change to
  744. fld/fst mem1 (hp1) fld/fst mem1
  745. fld mem1 (p) fld st}
  746. begin
  747. taicpu(p).changeopsize(S_FL);
  748. taicpu(p).loadreg(0,NR_ST);
  749. end
  750. else
  751. begin
  752. case taicpu(hp2).opcode Of
  753. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  754. { change to
  755. fld/fst mem1 (hp1) fld/fst mem1
  756. fld mem2 (p) fxxx mem2
  757. fxxxp st, st1 (hp2) }
  758. begin
  759. case taicpu(hp2).opcode Of
  760. A_FADDP: taicpu(p).opcode := A_FADD;
  761. A_FMULP: taicpu(p).opcode := A_FMUL;
  762. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  763. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  764. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  765. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  766. end;
  767. asml.remove(hp2);
  768. hp2.free;
  769. end
  770. end
  771. end
  772. end;
  773. A_FSTP,A_FISTP:
  774. if doFpuLoadStoreOpt(p) then
  775. continue;
  776. A_LEA:
  777. begin
  778. if OptPass1LEA(p) then
  779. continue;
  780. end;
  781. A_MOV:
  782. begin
  783. If OptPass1MOV(p) then
  784. Continue;
  785. end;
  786. A_MOVSX,
  787. A_MOVZX :
  788. begin
  789. If OptPass1Movx(p) then
  790. Continue
  791. end;
  792. (* should not be generated anymore by the current code generator
  793. A_POP:
  794. begin
  795. if target_info.system=system_i386_go32v2 then
  796. begin
  797. { Transform a series of pop/pop/pop/push/push/push to }
  798. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  799. { because I'm not sure whether they can cope with }
  800. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  801. { such a problem when using esp as frame pointer (JM) }
  802. if (taicpu(p).oper[0]^.typ = top_reg) then
  803. begin
  804. hp1 := p;
  805. hp2 := p;
  806. l := 0;
  807. while getNextInstruction(hp1,hp1) and
  808. (hp1.typ = ait_instruction) and
  809. (taicpu(hp1).opcode = A_POP) and
  810. (taicpu(hp1).oper[0]^.typ = top_reg) do
  811. begin
  812. hp2 := hp1;
  813. inc(l,4);
  814. end;
  815. getLastInstruction(p,hp3);
  816. l1 := 0;
  817. while (hp2 <> hp3) and
  818. assigned(hp1) and
  819. (hp1.typ = ait_instruction) and
  820. (taicpu(hp1).opcode = A_PUSH) and
  821. (taicpu(hp1).oper[0]^.typ = top_reg) and
  822. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  823. begin
  824. { change it to a two op operation }
  825. taicpu(hp2).oper[1]^.typ:=top_none;
  826. taicpu(hp2).ops:=2;
  827. taicpu(hp2).opcode := A_MOV;
  828. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  829. reference_reset(tmpref);
  830. tmpRef.base.enum:=R_INTREGISTER;
  831. tmpRef.base.number:=NR_STACK_POINTER_REG;
  832. convert_register_to_enum(tmpref.base);
  833. tmpRef.offset := l;
  834. taicpu(hp2).loadRef(0,tmpRef);
  835. hp4 := hp1;
  836. getNextInstruction(hp1,hp1);
  837. asml.remove(hp4);
  838. hp4.free;
  839. getLastInstruction(hp2,hp2);
  840. dec(l,4);
  841. inc(l1);
  842. end;
  843. if l <> -4 then
  844. begin
  845. inc(l,4);
  846. for l1 := l1 downto 1 do
  847. begin
  848. getNextInstruction(hp2,hp2);
  849. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  850. end
  851. end
  852. end
  853. end
  854. else
  855. begin
  856. if (taicpu(p).oper[0]^.typ = top_reg) and
  857. GetNextInstruction(p, hp1) and
  858. (tai(hp1).typ=ait_instruction) and
  859. (taicpu(hp1).opcode=A_PUSH) and
  860. (taicpu(hp1).oper[0]^.typ = top_reg) and
  861. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  862. begin
  863. { change it to a two op operation }
  864. taicpu(p).oper[1]^.typ:=top_none;
  865. taicpu(p).ops:=2;
  866. taicpu(p).opcode := A_MOV;
  867. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  868. reference_reset(tmpref);
  869. TmpRef.base.enum := R_ESP;
  870. taicpu(p).loadRef(0,TmpRef);
  871. asml.remove(hp1);
  872. hp1.free;
  873. end;
  874. end;
  875. end;
  876. *)
  877. A_PUSH:
  878. begin
  879. if (taicpu(p).opsize = S_W) and
  880. (taicpu(p).oper[0]^.typ = Top_Const) and
  881. GetNextInstruction(p, hp1) and
  882. (tai(hp1).typ = ait_instruction) and
  883. (taicpu(hp1).opcode = A_PUSH) and
  884. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  885. (taicpu(hp1).opsize = S_W) then
  886. begin
  887. taicpu(p).changeopsize(S_L);
  888. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  889. asml.remove(hp1);
  890. hp1.free;
  891. end;
  892. end;
  893. A_SHL, A_SAL:
  894. begin
  895. if (taicpu(p).oper[0]^.typ = Top_Const) and
  896. (taicpu(p).oper[1]^.typ = Top_Reg) and
  897. (taicpu(p).opsize = S_L) and
  898. (taicpu(p).oper[0]^.val <= 3) then
  899. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  900. begin
  901. TmpBool1 := True; {should we check the next instruction?}
  902. TmpBool2 := False; {have we found an add/sub which could be
  903. integrated in the lea?}
  904. reference_reset(tmpref,2,[]);
  905. TmpRef.index := taicpu(p).oper[1]^.reg;
  906. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  907. while TmpBool1 and
  908. GetNextInstruction(p, hp1) and
  909. (tai(hp1).typ = ait_instruction) and
  910. ((((taicpu(hp1).opcode = A_ADD) or
  911. (taicpu(hp1).opcode = A_SUB)) and
  912. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  913. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  914. (((taicpu(hp1).opcode = A_INC) or
  915. (taicpu(hp1).opcode = A_DEC)) and
  916. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  917. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  918. (not GetNextInstruction(hp1,hp2) or
  919. not instrReadsFlags(hp2)) Do
  920. begin
  921. TmpBool1 := False;
  922. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  923. begin
  924. TmpBool1 := True;
  925. TmpBool2 := True;
  926. case taicpu(hp1).opcode of
  927. A_ADD:
  928. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  929. A_SUB:
  930. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  931. end;
  932. asml.remove(hp1);
  933. hp1.free;
  934. end
  935. else
  936. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  937. (((taicpu(hp1).opcode = A_ADD) and
  938. (TmpRef.base = NR_NO)) or
  939. (taicpu(hp1).opcode = A_INC) or
  940. (taicpu(hp1).opcode = A_DEC)) then
  941. begin
  942. TmpBool1 := True;
  943. TmpBool2 := True;
  944. case taicpu(hp1).opcode of
  945. A_ADD:
  946. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  947. A_INC:
  948. inc(TmpRef.offset);
  949. A_DEC:
  950. dec(TmpRef.offset);
  951. end;
  952. asml.remove(hp1);
  953. hp1.free;
  954. end;
  955. end;
  956. if TmpBool2 or
  957. ((current_settings.optimizecputype < cpu_Pentium2) and
  958. (taicpu(p).oper[0]^.val <= 3) and
  959. not(cs_opt_size in current_settings.optimizerswitches)) then
  960. begin
  961. if not(TmpBool2) and
  962. (taicpu(p).oper[0]^.val = 1) then
  963. begin
  964. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  965. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  966. end
  967. else
  968. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  969. taicpu(p).oper[1]^.reg);
  970. InsertLLItem(p.previous, p.next, hp1);
  971. p.free;
  972. p := hp1;
  973. end;
  974. end
  975. else
  976. if (current_settings.optimizecputype < cpu_Pentium2) and
  977. (taicpu(p).oper[0]^.typ = top_const) and
  978. (taicpu(p).oper[1]^.typ = top_reg) then
  979. if (taicpu(p).oper[0]^.val = 1) then
  980. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  981. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  982. (unlike shl, which is only Tairable in the U pipe)}
  983. begin
  984. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  985. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  986. InsertLLItem(p.previous, p.next, hp1);
  987. p.free;
  988. p := hp1;
  989. end
  990. else if (taicpu(p).opsize = S_L) and
  991. (taicpu(p).oper[0]^.val<= 3) then
  992. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  993. "shl $3, %reg" to "lea (,%reg,8), %reg}
  994. begin
  995. reference_reset(tmpref,2,[]);
  996. TmpRef.index := taicpu(p).oper[1]^.reg;
  997. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  998. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  999. InsertLLItem(p.previous, p.next, hp1);
  1000. p.free;
  1001. p := hp1;
  1002. end
  1003. end;
  1004. A_SETcc :
  1005. { changes
  1006. setcc (funcres) setcc reg
  1007. movb (funcres), reg to leave/ret
  1008. leave/ret }
  1009. begin
  1010. if (taicpu(p).oper[0]^.typ = top_ref) and
  1011. GetNextInstruction(p, hp1) and
  1012. GetNextInstruction(hp1, hp2) and
  1013. IsExitCode(hp2) and
  1014. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1015. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1016. not(assigned(current_procinfo.procdef.funcretsym) and
  1017. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1018. (hp1.typ = ait_instruction) and
  1019. (taicpu(hp1).opcode = A_MOV) and
  1020. (taicpu(hp1).opsize = S_B) and
  1021. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1022. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1023. begin
  1024. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1025. DebugMsg('Peephole optimizer SetccMovbLeaveRet2SetccLeaveRet',p);
  1026. asml.remove(hp1);
  1027. hp1.free;
  1028. end
  1029. end;
  1030. A_SUB:
  1031. if OptPass1Sub(p) then
  1032. continue;
  1033. A_VMOVAPS,
  1034. A_VMOVAPD:
  1035. if OptPass1VMOVAP(p) then
  1036. continue;
  1037. A_VDIVSD,
  1038. A_VDIVSS,
  1039. A_VSUBSD,
  1040. A_VSUBSS,
  1041. A_VMULSD,
  1042. A_VMULSS,
  1043. A_VADDSD,
  1044. A_VADDSS,
  1045. A_VANDPD,
  1046. A_VANDPS,
  1047. A_VORPD,
  1048. A_VORPS,
  1049. A_VXORPD,
  1050. A_VXORPS:
  1051. if OptPass1VOP(p) then
  1052. continue;
  1053. A_MULSD,
  1054. A_MULSS,
  1055. A_ADDSD,
  1056. A_ADDSS:
  1057. if OptPass1OP(p) then
  1058. continue;
  1059. A_MOVAPD,
  1060. A_MOVAPS:
  1061. if OptPass1MOVAP(p) then
  1062. continue;
  1063. A_VMOVSD,
  1064. A_VMOVSS,
  1065. A_MOVSD,
  1066. A_MOVSS:
  1067. if OptPass1MOVXX(p) then
  1068. continue;
  1069. end;
  1070. end; { if is_jmp }
  1071. end;
  1072. end;
  1073. updateUsedRegs(UsedRegs,p);
  1074. p:=tai(p.next);
  1075. end;
  1076. end;
  1077. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1078. var
  1079. p : tai;
  1080. begin
  1081. p := BlockStart;
  1082. ClearUsedRegs;
  1083. while (p <> BlockEnd) Do
  1084. begin
  1085. UpdateUsedRegs(UsedRegs, tai(p.next));
  1086. case p.Typ Of
  1087. Ait_Instruction:
  1088. begin
  1089. if InsContainsSegRef(taicpu(p)) then
  1090. begin
  1091. p := tai(p.next);
  1092. continue;
  1093. end;
  1094. case taicpu(p).opcode Of
  1095. A_Jcc:
  1096. if OptPass2Jcc(p) then
  1097. continue;
  1098. A_FSTP,A_FISTP:
  1099. if DoFpuLoadStoreOpt(p) then
  1100. continue;
  1101. A_IMUL:
  1102. if OptPass2Imul(p) then
  1103. continue;
  1104. A_JMP:
  1105. if OptPass2Jmp(p) then
  1106. continue;
  1107. A_MOV:
  1108. if OptPass2MOV(p) then
  1109. continue;
  1110. end;
  1111. end;
  1112. end;
  1113. p := tai(p.next)
  1114. end;
  1115. end;
  1116. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1117. var
  1118. p,hp1,hp2: tai;
  1119. begin
  1120. p := BlockStart;
  1121. ClearUsedRegs;
  1122. while (p <> BlockEnd) Do
  1123. begin
  1124. UpdateUsedRegs(UsedRegs, tai(p.next));
  1125. case p.Typ Of
  1126. Ait_Instruction:
  1127. begin
  1128. if InsContainsSegRef(taicpu(p)) then
  1129. begin
  1130. p := tai(p.next);
  1131. continue;
  1132. end;
  1133. case taicpu(p).opcode Of
  1134. A_CALL:
  1135. if PostPeepHoleOptCall(p) then
  1136. Continue;
  1137. A_CMP:
  1138. if PostPeepholeOptCmp(p) then
  1139. Continue;
  1140. A_MOV:
  1141. if PostPeepholeOptMov(p) then
  1142. Continue;
  1143. A_MOVZX:
  1144. { if register vars are on, it's possible there is code like }
  1145. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1146. { so we can't safely replace the movzx then with xor/mov, }
  1147. { since that would change the flags (JM) }
  1148. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  1149. begin
  1150. if (taicpu(p).oper[1]^.typ = top_reg) then
  1151. if (taicpu(p).oper[0]^.typ = top_reg)
  1152. then
  1153. case taicpu(p).opsize of
  1154. S_BL:
  1155. begin
  1156. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1157. not(cs_opt_size in current_settings.optimizerswitches) and
  1158. (current_settings.optimizecputype = cpu_Pentium) then
  1159. {Change "movzbl %reg1, %reg2" to
  1160. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1161. PentiumMMX}
  1162. begin
  1163. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1164. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1165. InsertLLItem(p.previous, p, hp1);
  1166. taicpu(p).opcode := A_MOV;
  1167. taicpu(p).changeopsize(S_B);
  1168. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1169. end;
  1170. end;
  1171. end
  1172. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1173. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1174. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1175. not(cs_opt_size in current_settings.optimizerswitches) and
  1176. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1177. (current_settings.optimizecputype = cpu_Pentium) and
  1178. (taicpu(p).opsize = S_BL) then
  1179. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1180. Pentium and PentiumMMX}
  1181. begin
  1182. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1183. taicpu(p).oper[1]^.reg);
  1184. taicpu(p).opcode := A_MOV;
  1185. taicpu(p).changeopsize(S_B);
  1186. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1187. InsertLLItem(p.previous, p, hp1);
  1188. end;
  1189. end;
  1190. A_TEST, A_OR:
  1191. if PostPeepholeOptTestOr(p) then
  1192. Continue;
  1193. end;
  1194. end;
  1195. end;
  1196. p := tai(p.next)
  1197. end;
  1198. OptReferences;
  1199. end;
  1200. Procedure TCpuAsmOptimizer.Optimize;
  1201. Var
  1202. HP: Tai;
  1203. pass: longint;
  1204. slowopt, changed, lastLoop: boolean;
  1205. Begin
  1206. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  1207. pass := 0;
  1208. changed := false;
  1209. repeat
  1210. lastLoop :=
  1211. not(slowopt) or
  1212. (not changed and (pass > 2)) or
  1213. { prevent endless loops }
  1214. (pass = 4);
  1215. changed := false;
  1216. { Setup labeltable, always necessary }
  1217. blockstart := tai(asml.first);
  1218. pass_1;
  1219. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  1220. { or nil }
  1221. While Assigned(BlockStart) Do
  1222. Begin
  1223. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1224. begin
  1225. if (pass = 0) then
  1226. PrePeepHoleOpts;
  1227. { Peephole optimizations }
  1228. PeepHoleOptPass1;
  1229. { Only perform them twice in the first pass }
  1230. if pass = 0 then
  1231. PeepHoleOptPass1;
  1232. end;
  1233. { More peephole optimizations }
  1234. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1235. begin
  1236. PeepHoleOptPass2;
  1237. if lastLoop then
  1238. PostPeepHoleOpts;
  1239. end;
  1240. { Continue where we left off, BlockEnd is either the start of an }
  1241. { assembler block or nil }
  1242. BlockStart := BlockEnd;
  1243. While Assigned(BlockStart) And
  1244. (BlockStart.typ = ait_Marker) And
  1245. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  1246. Begin
  1247. { We stopped at an assembler block, so skip it }
  1248. Repeat
  1249. BlockStart := Tai(BlockStart.Next);
  1250. Until (BlockStart.Typ = Ait_Marker) And
  1251. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  1252. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  1253. If GetNextInstruction(BlockStart, HP) And
  1254. ((HP.typ <> ait_Marker) Or
  1255. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  1256. { There is no assembler block anymore after the current one, so }
  1257. { optimize the next block of "normal" instructions }
  1258. pass_1
  1259. { Otherwise, skip the next assembler block }
  1260. else
  1261. blockStart := hp;
  1262. End;
  1263. End;
  1264. inc(pass);
  1265. until lastLoop;
  1266. dfa.free;
  1267. End;
  1268. begin
  1269. casmoptimizer:=TCpuAsmOptimizer;
  1270. end.