aoptcpu.pas 83 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  34. function InstructionLoadsFromReg(const reg : TRegister;const hp : tai) : boolean;override;
  35. end;
  36. Var
  37. AsmOptimizer : TCpuAsmOptimizer;
  38. Implementation
  39. uses
  40. verbose,globtype,globals,
  41. cpuinfo,
  42. aasmcpu,
  43. aoptutils,
  44. procinfo,
  45. cgutils,cgx86,
  46. { units we should get rid off: }
  47. symsym,symconst;
  48. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  49. { returns true if a "continue" should be done after this optimization }
  50. var hp1, hp2: tai;
  51. begin
  52. DoFpuLoadStoreOpt := false;
  53. if (taicpu(p).oper[0]^.typ = top_ref) and
  54. getNextInstruction(p, hp1) and
  55. (hp1.typ = ait_instruction) and
  56. (((taicpu(hp1).opcode = A_FLD) and
  57. (taicpu(p).opcode = A_FSTP)) or
  58. ((taicpu(p).opcode = A_FISTP) and
  59. (taicpu(hp1).opcode = A_FILD))) and
  60. (taicpu(hp1).oper[0]^.typ = top_ref) and
  61. (taicpu(hp1).opsize = taicpu(p).opsize) and
  62. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  63. begin
  64. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  65. if (taicpu(p).opsize=S_FX) and
  66. getNextInstruction(hp1, hp2) and
  67. (hp2.typ = ait_instruction) and
  68. IsExitCode(hp2) and
  69. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  70. not(assigned(current_procinfo.procdef.funcretsym) and
  71. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  72. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  73. begin
  74. asml.remove(p);
  75. asml.remove(hp1);
  76. p.free;
  77. hp1.free;
  78. p := hp2;
  79. removeLastDeallocForFuncRes(p);
  80. doFPULoadStoreOpt := true;
  81. end
  82. (* can't be done because the store operation rounds
  83. else
  84. { fst can't store an extended value! }
  85. if (taicpu(p).opsize <> S_FX) and
  86. (taicpu(p).opsize <> S_IQ) then
  87. begin
  88. if (taicpu(p).opcode = A_FSTP) then
  89. taicpu(p).opcode := A_FST
  90. else taicpu(p).opcode := A_FIST;
  91. asml.remove(hp1);
  92. hp1.free;
  93. end
  94. *)
  95. end;
  96. end;
  97. { converts a TChange variable to a TRegister }
  98. function tch2reg(ch: tinschange): tsuperregister;
  99. const
  100. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  101. begin
  102. if (ch <= CH_REDI) then
  103. tch2reg := ch2reg[ch]
  104. else if (ch <= CH_WEDI) then
  105. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  106. else if (ch <= CH_RWEDI) then
  107. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  108. else if (ch <= CH_MEDI) then
  109. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  110. else
  111. InternalError(2016041901)
  112. end;
  113. { Checks if the register is a 32 bit general purpose register }
  114. function isgp32reg(reg: TRegister): boolean;
  115. begin
  116. {$push}{$warnings off}
  117. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  118. {$pop}
  119. end;
  120. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  121. begin
  122. Result:=RegReadByInstruction(reg,hp);
  123. end;
  124. function TCpuAsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  125. var
  126. p: taicpu;
  127. opcount: longint;
  128. begin
  129. RegReadByInstruction := false;
  130. if hp.typ <> ait_instruction then
  131. exit;
  132. p := taicpu(hp);
  133. case p.opcode of
  134. A_CALL:
  135. regreadbyinstruction := true;
  136. A_IMUL:
  137. case p.ops of
  138. 1:
  139. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  140. (
  141. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  142. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  143. );
  144. 2,3:
  145. regReadByInstruction :=
  146. reginop(reg,p.oper[0]^) or
  147. reginop(reg,p.oper[1]^);
  148. end;
  149. A_MUL:
  150. begin
  151. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  152. (
  153. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  154. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  155. );
  156. end;
  157. A_IDIV,A_DIV:
  158. begin
  159. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  160. (
  161. (getregtype(reg)=R_INTREGISTER) and
  162. (
  163. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  164. )
  165. );
  166. end;
  167. else
  168. begin
  169. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  170. begin
  171. RegReadByInstruction := false;
  172. exit;
  173. end;
  174. for opcount := 0 to p.ops-1 do
  175. if (p.oper[opCount]^.typ = top_ref) and
  176. RegInRef(reg,p.oper[opcount]^.ref^) then
  177. begin
  178. RegReadByInstruction := true;
  179. exit
  180. end;
  181. { special handling for SSE MOVSD }
  182. if (p.opcode=A_MOVSD) and (p.ops>0) then
  183. begin
  184. if p.ops<>2 then
  185. internalerror(2017042702);
  186. regReadByInstruction := reginop(reg,p.oper[0]^) or
  187. (
  188. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  189. );
  190. exit;
  191. end;
  192. with insprop[p.opcode] do
  193. begin
  194. if getregtype(reg)=R_INTREGISTER then
  195. begin
  196. case getsupreg(reg) of
  197. RS_EAX:
  198. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  199. begin
  200. RegReadByInstruction := true;
  201. exit
  202. end;
  203. RS_ECX:
  204. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  205. begin
  206. RegReadByInstruction := true;
  207. exit
  208. end;
  209. RS_EDX:
  210. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  211. begin
  212. RegReadByInstruction := true;
  213. exit
  214. end;
  215. RS_EBX:
  216. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  217. begin
  218. RegReadByInstruction := true;
  219. exit
  220. end;
  221. RS_ESP:
  222. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  223. begin
  224. RegReadByInstruction := true;
  225. exit
  226. end;
  227. RS_EBP:
  228. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  229. begin
  230. RegReadByInstruction := true;
  231. exit
  232. end;
  233. RS_ESI:
  234. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  235. begin
  236. RegReadByInstruction := true;
  237. exit
  238. end;
  239. RS_EDI:
  240. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  241. begin
  242. RegReadByInstruction := true;
  243. exit
  244. end;
  245. end;
  246. end;
  247. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  248. begin
  249. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  250. begin
  251. case p.condition of
  252. C_A,C_NBE, { CF=0 and ZF=0 }
  253. C_BE,C_NA: { CF=1 or ZF=1 }
  254. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  255. C_AE,C_NB,C_NC, { CF=0 }
  256. C_B,C_NAE,C_C: { CF=1 }
  257. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  258. C_NE,C_NZ, { ZF=0 }
  259. C_E,C_Z: { ZF=1 }
  260. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  261. C_G,C_NLE, { ZF=0 and SF=OF }
  262. C_LE,C_NG: { ZF=1 or SF<>OF }
  263. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  264. C_GE,C_NL, { SF=OF }
  265. C_L,C_NGE: { SF<>OF }
  266. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  267. C_NO, { OF=0 }
  268. C_O: { OF=1 }
  269. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  270. C_NP,C_PO, { PF=0 }
  271. C_P,C_PE: { PF=1 }
  272. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  273. C_NS, { SF=0 }
  274. C_S: { SF=1 }
  275. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  276. else
  277. internalerror(2017042701);
  278. end;
  279. if RegReadByInstruction then
  280. exit;
  281. end;
  282. case getsubreg(reg) of
  283. R_SUBW,R_SUBD,R_SUBQ:
  284. RegReadByInstruction :=
  285. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  286. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  287. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  288. R_SUBFLAGCARRY:
  289. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  290. R_SUBFLAGPARITY:
  291. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  292. R_SUBFLAGAUXILIARY:
  293. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  294. R_SUBFLAGZERO:
  295. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  296. R_SUBFLAGSIGN:
  297. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  298. R_SUBFLAGOVERFLOW:
  299. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  300. R_SUBFLAGINTERRUPT:
  301. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  302. R_SUBFLAGDIRECTION:
  303. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  304. else
  305. internalerror(2017042601);
  306. end;
  307. exit;
  308. end;
  309. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  310. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  311. (p.oper[0]^.reg=p.oper[1]^.reg) then
  312. exit;
  313. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  314. begin
  315. RegReadByInstruction := true;
  316. exit
  317. end;
  318. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  319. begin
  320. RegReadByInstruction := true;
  321. exit
  322. end;
  323. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  324. begin
  325. RegReadByInstruction := true;
  326. exit
  327. end;
  328. end;
  329. end;
  330. end;
  331. end;
  332. { returns true if p contains a memory operand with a segment set }
  333. function InsContainsSegRef(p: taicpu): boolean;
  334. var
  335. i: longint;
  336. begin
  337. result:=true;
  338. for i:=0 to p.opercnt-1 do
  339. if (p.oper[i]^.typ=top_ref) and
  340. (p.oper[i]^.ref^.segment<>NR_NO) then
  341. exit;
  342. result:=false;
  343. end;
  344. function InstrReadsFlags(p: tai): boolean;
  345. var
  346. l: longint;
  347. begin
  348. InstrReadsFlags := true;
  349. case p.typ of
  350. ait_instruction:
  351. if InsProp[taicpu(p).opcode].Ch*
  352. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  353. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  354. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  355. exit;
  356. ait_label:
  357. exit;
  358. end;
  359. InstrReadsFlags := false;
  360. end;
  361. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  362. var
  363. p,hp1: tai;
  364. l: aint;
  365. tmpRef: treference;
  366. begin
  367. p := BlockStart;
  368. while (p <> BlockEnd) Do
  369. begin
  370. case p.Typ Of
  371. Ait_Instruction:
  372. begin
  373. if InsContainsSegRef(taicpu(p)) then
  374. begin
  375. p := tai(p.next);
  376. continue;
  377. end;
  378. case taicpu(p).opcode Of
  379. A_IMUL:
  380. {changes certain "imul const, %reg"'s to lea sequences}
  381. begin
  382. if (taicpu(p).oper[0]^.typ = Top_Const) and
  383. (taicpu(p).oper[1]^.typ = Top_Reg) and
  384. (taicpu(p).opsize = S_L) then
  385. if (taicpu(p).oper[0]^.val = 1) then
  386. if (taicpu(p).ops = 2) then
  387. {remove "imul $1, reg"}
  388. begin
  389. hp1 := tai(p.Next);
  390. asml.remove(p);
  391. p.free;
  392. p := hp1;
  393. continue;
  394. end
  395. else
  396. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  397. begin
  398. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  399. InsertLLItem(p.previous, p.next, hp1);
  400. p.free;
  401. p := hp1;
  402. end
  403. else if
  404. ((taicpu(p).ops <= 2) or
  405. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  406. (taicpu(p).oper[0]^.val <= 12) and
  407. not(cs_opt_size in current_settings.optimizerswitches) and
  408. (not(GetNextInstruction(p, hp1)) or
  409. {GetNextInstruction(p, hp1) and}
  410. not((tai(hp1).typ = ait_instruction) and
  411. ((taicpu(hp1).opcode=A_Jcc) and
  412. (taicpu(hp1).condition in [C_O,C_NO])))) then
  413. begin
  414. reference_reset(tmpref,1,[]);
  415. case taicpu(p).oper[0]^.val Of
  416. 3: begin
  417. {imul 3, reg1, reg2 to
  418. lea (reg1,reg1,2), reg2
  419. imul 3, reg1 to
  420. lea (reg1,reg1,2), reg1}
  421. TmpRef.base := taicpu(p).oper[1]^.reg;
  422. TmpRef.index := taicpu(p).oper[1]^.reg;
  423. TmpRef.ScaleFactor := 2;
  424. if (taicpu(p).ops = 2) then
  425. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  426. else
  427. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  428. InsertLLItem(p.previous, p.next, hp1);
  429. p.free;
  430. p := hp1;
  431. end;
  432. 5: begin
  433. {imul 5, reg1, reg2 to
  434. lea (reg1,reg1,4), reg2
  435. imul 5, reg1 to
  436. lea (reg1,reg1,4), reg1}
  437. TmpRef.base := taicpu(p).oper[1]^.reg;
  438. TmpRef.index := taicpu(p).oper[1]^.reg;
  439. TmpRef.ScaleFactor := 4;
  440. if (taicpu(p).ops = 2) then
  441. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  442. else
  443. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  444. InsertLLItem(p.previous, p.next, hp1);
  445. p.free;
  446. p := hp1;
  447. end;
  448. 6: begin
  449. {imul 6, reg1, reg2 to
  450. lea (,reg1,2), reg2
  451. lea (reg2,reg1,4), reg2
  452. imul 6, reg1 to
  453. lea (reg1,reg1,2), reg1
  454. add reg1, reg1}
  455. if (current_settings.optimizecputype <= cpu_386) then
  456. begin
  457. TmpRef.index := taicpu(p).oper[1]^.reg;
  458. if (taicpu(p).ops = 3) then
  459. begin
  460. TmpRef.base := taicpu(p).oper[2]^.reg;
  461. TmpRef.ScaleFactor := 4;
  462. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  463. end
  464. else
  465. begin
  466. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  467. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  468. end;
  469. InsertLLItem(p, p.next, hp1);
  470. reference_reset(tmpref,2,[]);
  471. TmpRef.index := taicpu(p).oper[1]^.reg;
  472. TmpRef.ScaleFactor := 2;
  473. if (taicpu(p).ops = 3) then
  474. begin
  475. TmpRef.base := NR_NO;
  476. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  477. taicpu(p).oper[2]^.reg);
  478. end
  479. else
  480. begin
  481. TmpRef.base := taicpu(p).oper[1]^.reg;
  482. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  483. end;
  484. InsertLLItem(p.previous, p.next, hp1);
  485. p.free;
  486. p := tai(hp1.next);
  487. end
  488. end;
  489. 9: begin
  490. {imul 9, reg1, reg2 to
  491. lea (reg1,reg1,8), reg2
  492. imul 9, reg1 to
  493. lea (reg1,reg1,8), reg1}
  494. TmpRef.base := taicpu(p).oper[1]^.reg;
  495. TmpRef.index := taicpu(p).oper[1]^.reg;
  496. TmpRef.ScaleFactor := 8;
  497. if (taicpu(p).ops = 2) then
  498. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  499. else
  500. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  501. InsertLLItem(p.previous, p.next, hp1);
  502. p.free;
  503. p := hp1;
  504. end;
  505. 10: begin
  506. {imul 10, reg1, reg2 to
  507. lea (reg1,reg1,4), reg2
  508. add reg2, reg2
  509. imul 10, reg1 to
  510. lea (reg1,reg1,4), reg1
  511. add reg1, reg1}
  512. if (current_settings.optimizecputype <= cpu_386) then
  513. begin
  514. if (taicpu(p).ops = 3) then
  515. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  516. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  517. else
  518. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  519. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  520. InsertLLItem(p, p.next, hp1);
  521. TmpRef.base := taicpu(p).oper[1]^.reg;
  522. TmpRef.index := taicpu(p).oper[1]^.reg;
  523. TmpRef.ScaleFactor := 4;
  524. if (taicpu(p).ops = 3) then
  525. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  526. else
  527. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  528. InsertLLItem(p.previous, p.next, hp1);
  529. p.free;
  530. p := tai(hp1.next);
  531. end
  532. end;
  533. 12: begin
  534. {imul 12, reg1, reg2 to
  535. lea (,reg1,4), reg2
  536. lea (reg2,reg1,8), reg2
  537. imul 12, reg1 to
  538. lea (reg1,reg1,2), reg1
  539. lea (,reg1,4), reg1}
  540. if (current_settings.optimizecputype <= cpu_386)
  541. then
  542. begin
  543. TmpRef.index := taicpu(p).oper[1]^.reg;
  544. if (taicpu(p).ops = 3) then
  545. begin
  546. TmpRef.base := taicpu(p).oper[2]^.reg;
  547. TmpRef.ScaleFactor := 8;
  548. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  549. end
  550. else
  551. begin
  552. TmpRef.base := NR_NO;
  553. TmpRef.ScaleFactor := 4;
  554. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  555. end;
  556. InsertLLItem(p, p.next, hp1);
  557. reference_reset(tmpref,2,[]);
  558. TmpRef.index := taicpu(p).oper[1]^.reg;
  559. if (taicpu(p).ops = 3) then
  560. begin
  561. TmpRef.base := NR_NO;
  562. TmpRef.ScaleFactor := 4;
  563. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  564. end
  565. else
  566. begin
  567. TmpRef.base := taicpu(p).oper[1]^.reg;
  568. TmpRef.ScaleFactor := 2;
  569. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  570. end;
  571. InsertLLItem(p.previous, p.next, hp1);
  572. p.free;
  573. p := tai(hp1.next);
  574. end
  575. end
  576. end;
  577. end;
  578. end;
  579. A_SAR,A_SHR:
  580. if PrePeepholeOptSxx(p) then
  581. continue;
  582. A_XOR:
  583. if (taicpu(p).oper[0]^.typ = top_reg) and
  584. (taicpu(p).oper[1]^.typ = top_reg) and
  585. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  586. { temporarily change this to 'mov reg,0' to make it easier }
  587. { for the CSE. Will be changed back in pass 2 }
  588. begin
  589. taicpu(p).opcode := A_MOV;
  590. taicpu(p).loadConst(0,0);
  591. end;
  592. end;
  593. end;
  594. end;
  595. p := tai(p.next)
  596. end;
  597. end;
  598. { First pass of peephole optimizations }
  599. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  600. function WriteOk : Boolean;
  601. begin
  602. writeln('Ok');
  603. Result:=True;
  604. end;
  605. var
  606. l : longint;
  607. p,hp1,hp2 : tai;
  608. hp3,hp4: tai;
  609. v:aint;
  610. TmpRef: TReference;
  611. TmpBool1, TmpBool2: Boolean;
  612. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  613. {traces sucessive jumps to their final destination and sets it, e.g.
  614. je l1 je l3
  615. <code> <code>
  616. l1: becomes l1:
  617. je l2 je l3
  618. <code> <code>
  619. l2: l2:
  620. jmp l3 jmp l3
  621. the level parameter denotes how deeep we have already followed the jump,
  622. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  623. var p1, p2: tai;
  624. l: tasmlabel;
  625. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  626. begin
  627. FindAnyLabel := false;
  628. while assigned(hp.next) and
  629. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  630. hp := tai(hp.next);
  631. if assigned(hp.next) and
  632. (tai(hp.next).typ = ait_label) then
  633. begin
  634. FindAnyLabel := true;
  635. l := tai_label(hp.next).labsym;
  636. end
  637. end;
  638. begin
  639. GetfinalDestination := false;
  640. if level > 20 then
  641. exit;
  642. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  643. if assigned(p1) then
  644. begin
  645. SkipLabels(p1,p1);
  646. if (tai(p1).typ = ait_instruction) and
  647. (taicpu(p1).is_jmp) then
  648. if { the next instruction after the label where the jump hp arrives}
  649. { is unconditional or of the same type as hp, so continue }
  650. (taicpu(p1).condition in [C_None,hp.condition]) or
  651. { the next instruction after the label where the jump hp arrives}
  652. { is the opposite of hp (so this one is never taken), but after }
  653. { that one there is a branch that will be taken, so perform a }
  654. { little hack: set p1 equal to this instruction (that's what the}
  655. { last SkipLabels is for, only works with short bool evaluation)}
  656. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  657. SkipLabels(p1,p2) and
  658. (p2.typ = ait_instruction) and
  659. (taicpu(p2).is_jmp) and
  660. (taicpu(p2).condition in [C_None,hp.condition]) and
  661. SkipLabels(p1,p1)) then
  662. begin
  663. { quick check for loops of the form "l5: ; jmp l5 }
  664. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  665. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  666. exit;
  667. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  668. exit;
  669. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  670. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  671. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  672. end
  673. else
  674. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  675. if not FindAnyLabel(p1,l) then
  676. begin
  677. {$ifdef finaldestdebug}
  678. insertllitem(asml,p1,p1.next,tai_comment.Create(
  679. strpnew('previous label inserted'))));
  680. {$endif finaldestdebug}
  681. current_asmdata.getjumplabel(l);
  682. insertllitem(p1,p1.next,tai_label.Create(l));
  683. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  684. hp.oper[0]^.ref^.symbol := l;
  685. l.increfs;
  686. { this won't work, since the new label isn't in the labeltable }
  687. { so it will fail the rangecheck. Labeltable should become a }
  688. { hashtable to support this: }
  689. { GetFinalDestination(asml, hp); }
  690. end
  691. else
  692. begin
  693. {$ifdef finaldestdebug}
  694. insertllitem(asml,p1,p1.next,tai_comment.Create(
  695. strpnew('next label reused'))));
  696. {$endif finaldestdebug}
  697. l.increfs;
  698. hp.oper[0]^.ref^.symbol := l;
  699. if not GetFinalDestination(asml, hp,succ(level)) then
  700. exit;
  701. end;
  702. end;
  703. GetFinalDestination := true;
  704. end;
  705. function DoSubAddOpt(var p: tai): Boolean;
  706. begin
  707. DoSubAddOpt := False;
  708. if GetLastInstruction(p, hp1) and
  709. (hp1.typ = ait_instruction) and
  710. (taicpu(hp1).opsize = taicpu(p).opsize) then
  711. case taicpu(hp1).opcode Of
  712. A_DEC:
  713. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  714. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  715. begin
  716. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  717. asml.remove(hp1);
  718. hp1.free;
  719. end;
  720. A_SUB:
  721. if (taicpu(hp1).oper[0]^.typ = top_const) and
  722. (taicpu(hp1).oper[1]^.typ = top_reg) and
  723. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  724. begin
  725. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  726. asml.remove(hp1);
  727. hp1.free;
  728. end;
  729. A_ADD:
  730. if (taicpu(hp1).oper[0]^.typ = top_const) and
  731. (taicpu(hp1).oper[1]^.typ = top_reg) and
  732. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  733. begin
  734. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  735. asml.remove(hp1);
  736. hp1.free;
  737. if (taicpu(p).oper[0]^.val = 0) then
  738. begin
  739. hp1 := tai(p.next);
  740. asml.remove(p);
  741. p.free;
  742. if not GetLastInstruction(hp1, p) then
  743. p := hp1;
  744. DoSubAddOpt := True;
  745. end
  746. end;
  747. end;
  748. end;
  749. begin
  750. p := BlockStart;
  751. ClearUsedRegs;
  752. while (p <> BlockEnd) Do
  753. begin
  754. UpDateUsedRegs(UsedRegs, tai(p.next));
  755. case p.Typ Of
  756. ait_instruction:
  757. begin
  758. current_filepos:=taicpu(p).fileinfo;
  759. if InsContainsSegRef(taicpu(p)) then
  760. begin
  761. p := tai(p.next);
  762. continue;
  763. end;
  764. { Handle Jmp Optimizations }
  765. if taicpu(p).is_jmp then
  766. begin
  767. {the following if-block removes all code between a jmp and the next label,
  768. because it can never be executed}
  769. if (taicpu(p).opcode = A_JMP) then
  770. begin
  771. hp2:=p;
  772. while GetNextInstruction(hp2, hp1) and
  773. (hp1.typ <> ait_label) do
  774. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  775. begin
  776. { don't kill start/end of assembler block,
  777. no-line-info-start/end etc }
  778. if hp1.typ<>ait_marker then
  779. begin
  780. asml.remove(hp1);
  781. hp1.free;
  782. end
  783. else
  784. hp2:=hp1;
  785. end
  786. else break;
  787. end;
  788. { remove jumps to a label coming right after them }
  789. if GetNextInstruction(p, hp1) then
  790. begin
  791. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  792. { TODO: FIXME removing the first instruction fails}
  793. (p<>blockstart) then
  794. begin
  795. hp2:=tai(hp1.next);
  796. asml.remove(p);
  797. p.free;
  798. p:=hp2;
  799. continue;
  800. end
  801. else
  802. begin
  803. if hp1.typ = ait_label then
  804. SkipLabels(hp1,hp1);
  805. if (tai(hp1).typ=ait_instruction) and
  806. (taicpu(hp1).opcode=A_JMP) and
  807. GetNextInstruction(hp1, hp2) and
  808. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  809. begin
  810. if taicpu(p).opcode=A_Jcc then
  811. begin
  812. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  813. tai_label(hp2).labsym.decrefs;
  814. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  815. { when free'ing hp1, the ref. isn't decresed, so we don't
  816. increase it (FK)
  817. taicpu(p).oper[0]^.ref^.symbol.increfs;
  818. }
  819. asml.remove(hp1);
  820. hp1.free;
  821. GetFinalDestination(asml, taicpu(p),0);
  822. end
  823. else
  824. begin
  825. GetFinalDestination(asml, taicpu(p),0);
  826. p:=tai(p.next);
  827. continue;
  828. end;
  829. end
  830. else
  831. GetFinalDestination(asml, taicpu(p),0);
  832. end;
  833. end;
  834. end
  835. else
  836. { All other optimizes }
  837. begin
  838. for l := 0 to taicpu(p).ops-1 Do
  839. if (taicpu(p).oper[l]^.typ = top_ref) then
  840. With taicpu(p).oper[l]^.ref^ Do
  841. begin
  842. if (base = NR_NO) and
  843. (index <> NR_NO) and
  844. (scalefactor in [0,1]) then
  845. begin
  846. base := index;
  847. index := NR_NO
  848. end
  849. end;
  850. case taicpu(p).opcode Of
  851. A_AND:
  852. if OptPass1And(p) then
  853. continue;
  854. A_CMP:
  855. begin
  856. { cmp register,$8000 neg register
  857. je target --> jo target
  858. .... only if register is deallocated before jump.}
  859. case Taicpu(p).opsize of
  860. S_B: v:=$80;
  861. S_W: v:=$8000;
  862. S_L: v:=aint($80000000);
  863. else
  864. internalerror(2013112905);
  865. end;
  866. if (taicpu(p).oper[0]^.typ=Top_const) and
  867. (taicpu(p).oper[0]^.val=v) and
  868. (Taicpu(p).oper[1]^.typ=top_reg) and
  869. GetNextInstruction(p, hp1) and
  870. (hp1.typ=ait_instruction) and
  871. (taicpu(hp1).opcode=A_Jcc) and
  872. (Taicpu(hp1).condition in [C_E,C_NE]) and
  873. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  874. begin
  875. Taicpu(p).opcode:=A_NEG;
  876. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  877. Taicpu(p).clearop(1);
  878. Taicpu(p).ops:=1;
  879. if Taicpu(hp1).condition=C_E then
  880. Taicpu(hp1).condition:=C_O
  881. else
  882. Taicpu(hp1).condition:=C_NO;
  883. continue;
  884. end;
  885. {
  886. @@2: @@2:
  887. .... ....
  888. cmp operand1,0
  889. jle/jbe @@1
  890. dec operand1 --> sub operand1,1
  891. jmp @@2 jge/jae @@2
  892. @@1: @@1:
  893. ... ....}
  894. if (taicpu(p).oper[0]^.typ = top_const) and
  895. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  896. (taicpu(p).oper[0]^.val = 0) and
  897. GetNextInstruction(p, hp1) and
  898. (hp1.typ = ait_instruction) and
  899. (taicpu(hp1).is_jmp) and
  900. (taicpu(hp1).opcode=A_Jcc) and
  901. (taicpu(hp1).condition in [C_LE,C_BE]) and
  902. GetNextInstruction(hp1,hp2) and
  903. (hp2.typ = ait_instruction) and
  904. (taicpu(hp2).opcode = A_DEC) and
  905. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  906. GetNextInstruction(hp2, hp3) and
  907. (hp3.typ = ait_instruction) and
  908. (taicpu(hp3).is_jmp) and
  909. (taicpu(hp3).opcode = A_JMP) and
  910. GetNextInstruction(hp3, hp4) and
  911. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  912. begin
  913. taicpu(hp2).Opcode := A_SUB;
  914. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  915. taicpu(hp2).loadConst(0,1);
  916. taicpu(hp2).ops:=2;
  917. taicpu(hp3).Opcode := A_Jcc;
  918. case taicpu(hp1).condition of
  919. C_LE: taicpu(hp3).condition := C_GE;
  920. C_BE: taicpu(hp3).condition := C_AE;
  921. end;
  922. asml.remove(p);
  923. asml.remove(hp1);
  924. p.free;
  925. hp1.free;
  926. p := hp2;
  927. continue;
  928. end
  929. end;
  930. A_FLD:
  931. begin
  932. if (taicpu(p).oper[0]^.typ = top_reg) and
  933. GetNextInstruction(p, hp1) and
  934. (hp1.typ = Ait_Instruction) and
  935. (taicpu(hp1).oper[0]^.typ = top_reg) and
  936. (taicpu(hp1).oper[1]^.typ = top_reg) and
  937. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  938. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  939. { change to
  940. fld reg fxxx reg,st
  941. fxxxp st, st1 (hp1)
  942. Remark: non commutative operations must be reversed!
  943. }
  944. begin
  945. case taicpu(hp1).opcode Of
  946. A_FMULP,A_FADDP,
  947. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  948. begin
  949. case taicpu(hp1).opcode Of
  950. A_FADDP: taicpu(hp1).opcode := A_FADD;
  951. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  952. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  953. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  954. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  955. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  956. end;
  957. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  958. taicpu(hp1).oper[1]^.reg := NR_ST;
  959. asml.remove(p);
  960. p.free;
  961. p := hp1;
  962. continue;
  963. end;
  964. end;
  965. end
  966. else
  967. if (taicpu(p).oper[0]^.typ = top_ref) and
  968. GetNextInstruction(p, hp2) and
  969. (hp2.typ = Ait_Instruction) and
  970. (taicpu(hp2).ops = 2) and
  971. (taicpu(hp2).oper[0]^.typ = top_reg) and
  972. (taicpu(hp2).oper[1]^.typ = top_reg) and
  973. (taicpu(p).opsize in [S_FS, S_FL]) and
  974. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  975. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  976. if GetLastInstruction(p, hp1) and
  977. (hp1.typ = Ait_Instruction) and
  978. ((taicpu(hp1).opcode = A_FLD) or
  979. (taicpu(hp1).opcode = A_FST)) and
  980. (taicpu(hp1).opsize = taicpu(p).opsize) and
  981. (taicpu(hp1).oper[0]^.typ = top_ref) and
  982. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  983. if ((taicpu(hp2).opcode = A_FMULP) or
  984. (taicpu(hp2).opcode = A_FADDP)) then
  985. { change to
  986. fld/fst mem1 (hp1) fld/fst mem1
  987. fld mem1 (p) fadd/
  988. faddp/ fmul st, st
  989. fmulp st, st1 (hp2) }
  990. begin
  991. asml.remove(p);
  992. p.free;
  993. p := hp1;
  994. if (taicpu(hp2).opcode = A_FADDP) then
  995. taicpu(hp2).opcode := A_FADD
  996. else
  997. taicpu(hp2).opcode := A_FMUL;
  998. taicpu(hp2).oper[1]^.reg := NR_ST;
  999. end
  1000. else
  1001. { change to
  1002. fld/fst mem1 (hp1) fld/fst mem1
  1003. fld mem1 (p) fld st}
  1004. begin
  1005. taicpu(p).changeopsize(S_FL);
  1006. taicpu(p).loadreg(0,NR_ST);
  1007. end
  1008. else
  1009. begin
  1010. case taicpu(hp2).opcode Of
  1011. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  1012. { change to
  1013. fld/fst mem1 (hp1) fld/fst mem1
  1014. fld mem2 (p) fxxx mem2
  1015. fxxxp st, st1 (hp2) }
  1016. begin
  1017. case taicpu(hp2).opcode Of
  1018. A_FADDP: taicpu(p).opcode := A_FADD;
  1019. A_FMULP: taicpu(p).opcode := A_FMUL;
  1020. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  1021. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  1022. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  1023. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  1024. end;
  1025. asml.remove(hp2);
  1026. hp2.free;
  1027. end
  1028. end
  1029. end
  1030. end;
  1031. A_FSTP,A_FISTP:
  1032. if doFpuLoadStoreOpt(p) then
  1033. continue;
  1034. A_LEA:
  1035. begin
  1036. {removes seg register prefixes from LEA operations, as they
  1037. don't do anything}
  1038. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  1039. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  1040. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1041. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  1042. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1043. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1044. begin
  1045. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1046. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1047. begin
  1048. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  1049. taicpu(p).oper[1]^.reg);
  1050. InsertLLItem(p.previous,p.next, hp1);
  1051. p.free;
  1052. p := hp1;
  1053. continue;
  1054. end
  1055. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1056. begin
  1057. hp1 := tai(p.Next);
  1058. asml.remove(p);
  1059. p.free;
  1060. p := hp1;
  1061. continue;
  1062. end
  1063. { continue to use lea to adjust the stack pointer,
  1064. it is the recommended way, but only if not optimizing for size }
  1065. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1066. (cs_opt_size in current_settings.optimizerswitches) then
  1067. with taicpu(p).oper[0]^.ref^ do
  1068. if (base = taicpu(p).oper[1]^.reg) then
  1069. begin
  1070. l := offset;
  1071. if (l=1) and UseIncDec then
  1072. begin
  1073. taicpu(p).opcode := A_INC;
  1074. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1075. taicpu(p).ops := 1
  1076. end
  1077. else if (l=-1) and UseIncDec then
  1078. begin
  1079. taicpu(p).opcode := A_DEC;
  1080. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1081. taicpu(p).ops := 1;
  1082. end
  1083. else
  1084. begin
  1085. if (l<0) and (l<>-2147483648) then
  1086. begin
  1087. taicpu(p).opcode := A_SUB;
  1088. taicpu(p).loadConst(0,-l);
  1089. end
  1090. else
  1091. begin
  1092. taicpu(p).opcode := A_ADD;
  1093. taicpu(p).loadConst(0,l);
  1094. end;
  1095. end;
  1096. end;
  1097. end
  1098. (*
  1099. This is unsafe, lea doesn't modify the flags but "add"
  1100. does. This breaks webtbs/tw15694.pp. The above
  1101. transformations are also unsafe, but they don't seem to
  1102. be triggered by code that FPC generators (or that at
  1103. least does not occur in the tests...). This needs to be
  1104. fixed by checking for the liveness of the flags register.
  1105. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1106. begin
  1107. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1108. taicpu(p).oper[0]^.ref^.base);
  1109. InsertLLItem(asml,p.previous,p.next, hp1);
  1110. DebugMsg('Peephole Lea2AddBase done',hp1);
  1111. p.free;
  1112. p:=hp1;
  1113. continue;
  1114. end
  1115. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1116. begin
  1117. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1118. taicpu(p).oper[0]^.ref^.index);
  1119. InsertLLItem(asml,p.previous,p.next,hp1);
  1120. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1121. p.free;
  1122. p:=hp1;
  1123. continue;
  1124. end
  1125. *)
  1126. end;
  1127. A_MOV:
  1128. begin
  1129. If OptPass1MOV(p) then
  1130. Continue;
  1131. end;
  1132. A_MOVSX,
  1133. A_MOVZX :
  1134. begin
  1135. If OptPass1Movx(p) then
  1136. Continue
  1137. end;
  1138. (* should not be generated anymore by the current code generator
  1139. A_POP:
  1140. begin
  1141. if target_info.system=system_i386_go32v2 then
  1142. begin
  1143. { Transform a series of pop/pop/pop/push/push/push to }
  1144. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1145. { because I'm not sure whether they can cope with }
  1146. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1147. { such a problem when using esp as frame pointer (JM) }
  1148. if (taicpu(p).oper[0]^.typ = top_reg) then
  1149. begin
  1150. hp1 := p;
  1151. hp2 := p;
  1152. l := 0;
  1153. while getNextInstruction(hp1,hp1) and
  1154. (hp1.typ = ait_instruction) and
  1155. (taicpu(hp1).opcode = A_POP) and
  1156. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1157. begin
  1158. hp2 := hp1;
  1159. inc(l,4);
  1160. end;
  1161. getLastInstruction(p,hp3);
  1162. l1 := 0;
  1163. while (hp2 <> hp3) and
  1164. assigned(hp1) and
  1165. (hp1.typ = ait_instruction) and
  1166. (taicpu(hp1).opcode = A_PUSH) and
  1167. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1168. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1169. begin
  1170. { change it to a two op operation }
  1171. taicpu(hp2).oper[1]^.typ:=top_none;
  1172. taicpu(hp2).ops:=2;
  1173. taicpu(hp2).opcode := A_MOV;
  1174. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1175. reference_reset(tmpref);
  1176. tmpRef.base.enum:=R_INTREGISTER;
  1177. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1178. convert_register_to_enum(tmpref.base);
  1179. tmpRef.offset := l;
  1180. taicpu(hp2).loadRef(0,tmpRef);
  1181. hp4 := hp1;
  1182. getNextInstruction(hp1,hp1);
  1183. asml.remove(hp4);
  1184. hp4.free;
  1185. getLastInstruction(hp2,hp2);
  1186. dec(l,4);
  1187. inc(l1);
  1188. end;
  1189. if l <> -4 then
  1190. begin
  1191. inc(l,4);
  1192. for l1 := l1 downto 1 do
  1193. begin
  1194. getNextInstruction(hp2,hp2);
  1195. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1196. end
  1197. end
  1198. end
  1199. end
  1200. else
  1201. begin
  1202. if (taicpu(p).oper[0]^.typ = top_reg) and
  1203. GetNextInstruction(p, hp1) and
  1204. (tai(hp1).typ=ait_instruction) and
  1205. (taicpu(hp1).opcode=A_PUSH) and
  1206. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1207. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1208. begin
  1209. { change it to a two op operation }
  1210. taicpu(p).oper[1]^.typ:=top_none;
  1211. taicpu(p).ops:=2;
  1212. taicpu(p).opcode := A_MOV;
  1213. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1214. reference_reset(tmpref);
  1215. TmpRef.base.enum := R_ESP;
  1216. taicpu(p).loadRef(0,TmpRef);
  1217. asml.remove(hp1);
  1218. hp1.free;
  1219. end;
  1220. end;
  1221. end;
  1222. *)
  1223. A_PUSH:
  1224. begin
  1225. if (taicpu(p).opsize = S_W) and
  1226. (taicpu(p).oper[0]^.typ = Top_Const) and
  1227. GetNextInstruction(p, hp1) and
  1228. (tai(hp1).typ = ait_instruction) and
  1229. (taicpu(hp1).opcode = A_PUSH) and
  1230. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1231. (taicpu(hp1).opsize = S_W) then
  1232. begin
  1233. taicpu(p).changeopsize(S_L);
  1234. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1235. asml.remove(hp1);
  1236. hp1.free;
  1237. end;
  1238. end;
  1239. A_SHL, A_SAL:
  1240. begin
  1241. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1242. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1243. (taicpu(p).opsize = S_L) and
  1244. (taicpu(p).oper[0]^.val <= 3) then
  1245. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1246. begin
  1247. TmpBool1 := True; {should we check the next instruction?}
  1248. TmpBool2 := False; {have we found an add/sub which could be
  1249. integrated in the lea?}
  1250. reference_reset(tmpref,2,[]);
  1251. TmpRef.index := taicpu(p).oper[1]^.reg;
  1252. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1253. while TmpBool1 and
  1254. GetNextInstruction(p, hp1) and
  1255. (tai(hp1).typ = ait_instruction) and
  1256. ((((taicpu(hp1).opcode = A_ADD) or
  1257. (taicpu(hp1).opcode = A_SUB)) and
  1258. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1259. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1260. (((taicpu(hp1).opcode = A_INC) or
  1261. (taicpu(hp1).opcode = A_DEC)) and
  1262. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1263. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1264. (not GetNextInstruction(hp1,hp2) or
  1265. not instrReadsFlags(hp2)) Do
  1266. begin
  1267. TmpBool1 := False;
  1268. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1269. begin
  1270. TmpBool1 := True;
  1271. TmpBool2 := True;
  1272. case taicpu(hp1).opcode of
  1273. A_ADD:
  1274. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1275. A_SUB:
  1276. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1277. end;
  1278. asml.remove(hp1);
  1279. hp1.free;
  1280. end
  1281. else
  1282. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1283. (((taicpu(hp1).opcode = A_ADD) and
  1284. (TmpRef.base = NR_NO)) or
  1285. (taicpu(hp1).opcode = A_INC) or
  1286. (taicpu(hp1).opcode = A_DEC)) then
  1287. begin
  1288. TmpBool1 := True;
  1289. TmpBool2 := True;
  1290. case taicpu(hp1).opcode of
  1291. A_ADD:
  1292. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1293. A_INC:
  1294. inc(TmpRef.offset);
  1295. A_DEC:
  1296. dec(TmpRef.offset);
  1297. end;
  1298. asml.remove(hp1);
  1299. hp1.free;
  1300. end;
  1301. end;
  1302. if TmpBool2 or
  1303. ((current_settings.optimizecputype < cpu_Pentium2) and
  1304. (taicpu(p).oper[0]^.val <= 3) and
  1305. not(cs_opt_size in current_settings.optimizerswitches)) then
  1306. begin
  1307. if not(TmpBool2) and
  1308. (taicpu(p).oper[0]^.val = 1) then
  1309. begin
  1310. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1311. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1312. end
  1313. else
  1314. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1315. taicpu(p).oper[1]^.reg);
  1316. InsertLLItem(p.previous, p.next, hp1);
  1317. p.free;
  1318. p := hp1;
  1319. end;
  1320. end
  1321. else
  1322. if (current_settings.optimizecputype < cpu_Pentium2) and
  1323. (taicpu(p).oper[0]^.typ = top_const) and
  1324. (taicpu(p).oper[1]^.typ = top_reg) then
  1325. if (taicpu(p).oper[0]^.val = 1) then
  1326. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1327. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1328. (unlike shl, which is only Tairable in the U pipe)}
  1329. begin
  1330. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1331. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1332. InsertLLItem(p.previous, p.next, hp1);
  1333. p.free;
  1334. p := hp1;
  1335. end
  1336. else if (taicpu(p).opsize = S_L) and
  1337. (taicpu(p).oper[0]^.val<= 3) then
  1338. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1339. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1340. begin
  1341. reference_reset(tmpref,2,[]);
  1342. TmpRef.index := taicpu(p).oper[1]^.reg;
  1343. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1344. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1345. InsertLLItem(p.previous, p.next, hp1);
  1346. p.free;
  1347. p := hp1;
  1348. end
  1349. end;
  1350. A_SETcc :
  1351. { changes
  1352. setcc (funcres) setcc reg
  1353. movb (funcres), reg to leave/ret
  1354. leave/ret }
  1355. begin
  1356. if (taicpu(p).oper[0]^.typ = top_ref) and
  1357. GetNextInstruction(p, hp1) and
  1358. GetNextInstruction(hp1, hp2) and
  1359. IsExitCode(hp2) and
  1360. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1361. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1362. not(assigned(current_procinfo.procdef.funcretsym) and
  1363. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1364. (hp1.typ = ait_instruction) and
  1365. (taicpu(hp1).opcode = A_MOV) and
  1366. (taicpu(hp1).opsize = S_B) and
  1367. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1368. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1369. begin
  1370. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1371. DebugMsg('Peephole optimizer SetccMovbLeaveRet2SetccLeaveRet',p);
  1372. asml.remove(hp1);
  1373. hp1.free;
  1374. end
  1375. end;
  1376. A_SUB:
  1377. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1378. { * change "sub/add const1, reg" or "dec reg" followed by
  1379. "sub const2, reg" to one "sub ..., reg" }
  1380. begin
  1381. if (taicpu(p).oper[0]^.typ = top_const) and
  1382. (taicpu(p).oper[1]^.typ = top_reg) then
  1383. if (taicpu(p).oper[0]^.val = 2) and
  1384. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1385. { Don't do the sub/push optimization if the sub }
  1386. { comes from setting up the stack frame (JM) }
  1387. (not getLastInstruction(p,hp1) or
  1388. (hp1.typ <> ait_instruction) or
  1389. (taicpu(hp1).opcode <> A_MOV) or
  1390. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1391. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1392. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1393. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1394. begin
  1395. hp1 := tai(p.next);
  1396. while Assigned(hp1) and
  1397. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1398. not RegReadByInstruction(NR_ESP,hp1) and
  1399. not RegModifiedByInstruction(NR_ESP,hp1) do
  1400. hp1 := tai(hp1.next);
  1401. if Assigned(hp1) and
  1402. (tai(hp1).typ = ait_instruction) and
  1403. (taicpu(hp1).opcode = A_PUSH) and
  1404. (taicpu(hp1).opsize = S_W) then
  1405. begin
  1406. taicpu(hp1).changeopsize(S_L);
  1407. if taicpu(hp1).oper[0]^.typ=top_reg then
  1408. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1409. hp1 := tai(p.next);
  1410. asml.remove(p);
  1411. p.free;
  1412. p := hp1;
  1413. continue
  1414. end;
  1415. if DoSubAddOpt(p) then
  1416. continue;
  1417. end
  1418. else if DoSubAddOpt(p) then
  1419. continue
  1420. end;
  1421. A_VMOVAPS,
  1422. A_VMOVAPD:
  1423. if OptPass1VMOVAP(p) then
  1424. continue;
  1425. A_VDIVSD,
  1426. A_VDIVSS,
  1427. A_VSUBSD,
  1428. A_VSUBSS,
  1429. A_VMULSD,
  1430. A_VMULSS,
  1431. A_VADDSD,
  1432. A_VADDSS:
  1433. if OptPass1VOP(p) then
  1434. continue;
  1435. end;
  1436. end; { if is_jmp }
  1437. end;
  1438. end;
  1439. updateUsedRegs(UsedRegs,p);
  1440. p:=tai(p.next);
  1441. end;
  1442. end;
  1443. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1444. var
  1445. p : tai;
  1446. begin
  1447. p := BlockStart;
  1448. ClearUsedRegs;
  1449. while (p <> BlockEnd) Do
  1450. begin
  1451. UpdateUsedRegs(UsedRegs, tai(p.next));
  1452. case p.Typ Of
  1453. Ait_Instruction:
  1454. begin
  1455. if InsContainsSegRef(taicpu(p)) then
  1456. begin
  1457. p := tai(p.next);
  1458. continue;
  1459. end;
  1460. case taicpu(p).opcode Of
  1461. A_Jcc:
  1462. if OptPass2Jcc(p) then
  1463. continue;
  1464. A_FSTP,A_FISTP:
  1465. if DoFpuLoadStoreOpt(p) then
  1466. continue;
  1467. A_IMUL:
  1468. if OptPass2Imul(p) then
  1469. continue;
  1470. A_JMP:
  1471. if OptPass2Jmp(p) then
  1472. continue;
  1473. A_MOV:
  1474. if OptPass2MOV(p) then
  1475. continue;
  1476. end;
  1477. end;
  1478. end;
  1479. p := tai(p.next)
  1480. end;
  1481. end;
  1482. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1483. var
  1484. p,hp1,hp2: tai;
  1485. IsTestConstX: boolean;
  1486. begin
  1487. p := BlockStart;
  1488. ClearUsedRegs;
  1489. while (p <> BlockEnd) Do
  1490. begin
  1491. UpdateUsedRegs(UsedRegs, tai(p.next));
  1492. case p.Typ Of
  1493. Ait_Instruction:
  1494. begin
  1495. if InsContainsSegRef(taicpu(p)) then
  1496. begin
  1497. p := tai(p.next);
  1498. continue;
  1499. end;
  1500. case taicpu(p).opcode Of
  1501. A_CALL:
  1502. begin
  1503. { don't do this on modern CPUs, this really hurts them due to
  1504. broken call/ret pairing }
  1505. if (current_settings.optimizecputype < cpu_Pentium2) and
  1506. not(cs_create_pic in current_settings.moduleswitches) and
  1507. GetNextInstruction(p, hp1) and
  1508. (hp1.typ = ait_instruction) and
  1509. (taicpu(hp1).opcode = A_JMP) and
  1510. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1511. begin
  1512. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1513. InsertLLItem(p.previous, p, hp2);
  1514. taicpu(p).opcode := A_JMP;
  1515. taicpu(p).is_jmp := true;
  1516. asml.remove(hp1);
  1517. hp1.free;
  1518. end
  1519. { replace
  1520. call procname
  1521. ret
  1522. by
  1523. jmp procname
  1524. this should never hurt except when pic is used, not sure
  1525. how to handle it then
  1526. but do it only on level 4 because it destroys stack back traces
  1527. }
  1528. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  1529. not(cs_create_pic in current_settings.moduleswitches) and
  1530. GetNextInstruction(p, hp1) and
  1531. (hp1.typ = ait_instruction) and
  1532. (taicpu(hp1).opcode = A_RET) and
  1533. (taicpu(hp1).ops=0) then
  1534. begin
  1535. taicpu(p).opcode := A_JMP;
  1536. taicpu(p).is_jmp := true;
  1537. asml.remove(hp1);
  1538. hp1.free;
  1539. end;
  1540. end;
  1541. A_CMP:
  1542. begin
  1543. if (taicpu(p).oper[0]^.typ = top_const) and
  1544. (taicpu(p).oper[0]^.val = 0) and
  1545. (taicpu(p).oper[1]^.typ = top_reg) then
  1546. {change "cmp $0, %reg" to "test %reg, %reg"}
  1547. begin
  1548. taicpu(p).opcode := A_TEST;
  1549. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1550. continue;
  1551. end;
  1552. end;
  1553. A_MOV:
  1554. PostPeepholeOptMov(p);
  1555. A_MOVZX:
  1556. { if register vars are on, it's possible there is code like }
  1557. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1558. { so we can't safely replace the movzx then with xor/mov, }
  1559. { since that would change the flags (JM) }
  1560. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  1561. begin
  1562. if (taicpu(p).oper[1]^.typ = top_reg) then
  1563. if (taicpu(p).oper[0]^.typ = top_reg)
  1564. then
  1565. case taicpu(p).opsize of
  1566. S_BL:
  1567. begin
  1568. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1569. not(cs_opt_size in current_settings.optimizerswitches) and
  1570. (current_settings.optimizecputype = cpu_Pentium) then
  1571. {Change "movzbl %reg1, %reg2" to
  1572. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1573. PentiumMMX}
  1574. begin
  1575. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1576. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1577. InsertLLItem(p.previous, p, hp1);
  1578. taicpu(p).opcode := A_MOV;
  1579. taicpu(p).changeopsize(S_B);
  1580. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1581. end;
  1582. end;
  1583. end
  1584. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1585. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1586. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1587. not(cs_opt_size in current_settings.optimizerswitches) and
  1588. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1589. (current_settings.optimizecputype = cpu_Pentium) and
  1590. (taicpu(p).opsize = S_BL) then
  1591. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1592. Pentium and PentiumMMX}
  1593. begin
  1594. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1595. taicpu(p).oper[1]^.reg);
  1596. taicpu(p).opcode := A_MOV;
  1597. taicpu(p).changeopsize(S_B);
  1598. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1599. InsertLLItem(p.previous, p, hp1);
  1600. end;
  1601. end;
  1602. A_TEST, A_OR:
  1603. {removes the line marked with (x) from the sequence
  1604. and/or/xor/add/sub/... $x, %y
  1605. test/or %y, %y | test $-1, %y (x)
  1606. j(n)z _Label
  1607. as the first instruction already adjusts the ZF
  1608. %y operand may also be a reference }
  1609. begin
  1610. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  1611. MatchOperand(taicpu(p).oper[0]^,-1);
  1612. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  1613. GetLastInstruction(p, hp1) and
  1614. (tai(hp1).typ = ait_instruction) and
  1615. GetNextInstruction(p,hp2) and
  1616. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  1617. case taicpu(hp1).opcode Of
  1618. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  1619. begin
  1620. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  1621. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1622. { and in case of carry for A(E)/B(E)/C/NC }
  1623. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  1624. ((taicpu(hp1).opcode <> A_ADD) and
  1625. (taicpu(hp1).opcode <> A_SUB))) then
  1626. begin
  1627. hp1 := tai(p.next);
  1628. asml.remove(p);
  1629. p.free;
  1630. p := tai(hp1);
  1631. continue
  1632. end;
  1633. end;
  1634. A_SHL, A_SAL, A_SHR, A_SAR:
  1635. begin
  1636. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  1637. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  1638. { therefore, it's only safe to do this optimization for }
  1639. { shifts by a (nonzero) constant }
  1640. (taicpu(hp1).oper[0]^.typ = top_const) and
  1641. (taicpu(hp1).oper[0]^.val <> 0) and
  1642. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1643. { and in case of carry for A(E)/B(E)/C/NC }
  1644. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  1645. begin
  1646. hp1 := tai(p.next);
  1647. asml.remove(p);
  1648. p.free;
  1649. p := tai(hp1);
  1650. continue
  1651. end;
  1652. end;
  1653. A_DEC, A_INC, A_NEG:
  1654. begin
  1655. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  1656. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1657. { and in case of carry for A(E)/B(E)/C/NC }
  1658. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  1659. begin
  1660. case taicpu(hp1).opcode Of
  1661. A_DEC, A_INC:
  1662. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  1663. begin
  1664. case taicpu(hp1).opcode Of
  1665. A_DEC: taicpu(hp1).opcode := A_SUB;
  1666. A_INC: taicpu(hp1).opcode := A_ADD;
  1667. end;
  1668. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  1669. taicpu(hp1).loadConst(0,1);
  1670. taicpu(hp1).ops:=2;
  1671. end
  1672. end;
  1673. hp1 := tai(p.next);
  1674. asml.remove(p);
  1675. p.free;
  1676. p := tai(hp1);
  1677. continue
  1678. end;
  1679. end
  1680. else
  1681. { change "test $-1,%reg" into "test %reg,%reg" }
  1682. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  1683. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  1684. end { case }
  1685. else
  1686. { change "test $-1,%reg" into "test %reg,%reg" }
  1687. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  1688. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  1689. end;
  1690. end;
  1691. end;
  1692. end;
  1693. p := tai(p.next)
  1694. end;
  1695. end;
  1696. Procedure TCpuAsmOptimizer.Optimize;
  1697. Var
  1698. HP: Tai;
  1699. pass: longint;
  1700. slowopt, changed, lastLoop: boolean;
  1701. Begin
  1702. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  1703. pass := 0;
  1704. changed := false;
  1705. repeat
  1706. lastLoop :=
  1707. not(slowopt) or
  1708. (not changed and (pass > 2)) or
  1709. { prevent endless loops }
  1710. (pass = 4);
  1711. changed := false;
  1712. { Setup labeltable, always necessary }
  1713. blockstart := tai(asml.first);
  1714. pass_1;
  1715. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  1716. { or nil }
  1717. While Assigned(BlockStart) Do
  1718. Begin
  1719. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1720. begin
  1721. if (pass = 0) then
  1722. PrePeepHoleOpts;
  1723. { Peephole optimizations }
  1724. PeepHoleOptPass1;
  1725. { Only perform them twice in the first pass }
  1726. if pass = 0 then
  1727. PeepHoleOptPass1;
  1728. end;
  1729. { More peephole optimizations }
  1730. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1731. begin
  1732. PeepHoleOptPass2;
  1733. if lastLoop then
  1734. PostPeepHoleOpts;
  1735. end;
  1736. { Continue where we left off, BlockEnd is either the start of an }
  1737. { assembler block or nil }
  1738. BlockStart := BlockEnd;
  1739. While Assigned(BlockStart) And
  1740. (BlockStart.typ = ait_Marker) And
  1741. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  1742. Begin
  1743. { We stopped at an assembler block, so skip it }
  1744. Repeat
  1745. BlockStart := Tai(BlockStart.Next);
  1746. Until (BlockStart.Typ = Ait_Marker) And
  1747. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  1748. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  1749. If GetNextInstruction(BlockStart, HP) And
  1750. ((HP.typ <> ait_Marker) Or
  1751. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  1752. { There is no assembler block anymore after the current one, so }
  1753. { optimize the next block of "normal" instructions }
  1754. pass_1
  1755. { Otherwise, skip the next assembler block }
  1756. else
  1757. blockStart := hp;
  1758. End;
  1759. End;
  1760. inc(pass);
  1761. until lastLoop;
  1762. dfa.free;
  1763. End;
  1764. begin
  1765. casmoptimizer:=TCpuAsmOptimizer;
  1766. end.