popt386.pas 107 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit popt386;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses Aasmbase,aasmtai,aasmdata,aasmcpu,verbose;
  21. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  22. procedure PeepHoleOptPass1(asml: TAsmList; BlockStart, BlockEnd: tai);
  23. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  24. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  25. implementation
  26. uses
  27. globtype,systems,
  28. globals,cgbase,procinfo,
  29. symsym,
  30. {$ifdef finaldestdebug}
  31. cobjects,
  32. {$endif finaldestdebug}
  33. cpuinfo,cpubase,cgutils,daopt386;
  34. function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean;
  35. var
  36. supreg: tsuperregister;
  37. begin
  38. supreg := getsupreg(reg);
  39. UpdateUsedRegs(UsedRegs, tai(p.Next));
  40. RegUsedAfterInstruction :=
  41. (supreg in UsedRegs) and
  42. (not(getNextInstruction(p,p)) or
  43. not(regLoadedWithNewValue(supreg,false,p)));
  44. end;
  45. function doFpuLoadStoreOpt(asmL: TAsmList; var p: tai): boolean;
  46. { returns true if a "continue" should be done after this optimization }
  47. var hp1, hp2: tai;
  48. begin
  49. doFpuLoadStoreOpt := false;
  50. if (taicpu(p).oper[0]^.typ = top_ref) and
  51. getNextInstruction(p, hp1) and
  52. (hp1.typ = ait_instruction) and
  53. (((taicpu(hp1).opcode = A_FLD) and
  54. (taicpu(p).opcode = A_FSTP)) or
  55. ((taicpu(p).opcode = A_FISTP) and
  56. (taicpu(hp1).opcode = A_FILD))) and
  57. (taicpu(hp1).oper[0]^.typ = top_ref) and
  58. (taicpu(hp1).opsize = taicpu(p).opsize) and
  59. refsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  60. begin
  61. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  62. if (taicpu(p).opsize=S_FX) and
  63. getNextInstruction(hp1, hp2) and
  64. (hp2.typ = ait_instruction) and
  65. ((taicpu(hp2).opcode = A_LEAVE) or
  66. (taicpu(hp2).opcode = A_RET)) and
  67. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  68. not(assigned(current_procinfo.procdef.funcretsym) and
  69. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  70. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  71. begin
  72. asml.remove(p);
  73. asml.remove(hp1);
  74. p.free;
  75. hp1.free;
  76. p := hp2;
  77. removeLastDeallocForFuncRes(asmL, p);
  78. doFPULoadStoreOpt := true;
  79. end
  80. { can't be done because the store operation rounds
  81. else
  82. { fst can't store an extended value! }
  83. if (taicpu(p).opsize <> S_FX) and
  84. (taicpu(p).opsize <> S_IQ) then
  85. begin
  86. if (taicpu(p).opcode = A_FSTP) then
  87. taicpu(p).opcode := A_FST
  88. else taicpu(p).opcode := A_FIST;
  89. asml.remove(hp1);
  90. hp1.free;
  91. end
  92. }
  93. end;
  94. end;
  95. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  96. var
  97. p,hp1: tai;
  98. l: aint;
  99. tmpRef: treference;
  100. begin
  101. p := BlockStart;
  102. while (p <> BlockEnd) Do
  103. begin
  104. case p.Typ Of
  105. Ait_Instruction:
  106. begin
  107. case taicpu(p).opcode Of
  108. A_IMUL:
  109. {changes certain "imul const, %reg"'s to lea sequences}
  110. begin
  111. if (taicpu(p).oper[0]^.typ = Top_Const) and
  112. (taicpu(p).oper[1]^.typ = Top_Reg) and
  113. (taicpu(p).opsize = S_L) then
  114. if (taicpu(p).oper[0]^.val = 1) then
  115. if (taicpu(p).ops = 2) then
  116. {remove "imul $1, reg"}
  117. begin
  118. hp1 := tai(p.Next);
  119. asml.remove(p);
  120. p.free;
  121. p := hp1;
  122. continue;
  123. end
  124. else
  125. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  126. begin
  127. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  128. InsertLLItem(asml, p.previous, p.next, hp1);
  129. p.free;
  130. p := hp1;
  131. end
  132. else if
  133. ((taicpu(p).ops <= 2) or
  134. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  135. (current_settings.optimizecputype < cpu_Pentium2) and
  136. (taicpu(p).oper[0]^.val <= 12) and
  137. not(cs_opt_size in current_settings.optimizerswitches) and
  138. (not(GetNextInstruction(p, hp1)) or
  139. {GetNextInstruction(p, hp1) and}
  140. not((tai(hp1).typ = ait_instruction) and
  141. ((taicpu(hp1).opcode=A_Jcc) and
  142. (taicpu(hp1).condition in [C_O,C_NO])))) then
  143. begin
  144. reference_reset(tmpref);
  145. case taicpu(p).oper[0]^.val Of
  146. 3: begin
  147. {imul 3, reg1, reg2 to
  148. lea (reg1,reg1,2), reg2
  149. imul 3, reg1 to
  150. lea (reg1,reg1,2), reg1}
  151. TmpRef.base := taicpu(p).oper[1]^.reg;
  152. TmpRef.index := taicpu(p).oper[1]^.reg;
  153. TmpRef.ScaleFactor := 2;
  154. if (taicpu(p).ops = 2) then
  155. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  156. else
  157. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  158. InsertLLItem(asml,p.previous, p.next, hp1);
  159. p.free;
  160. p := hp1;
  161. end;
  162. 5: begin
  163. {imul 5, reg1, reg2 to
  164. lea (reg1,reg1,4), reg2
  165. imul 5, reg1 to
  166. lea (reg1,reg1,4), reg1}
  167. TmpRef.base := taicpu(p).oper[1]^.reg;
  168. TmpRef.index := taicpu(p).oper[1]^.reg;
  169. TmpRef.ScaleFactor := 4;
  170. if (taicpu(p).ops = 2) then
  171. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  172. else
  173. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  174. InsertLLItem(asml,p.previous, p.next, hp1);
  175. p.free;
  176. p := hp1;
  177. end;
  178. 6: begin
  179. {imul 6, reg1, reg2 to
  180. lea (,reg1,2), reg2
  181. lea (reg2,reg1,4), reg2
  182. imul 6, reg1 to
  183. lea (reg1,reg1,2), reg1
  184. add reg1, reg1}
  185. if (current_settings.optimizecputype <= cpu_386) then
  186. begin
  187. TmpRef.index := taicpu(p).oper[1]^.reg;
  188. if (taicpu(p).ops = 3) then
  189. begin
  190. TmpRef.base := taicpu(p).oper[2]^.reg;
  191. TmpRef.ScaleFactor := 4;
  192. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  193. end
  194. else
  195. begin
  196. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  197. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  198. end;
  199. InsertLLItem(asml,p, p.next, hp1);
  200. reference_reset(tmpref);
  201. TmpRef.index := taicpu(p).oper[1]^.reg;
  202. TmpRef.ScaleFactor := 2;
  203. if (taicpu(p).ops = 3) then
  204. begin
  205. TmpRef.base := NR_NO;
  206. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  207. taicpu(p).oper[2]^.reg);
  208. end
  209. else
  210. begin
  211. TmpRef.base := taicpu(p).oper[1]^.reg;
  212. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  213. end;
  214. InsertLLItem(asml,p.previous, p.next, hp1);
  215. p.free;
  216. p := tai(hp1.next);
  217. end
  218. end;
  219. 9: begin
  220. {imul 9, reg1, reg2 to
  221. lea (reg1,reg1,8), reg2
  222. imul 9, reg1 to
  223. lea (reg1,reg1,8), reg1}
  224. TmpRef.base := taicpu(p).oper[1]^.reg;
  225. TmpRef.index := taicpu(p).oper[1]^.reg;
  226. TmpRef.ScaleFactor := 8;
  227. if (taicpu(p).ops = 2) then
  228. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  229. else
  230. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  231. InsertLLItem(asml,p.previous, p.next, hp1);
  232. p.free;
  233. p := hp1;
  234. end;
  235. 10: begin
  236. {imul 10, reg1, reg2 to
  237. lea (reg1,reg1,4), reg2
  238. add reg2, reg2
  239. imul 10, reg1 to
  240. lea (reg1,reg1,4), reg1
  241. add reg1, reg1}
  242. if (current_settings.optimizecputype <= cpu_386) then
  243. begin
  244. if (taicpu(p).ops = 3) then
  245. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  246. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  247. else
  248. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  249. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  250. InsertLLItem(asml,p, p.next, hp1);
  251. TmpRef.base := taicpu(p).oper[1]^.reg;
  252. TmpRef.index := taicpu(p).oper[1]^.reg;
  253. TmpRef.ScaleFactor := 4;
  254. if (taicpu(p).ops = 3) then
  255. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  256. else
  257. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  258. InsertLLItem(asml,p.previous, p.next, hp1);
  259. p.free;
  260. p := tai(hp1.next);
  261. end
  262. end;
  263. 12: begin
  264. {imul 12, reg1, reg2 to
  265. lea (,reg1,4), reg2
  266. lea (,reg1,8) reg2
  267. imul 12, reg1 to
  268. lea (reg1,reg1,2), reg1
  269. lea (,reg1,4), reg1}
  270. if (current_settings.optimizecputype <= cpu_386)
  271. then
  272. begin
  273. TmpRef.index := taicpu(p).oper[1]^.reg;
  274. if (taicpu(p).ops = 3) then
  275. begin
  276. TmpRef.base := taicpu(p).oper[2]^.reg;
  277. TmpRef.ScaleFactor := 8;
  278. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  279. end
  280. else
  281. begin
  282. TmpRef.base := NR_NO;
  283. TmpRef.ScaleFactor := 4;
  284. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  285. end;
  286. InsertLLItem(asml,p, p.next, hp1);
  287. reference_reset(tmpref);
  288. TmpRef.index := taicpu(p).oper[1]^.reg;
  289. if (taicpu(p).ops = 3) then
  290. begin
  291. TmpRef.base := NR_NO;
  292. TmpRef.ScaleFactor := 4;
  293. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  294. end
  295. else
  296. begin
  297. TmpRef.base := taicpu(p).oper[1]^.reg;
  298. TmpRef.ScaleFactor := 2;
  299. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  300. end;
  301. InsertLLItem(asml,p.previous, p.next, hp1);
  302. p.free;
  303. p := tai(hp1.next);
  304. end
  305. end
  306. end;
  307. end;
  308. end;
  309. A_SAR, A_SHR:
  310. {changes the code sequence
  311. shr/sar const1, x
  312. shl const2, x
  313. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  314. begin
  315. if GetNextInstruction(p, hp1) and
  316. (tai(hp1).typ = ait_instruction) and
  317. (taicpu(hp1).opcode = A_SHL) and
  318. (taicpu(p).oper[0]^.typ = top_const) and
  319. (taicpu(hp1).oper[0]^.typ = top_const) and
  320. (taicpu(hp1).opsize = taicpu(p).opsize) and
  321. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  322. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  323. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  324. not(cs_opt_size in current_settings.optimizerswitches) then
  325. { shr/sar const1, %reg
  326. shl const2, %reg
  327. with const1 > const2 }
  328. begin
  329. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  330. taicpu(hp1).opcode := A_AND;
  331. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  332. case taicpu(p).opsize Of
  333. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  334. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  335. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  336. end;
  337. end
  338. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  339. not(cs_opt_size in current_settings.optimizerswitches) then
  340. { shr/sar const1, %reg
  341. shl const2, %reg
  342. with const1 < const2 }
  343. begin
  344. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  345. taicpu(p).opcode := A_AND;
  346. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  347. case taicpu(p).opsize Of
  348. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  349. S_B: taicpu(p).loadConst(0,l Xor $ff);
  350. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  351. end;
  352. end
  353. else
  354. { shr/sar const1, %reg
  355. shl const2, %reg
  356. with const1 = const2 }
  357. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  358. begin
  359. taicpu(p).opcode := A_AND;
  360. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  361. case taicpu(p).opsize Of
  362. S_B: taicpu(p).loadConst(0,l Xor $ff);
  363. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  364. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  365. end;
  366. asml.remove(hp1);
  367. hp1.free;
  368. end;
  369. end;
  370. A_XOR:
  371. if (taicpu(p).oper[0]^.typ = top_reg) and
  372. (taicpu(p).oper[1]^.typ = top_reg) and
  373. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  374. { temporarily change this to 'mov reg,0' to make it easier }
  375. { for the CSE. Will be changed back in pass 2 }
  376. begin
  377. taicpu(p).opcode := A_MOV;
  378. taicpu(p).loadConst(0,0);
  379. end;
  380. end;
  381. end;
  382. end;
  383. p := tai(p.next)
  384. end;
  385. end;
  386. procedure PeepHoleOptPass1(Asml: TAsmList; BlockStart, BlockEnd: tai);
  387. {First pass of peepholeoptimizations}
  388. var
  389. l : longint;
  390. p,hp1,hp2 : tai;
  391. hp3,hp4: tai;
  392. v:aint;
  393. TmpRef: TReference;
  394. UsedRegs, TmpUsedRegs: TRegSet;
  395. TmpBool1, TmpBool2: Boolean;
  396. function SkipLabels(hp: tai; var hp2: tai): boolean;
  397. {skips all labels and returns the next "real" instruction}
  398. begin
  399. while assigned(hp.next) and
  400. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  401. hp := tai(hp.next);
  402. if assigned(hp.next) then
  403. begin
  404. SkipLabels := True;
  405. hp2 := tai(hp.next)
  406. end
  407. else
  408. begin
  409. hp2 := hp;
  410. SkipLabels := False
  411. end;
  412. end;
  413. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  414. {traces sucessive jumps to their final destination and sets it, e.g.
  415. je l1 je l3
  416. <code> <code>
  417. l1: becomes l1:
  418. je l2 je l3
  419. <code> <code>
  420. l2: l2:
  421. jmp l3 jmp l3
  422. the level parameter denotes how deeep we have already followed the jump,
  423. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  424. var p1, p2: tai;
  425. l: tasmlabel;
  426. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  427. begin
  428. FindAnyLabel := false;
  429. while assigned(hp.next) and
  430. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  431. hp := tai(hp.next);
  432. if assigned(hp.next) and
  433. (tai(hp.next).typ = ait_label) then
  434. begin
  435. FindAnyLabel := true;
  436. l := tai_label(hp.next).labsym;
  437. end
  438. end;
  439. begin
  440. GetfinalDestination := false;
  441. if level > 20 then
  442. exit;
  443. p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  444. if assigned(p1) then
  445. begin
  446. SkipLabels(p1,p1);
  447. if (tai(p1).typ = ait_instruction) and
  448. (taicpu(p1).is_jmp) then
  449. if { the next instruction after the label where the jump hp arrives}
  450. { is unconditional or of the same type as hp, so continue }
  451. (taicpu(p1).condition in [C_None,hp.condition]) or
  452. { the next instruction after the label where the jump hp arrives}
  453. { is the opposite of hp (so this one is never taken), but after }
  454. { that one there is a branch that will be taken, so perform a }
  455. { little hack: set p1 equal to this instruction (that's what the}
  456. { last SkipLabels is for, only works with short bool evaluation)}
  457. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  458. SkipLabels(p1,p2) and
  459. (p2.typ = ait_instruction) and
  460. (taicpu(p2).is_jmp) and
  461. (taicpu(p2).condition in [C_None,hp.condition]) and
  462. SkipLabels(p1,p1)) then
  463. begin
  464. { quick check for loops of the form "l5: ; jmp l5 }
  465. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  466. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  467. exit;
  468. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  469. exit;
  470. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  471. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  472. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  473. end
  474. else
  475. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  476. if not FindAnyLabel(p1,l) then
  477. begin
  478. {$ifdef finaldestdebug}
  479. insertllitem(asml,p1,p1.next,tai_comment.Create(
  480. strpnew('previous label inserted'))));
  481. {$endif finaldestdebug}
  482. current_asmdata.getjumplabel(l);
  483. insertllitem(asml,p1,p1.next,tai_label.Create(l));
  484. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  485. hp.oper[0]^.ref^.symbol := l;
  486. l.increfs;
  487. { this won't work, since the new label isn't in the labeltable }
  488. { so it will fail the rangecheck. Labeltable should become a }
  489. { hashtable to support this: }
  490. { GetFinalDestination(asml, hp); }
  491. end
  492. else
  493. begin
  494. {$ifdef finaldestdebug}
  495. insertllitem(asml,p1,p1.next,tai_comment.Create(
  496. strpnew('next label reused'))));
  497. {$endif finaldestdebug}
  498. l.increfs;
  499. hp.oper[0]^.ref^.symbol := l;
  500. if not GetFinalDestination(asml, hp,succ(level)) then
  501. exit;
  502. end;
  503. end;
  504. GetFinalDestination := true;
  505. end;
  506. function DoSubAddOpt(var p: tai): Boolean;
  507. begin
  508. DoSubAddOpt := False;
  509. if GetLastInstruction(p, hp1) and
  510. (hp1.typ = ait_instruction) and
  511. (taicpu(hp1).opsize = taicpu(p).opsize) then
  512. case taicpu(hp1).opcode Of
  513. A_DEC:
  514. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  515. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  516. begin
  517. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  518. asml.remove(hp1);
  519. hp1.free;
  520. end;
  521. A_SUB:
  522. if (taicpu(hp1).oper[0]^.typ = top_const) and
  523. (taicpu(hp1).oper[1]^.typ = top_reg) and
  524. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  525. begin
  526. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  527. asml.remove(hp1);
  528. hp1.free;
  529. end;
  530. A_ADD:
  531. if (taicpu(hp1).oper[0]^.typ = top_const) and
  532. (taicpu(hp1).oper[1]^.typ = top_reg) and
  533. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  534. begin
  535. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  536. asml.remove(hp1);
  537. hp1.free;
  538. if (taicpu(p).oper[0]^.val = 0) then
  539. begin
  540. hp1 := tai(p.next);
  541. asml.remove(p);
  542. p.free;
  543. if not GetLastInstruction(hp1, p) then
  544. p := hp1;
  545. DoSubAddOpt := True;
  546. end
  547. end;
  548. end;
  549. end;
  550. begin
  551. p := BlockStart;
  552. UsedRegs := [];
  553. while (p <> BlockEnd) Do
  554. begin
  555. UpDateUsedRegs(UsedRegs, tai(p.next));
  556. case p.Typ Of
  557. ait_instruction:
  558. begin
  559. { Handle Jmp Optimizations }
  560. if taicpu(p).is_jmp then
  561. begin
  562. {the following if-block removes all code between a jmp and the next label,
  563. because it can never be executed}
  564. if (taicpu(p).opcode = A_JMP) then
  565. begin
  566. while GetNextInstruction(p, hp1) and
  567. (hp1.typ <> ait_label) do
  568. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  569. begin
  570. asml.remove(hp1);
  571. hp1.free;
  572. end
  573. else break;
  574. end;
  575. { remove jumps to a label coming right after them }
  576. if GetNextInstruction(p, hp1) then
  577. begin
  578. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  579. {$warning FIXME removing the first instruction fails}
  580. (p<>blockstart) then
  581. begin
  582. hp2:=tai(hp1.next);
  583. asml.remove(p);
  584. p.free;
  585. p:=hp2;
  586. continue;
  587. end
  588. else
  589. begin
  590. if hp1.typ = ait_label then
  591. SkipLabels(hp1,hp1);
  592. if (tai(hp1).typ=ait_instruction) and
  593. (taicpu(hp1).opcode=A_JMP) and
  594. GetNextInstruction(hp1, hp2) and
  595. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  596. begin
  597. if taicpu(p).opcode=A_Jcc then
  598. begin
  599. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  600. tai_label(hp2).labsym.decrefs;
  601. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  602. { when free'ing hp1, the ref. isn't decresed, so we don't
  603. increase it (FK)
  604. taicpu(p).oper[0]^.ref^.symbol.increfs;
  605. }
  606. asml.remove(hp1);
  607. hp1.free;
  608. GetFinalDestination(asml, taicpu(p),0);
  609. end
  610. else
  611. begin
  612. GetFinalDestination(asml, taicpu(p),0);
  613. p:=tai(p.next);
  614. continue;
  615. end;
  616. end
  617. else
  618. GetFinalDestination(asml, taicpu(p),0);
  619. end;
  620. end;
  621. end
  622. else
  623. { All other optimizes }
  624. begin
  625. for l := 0 to taicpu(p).ops-1 Do
  626. if (taicpu(p).oper[l]^.typ = top_ref) then
  627. With taicpu(p).oper[l]^.ref^ Do
  628. begin
  629. if (base = NR_NO) and
  630. (index <> NR_NO) and
  631. (scalefactor in [0,1]) then
  632. begin
  633. base := index;
  634. index := NR_NO
  635. end
  636. end;
  637. case taicpu(p).opcode Of
  638. A_AND:
  639. begin
  640. if (taicpu(p).oper[0]^.typ = top_const) and
  641. (taicpu(p).oper[1]^.typ = top_reg) and
  642. GetNextInstruction(p, hp1) and
  643. (tai(hp1).typ = ait_instruction) and
  644. (taicpu(hp1).opcode = A_AND) and
  645. (taicpu(hp1).oper[0]^.typ = top_const) and
  646. (taicpu(hp1).oper[1]^.typ = top_reg) and
  647. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) then
  648. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  649. begin
  650. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  651. asml.remove(hp1);
  652. hp1.free;
  653. end
  654. else
  655. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  656. jump, but only if it's a conditional jump (PFV) }
  657. if (taicpu(p).oper[1]^.typ = top_reg) and
  658. GetNextInstruction(p, hp1) and
  659. (hp1.typ = ait_instruction) and
  660. (taicpu(hp1).is_jmp) and
  661. (taicpu(hp1).opcode<>A_JMP) and
  662. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  663. taicpu(p).opcode := A_TEST;
  664. end;
  665. A_CMP:
  666. begin
  667. { cmp register,$8000 neg register
  668. je target --> jo target
  669. .... only if register is deallocated before jump.}
  670. case Taicpu(p).opsize of
  671. S_B: v:=$80;
  672. S_W: v:=$8000;
  673. S_L: v:=aint($80000000);
  674. end;
  675. if (taicpu(p).oper[0]^.typ=Top_const) and
  676. (taicpu(p).oper[0]^.val=v) and
  677. (Taicpu(p).oper[1]^.typ=top_reg) and
  678. GetNextInstruction(p, hp1) and
  679. (hp1.typ=ait_instruction) and
  680. (taicpu(hp1).opcode=A_Jcc) and
  681. (Taicpu(hp1).condition in [C_E,C_NE]) and
  682. not(getsupreg(Taicpu(p).oper[1]^.reg) in usedregs) then
  683. begin
  684. Taicpu(p).opcode:=A_NEG;
  685. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  686. Taicpu(p).clearop(1);
  687. Taicpu(p).ops:=1;
  688. if Taicpu(hp1).condition=C_E then
  689. Taicpu(hp1).condition:=C_O
  690. else
  691. Taicpu(hp1).condition:=C_NO;
  692. continue;
  693. end;
  694. {
  695. @@2: @@2:
  696. .... ....
  697. cmp operand1,0
  698. jle/jbe @@1
  699. dec operand1 --> sub operand1,1
  700. jmp @@2 jge/jae @@2
  701. @@1: @@1:
  702. ... ....}
  703. if (taicpu(p).oper[0]^.typ = top_const) and
  704. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  705. (taicpu(p).oper[0]^.val = 0) and
  706. GetNextInstruction(p, hp1) and
  707. (hp1.typ = ait_instruction) and
  708. (taicpu(hp1).is_jmp) and
  709. (taicpu(hp1).opcode=A_Jcc) and
  710. (taicpu(hp1).condition in [C_LE,C_BE]) and
  711. GetNextInstruction(hp1,hp2) and
  712. (hp2.typ = ait_instruction) and
  713. (taicpu(hp2).opcode = A_DEC) and
  714. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  715. GetNextInstruction(hp2, hp3) and
  716. (hp3.typ = ait_instruction) and
  717. (taicpu(hp3).is_jmp) and
  718. (taicpu(hp3).opcode = A_JMP) and
  719. GetNextInstruction(hp3, hp4) and
  720. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  721. begin
  722. taicpu(hp2).Opcode := A_SUB;
  723. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  724. taicpu(hp2).loadConst(0,1);
  725. taicpu(hp2).ops:=2;
  726. taicpu(hp3).Opcode := A_Jcc;
  727. case taicpu(hp1).condition of
  728. C_LE: taicpu(hp3).condition := C_GE;
  729. C_BE: taicpu(hp3).condition := C_AE;
  730. end;
  731. asml.remove(p);
  732. asml.remove(hp1);
  733. p.free;
  734. hp1.free;
  735. p := hp2;
  736. continue;
  737. end
  738. end;
  739. A_FLD:
  740. begin
  741. if (taicpu(p).oper[0]^.typ = top_reg) and
  742. GetNextInstruction(p, hp1) and
  743. (hp1.typ = Ait_Instruction) and
  744. (taicpu(hp1).oper[0]^.typ = top_reg) and
  745. (taicpu(hp1).oper[1]^.typ = top_reg) and
  746. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  747. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  748. { change to
  749. fld reg fxxx reg,st
  750. fxxxp st, st1 (hp1)
  751. Remark: non commutative operations must be reversed!
  752. }
  753. begin
  754. case taicpu(hp1).opcode Of
  755. A_FMULP,A_FADDP,
  756. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  757. begin
  758. case taicpu(hp1).opcode Of
  759. A_FADDP: taicpu(hp1).opcode := A_FADD;
  760. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  761. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  762. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  763. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  764. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  765. end;
  766. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  767. taicpu(hp1).oper[1]^.reg := NR_ST;
  768. asml.remove(p);
  769. p.free;
  770. p := hp1;
  771. continue;
  772. end;
  773. end;
  774. end
  775. else
  776. if (taicpu(p).oper[0]^.typ = top_ref) and
  777. GetNextInstruction(p, hp2) and
  778. (hp2.typ = Ait_Instruction) and
  779. (taicpu(hp2).ops = 2) and
  780. (taicpu(hp2).oper[0]^.typ = top_reg) and
  781. (taicpu(hp2).oper[1]^.typ = top_reg) and
  782. (taicpu(p).opsize in [S_FS, S_FL]) and
  783. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  784. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  785. if GetLastInstruction(p, hp1) and
  786. (hp1.typ = Ait_Instruction) and
  787. ((taicpu(hp1).opcode = A_FLD) or
  788. (taicpu(hp1).opcode = A_FST)) and
  789. (taicpu(hp1).opsize = taicpu(p).opsize) and
  790. (taicpu(hp1).oper[0]^.typ = top_ref) and
  791. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  792. if ((taicpu(hp2).opcode = A_FMULP) or
  793. (taicpu(hp2).opcode = A_FADDP)) then
  794. { change to
  795. fld/fst mem1 (hp1) fld/fst mem1
  796. fld mem1 (p) fadd/
  797. faddp/ fmul st, st
  798. fmulp st, st1 (hp2) }
  799. begin
  800. asml.remove(p);
  801. p.free;
  802. p := hp1;
  803. if (taicpu(hp2).opcode = A_FADDP) then
  804. taicpu(hp2).opcode := A_FADD
  805. else
  806. taicpu(hp2).opcode := A_FMUL;
  807. taicpu(hp2).oper[1]^.reg := NR_ST;
  808. end
  809. else
  810. { change to
  811. fld/fst mem1 (hp1) fld/fst mem1
  812. fld mem1 (p) fld st}
  813. begin
  814. taicpu(p).changeopsize(S_FL);
  815. taicpu(p).loadreg(0,NR_ST);
  816. end
  817. else
  818. begin
  819. case taicpu(hp2).opcode Of
  820. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  821. { change to
  822. fld/fst mem1 (hp1) fld/fst mem1
  823. fld mem2 (p) fxxx mem2
  824. fxxxp st, st1 (hp2) }
  825. begin
  826. case taicpu(hp2).opcode Of
  827. A_FADDP: taicpu(p).opcode := A_FADD;
  828. A_FMULP: taicpu(p).opcode := A_FMUL;
  829. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  830. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  831. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  832. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  833. end;
  834. asml.remove(hp2);
  835. hp2.free;
  836. end
  837. end
  838. end
  839. end;
  840. A_FSTP,A_FISTP:
  841. if doFpuLoadStoreOpt(asmL,p) then
  842. continue;
  843. A_LEA:
  844. begin
  845. {removes seg register prefixes from LEA operations, as they
  846. don't do anything}
  847. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  848. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  849. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  850. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  851. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  852. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  853. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  854. (taicpu(p).oper[0]^.ref^.offset = 0) then
  855. begin
  856. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  857. taicpu(p).oper[1]^.reg);
  858. InsertLLItem(asml,p.previous,p.next, hp1);
  859. p.free;
  860. p := hp1;
  861. continue;
  862. end
  863. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  864. begin
  865. hp1 := tai(p.Next);
  866. asml.remove(p);
  867. p.free;
  868. p := hp1;
  869. continue;
  870. end
  871. else
  872. with taicpu(p).oper[0]^.ref^ do
  873. if (base = taicpu(p).oper[1]^.reg) then
  874. begin
  875. l := offset;
  876. if (l=1) then
  877. begin
  878. taicpu(p).opcode := A_INC;
  879. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  880. taicpu(p).ops := 1
  881. end
  882. else if (l=-1) then
  883. begin
  884. taicpu(p).opcode := A_DEC;
  885. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  886. taicpu(p).ops := 1;
  887. end
  888. else
  889. begin
  890. taicpu(p).opcode := A_ADD;
  891. taicpu(p).loadConst(0,l);
  892. end;
  893. end;
  894. end;
  895. A_MOV:
  896. begin
  897. TmpUsedRegs := UsedRegs;
  898. if (taicpu(p).oper[1]^.typ = top_reg) and
  899. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  900. GetNextInstruction(p, hp1) and
  901. (tai(hp1).typ = ait_instruction) and
  902. (taicpu(hp1).opcode = A_MOV) and
  903. (taicpu(hp1).oper[0]^.typ = top_reg) and
  904. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  905. begin
  906. {we have "mov x, %treg; mov %treg, y}
  907. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  908. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  909. case taicpu(p).oper[0]^.typ Of
  910. top_reg:
  911. begin
  912. { change "mov %reg, %treg; mov %treg, y"
  913. to "mov %reg, y" }
  914. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  915. asml.remove(hp1);
  916. hp1.free;
  917. continue;
  918. end;
  919. top_ref:
  920. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  921. begin
  922. { change "mov mem, %treg; mov %treg, %reg"
  923. to "mov mem, %reg" }
  924. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  925. asml.remove(hp1);
  926. hp1.free;
  927. continue;
  928. end;
  929. end
  930. end
  931. else
  932. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  933. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  934. penalty}
  935. if (taicpu(p).oper[0]^.typ = top_reg) and
  936. (taicpu(p).oper[1]^.typ = top_reg) and
  937. GetNextInstruction(p,hp1) and
  938. (tai(hp1).typ = ait_instruction) and
  939. (taicpu(hp1).ops >= 1) and
  940. (taicpu(hp1).oper[0]^.typ = top_reg) and
  941. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  942. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  943. begin
  944. if ((taicpu(hp1).opcode = A_OR) or
  945. (taicpu(hp1).opcode = A_TEST)) and
  946. (taicpu(hp1).oper[1]^.typ = top_reg) and
  947. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  948. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  949. begin
  950. TmpUsedRegs := UsedRegs;
  951. { reg1 will be used after the first instruction, }
  952. { so update the allocation info }
  953. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  954. if GetNextInstruction(hp1, hp2) and
  955. (hp2.typ = ait_instruction) and
  956. taicpu(hp2).is_jmp and
  957. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  958. { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  959. "test %reg1, %reg1; jxx" }
  960. begin
  961. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  962. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  963. asml.remove(p);
  964. p.free;
  965. p := hp1;
  966. continue
  967. end
  968. else
  969. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  970. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  971. begin
  972. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  973. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  974. end;
  975. end
  976. { else
  977. if (taicpu(p.next)^.opcode
  978. in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  979. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  980. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  981. end
  982. else
  983. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  984. x >= RetOffset) as it doesn't do anything (it writes either to a
  985. parameter or to the temporary storage room for the function
  986. result)}
  987. if GetNextInstruction(p, hp1) and
  988. (tai(hp1).typ = ait_instruction) then
  989. if ((taicpu(hp1).opcode = A_LEAVE) or
  990. (taicpu(hp1).opcode = A_RET)) and
  991. (taicpu(p).oper[1]^.typ = top_ref) and
  992. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  993. not(assigned(current_procinfo.procdef.funcretsym) and
  994. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  995. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  996. (taicpu(p).oper[0]^.typ = top_reg) then
  997. begin
  998. asml.remove(p);
  999. p.free;
  1000. p := hp1;
  1001. RemoveLastDeallocForFuncRes(asmL,p);
  1002. end
  1003. else
  1004. if (taicpu(p).oper[0]^.typ = top_reg) and
  1005. (taicpu(p).oper[1]^.typ = top_ref) and
  1006. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1007. (taicpu(hp1).opcode = A_CMP) and
  1008. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1009. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1010. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  1011. begin
  1012. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1013. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1014. end;
  1015. { Next instruction is also a MOV ? }
  1016. if GetNextInstruction(p, hp1) and
  1017. (tai(hp1).typ = ait_instruction) and
  1018. (taicpu(hp1).opcode = A_MOV) and
  1019. (taicpu(hp1).opsize = taicpu(p).opsize) then
  1020. begin
  1021. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1022. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1023. {mov reg1, mem1 or mov mem1, reg1
  1024. mov mem2, reg2 mov reg2, mem2}
  1025. begin
  1026. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1027. {mov reg1, mem1 or mov mem1, reg1
  1028. mov mem2, reg1 mov reg2, mem1}
  1029. begin
  1030. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1031. { Removes the second statement from
  1032. mov reg1, mem1/reg2
  1033. mov mem1/reg2, reg1 }
  1034. begin
  1035. if (taicpu(p).oper[0]^.typ = top_reg) then
  1036. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1037. asml.remove(hp1);
  1038. hp1.free;
  1039. end
  1040. else
  1041. begin
  1042. TmpUsedRegs := UsedRegs;
  1043. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1044. if (taicpu(p).oper[1]^.typ = top_ref) and
  1045. { mov reg1, mem1
  1046. mov mem2, reg1 }
  1047. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1048. GetNextInstruction(hp1, hp2) and
  1049. (hp2.typ = ait_instruction) and
  1050. (taicpu(hp2).opcode = A_CMP) and
  1051. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1052. (taicpu(hp2).oper[0]^.typ = TOp_Ref) and
  1053. (taicpu(hp2).oper[1]^.typ = TOp_Reg) and
  1054. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(p).oper[1]^.ref^) and
  1055. (taicpu(hp2).oper[1]^.reg= taicpu(p).oper[0]^.reg) and
  1056. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1057. { change to
  1058. mov reg1, mem1 mov reg1, mem1
  1059. mov mem2, reg1 cmp reg1, mem2
  1060. cmp mem1, reg1 }
  1061. begin
  1062. asml.remove(hp2);
  1063. hp2.free;
  1064. taicpu(hp1).opcode := A_CMP;
  1065. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1066. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1067. end;
  1068. end;
  1069. end
  1070. else
  1071. begin
  1072. tmpUsedRegs := UsedRegs;
  1073. if GetNextInstruction(hp1, hp2) and
  1074. (taicpu(p).oper[0]^.typ = top_ref) and
  1075. (taicpu(p).oper[1]^.typ = top_reg) and
  1076. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1077. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1078. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1079. (tai(hp2).typ = ait_instruction) and
  1080. (taicpu(hp2).opcode = A_MOV) and
  1081. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1082. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1083. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1084. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1085. if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and
  1086. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1087. { mov mem1, %reg1
  1088. mov %reg1, mem2
  1089. mov mem2, reg2
  1090. to:
  1091. mov mem1, reg2
  1092. mov reg2, mem2}
  1093. begin
  1094. AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1095. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1096. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1097. asml.remove(hp2);
  1098. hp2.free;
  1099. end
  1100. else
  1101. if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1102. not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and
  1103. not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then
  1104. { mov mem1, reg1 mov mem1, reg1
  1105. mov reg1, mem2 mov reg1, mem2
  1106. mov mem2, reg2 mov mem2, reg1
  1107. to: to:
  1108. mov mem1, reg1 mov mem1, reg1
  1109. mov mem1, reg2 mov reg1, mem2
  1110. mov reg1, mem2
  1111. or (if mem1 depends on reg1
  1112. and/or if mem2 depends on reg2)
  1113. to:
  1114. mov mem1, reg1
  1115. mov reg1, mem2
  1116. mov reg1, reg2
  1117. }
  1118. begin
  1119. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1120. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1121. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1122. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1123. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1124. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1125. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1126. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1127. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1128. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1129. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1130. end
  1131. else
  1132. if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1133. begin
  1134. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1135. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1136. end
  1137. else
  1138. begin
  1139. asml.remove(hp2);
  1140. hp2.free;
  1141. end
  1142. end
  1143. end
  1144. else
  1145. (* {movl [mem1],reg1
  1146. movl [mem1],reg2
  1147. to:
  1148. movl [mem1],reg1
  1149. movl reg1,reg2 }
  1150. if (taicpu(p).oper[0]^.typ = top_ref) and
  1151. (taicpu(p).oper[1]^.typ = top_reg) and
  1152. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1153. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1154. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1155. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1156. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1157. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1158. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1159. else*)
  1160. { movl const1,[mem1]
  1161. movl [mem1],reg1
  1162. to:
  1163. movl const1,reg1
  1164. movl reg1,[mem1] }
  1165. if (taicpu(p).oper[0]^.typ = top_const) and
  1166. (taicpu(p).oper[1]^.typ = top_ref) and
  1167. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1168. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1169. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1170. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1171. not(reginref(getsupreg(taicpu(hp1).oper[1]^.reg),taicpu(hp1).oper[0]^.ref^)) then
  1172. begin
  1173. allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1174. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1175. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1176. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1177. end
  1178. end;
  1179. if GetNextInstruction(p, hp1) and
  1180. (Tai(hp1).typ = ait_instruction) and
  1181. ((Taicpu(hp1).opcode = A_BTS) or (Taicpu(hp1).opcode = A_BTR)) and
  1182. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  1183. GetNextInstruction(hp1, hp2) and
  1184. (Tai(hp2).typ = ait_instruction) and
  1185. (Taicpu(hp2).opcode = A_OR) and
  1186. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  1187. (Taicpu(hp2).opsize = Taicpu(p).opsize) and
  1188. (Taicpu(p).oper[0]^.typ = top_const) and (Taicpu(p).oper[0]^.val=0) and
  1189. (Taicpu(p).oper[1]^.typ = top_reg) and
  1190. (Taicpu(hp1).oper[1]^.typ = top_reg) and
  1191. (Taicpu(p).oper[1]^.reg=Taicpu(hp1).oper[1]^.reg) and
  1192. (Taicpu(hp2).oper[1]^.typ = top_reg) and
  1193. (Taicpu(p).oper[1]^.reg=Taicpu(hp2).oper[1]^.reg) then
  1194. {mov reg1,0
  1195. bts reg1,operand1 --> mov reg1,operand2
  1196. or reg1,operand2 bts reg1,operand1}
  1197. begin
  1198. Taicpu(hp2).opcode:=A_MOV;
  1199. asml.remove(hp1);
  1200. insertllitem(asml,hp2,hp2.next,hp1);
  1201. asml.remove(p);
  1202. p.free;
  1203. end;
  1204. end;
  1205. A_MOVZX:
  1206. begin
  1207. {removes superfluous And's after movzx's}
  1208. if (taicpu(p).oper[1]^.typ = top_reg) and
  1209. GetNextInstruction(p, hp1) and
  1210. (tai(hp1).typ = ait_instruction) and
  1211. (taicpu(hp1).opcode = A_AND) and
  1212. (taicpu(hp1).oper[0]^.typ = top_const) and
  1213. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1214. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1215. case taicpu(p).opsize Of
  1216. S_BL, S_BW:
  1217. if (taicpu(hp1).oper[0]^.val = $ff) then
  1218. begin
  1219. asml.remove(hp1);
  1220. hp1.free;
  1221. end;
  1222. S_WL:
  1223. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1224. begin
  1225. asml.remove(hp1);
  1226. hp1.free;
  1227. end;
  1228. end;
  1229. {changes some movzx constructs to faster synonims (all examples
  1230. are given with eax/ax, but are also valid for other registers)}
  1231. if (taicpu(p).oper[1]^.typ = top_reg) then
  1232. if (taicpu(p).oper[0]^.typ = top_reg) then
  1233. case taicpu(p).opsize of
  1234. S_BW:
  1235. begin
  1236. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1237. not(cs_opt_size in current_settings.optimizerswitches) then
  1238. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1239. begin
  1240. taicpu(p).opcode := A_AND;
  1241. taicpu(p).changeopsize(S_W);
  1242. taicpu(p).loadConst(0,$ff);
  1243. end
  1244. else if GetNextInstruction(p, hp1) and
  1245. (tai(hp1).typ = ait_instruction) and
  1246. (taicpu(hp1).opcode = A_AND) and
  1247. (taicpu(hp1).oper[0]^.typ = top_const) and
  1248. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1249. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1250. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1251. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1252. begin
  1253. taicpu(p).opcode := A_MOV;
  1254. taicpu(p).changeopsize(S_W);
  1255. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1256. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1257. end;
  1258. end;
  1259. S_BL:
  1260. begin
  1261. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1262. not(cs_opt_size in current_settings.optimizerswitches) then
  1263. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1264. begin
  1265. taicpu(p).opcode := A_AND;
  1266. taicpu(p).changeopsize(S_L);
  1267. taicpu(p).loadConst(0,$ff)
  1268. end
  1269. else if GetNextInstruction(p, hp1) and
  1270. (tai(hp1).typ = ait_instruction) and
  1271. (taicpu(hp1).opcode = A_AND) and
  1272. (taicpu(hp1).oper[0]^.typ = top_const) and
  1273. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1274. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1275. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1276. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1277. begin
  1278. taicpu(p).opcode := A_MOV;
  1279. taicpu(p).changeopsize(S_L);
  1280. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1281. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1282. end
  1283. end;
  1284. S_WL:
  1285. begin
  1286. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1287. not(cs_opt_size in current_settings.optimizerswitches) then
  1288. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1289. begin
  1290. taicpu(p).opcode := A_AND;
  1291. taicpu(p).changeopsize(S_L);
  1292. taicpu(p).loadConst(0,$ffff);
  1293. end
  1294. else if GetNextInstruction(p, hp1) and
  1295. (tai(hp1).typ = ait_instruction) and
  1296. (taicpu(hp1).opcode = A_AND) and
  1297. (taicpu(hp1).oper[0]^.typ = top_const) and
  1298. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1299. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1300. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1301. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1302. begin
  1303. taicpu(p).opcode := A_MOV;
  1304. taicpu(p).changeopsize(S_L);
  1305. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1306. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1307. end;
  1308. end;
  1309. end
  1310. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1311. begin
  1312. if GetNextInstruction(p, hp1) and
  1313. (tai(hp1).typ = ait_instruction) and
  1314. (taicpu(hp1).opcode = A_AND) and
  1315. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1316. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1317. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1318. begin
  1319. taicpu(p).opcode := A_MOV;
  1320. case taicpu(p).opsize Of
  1321. S_BL:
  1322. begin
  1323. taicpu(p).changeopsize(S_L);
  1324. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1325. end;
  1326. S_WL:
  1327. begin
  1328. taicpu(p).changeopsize(S_L);
  1329. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1330. end;
  1331. S_BW:
  1332. begin
  1333. taicpu(p).changeopsize(S_W);
  1334. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1335. end;
  1336. end;
  1337. end;
  1338. end;
  1339. end;
  1340. (* should not be generated anymore by the current code generator
  1341. A_POP:
  1342. begin
  1343. if target_info.system=system_i386_go32v2 then
  1344. begin
  1345. { Transform a series of pop/pop/pop/push/push/push to }
  1346. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1347. { because I'm not sure whether they can cope with }
  1348. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1349. { such a problem when using esp as frame pointer (JM) }
  1350. if (taicpu(p).oper[0]^.typ = top_reg) then
  1351. begin
  1352. hp1 := p;
  1353. hp2 := p;
  1354. l := 0;
  1355. while getNextInstruction(hp1,hp1) and
  1356. (hp1.typ = ait_instruction) and
  1357. (taicpu(hp1).opcode = A_POP) and
  1358. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1359. begin
  1360. hp2 := hp1;
  1361. inc(l,4);
  1362. end;
  1363. getLastInstruction(p,hp3);
  1364. l1 := 0;
  1365. while (hp2 <> hp3) and
  1366. assigned(hp1) and
  1367. (hp1.typ = ait_instruction) and
  1368. (taicpu(hp1).opcode = A_PUSH) and
  1369. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1370. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1371. begin
  1372. { change it to a two op operation }
  1373. taicpu(hp2).oper[1]^.typ:=top_none;
  1374. taicpu(hp2).ops:=2;
  1375. taicpu(hp2).opcode := A_MOV;
  1376. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1377. reference_reset(tmpref);
  1378. tmpRef.base.enum:=R_INTREGISTER;
  1379. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1380. convert_register_to_enum(tmpref.base);
  1381. tmpRef.offset := l;
  1382. taicpu(hp2).loadRef(0,tmpRef);
  1383. hp4 := hp1;
  1384. getNextInstruction(hp1,hp1);
  1385. asml.remove(hp4);
  1386. hp4.free;
  1387. getLastInstruction(hp2,hp2);
  1388. dec(l,4);
  1389. inc(l1);
  1390. end;
  1391. if l <> -4 then
  1392. begin
  1393. inc(l,4);
  1394. for l1 := l1 downto 1 do
  1395. begin
  1396. getNextInstruction(hp2,hp2);
  1397. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1398. end
  1399. end
  1400. end
  1401. end
  1402. else
  1403. begin
  1404. if (taicpu(p).oper[0]^.typ = top_reg) and
  1405. GetNextInstruction(p, hp1) and
  1406. (tai(hp1).typ=ait_instruction) and
  1407. (taicpu(hp1).opcode=A_PUSH) and
  1408. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1409. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1410. begin
  1411. { change it to a two op operation }
  1412. taicpu(p).oper[1]^.typ:=top_none;
  1413. taicpu(p).ops:=2;
  1414. taicpu(p).opcode := A_MOV;
  1415. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1416. reference_reset(tmpref);
  1417. TmpRef.base.enum := R_ESP;
  1418. taicpu(p).loadRef(0,TmpRef);
  1419. asml.remove(hp1);
  1420. hp1.free;
  1421. end;
  1422. end;
  1423. end;
  1424. *)
  1425. A_PUSH:
  1426. begin
  1427. if (taicpu(p).opsize = S_W) and
  1428. (taicpu(p).oper[0]^.typ = Top_Const) and
  1429. GetNextInstruction(p, hp1) and
  1430. (tai(hp1).typ = ait_instruction) and
  1431. (taicpu(hp1).opcode = A_PUSH) and
  1432. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1433. (taicpu(hp1).opsize = S_W) then
  1434. begin
  1435. taicpu(p).changeopsize(S_L);
  1436. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1437. asml.remove(hp1);
  1438. hp1.free;
  1439. end;
  1440. end;
  1441. A_SHL, A_SAL:
  1442. begin
  1443. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1444. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1445. (taicpu(p).opsize = S_L) and
  1446. (taicpu(p).oper[0]^.val <= 3) then
  1447. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1448. begin
  1449. TmpBool1 := True; {should we check the next instruction?}
  1450. TmpBool2 := False; {have we found an add/sub which could be
  1451. integrated in the lea?}
  1452. reference_reset(tmpref);
  1453. TmpRef.index := taicpu(p).oper[1]^.reg;
  1454. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1455. while TmpBool1 and
  1456. GetNextInstruction(p, hp1) and
  1457. (tai(hp1).typ = ait_instruction) and
  1458. ((((taicpu(hp1).opcode = A_ADD) or
  1459. (taicpu(hp1).opcode = A_SUB)) and
  1460. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1461. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1462. (((taicpu(hp1).opcode = A_INC) or
  1463. (taicpu(hp1).opcode = A_DEC)) and
  1464. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1465. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1466. (not GetNextInstruction(hp1,hp2) or
  1467. not instrReadsFlags(hp2)) Do
  1468. begin
  1469. TmpBool1 := False;
  1470. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1471. begin
  1472. TmpBool1 := True;
  1473. TmpBool2 := True;
  1474. case taicpu(hp1).opcode of
  1475. A_ADD:
  1476. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1477. A_SUB:
  1478. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1479. end;
  1480. asml.remove(hp1);
  1481. hp1.free;
  1482. end
  1483. else
  1484. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1485. (((taicpu(hp1).opcode = A_ADD) and
  1486. (TmpRef.base = NR_NO)) or
  1487. (taicpu(hp1).opcode = A_INC) or
  1488. (taicpu(hp1).opcode = A_DEC)) then
  1489. begin
  1490. TmpBool1 := True;
  1491. TmpBool2 := True;
  1492. case taicpu(hp1).opcode of
  1493. A_ADD:
  1494. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1495. A_INC:
  1496. inc(TmpRef.offset);
  1497. A_DEC:
  1498. dec(TmpRef.offset);
  1499. end;
  1500. asml.remove(hp1);
  1501. hp1.free;
  1502. end;
  1503. end;
  1504. if TmpBool2 or
  1505. ((current_settings.optimizecputype < cpu_Pentium2) and
  1506. (taicpu(p).oper[0]^.val <= 3) and
  1507. not(cs_opt_size in current_settings.optimizerswitches)) then
  1508. begin
  1509. if not(TmpBool2) and
  1510. (taicpu(p).oper[0]^.val = 1) then
  1511. begin
  1512. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1513. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1514. end
  1515. else
  1516. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1517. taicpu(p).oper[1]^.reg);
  1518. InsertLLItem(asml,p.previous, p.next, hp1);
  1519. p.free;
  1520. p := hp1;
  1521. end;
  1522. end
  1523. else
  1524. if (current_settings.optimizecputype < cpu_Pentium2) and
  1525. (taicpu(p).oper[0]^.typ = top_const) and
  1526. (taicpu(p).oper[1]^.typ = top_reg) then
  1527. if (taicpu(p).oper[0]^.val = 1) then
  1528. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1529. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1530. (unlike shl, which is only Tairable in the U pipe)}
  1531. begin
  1532. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1533. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1534. InsertLLItem(asml,p.previous, p.next, hp1);
  1535. p.free;
  1536. p := hp1;
  1537. end
  1538. else if (taicpu(p).opsize = S_L) and
  1539. (taicpu(p).oper[0]^.val<= 3) then
  1540. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1541. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1542. begin
  1543. reference_reset(tmpref);
  1544. TmpRef.index := taicpu(p).oper[1]^.reg;
  1545. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1546. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1547. InsertLLItem(asml,p.previous, p.next, hp1);
  1548. p.free;
  1549. p := hp1;
  1550. end
  1551. end;
  1552. A_SETcc :
  1553. { changes
  1554. setcc (funcres) setcc reg
  1555. movb (funcres), reg to leave/ret
  1556. leave/ret }
  1557. begin
  1558. if (taicpu(p).oper[0]^.typ = top_ref) and
  1559. GetNextInstruction(p, hp1) and
  1560. GetNextInstruction(hp1, hp2) and
  1561. (hp2.typ = ait_instruction) and
  1562. ((taicpu(hp2).opcode = A_LEAVE) or
  1563. (taicpu(hp2).opcode = A_RET)) and
  1564. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1565. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1566. not(assigned(current_procinfo.procdef.funcretsym) and
  1567. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1568. (hp1.typ = ait_instruction) and
  1569. (taicpu(hp1).opcode = A_MOV) and
  1570. (taicpu(hp1).opsize = S_B) and
  1571. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1572. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1573. begin
  1574. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1575. asml.remove(hp1);
  1576. hp1.free;
  1577. end
  1578. end;
  1579. A_SUB:
  1580. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1581. { * change "sub/add const1, reg" or "dec reg" followed by
  1582. "sub const2, reg" to one "sub ..., reg" }
  1583. begin
  1584. if (taicpu(p).oper[0]^.typ = top_const) and
  1585. (taicpu(p).oper[1]^.typ = top_reg) then
  1586. if (taicpu(p).oper[0]^.val = 2) and
  1587. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1588. { Don't do the sub/push optimization if the sub }
  1589. { comes from setting up the stack frame (JM) }
  1590. (not getLastInstruction(p,hp1) or
  1591. (hp1.typ <> ait_instruction) or
  1592. (taicpu(hp1).opcode <> A_MOV) or
  1593. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1594. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1595. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1596. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1597. begin
  1598. hp1 := tai(p.next);
  1599. while Assigned(hp1) and
  1600. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1601. not regReadByInstruction(RS_ESP,hp1) and
  1602. not regModifiedByInstruction(RS_ESP,hp1) do
  1603. hp1 := tai(hp1.next);
  1604. if Assigned(hp1) and
  1605. (tai(hp1).typ = ait_instruction) and
  1606. (taicpu(hp1).opcode = A_PUSH) and
  1607. (taicpu(hp1).opsize = S_W) then
  1608. begin
  1609. taicpu(hp1).changeopsize(S_L);
  1610. if taicpu(hp1).oper[0]^.typ=top_reg then
  1611. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1612. hp1 := tai(p.next);
  1613. asml.remove(p);
  1614. p.free;
  1615. p := hp1;
  1616. continue
  1617. end;
  1618. if DoSubAddOpt(p) then
  1619. continue;
  1620. end
  1621. else if DoSubAddOpt(p) then
  1622. continue
  1623. end;
  1624. end;
  1625. end; { if is_jmp }
  1626. end;
  1627. end;
  1628. updateUsedRegs(UsedRegs,p);
  1629. p:=tai(p.next);
  1630. end;
  1631. end;
  1632. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  1633. begin
  1634. isFoldableArithOp := False;
  1635. case hp1.opcode of
  1636. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  1637. isFoldableArithOp :=
  1638. ((taicpu(hp1).oper[0]^.typ = top_const) or
  1639. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  1640. (taicpu(hp1).oper[0]^.reg <> reg))) and
  1641. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1642. (taicpu(hp1).oper[1]^.reg = reg);
  1643. A_INC,A_DEC:
  1644. isFoldableArithOp :=
  1645. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1646. (taicpu(hp1).oper[0]^.reg = reg);
  1647. end;
  1648. end;
  1649. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  1650. {$ifdef USECMOV}
  1651. function CanBeCMOV(p : tai) : boolean;
  1652. begin
  1653. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1654. (taicpu(p).opcode=A_MOV) and
  1655. (taicpu(p).opsize in [S_L,S_W]) and
  1656. ((taicpu(p).oper[0]^.typ = top_reg)
  1657. { we can't use cmov ref,reg because
  1658. ref could be nil and cmov still throws an exception
  1659. if ref=nil but the mov isn't done (FK)
  1660. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1661. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1662. }
  1663. ) and
  1664. (taicpu(p).oper[1]^.typ in [top_reg]);
  1665. end;
  1666. {$endif USECMOV}
  1667. var
  1668. p,hp1,hp2: tai;
  1669. {$ifdef USECMOV}
  1670. l : longint;
  1671. condition : tasmcond;
  1672. hp3: tai;
  1673. {$endif USECMOV}
  1674. UsedRegs, TmpUsedRegs: TRegSet;
  1675. begin
  1676. p := BlockStart;
  1677. UsedRegs := [];
  1678. while (p <> BlockEnd) Do
  1679. begin
  1680. UpdateUsedRegs(UsedRegs, tai(p.next));
  1681. case p.Typ Of
  1682. Ait_Instruction:
  1683. begin
  1684. case taicpu(p).opcode Of
  1685. {$ifdef USECMOV}
  1686. A_Jcc:
  1687. if (current_settings.cputype>=cpu_Pentium2) then
  1688. begin
  1689. { check for
  1690. jCC xxx
  1691. <several movs>
  1692. xxx:
  1693. }
  1694. l:=0;
  1695. GetNextInstruction(p, hp1);
  1696. while assigned(hp1) and
  1697. CanBeCMOV(hp1) and
  1698. { stop on labels }
  1699. not(hp1.typ=ait_label) do
  1700. begin
  1701. inc(l);
  1702. GetNextInstruction(hp1,hp1);
  1703. end;
  1704. if assigned(hp1) then
  1705. begin
  1706. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1707. begin
  1708. if (l<=4) and (l>0) then
  1709. begin
  1710. condition:=inverse_cond(taicpu(p).condition);
  1711. hp2:=p;
  1712. GetNextInstruction(p,hp1);
  1713. p:=hp1;
  1714. repeat
  1715. taicpu(hp1).opcode:=A_CMOVcc;
  1716. taicpu(hp1).condition:=condition;
  1717. GetNextInstruction(hp1,hp1);
  1718. until not(assigned(hp1)) or
  1719. not(CanBeCMOV(hp1));
  1720. { wait with removing else GetNextInstruction could
  1721. ignore the label if it was the only usage in the
  1722. jump moved away }
  1723. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1724. asml.remove(hp2);
  1725. hp2.free;
  1726. continue;
  1727. end;
  1728. end
  1729. else
  1730. begin
  1731. { check further for
  1732. jCC xxx
  1733. <several movs 1>
  1734. jmp yyy
  1735. xxx:
  1736. <several movs 2>
  1737. yyy:
  1738. }
  1739. { hp2 points to jmp yyy }
  1740. hp2:=hp1;
  1741. { skip hp1 to xxx }
  1742. GetNextInstruction(hp1, hp1);
  1743. if assigned(hp2) and
  1744. assigned(hp1) and
  1745. (l<=3) and
  1746. (hp2.typ=ait_instruction) and
  1747. (taicpu(hp2).is_jmp) and
  1748. (taicpu(hp2).condition=C_None) and
  1749. { real label and jump, no further references to the
  1750. label are allowed }
  1751. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1752. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1753. begin
  1754. l:=0;
  1755. { skip hp1 to <several moves 2> }
  1756. GetNextInstruction(hp1, hp1);
  1757. while assigned(hp1) and
  1758. CanBeCMOV(hp1) do
  1759. begin
  1760. inc(l);
  1761. GetNextInstruction(hp1, hp1);
  1762. end;
  1763. { hp1 points to yyy: }
  1764. if assigned(hp1) and
  1765. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1766. begin
  1767. condition:=inverse_cond(taicpu(p).condition);
  1768. GetNextInstruction(p,hp1);
  1769. hp3:=p;
  1770. p:=hp1;
  1771. repeat
  1772. taicpu(hp1).opcode:=A_CMOVcc;
  1773. taicpu(hp1).condition:=condition;
  1774. GetNextInstruction(hp1,hp1);
  1775. until not(assigned(hp1)) or
  1776. not(CanBeCMOV(hp1));
  1777. { hp2 is still at jmp yyy }
  1778. GetNextInstruction(hp2,hp1);
  1779. { hp2 is now at xxx: }
  1780. condition:=inverse_cond(condition);
  1781. GetNextInstruction(hp1,hp1);
  1782. { hp1 is now at <several movs 2> }
  1783. repeat
  1784. taicpu(hp1).opcode:=A_CMOVcc;
  1785. taicpu(hp1).condition:=condition;
  1786. GetNextInstruction(hp1,hp1);
  1787. until not(assigned(hp1)) or
  1788. not(CanBeCMOV(hp1));
  1789. {
  1790. asml.remove(hp1.next)
  1791. hp1.next.free;
  1792. asml.remove(hp1);
  1793. hp1.free;
  1794. }
  1795. { remove jCC }
  1796. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1797. asml.remove(hp3);
  1798. hp3.free;
  1799. { remove jmp }
  1800. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1801. asml.remove(hp2);
  1802. hp2.free;
  1803. continue;
  1804. end;
  1805. end;
  1806. end;
  1807. end;
  1808. end;
  1809. {$endif USECMOV}
  1810. A_FSTP,A_FISTP:
  1811. if doFpuLoadStoreOpt(asmL,p) then
  1812. continue;
  1813. A_IMUL:
  1814. begin
  1815. if (taicpu(p).ops >= 2) and
  1816. ((taicpu(p).oper[0]^.typ = top_const) or
  1817. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1818. (taicpu(p).oper[1]^.typ = top_reg) and
  1819. ((taicpu(p).ops = 2) or
  1820. ((taicpu(p).oper[2]^.typ = top_reg) and
  1821. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1822. getLastInstruction(p,hp1) and
  1823. (hp1.typ = ait_instruction) and
  1824. (taicpu(hp1).opcode = A_MOV) and
  1825. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1826. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1827. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1828. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1829. begin
  1830. taicpu(p).ops := 3;
  1831. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1832. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1833. asml.remove(hp1);
  1834. hp1.free;
  1835. end;
  1836. end;
  1837. A_MOV:
  1838. begin
  1839. if (taicpu(p).oper[0]^.typ = top_reg) and
  1840. (taicpu(p).oper[1]^.typ = top_reg) and
  1841. GetNextInstruction(p, hp1) and
  1842. (hp1.typ = ait_Instruction) and
  1843. ((taicpu(hp1).opcode = A_MOV) or
  1844. (taicpu(hp1).opcode = A_MOVZX) or
  1845. (taicpu(hp1).opcode = A_MOVSX)) and
  1846. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1847. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1848. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  1849. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  1850. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1851. {mov reg1, reg2
  1852. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1853. begin
  1854. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1855. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1856. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1857. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1858. asml.remove(p);
  1859. p.free;
  1860. p := hp1;
  1861. continue;
  1862. end
  1863. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1864. GetNextInstruction(p,hp1) and
  1865. (hp1.typ = ait_instruction) and
  1866. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1867. GetNextInstruction(hp1,hp2) and
  1868. (hp2.typ = ait_instruction) and
  1869. (taicpu(hp2).opcode = A_MOV) and
  1870. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1871. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1872. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1873. begin
  1874. TmpUsedRegs := UsedRegs;
  1875. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1876. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1877. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  1878. hp2, TmpUsedRegs))) then
  1879. { change mov (ref), reg }
  1880. { add/sub/or/... reg2/$const, reg }
  1881. { mov reg, (ref) }
  1882. { # release reg }
  1883. { to add/sub/or/... reg2/$const, (ref) }
  1884. begin
  1885. case taicpu(hp1).opcode of
  1886. A_INC,A_DEC:
  1887. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^)
  1888. else
  1889. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1890. end;
  1891. asml.remove(p);
  1892. asml.remove(hp2);
  1893. p.free;
  1894. hp2.free;
  1895. p := hp1
  1896. end;
  1897. end
  1898. end;
  1899. end;
  1900. end;
  1901. end;
  1902. p := tai(p.next)
  1903. end;
  1904. end;
  1905. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  1906. var
  1907. p,hp1,hp2: tai;
  1908. begin
  1909. p := BlockStart;
  1910. while (p <> BlockEnd) Do
  1911. begin
  1912. case p.Typ Of
  1913. Ait_Instruction:
  1914. begin
  1915. case taicpu(p).opcode Of
  1916. A_CALL:
  1917. if (current_settings.optimizecputype < cpu_Pentium2) and
  1918. GetNextInstruction(p, hp1) and
  1919. (hp1.typ = ait_instruction) and
  1920. (taicpu(hp1).opcode = A_JMP) and
  1921. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1922. begin
  1923. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1924. InsertLLItem(asml, p.previous, p, hp2);
  1925. taicpu(p).opcode := A_JMP;
  1926. taicpu(p).is_jmp := true;
  1927. asml.remove(hp1);
  1928. hp1.free;
  1929. end;
  1930. A_CMP:
  1931. begin
  1932. if (taicpu(p).oper[0]^.typ = top_const) and
  1933. (taicpu(p).oper[0]^.val = 0) and
  1934. (taicpu(p).oper[1]^.typ = top_reg) then
  1935. {change "cmp $0, %reg" to "test %reg, %reg"}
  1936. begin
  1937. taicpu(p).opcode := A_TEST;
  1938. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1939. continue;
  1940. end;
  1941. end;
  1942. (*
  1943. Optimization is not safe; xor clears the carry flag.
  1944. See test/tgadint64 in the test suite.
  1945. A_MOV:
  1946. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1947. (taicpu(p).oper[0]^.val = 0) and
  1948. (taicpu(p).oper[1]^.typ = Top_Reg) then
  1949. { change "mov $0, %reg" into "xor %reg, %reg" }
  1950. begin
  1951. taicpu(p).opcode := A_XOR;
  1952. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1953. end;
  1954. *)
  1955. A_MOVZX:
  1956. { if register vars are on, it's possible there is code like }
  1957. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1958. { so we can't safely replace the movzx then with xor/mov, }
  1959. { since that would change the flags (JM) }
  1960. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  1961. begin
  1962. if (taicpu(p).oper[1]^.typ = top_reg) then
  1963. if (taicpu(p).oper[0]^.typ = top_reg)
  1964. then
  1965. case taicpu(p).opsize of
  1966. S_BL:
  1967. begin
  1968. if IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  1969. not(cs_opt_size in current_settings.optimizerswitches) and
  1970. (current_settings.optimizecputype = cpu_Pentium) then
  1971. {Change "movzbl %reg1, %reg2" to
  1972. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1973. PentiumMMX}
  1974. begin
  1975. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1976. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1977. InsertLLItem(asml,p.previous, p, hp1);
  1978. taicpu(p).opcode := A_MOV;
  1979. taicpu(p).changeopsize(S_B);
  1980. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1981. end;
  1982. end;
  1983. end
  1984. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1985. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1986. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1987. not(cs_opt_size in current_settings.optimizerswitches) and
  1988. IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  1989. (current_settings.optimizecputype = cpu_Pentium) and
  1990. (taicpu(p).opsize = S_BL) then
  1991. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1992. Pentium and PentiumMMX}
  1993. begin
  1994. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1995. taicpu(p).oper[1]^.reg);
  1996. taicpu(p).opcode := A_MOV;
  1997. taicpu(p).changeopsize(S_B);
  1998. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1999. InsertLLItem(asml,p.previous, p, hp1);
  2000. end;
  2001. end;
  2002. A_TEST, A_OR:
  2003. {removes the line marked with (x) from the sequence
  2004. and/or/xor/add/sub/... $x, %y
  2005. test/or %y, %y (x)
  2006. j(n)z _Label
  2007. as the first instruction already adjusts the ZF}
  2008. begin
  2009. if OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  2010. if GetLastInstruction(p, hp1) and
  2011. (tai(hp1).typ = ait_instruction) and
  2012. GetNextInstruction(p,hp2) and
  2013. (hp2.typ = ait_instruction) and
  2014. ((taicpu(hp2).opcode = A_SETcc) or
  2015. (taicpu(hp2).opcode = A_Jcc) or
  2016. (taicpu(hp2).opcode = A_CMOVcc)) then
  2017. case taicpu(hp1).opcode Of
  2018. A_ADD, A_SUB, A_OR, A_XOR, A_AND{, A_SHL, A_SHR}:
  2019. begin
  2020. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  2021. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2022. { and in case of carry for A(E)/B(E)/C/NC }
  2023. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2024. ((taicpu(hp1).opcode <> A_ADD) and
  2025. (taicpu(hp1).opcode <> A_SUB))) then
  2026. begin
  2027. hp1 := tai(p.next);
  2028. asml.remove(p);
  2029. p.free;
  2030. p := tai(hp1);
  2031. continue
  2032. end;
  2033. end;
  2034. A_DEC, A_INC, A_NEG:
  2035. begin
  2036. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  2037. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2038. { and in case of carry for A(E)/B(E)/C/NC }
  2039. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2040. begin
  2041. case taicpu(hp1).opcode Of
  2042. A_DEC, A_INC:
  2043. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2044. begin
  2045. case taicpu(hp1).opcode Of
  2046. A_DEC: taicpu(hp1).opcode := A_SUB;
  2047. A_INC: taicpu(hp1).opcode := A_ADD;
  2048. end;
  2049. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2050. taicpu(hp1).loadConst(0,1);
  2051. taicpu(hp1).ops:=2;
  2052. end
  2053. end;
  2054. hp1 := tai(p.next);
  2055. asml.remove(p);
  2056. p.free;
  2057. p := tai(hp1);
  2058. continue
  2059. end;
  2060. end
  2061. end
  2062. end;
  2063. end;
  2064. end;
  2065. end;
  2066. p := tai(p.next)
  2067. end;
  2068. end;
  2069. end.