popt386.pas 115 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit popt386;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses Aasmbase,aasmtai,aasmdata,aasmcpu,verbose;
  21. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  22. procedure PeepHoleOptPass1(asml: TAsmList; BlockStart, BlockEnd: tai);
  23. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  24. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  25. implementation
  26. uses
  27. globtype,systems,
  28. globals,cgbase,procinfo,
  29. symsym,
  30. {$ifdef finaldestdebug}
  31. cobjects,
  32. {$endif finaldestdebug}
  33. cpuinfo,cpubase,cgutils,daopt386;
  34. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  35. begin
  36. isFoldableArithOp := False;
  37. case hp1.opcode of
  38. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  39. isFoldableArithOp :=
  40. ((taicpu(hp1).oper[0]^.typ = top_const) or
  41. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  42. (taicpu(hp1).oper[0]^.reg <> reg))) and
  43. (taicpu(hp1).oper[1]^.typ = top_reg) and
  44. (taicpu(hp1).oper[1]^.reg = reg);
  45. A_INC,A_DEC:
  46. isFoldableArithOp :=
  47. (taicpu(hp1).oper[0]^.typ = top_reg) and
  48. (taicpu(hp1).oper[0]^.reg = reg);
  49. end;
  50. end;
  51. function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean;
  52. var
  53. supreg: tsuperregister;
  54. begin
  55. supreg := getsupreg(reg);
  56. UpdateUsedRegs(UsedRegs, tai(p.Next));
  57. RegUsedAfterInstruction :=
  58. (supreg in UsedRegs) and
  59. (not(getNextInstruction(p,p)) or
  60. not(regLoadedWithNewValue(supreg,false,p)));
  61. end;
  62. function doFpuLoadStoreOpt(asmL: TAsmList; var p: tai): boolean;
  63. { returns true if a "continue" should be done after this optimization }
  64. var hp1, hp2: tai;
  65. begin
  66. doFpuLoadStoreOpt := false;
  67. if (taicpu(p).oper[0]^.typ = top_ref) and
  68. getNextInstruction(p, hp1) and
  69. (hp1.typ = ait_instruction) and
  70. (((taicpu(hp1).opcode = A_FLD) and
  71. (taicpu(p).opcode = A_FSTP)) or
  72. ((taicpu(p).opcode = A_FISTP) and
  73. (taicpu(hp1).opcode = A_FILD))) and
  74. (taicpu(hp1).oper[0]^.typ = top_ref) and
  75. (taicpu(hp1).opsize = taicpu(p).opsize) and
  76. refsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  77. begin
  78. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  79. if (taicpu(p).opsize=S_FX) and
  80. getNextInstruction(hp1, hp2) and
  81. (hp2.typ = ait_instruction) and
  82. ((taicpu(hp2).opcode = A_LEAVE) or
  83. (taicpu(hp2).opcode = A_RET)) and
  84. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  85. not(assigned(current_procinfo.procdef.funcretsym) and
  86. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  87. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  88. begin
  89. asml.remove(p);
  90. asml.remove(hp1);
  91. p.free;
  92. hp1.free;
  93. p := hp2;
  94. removeLastDeallocForFuncRes(asmL, p);
  95. doFPULoadStoreOpt := true;
  96. end
  97. (* can't be done because the store operation rounds
  98. else
  99. { fst can't store an extended value! }
  100. if (taicpu(p).opsize <> S_FX) and
  101. (taicpu(p).opsize <> S_IQ) then
  102. begin
  103. if (taicpu(p).opcode = A_FSTP) then
  104. taicpu(p).opcode := A_FST
  105. else taicpu(p).opcode := A_FIST;
  106. asml.remove(hp1);
  107. hp1.free;
  108. end
  109. *)
  110. end;
  111. end;
  112. { returns true if p contains a memory operand with a segment set }
  113. function InsContainsSegRef(p: taicpu): boolean;
  114. var
  115. i: longint;
  116. begin
  117. result:=true;
  118. for i:=0 to p.opercnt-1 do
  119. if (p.oper[i]^.typ=top_ref) and
  120. (p.oper[i]^.ref^.segment<>NR_NO) then
  121. exit;
  122. result:=false;
  123. end;
  124. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  125. var
  126. p,hp1: tai;
  127. l: aint;
  128. tmpRef: treference;
  129. begin
  130. p := BlockStart;
  131. while (p <> BlockEnd) Do
  132. begin
  133. case p.Typ Of
  134. Ait_Instruction:
  135. begin
  136. if InsContainsSegRef(taicpu(p)) then
  137. begin
  138. p := tai(p.next);
  139. continue;
  140. end;
  141. case taicpu(p).opcode Of
  142. A_IMUL:
  143. {changes certain "imul const, %reg"'s to lea sequences}
  144. begin
  145. if (taicpu(p).oper[0]^.typ = Top_Const) and
  146. (taicpu(p).oper[1]^.typ = Top_Reg) and
  147. (taicpu(p).opsize = S_L) then
  148. if (taicpu(p).oper[0]^.val = 1) then
  149. if (taicpu(p).ops = 2) then
  150. {remove "imul $1, reg"}
  151. begin
  152. hp1 := tai(p.Next);
  153. asml.remove(p);
  154. p.free;
  155. p := hp1;
  156. continue;
  157. end
  158. else
  159. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  160. begin
  161. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  162. InsertLLItem(asml, p.previous, p.next, hp1);
  163. p.free;
  164. p := hp1;
  165. end
  166. else if
  167. ((taicpu(p).ops <= 2) or
  168. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  169. (taicpu(p).oper[0]^.val <= 12) and
  170. not(cs_opt_size in current_settings.optimizerswitches) and
  171. (not(GetNextInstruction(p, hp1)) or
  172. {GetNextInstruction(p, hp1) and}
  173. not((tai(hp1).typ = ait_instruction) and
  174. ((taicpu(hp1).opcode=A_Jcc) and
  175. (taicpu(hp1).condition in [C_O,C_NO])))) then
  176. begin
  177. reference_reset(tmpref,1);
  178. case taicpu(p).oper[0]^.val Of
  179. 3: begin
  180. {imul 3, reg1, reg2 to
  181. lea (reg1,reg1,2), reg2
  182. imul 3, reg1 to
  183. lea (reg1,reg1,2), reg1}
  184. TmpRef.base := taicpu(p).oper[1]^.reg;
  185. TmpRef.index := taicpu(p).oper[1]^.reg;
  186. TmpRef.ScaleFactor := 2;
  187. if (taicpu(p).ops = 2) then
  188. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  189. else
  190. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  191. InsertLLItem(asml,p.previous, p.next, hp1);
  192. p.free;
  193. p := hp1;
  194. end;
  195. 5: begin
  196. {imul 5, reg1, reg2 to
  197. lea (reg1,reg1,4), reg2
  198. imul 5, reg1 to
  199. lea (reg1,reg1,4), reg1}
  200. TmpRef.base := taicpu(p).oper[1]^.reg;
  201. TmpRef.index := taicpu(p).oper[1]^.reg;
  202. TmpRef.ScaleFactor := 4;
  203. if (taicpu(p).ops = 2) then
  204. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  205. else
  206. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  207. InsertLLItem(asml,p.previous, p.next, hp1);
  208. p.free;
  209. p := hp1;
  210. end;
  211. 6: begin
  212. {imul 6, reg1, reg2 to
  213. lea (,reg1,2), reg2
  214. lea (reg2,reg1,4), reg2
  215. imul 6, reg1 to
  216. lea (reg1,reg1,2), reg1
  217. add reg1, reg1}
  218. if (current_settings.optimizecputype <= cpu_386) then
  219. begin
  220. TmpRef.index := taicpu(p).oper[1]^.reg;
  221. if (taicpu(p).ops = 3) then
  222. begin
  223. TmpRef.base := taicpu(p).oper[2]^.reg;
  224. TmpRef.ScaleFactor := 4;
  225. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  226. end
  227. else
  228. begin
  229. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  230. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  231. end;
  232. InsertLLItem(asml,p, p.next, hp1);
  233. reference_reset(tmpref,2);
  234. TmpRef.index := taicpu(p).oper[1]^.reg;
  235. TmpRef.ScaleFactor := 2;
  236. if (taicpu(p).ops = 3) then
  237. begin
  238. TmpRef.base := NR_NO;
  239. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  240. taicpu(p).oper[2]^.reg);
  241. end
  242. else
  243. begin
  244. TmpRef.base := taicpu(p).oper[1]^.reg;
  245. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  246. end;
  247. InsertLLItem(asml,p.previous, p.next, hp1);
  248. p.free;
  249. p := tai(hp1.next);
  250. end
  251. end;
  252. 9: begin
  253. {imul 9, reg1, reg2 to
  254. lea (reg1,reg1,8), reg2
  255. imul 9, reg1 to
  256. lea (reg1,reg1,8), reg1}
  257. TmpRef.base := taicpu(p).oper[1]^.reg;
  258. TmpRef.index := taicpu(p).oper[1]^.reg;
  259. TmpRef.ScaleFactor := 8;
  260. if (taicpu(p).ops = 2) then
  261. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  262. else
  263. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  264. InsertLLItem(asml,p.previous, p.next, hp1);
  265. p.free;
  266. p := hp1;
  267. end;
  268. 10: begin
  269. {imul 10, reg1, reg2 to
  270. lea (reg1,reg1,4), reg2
  271. add reg2, reg2
  272. imul 10, reg1 to
  273. lea (reg1,reg1,4), reg1
  274. add reg1, reg1}
  275. if (current_settings.optimizecputype <= cpu_386) then
  276. begin
  277. if (taicpu(p).ops = 3) then
  278. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  279. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  280. else
  281. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  282. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  283. InsertLLItem(asml,p, p.next, hp1);
  284. TmpRef.base := taicpu(p).oper[1]^.reg;
  285. TmpRef.index := taicpu(p).oper[1]^.reg;
  286. TmpRef.ScaleFactor := 4;
  287. if (taicpu(p).ops = 3) then
  288. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  289. else
  290. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  291. InsertLLItem(asml,p.previous, p.next, hp1);
  292. p.free;
  293. p := tai(hp1.next);
  294. end
  295. end;
  296. 12: begin
  297. {imul 12, reg1, reg2 to
  298. lea (,reg1,4), reg2
  299. lea (,reg1,8) reg2
  300. imul 12, reg1 to
  301. lea (reg1,reg1,2), reg1
  302. lea (,reg1,4), reg1}
  303. if (current_settings.optimizecputype <= cpu_386)
  304. then
  305. begin
  306. TmpRef.index := taicpu(p).oper[1]^.reg;
  307. if (taicpu(p).ops = 3) then
  308. begin
  309. TmpRef.base := taicpu(p).oper[2]^.reg;
  310. TmpRef.ScaleFactor := 8;
  311. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  312. end
  313. else
  314. begin
  315. TmpRef.base := NR_NO;
  316. TmpRef.ScaleFactor := 4;
  317. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  318. end;
  319. InsertLLItem(asml,p, p.next, hp1);
  320. reference_reset(tmpref,2);
  321. TmpRef.index := taicpu(p).oper[1]^.reg;
  322. if (taicpu(p).ops = 3) then
  323. begin
  324. TmpRef.base := NR_NO;
  325. TmpRef.ScaleFactor := 4;
  326. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  327. end
  328. else
  329. begin
  330. TmpRef.base := taicpu(p).oper[1]^.reg;
  331. TmpRef.ScaleFactor := 2;
  332. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  333. end;
  334. InsertLLItem(asml,p.previous, p.next, hp1);
  335. p.free;
  336. p := tai(hp1.next);
  337. end
  338. end
  339. end;
  340. end;
  341. end;
  342. A_SAR, A_SHR:
  343. {changes the code sequence
  344. shr/sar const1, x
  345. shl const2, x
  346. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  347. begin
  348. if GetNextInstruction(p, hp1) and
  349. (tai(hp1).typ = ait_instruction) and
  350. (taicpu(hp1).opcode = A_SHL) and
  351. (taicpu(p).oper[0]^.typ = top_const) and
  352. (taicpu(hp1).oper[0]^.typ = top_const) and
  353. (taicpu(hp1).opsize = taicpu(p).opsize) and
  354. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  355. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  356. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  357. not(cs_opt_size in current_settings.optimizerswitches) then
  358. { shr/sar const1, %reg
  359. shl const2, %reg
  360. with const1 > const2 }
  361. begin
  362. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  363. taicpu(hp1).opcode := A_AND;
  364. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  365. case taicpu(p).opsize Of
  366. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  367. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  368. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  369. end;
  370. end
  371. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  372. not(cs_opt_size in current_settings.optimizerswitches) then
  373. { shr/sar const1, %reg
  374. shl const2, %reg
  375. with const1 < const2 }
  376. begin
  377. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  378. taicpu(p).opcode := A_AND;
  379. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  380. case taicpu(p).opsize Of
  381. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  382. S_B: taicpu(p).loadConst(0,l Xor $ff);
  383. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  384. end;
  385. end
  386. else
  387. { shr/sar const1, %reg
  388. shl const2, %reg
  389. with const1 = const2 }
  390. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  391. begin
  392. taicpu(p).opcode := A_AND;
  393. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  394. case taicpu(p).opsize Of
  395. S_B: taicpu(p).loadConst(0,l Xor $ff);
  396. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  397. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  398. end;
  399. asml.remove(hp1);
  400. hp1.free;
  401. end;
  402. end;
  403. A_XOR:
  404. if (taicpu(p).oper[0]^.typ = top_reg) and
  405. (taicpu(p).oper[1]^.typ = top_reg) and
  406. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  407. { temporarily change this to 'mov reg,0' to make it easier }
  408. { for the CSE. Will be changed back in pass 2 }
  409. begin
  410. taicpu(p).opcode := A_MOV;
  411. taicpu(p).loadConst(0,0);
  412. end;
  413. end;
  414. end;
  415. end;
  416. p := tai(p.next)
  417. end;
  418. end;
  419. procedure PeepHoleOptPass1(Asml: TAsmList; BlockStart, BlockEnd: tai);
  420. {First pass of peepholeoptimizations}
  421. var
  422. l : longint;
  423. p,hp1,hp2 : tai;
  424. hp3,hp4: tai;
  425. v:aint;
  426. TmpRef: TReference;
  427. UsedRegs, TmpUsedRegs: TRegSet;
  428. TmpBool1, TmpBool2: Boolean;
  429. function SkipLabels(hp: tai; var hp2: tai): boolean;
  430. {skips all labels and returns the next "real" instruction}
  431. begin
  432. while assigned(hp.next) and
  433. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  434. hp := tai(hp.next);
  435. if assigned(hp.next) then
  436. begin
  437. SkipLabels := True;
  438. hp2 := tai(hp.next)
  439. end
  440. else
  441. begin
  442. hp2 := hp;
  443. SkipLabels := False
  444. end;
  445. end;
  446. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  447. {traces sucessive jumps to their final destination and sets it, e.g.
  448. je l1 je l3
  449. <code> <code>
  450. l1: becomes l1:
  451. je l2 je l3
  452. <code> <code>
  453. l2: l2:
  454. jmp l3 jmp l3
  455. the level parameter denotes how deeep we have already followed the jump,
  456. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  457. var p1, p2: tai;
  458. l: tasmlabel;
  459. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  460. begin
  461. FindAnyLabel := false;
  462. while assigned(hp.next) and
  463. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  464. hp := tai(hp.next);
  465. if assigned(hp.next) and
  466. (tai(hp.next).typ = ait_label) then
  467. begin
  468. FindAnyLabel := true;
  469. l := tai_label(hp.next).labsym;
  470. end
  471. end;
  472. begin
  473. GetfinalDestination := false;
  474. if level > 20 then
  475. exit;
  476. p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  477. if assigned(p1) then
  478. begin
  479. SkipLabels(p1,p1);
  480. if (tai(p1).typ = ait_instruction) and
  481. (taicpu(p1).is_jmp) then
  482. if { the next instruction after the label where the jump hp arrives}
  483. { is unconditional or of the same type as hp, so continue }
  484. (taicpu(p1).condition in [C_None,hp.condition]) or
  485. { the next instruction after the label where the jump hp arrives}
  486. { is the opposite of hp (so this one is never taken), but after }
  487. { that one there is a branch that will be taken, so perform a }
  488. { little hack: set p1 equal to this instruction (that's what the}
  489. { last SkipLabels is for, only works with short bool evaluation)}
  490. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  491. SkipLabels(p1,p2) and
  492. (p2.typ = ait_instruction) and
  493. (taicpu(p2).is_jmp) and
  494. (taicpu(p2).condition in [C_None,hp.condition]) and
  495. SkipLabels(p1,p1)) then
  496. begin
  497. { quick check for loops of the form "l5: ; jmp l5 }
  498. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  499. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  500. exit;
  501. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  502. exit;
  503. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  504. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  505. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  506. end
  507. else
  508. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  509. if not FindAnyLabel(p1,l) then
  510. begin
  511. {$ifdef finaldestdebug}
  512. insertllitem(asml,p1,p1.next,tai_comment.Create(
  513. strpnew('previous label inserted'))));
  514. {$endif finaldestdebug}
  515. current_asmdata.getjumplabel(l);
  516. insertllitem(asml,p1,p1.next,tai_label.Create(l));
  517. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  518. hp.oper[0]^.ref^.symbol := l;
  519. l.increfs;
  520. { this won't work, since the new label isn't in the labeltable }
  521. { so it will fail the rangecheck. Labeltable should become a }
  522. { hashtable to support this: }
  523. { GetFinalDestination(asml, hp); }
  524. end
  525. else
  526. begin
  527. {$ifdef finaldestdebug}
  528. insertllitem(asml,p1,p1.next,tai_comment.Create(
  529. strpnew('next label reused'))));
  530. {$endif finaldestdebug}
  531. l.increfs;
  532. hp.oper[0]^.ref^.symbol := l;
  533. if not GetFinalDestination(asml, hp,succ(level)) then
  534. exit;
  535. end;
  536. end;
  537. GetFinalDestination := true;
  538. end;
  539. function DoSubAddOpt(var p: tai): Boolean;
  540. begin
  541. DoSubAddOpt := False;
  542. if GetLastInstruction(p, hp1) and
  543. (hp1.typ = ait_instruction) and
  544. (taicpu(hp1).opsize = taicpu(p).opsize) then
  545. case taicpu(hp1).opcode Of
  546. A_DEC:
  547. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  548. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  549. begin
  550. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  551. asml.remove(hp1);
  552. hp1.free;
  553. end;
  554. A_SUB:
  555. if (taicpu(hp1).oper[0]^.typ = top_const) and
  556. (taicpu(hp1).oper[1]^.typ = top_reg) and
  557. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  558. begin
  559. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  560. asml.remove(hp1);
  561. hp1.free;
  562. end;
  563. A_ADD:
  564. if (taicpu(hp1).oper[0]^.typ = top_const) and
  565. (taicpu(hp1).oper[1]^.typ = top_reg) and
  566. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  567. begin
  568. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  569. asml.remove(hp1);
  570. hp1.free;
  571. if (taicpu(p).oper[0]^.val = 0) then
  572. begin
  573. hp1 := tai(p.next);
  574. asml.remove(p);
  575. p.free;
  576. if not GetLastInstruction(hp1, p) then
  577. p := hp1;
  578. DoSubAddOpt := True;
  579. end
  580. end;
  581. end;
  582. end;
  583. begin
  584. p := BlockStart;
  585. UsedRegs := [];
  586. while (p <> BlockEnd) Do
  587. begin
  588. UpDateUsedRegs(UsedRegs, tai(p.next));
  589. case p.Typ Of
  590. ait_instruction:
  591. begin
  592. if InsContainsSegRef(taicpu(p)) then
  593. begin
  594. p := tai(p.next);
  595. continue;
  596. end;
  597. { Handle Jmp Optimizations }
  598. if taicpu(p).is_jmp then
  599. begin
  600. {the following if-block removes all code between a jmp and the next label,
  601. because it can never be executed}
  602. if (taicpu(p).opcode = A_JMP) then
  603. begin
  604. while GetNextInstruction(p, hp1) and
  605. (hp1.typ <> ait_label) do
  606. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  607. begin
  608. asml.remove(hp1);
  609. hp1.free;
  610. end
  611. else break;
  612. end;
  613. { remove jumps to a label coming right after them }
  614. if GetNextInstruction(p, hp1) then
  615. begin
  616. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  617. { TODO: FIXME removing the first instruction fails}
  618. (p<>blockstart) then
  619. begin
  620. hp2:=tai(hp1.next);
  621. asml.remove(p);
  622. p.free;
  623. p:=hp2;
  624. continue;
  625. end
  626. else
  627. begin
  628. if hp1.typ = ait_label then
  629. SkipLabels(hp1,hp1);
  630. if (tai(hp1).typ=ait_instruction) and
  631. (taicpu(hp1).opcode=A_JMP) and
  632. GetNextInstruction(hp1, hp2) and
  633. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  634. begin
  635. if taicpu(p).opcode=A_Jcc then
  636. begin
  637. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  638. tai_label(hp2).labsym.decrefs;
  639. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  640. { when free'ing hp1, the ref. isn't decresed, so we don't
  641. increase it (FK)
  642. taicpu(p).oper[0]^.ref^.symbol.increfs;
  643. }
  644. asml.remove(hp1);
  645. hp1.free;
  646. GetFinalDestination(asml, taicpu(p),0);
  647. end
  648. else
  649. begin
  650. GetFinalDestination(asml, taicpu(p),0);
  651. p:=tai(p.next);
  652. continue;
  653. end;
  654. end
  655. else
  656. GetFinalDestination(asml, taicpu(p),0);
  657. end;
  658. end;
  659. end
  660. else
  661. { All other optimizes }
  662. begin
  663. for l := 0 to taicpu(p).ops-1 Do
  664. if (taicpu(p).oper[l]^.typ = top_ref) then
  665. With taicpu(p).oper[l]^.ref^ Do
  666. begin
  667. if (base = NR_NO) and
  668. (index <> NR_NO) and
  669. (scalefactor in [0,1]) then
  670. begin
  671. base := index;
  672. index := NR_NO
  673. end
  674. end;
  675. case taicpu(p).opcode Of
  676. A_AND:
  677. begin
  678. if (taicpu(p).oper[0]^.typ = top_const) and
  679. (taicpu(p).oper[1]^.typ = top_reg) and
  680. GetNextInstruction(p, hp1) and
  681. (tai(hp1).typ = ait_instruction) and
  682. (taicpu(hp1).opcode = A_AND) and
  683. (taicpu(hp1).oper[0]^.typ = top_const) and
  684. (taicpu(hp1).oper[1]^.typ = top_reg) and
  685. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  686. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) then
  687. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  688. begin
  689. taicpu(hp1).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  690. asml.remove(p);
  691. p.free;
  692. p:=hp1;
  693. end
  694. else
  695. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  696. jump, but only if it's a conditional jump (PFV) }
  697. if (taicpu(p).oper[1]^.typ = top_reg) and
  698. GetNextInstruction(p, hp1) and
  699. (hp1.typ = ait_instruction) and
  700. (taicpu(hp1).is_jmp) and
  701. (taicpu(hp1).opcode<>A_JMP) and
  702. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  703. taicpu(p).opcode := A_TEST;
  704. end;
  705. A_CMP:
  706. begin
  707. { cmp register,$8000 neg register
  708. je target --> jo target
  709. .... only if register is deallocated before jump.}
  710. case Taicpu(p).opsize of
  711. S_B: v:=$80;
  712. S_W: v:=$8000;
  713. S_L: v:=aint($80000000);
  714. end;
  715. if (taicpu(p).oper[0]^.typ=Top_const) and
  716. (taicpu(p).oper[0]^.val=v) and
  717. (Taicpu(p).oper[1]^.typ=top_reg) and
  718. GetNextInstruction(p, hp1) and
  719. (hp1.typ=ait_instruction) and
  720. (taicpu(hp1).opcode=A_Jcc) and
  721. (Taicpu(hp1).condition in [C_E,C_NE]) and
  722. not(getsupreg(Taicpu(p).oper[1]^.reg) in usedregs) then
  723. begin
  724. Taicpu(p).opcode:=A_NEG;
  725. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  726. Taicpu(p).clearop(1);
  727. Taicpu(p).ops:=1;
  728. if Taicpu(hp1).condition=C_E then
  729. Taicpu(hp1).condition:=C_O
  730. else
  731. Taicpu(hp1).condition:=C_NO;
  732. continue;
  733. end;
  734. {
  735. @@2: @@2:
  736. .... ....
  737. cmp operand1,0
  738. jle/jbe @@1
  739. dec operand1 --> sub operand1,1
  740. jmp @@2 jge/jae @@2
  741. @@1: @@1:
  742. ... ....}
  743. if (taicpu(p).oper[0]^.typ = top_const) and
  744. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  745. (taicpu(p).oper[0]^.val = 0) and
  746. GetNextInstruction(p, hp1) and
  747. (hp1.typ = ait_instruction) and
  748. (taicpu(hp1).is_jmp) and
  749. (taicpu(hp1).opcode=A_Jcc) and
  750. (taicpu(hp1).condition in [C_LE,C_BE]) and
  751. GetNextInstruction(hp1,hp2) and
  752. (hp2.typ = ait_instruction) and
  753. (taicpu(hp2).opcode = A_DEC) and
  754. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  755. GetNextInstruction(hp2, hp3) and
  756. (hp3.typ = ait_instruction) and
  757. (taicpu(hp3).is_jmp) and
  758. (taicpu(hp3).opcode = A_JMP) and
  759. GetNextInstruction(hp3, hp4) and
  760. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  761. begin
  762. taicpu(hp2).Opcode := A_SUB;
  763. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  764. taicpu(hp2).loadConst(0,1);
  765. taicpu(hp2).ops:=2;
  766. taicpu(hp3).Opcode := A_Jcc;
  767. case taicpu(hp1).condition of
  768. C_LE: taicpu(hp3).condition := C_GE;
  769. C_BE: taicpu(hp3).condition := C_AE;
  770. end;
  771. asml.remove(p);
  772. asml.remove(hp1);
  773. p.free;
  774. hp1.free;
  775. p := hp2;
  776. continue;
  777. end
  778. end;
  779. A_FLD:
  780. begin
  781. if (taicpu(p).oper[0]^.typ = top_reg) and
  782. GetNextInstruction(p, hp1) and
  783. (hp1.typ = Ait_Instruction) and
  784. (taicpu(hp1).oper[0]^.typ = top_reg) and
  785. (taicpu(hp1).oper[1]^.typ = top_reg) and
  786. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  787. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  788. { change to
  789. fld reg fxxx reg,st
  790. fxxxp st, st1 (hp1)
  791. Remark: non commutative operations must be reversed!
  792. }
  793. begin
  794. case taicpu(hp1).opcode Of
  795. A_FMULP,A_FADDP,
  796. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  797. begin
  798. case taicpu(hp1).opcode Of
  799. A_FADDP: taicpu(hp1).opcode := A_FADD;
  800. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  801. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  802. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  803. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  804. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  805. end;
  806. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  807. taicpu(hp1).oper[1]^.reg := NR_ST;
  808. asml.remove(p);
  809. p.free;
  810. p := hp1;
  811. continue;
  812. end;
  813. end;
  814. end
  815. else
  816. if (taicpu(p).oper[0]^.typ = top_ref) and
  817. GetNextInstruction(p, hp2) and
  818. (hp2.typ = Ait_Instruction) and
  819. (taicpu(hp2).ops = 2) and
  820. (taicpu(hp2).oper[0]^.typ = top_reg) and
  821. (taicpu(hp2).oper[1]^.typ = top_reg) and
  822. (taicpu(p).opsize in [S_FS, S_FL]) and
  823. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  824. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  825. if GetLastInstruction(p, hp1) and
  826. (hp1.typ = Ait_Instruction) and
  827. ((taicpu(hp1).opcode = A_FLD) or
  828. (taicpu(hp1).opcode = A_FST)) and
  829. (taicpu(hp1).opsize = taicpu(p).opsize) and
  830. (taicpu(hp1).oper[0]^.typ = top_ref) and
  831. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  832. if ((taicpu(hp2).opcode = A_FMULP) or
  833. (taicpu(hp2).opcode = A_FADDP)) then
  834. { change to
  835. fld/fst mem1 (hp1) fld/fst mem1
  836. fld mem1 (p) fadd/
  837. faddp/ fmul st, st
  838. fmulp st, st1 (hp2) }
  839. begin
  840. asml.remove(p);
  841. p.free;
  842. p := hp1;
  843. if (taicpu(hp2).opcode = A_FADDP) then
  844. taicpu(hp2).opcode := A_FADD
  845. else
  846. taicpu(hp2).opcode := A_FMUL;
  847. taicpu(hp2).oper[1]^.reg := NR_ST;
  848. end
  849. else
  850. { change to
  851. fld/fst mem1 (hp1) fld/fst mem1
  852. fld mem1 (p) fld st}
  853. begin
  854. taicpu(p).changeopsize(S_FL);
  855. taicpu(p).loadreg(0,NR_ST);
  856. end
  857. else
  858. begin
  859. case taicpu(hp2).opcode Of
  860. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  861. { change to
  862. fld/fst mem1 (hp1) fld/fst mem1
  863. fld mem2 (p) fxxx mem2
  864. fxxxp st, st1 (hp2) }
  865. begin
  866. case taicpu(hp2).opcode Of
  867. A_FADDP: taicpu(p).opcode := A_FADD;
  868. A_FMULP: taicpu(p).opcode := A_FMUL;
  869. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  870. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  871. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  872. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  873. end;
  874. asml.remove(hp2);
  875. hp2.free;
  876. end
  877. end
  878. end
  879. end;
  880. A_FSTP,A_FISTP:
  881. if doFpuLoadStoreOpt(asmL,p) then
  882. continue;
  883. A_LEA:
  884. begin
  885. {removes seg register prefixes from LEA operations, as they
  886. don't do anything}
  887. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  888. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  889. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  890. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  891. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  892. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  893. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  894. (taicpu(p).oper[0]^.ref^.offset = 0) then
  895. begin
  896. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  897. taicpu(p).oper[1]^.reg);
  898. InsertLLItem(asml,p.previous,p.next, hp1);
  899. p.free;
  900. p := hp1;
  901. continue;
  902. end
  903. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  904. begin
  905. hp1 := tai(p.Next);
  906. asml.remove(p);
  907. p.free;
  908. p := hp1;
  909. continue;
  910. end
  911. else
  912. with taicpu(p).oper[0]^.ref^ do
  913. if (base = taicpu(p).oper[1]^.reg) then
  914. begin
  915. l := offset;
  916. if (l=1) then
  917. begin
  918. taicpu(p).opcode := A_INC;
  919. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  920. taicpu(p).ops := 1
  921. end
  922. else if (l=-1) then
  923. begin
  924. taicpu(p).opcode := A_DEC;
  925. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  926. taicpu(p).ops := 1;
  927. end
  928. else
  929. begin
  930. taicpu(p).opcode := A_ADD;
  931. taicpu(p).loadConst(0,l);
  932. end;
  933. end;
  934. end;
  935. A_MOV:
  936. begin
  937. TmpUsedRegs := UsedRegs;
  938. if (taicpu(p).oper[1]^.typ = top_reg) and
  939. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  940. GetNextInstruction(p, hp1) and
  941. (tai(hp1).typ = ait_instruction) and
  942. (taicpu(hp1).opcode = A_MOV) and
  943. (taicpu(hp1).oper[0]^.typ = top_reg) and
  944. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  945. begin
  946. {we have "mov x, %treg; mov %treg, y}
  947. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  948. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  949. case taicpu(p).oper[0]^.typ Of
  950. top_reg:
  951. begin
  952. { change "mov %reg, %treg; mov %treg, y"
  953. to "mov %reg, y" }
  954. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  955. asml.remove(hp1);
  956. hp1.free;
  957. continue;
  958. end;
  959. top_ref:
  960. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  961. begin
  962. { change "mov mem, %treg; mov %treg, %reg"
  963. to "mov mem, %reg" }
  964. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  965. asml.remove(hp1);
  966. hp1.free;
  967. continue;
  968. end;
  969. end
  970. end
  971. else
  972. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  973. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  974. penalty}
  975. if (taicpu(p).oper[0]^.typ = top_reg) and
  976. (taicpu(p).oper[1]^.typ = top_reg) and
  977. GetNextInstruction(p,hp1) and
  978. (tai(hp1).typ = ait_instruction) and
  979. (taicpu(hp1).ops >= 1) and
  980. (taicpu(hp1).oper[0]^.typ = top_reg) and
  981. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  982. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  983. begin
  984. if ((taicpu(hp1).opcode = A_OR) or
  985. (taicpu(hp1).opcode = A_TEST)) and
  986. (taicpu(hp1).oper[1]^.typ = top_reg) and
  987. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  988. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  989. begin
  990. TmpUsedRegs := UsedRegs;
  991. { reg1 will be used after the first instruction, }
  992. { so update the allocation info }
  993. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  994. if GetNextInstruction(hp1, hp2) and
  995. (hp2.typ = ait_instruction) and
  996. taicpu(hp2).is_jmp and
  997. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  998. { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  999. "test %reg1, %reg1; jxx" }
  1000. begin
  1001. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1002. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1003. asml.remove(p);
  1004. p.free;
  1005. p := hp1;
  1006. continue
  1007. end
  1008. else
  1009. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  1010. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  1011. begin
  1012. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1013. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1014. end;
  1015. end
  1016. { else
  1017. if (taicpu(p.next)^.opcode
  1018. in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  1019. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  1020. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  1021. end
  1022. else
  1023. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1024. x >= RetOffset) as it doesn't do anything (it writes either to a
  1025. parameter or to the temporary storage room for the function
  1026. result)}
  1027. if GetNextInstruction(p, hp1) and
  1028. (tai(hp1).typ = ait_instruction) then
  1029. if ((taicpu(hp1).opcode = A_LEAVE) or
  1030. (taicpu(hp1).opcode = A_RET)) and
  1031. (taicpu(p).oper[1]^.typ = top_ref) and
  1032. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1033. not(assigned(current_procinfo.procdef.funcretsym) and
  1034. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1035. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  1036. (taicpu(p).oper[0]^.typ = top_reg) then
  1037. begin
  1038. asml.remove(p);
  1039. p.free;
  1040. p := hp1;
  1041. RemoveLastDeallocForFuncRes(asmL,p);
  1042. end
  1043. else
  1044. if (taicpu(p).oper[0]^.typ = top_reg) and
  1045. (taicpu(p).oper[1]^.typ = top_ref) and
  1046. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1047. (taicpu(hp1).opcode = A_CMP) and
  1048. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1049. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1050. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  1051. begin
  1052. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1053. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1054. end;
  1055. { Next instruction is also a MOV ? }
  1056. if GetNextInstruction(p, hp1) and
  1057. (tai(hp1).typ = ait_instruction) and
  1058. (taicpu(hp1).opcode = A_MOV) and
  1059. (taicpu(hp1).opsize = taicpu(p).opsize) then
  1060. begin
  1061. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1062. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1063. {mov reg1, mem1 or mov mem1, reg1
  1064. mov mem2, reg2 mov reg2, mem2}
  1065. begin
  1066. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1067. {mov reg1, mem1 or mov mem1, reg1
  1068. mov mem2, reg1 mov reg2, mem1}
  1069. begin
  1070. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1071. { Removes the second statement from
  1072. mov reg1, mem1/reg2
  1073. mov mem1/reg2, reg1 }
  1074. begin
  1075. if (taicpu(p).oper[0]^.typ = top_reg) then
  1076. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1077. asml.remove(hp1);
  1078. hp1.free;
  1079. end
  1080. else
  1081. begin
  1082. TmpUsedRegs := UsedRegs;
  1083. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1084. if (taicpu(p).oper[1]^.typ = top_ref) and
  1085. { mov reg1, mem1
  1086. mov mem2, reg1 }
  1087. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1088. GetNextInstruction(hp1, hp2) and
  1089. (hp2.typ = ait_instruction) and
  1090. (taicpu(hp2).opcode = A_CMP) and
  1091. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1092. (taicpu(hp2).oper[0]^.typ = TOp_Ref) and
  1093. (taicpu(hp2).oper[1]^.typ = TOp_Reg) and
  1094. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(p).oper[1]^.ref^) and
  1095. (taicpu(hp2).oper[1]^.reg= taicpu(p).oper[0]^.reg) and
  1096. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1097. { change to
  1098. mov reg1, mem1 mov reg1, mem1
  1099. mov mem2, reg1 cmp reg1, mem2
  1100. cmp mem1, reg1 }
  1101. begin
  1102. asml.remove(hp2);
  1103. hp2.free;
  1104. taicpu(hp1).opcode := A_CMP;
  1105. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1106. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1107. end;
  1108. end;
  1109. end
  1110. else
  1111. begin
  1112. tmpUsedRegs := UsedRegs;
  1113. if GetNextInstruction(hp1, hp2) and
  1114. (taicpu(p).oper[0]^.typ = top_ref) and
  1115. (taicpu(p).oper[1]^.typ = top_reg) and
  1116. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1117. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1118. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1119. (tai(hp2).typ = ait_instruction) and
  1120. (taicpu(hp2).opcode = A_MOV) and
  1121. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1122. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1123. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1124. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1125. if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and
  1126. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1127. { mov mem1, %reg1
  1128. mov %reg1, mem2
  1129. mov mem2, reg2
  1130. to:
  1131. mov mem1, reg2
  1132. mov reg2, mem2}
  1133. begin
  1134. AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1135. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1136. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1137. asml.remove(hp2);
  1138. hp2.free;
  1139. end
  1140. else
  1141. if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1142. not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and
  1143. not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then
  1144. { mov mem1, reg1 mov mem1, reg1
  1145. mov reg1, mem2 mov reg1, mem2
  1146. mov mem2, reg2 mov mem2, reg1
  1147. to: to:
  1148. mov mem1, reg1 mov mem1, reg1
  1149. mov mem1, reg2 mov reg1, mem2
  1150. mov reg1, mem2
  1151. or (if mem1 depends on reg1
  1152. and/or if mem2 depends on reg2)
  1153. to:
  1154. mov mem1, reg1
  1155. mov reg1, mem2
  1156. mov reg1, reg2
  1157. }
  1158. begin
  1159. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1160. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1161. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1162. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1163. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1164. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1165. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1166. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1167. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1168. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1169. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1170. end
  1171. else
  1172. if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1173. begin
  1174. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1175. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1176. end
  1177. else
  1178. begin
  1179. asml.remove(hp2);
  1180. hp2.free;
  1181. end
  1182. end
  1183. end
  1184. else
  1185. (* {movl [mem1],reg1
  1186. movl [mem1],reg2
  1187. to:
  1188. movl [mem1],reg1
  1189. movl reg1,reg2 }
  1190. if (taicpu(p).oper[0]^.typ = top_ref) and
  1191. (taicpu(p).oper[1]^.typ = top_reg) and
  1192. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1193. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1194. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1195. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1196. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1197. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1198. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1199. else*)
  1200. { movl const1,[mem1]
  1201. movl [mem1],reg1
  1202. to:
  1203. movl const1,reg1
  1204. movl reg1,[mem1] }
  1205. if (taicpu(p).oper[0]^.typ = top_const) and
  1206. (taicpu(p).oper[1]^.typ = top_ref) and
  1207. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1208. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1209. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1210. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1211. not(reginref(getsupreg(taicpu(hp1).oper[1]^.reg),taicpu(hp1).oper[0]^.ref^)) then
  1212. begin
  1213. allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1214. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1215. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1216. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1217. end
  1218. end;
  1219. if GetNextInstruction(p, hp1) and
  1220. (Tai(hp1).typ = ait_instruction) and
  1221. ((Taicpu(hp1).opcode = A_BTS) or (Taicpu(hp1).opcode = A_BTR)) and
  1222. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  1223. GetNextInstruction(hp1, hp2) and
  1224. (Tai(hp2).typ = ait_instruction) and
  1225. (Taicpu(hp2).opcode = A_OR) and
  1226. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  1227. (Taicpu(hp2).opsize = Taicpu(p).opsize) and
  1228. (Taicpu(p).oper[0]^.typ = top_const) and (Taicpu(p).oper[0]^.val=0) and
  1229. (Taicpu(p).oper[1]^.typ = top_reg) and
  1230. (Taicpu(hp1).oper[1]^.typ = top_reg) and
  1231. (Taicpu(p).oper[1]^.reg=Taicpu(hp1).oper[1]^.reg) and
  1232. (Taicpu(hp2).oper[1]^.typ = top_reg) and
  1233. (Taicpu(p).oper[1]^.reg=Taicpu(hp2).oper[1]^.reg) then
  1234. {mov reg1,0
  1235. bts reg1,operand1 --> mov reg1,operand2
  1236. or reg1,operand2 bts reg1,operand1}
  1237. begin
  1238. Taicpu(hp2).opcode:=A_MOV;
  1239. asml.remove(hp1);
  1240. insertllitem(asml,hp2,hp2.next,hp1);
  1241. asml.remove(p);
  1242. p.free;
  1243. end;
  1244. end;
  1245. A_MOVSX,
  1246. A_MOVZX :
  1247. begin
  1248. if (taicpu(p).oper[1]^.typ = top_reg) and
  1249. GetNextInstruction(p,hp1) and
  1250. (hp1.typ = ait_instruction) and
  1251. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1252. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1253. GetNextInstruction(hp1,hp2) and
  1254. (hp2.typ = ait_instruction) and
  1255. (taicpu(hp2).opcode = A_MOV) and
  1256. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1257. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) then
  1258. { change movsX/movzX reg/ref, reg2 }
  1259. { add/sub/or/... reg3/$const, reg2 }
  1260. { mov reg2 reg/ref }
  1261. { to add/sub/or/... reg3/$const, reg/ref }
  1262. begin
  1263. { by example:
  1264. movswl %si,%eax movswl %si,%eax p
  1265. decl %eax addl %edx,%eax hp1
  1266. movw %ax,%si movw %ax,%si hp2
  1267. ->
  1268. movswl %si,%eax movswl %si,%eax p
  1269. decw %eax addw %edx,%eax hp1
  1270. movw %ax,%si movw %ax,%si hp2
  1271. }
  1272. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1273. {
  1274. ->
  1275. movswl %si,%eax movswl %si,%eax p
  1276. decw %si addw %dx,%si hp1
  1277. movw %ax,%si movw %ax,%si hp2
  1278. }
  1279. case taicpu(hp1).ops of
  1280. 1:
  1281. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1282. 2:
  1283. begin
  1284. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1285. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1286. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1287. end;
  1288. else
  1289. internalerror(2008042701);
  1290. end;
  1291. {
  1292. ->
  1293. decw %si addw %dx,%si p
  1294. }
  1295. asml.remove(p);
  1296. asml.remove(hp2);
  1297. p.free;
  1298. hp2.free;
  1299. p := hp1
  1300. end
  1301. { removes superfluous And's after movzx's }
  1302. else if taicpu(p).opcode=A_MOVZX then
  1303. begin
  1304. if (taicpu(p).oper[1]^.typ = top_reg) and
  1305. GetNextInstruction(p, hp1) and
  1306. (tai(hp1).typ = ait_instruction) and
  1307. (taicpu(hp1).opcode = A_AND) and
  1308. (taicpu(hp1).oper[0]^.typ = top_const) and
  1309. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1310. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1311. case taicpu(p).opsize Of
  1312. S_BL, S_BW:
  1313. if (taicpu(hp1).oper[0]^.val = $ff) then
  1314. begin
  1315. asml.remove(hp1);
  1316. hp1.free;
  1317. end;
  1318. S_WL:
  1319. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1320. begin
  1321. asml.remove(hp1);
  1322. hp1.free;
  1323. end;
  1324. end;
  1325. {changes some movzx constructs to faster synonims (all examples
  1326. are given with eax/ax, but are also valid for other registers)}
  1327. if (taicpu(p).oper[1]^.typ = top_reg) then
  1328. if (taicpu(p).oper[0]^.typ = top_reg) then
  1329. case taicpu(p).opsize of
  1330. S_BW:
  1331. begin
  1332. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1333. not(cs_opt_size in current_settings.optimizerswitches) then
  1334. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1335. begin
  1336. taicpu(p).opcode := A_AND;
  1337. taicpu(p).changeopsize(S_W);
  1338. taicpu(p).loadConst(0,$ff);
  1339. end
  1340. else if GetNextInstruction(p, hp1) and
  1341. (tai(hp1).typ = ait_instruction) and
  1342. (taicpu(hp1).opcode = A_AND) and
  1343. (taicpu(hp1).oper[0]^.typ = top_const) and
  1344. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1345. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1346. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1347. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1348. begin
  1349. taicpu(p).opcode := A_MOV;
  1350. taicpu(p).changeopsize(S_W);
  1351. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1352. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1353. end;
  1354. end;
  1355. S_BL:
  1356. begin
  1357. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1358. not(cs_opt_size in current_settings.optimizerswitches) then
  1359. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1360. begin
  1361. taicpu(p).opcode := A_AND;
  1362. taicpu(p).changeopsize(S_L);
  1363. taicpu(p).loadConst(0,$ff)
  1364. end
  1365. else if GetNextInstruction(p, hp1) and
  1366. (tai(hp1).typ = ait_instruction) and
  1367. (taicpu(hp1).opcode = A_AND) and
  1368. (taicpu(hp1).oper[0]^.typ = top_const) and
  1369. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1370. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1371. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1372. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1373. begin
  1374. taicpu(p).opcode := A_MOV;
  1375. taicpu(p).changeopsize(S_L);
  1376. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1377. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1378. end
  1379. end;
  1380. S_WL:
  1381. begin
  1382. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1383. not(cs_opt_size in current_settings.optimizerswitches) then
  1384. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1385. begin
  1386. taicpu(p).opcode := A_AND;
  1387. taicpu(p).changeopsize(S_L);
  1388. taicpu(p).loadConst(0,$ffff);
  1389. end
  1390. else if GetNextInstruction(p, hp1) and
  1391. (tai(hp1).typ = ait_instruction) and
  1392. (taicpu(hp1).opcode = A_AND) and
  1393. (taicpu(hp1).oper[0]^.typ = top_const) and
  1394. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1395. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1396. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1397. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1398. begin
  1399. taicpu(p).opcode := A_MOV;
  1400. taicpu(p).changeopsize(S_L);
  1401. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1402. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1403. end;
  1404. end;
  1405. end
  1406. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1407. begin
  1408. if GetNextInstruction(p, hp1) and
  1409. (tai(hp1).typ = ait_instruction) and
  1410. (taicpu(hp1).opcode = A_AND) and
  1411. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1412. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1413. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1414. begin
  1415. taicpu(p).opcode := A_MOV;
  1416. case taicpu(p).opsize Of
  1417. S_BL:
  1418. begin
  1419. taicpu(p).changeopsize(S_L);
  1420. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1421. end;
  1422. S_WL:
  1423. begin
  1424. taicpu(p).changeopsize(S_L);
  1425. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1426. end;
  1427. S_BW:
  1428. begin
  1429. taicpu(p).changeopsize(S_W);
  1430. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1431. end;
  1432. end;
  1433. end;
  1434. end;
  1435. end;
  1436. end;
  1437. (* should not be generated anymore by the current code generator
  1438. A_POP:
  1439. begin
  1440. if target_info.system=system_i386_go32v2 then
  1441. begin
  1442. { Transform a series of pop/pop/pop/push/push/push to }
  1443. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1444. { because I'm not sure whether they can cope with }
  1445. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1446. { such a problem when using esp as frame pointer (JM) }
  1447. if (taicpu(p).oper[0]^.typ = top_reg) then
  1448. begin
  1449. hp1 := p;
  1450. hp2 := p;
  1451. l := 0;
  1452. while getNextInstruction(hp1,hp1) and
  1453. (hp1.typ = ait_instruction) and
  1454. (taicpu(hp1).opcode = A_POP) and
  1455. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1456. begin
  1457. hp2 := hp1;
  1458. inc(l,4);
  1459. end;
  1460. getLastInstruction(p,hp3);
  1461. l1 := 0;
  1462. while (hp2 <> hp3) and
  1463. assigned(hp1) and
  1464. (hp1.typ = ait_instruction) and
  1465. (taicpu(hp1).opcode = A_PUSH) and
  1466. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1467. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1468. begin
  1469. { change it to a two op operation }
  1470. taicpu(hp2).oper[1]^.typ:=top_none;
  1471. taicpu(hp2).ops:=2;
  1472. taicpu(hp2).opcode := A_MOV;
  1473. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1474. reference_reset(tmpref);
  1475. tmpRef.base.enum:=R_INTREGISTER;
  1476. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1477. convert_register_to_enum(tmpref.base);
  1478. tmpRef.offset := l;
  1479. taicpu(hp2).loadRef(0,tmpRef);
  1480. hp4 := hp1;
  1481. getNextInstruction(hp1,hp1);
  1482. asml.remove(hp4);
  1483. hp4.free;
  1484. getLastInstruction(hp2,hp2);
  1485. dec(l,4);
  1486. inc(l1);
  1487. end;
  1488. if l <> -4 then
  1489. begin
  1490. inc(l,4);
  1491. for l1 := l1 downto 1 do
  1492. begin
  1493. getNextInstruction(hp2,hp2);
  1494. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1495. end
  1496. end
  1497. end
  1498. end
  1499. else
  1500. begin
  1501. if (taicpu(p).oper[0]^.typ = top_reg) and
  1502. GetNextInstruction(p, hp1) and
  1503. (tai(hp1).typ=ait_instruction) and
  1504. (taicpu(hp1).opcode=A_PUSH) and
  1505. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1506. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1507. begin
  1508. { change it to a two op operation }
  1509. taicpu(p).oper[1]^.typ:=top_none;
  1510. taicpu(p).ops:=2;
  1511. taicpu(p).opcode := A_MOV;
  1512. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1513. reference_reset(tmpref);
  1514. TmpRef.base.enum := R_ESP;
  1515. taicpu(p).loadRef(0,TmpRef);
  1516. asml.remove(hp1);
  1517. hp1.free;
  1518. end;
  1519. end;
  1520. end;
  1521. *)
  1522. A_PUSH:
  1523. begin
  1524. if (taicpu(p).opsize = S_W) and
  1525. (taicpu(p).oper[0]^.typ = Top_Const) and
  1526. GetNextInstruction(p, hp1) and
  1527. (tai(hp1).typ = ait_instruction) and
  1528. (taicpu(hp1).opcode = A_PUSH) and
  1529. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1530. (taicpu(hp1).opsize = S_W) then
  1531. begin
  1532. taicpu(p).changeopsize(S_L);
  1533. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1534. asml.remove(hp1);
  1535. hp1.free;
  1536. end;
  1537. end;
  1538. A_SHL, A_SAL:
  1539. begin
  1540. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1541. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1542. (taicpu(p).opsize = S_L) and
  1543. (taicpu(p).oper[0]^.val <= 3) then
  1544. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1545. begin
  1546. TmpBool1 := True; {should we check the next instruction?}
  1547. TmpBool2 := False; {have we found an add/sub which could be
  1548. integrated in the lea?}
  1549. reference_reset(tmpref,2);
  1550. TmpRef.index := taicpu(p).oper[1]^.reg;
  1551. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1552. while TmpBool1 and
  1553. GetNextInstruction(p, hp1) and
  1554. (tai(hp1).typ = ait_instruction) and
  1555. ((((taicpu(hp1).opcode = A_ADD) or
  1556. (taicpu(hp1).opcode = A_SUB)) and
  1557. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1558. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1559. (((taicpu(hp1).opcode = A_INC) or
  1560. (taicpu(hp1).opcode = A_DEC)) and
  1561. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1562. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1563. (not GetNextInstruction(hp1,hp2) or
  1564. not instrReadsFlags(hp2)) Do
  1565. begin
  1566. TmpBool1 := False;
  1567. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1568. begin
  1569. TmpBool1 := True;
  1570. TmpBool2 := True;
  1571. case taicpu(hp1).opcode of
  1572. A_ADD:
  1573. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1574. A_SUB:
  1575. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1576. end;
  1577. asml.remove(hp1);
  1578. hp1.free;
  1579. end
  1580. else
  1581. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1582. (((taicpu(hp1).opcode = A_ADD) and
  1583. (TmpRef.base = NR_NO)) or
  1584. (taicpu(hp1).opcode = A_INC) or
  1585. (taicpu(hp1).opcode = A_DEC)) then
  1586. begin
  1587. TmpBool1 := True;
  1588. TmpBool2 := True;
  1589. case taicpu(hp1).opcode of
  1590. A_ADD:
  1591. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1592. A_INC:
  1593. inc(TmpRef.offset);
  1594. A_DEC:
  1595. dec(TmpRef.offset);
  1596. end;
  1597. asml.remove(hp1);
  1598. hp1.free;
  1599. end;
  1600. end;
  1601. if TmpBool2 or
  1602. ((current_settings.optimizecputype < cpu_Pentium2) and
  1603. (taicpu(p).oper[0]^.val <= 3) and
  1604. not(cs_opt_size in current_settings.optimizerswitches)) then
  1605. begin
  1606. if not(TmpBool2) and
  1607. (taicpu(p).oper[0]^.val = 1) then
  1608. begin
  1609. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1610. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1611. end
  1612. else
  1613. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1614. taicpu(p).oper[1]^.reg);
  1615. InsertLLItem(asml,p.previous, p.next, hp1);
  1616. p.free;
  1617. p := hp1;
  1618. end;
  1619. end
  1620. else
  1621. if (current_settings.optimizecputype < cpu_Pentium2) and
  1622. (taicpu(p).oper[0]^.typ = top_const) and
  1623. (taicpu(p).oper[1]^.typ = top_reg) then
  1624. if (taicpu(p).oper[0]^.val = 1) then
  1625. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1626. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1627. (unlike shl, which is only Tairable in the U pipe)}
  1628. begin
  1629. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1630. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1631. InsertLLItem(asml,p.previous, p.next, hp1);
  1632. p.free;
  1633. p := hp1;
  1634. end
  1635. else if (taicpu(p).opsize = S_L) and
  1636. (taicpu(p).oper[0]^.val<= 3) then
  1637. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1638. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1639. begin
  1640. reference_reset(tmpref,2);
  1641. TmpRef.index := taicpu(p).oper[1]^.reg;
  1642. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1643. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1644. InsertLLItem(asml,p.previous, p.next, hp1);
  1645. p.free;
  1646. p := hp1;
  1647. end
  1648. end;
  1649. A_SETcc :
  1650. { changes
  1651. setcc (funcres) setcc reg
  1652. movb (funcres), reg to leave/ret
  1653. leave/ret }
  1654. begin
  1655. if (taicpu(p).oper[0]^.typ = top_ref) and
  1656. GetNextInstruction(p, hp1) and
  1657. GetNextInstruction(hp1, hp2) and
  1658. (hp2.typ = ait_instruction) and
  1659. ((taicpu(hp2).opcode = A_LEAVE) or
  1660. (taicpu(hp2).opcode = A_RET)) and
  1661. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1662. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1663. not(assigned(current_procinfo.procdef.funcretsym) and
  1664. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1665. (hp1.typ = ait_instruction) and
  1666. (taicpu(hp1).opcode = A_MOV) and
  1667. (taicpu(hp1).opsize = S_B) and
  1668. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1669. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1670. begin
  1671. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1672. asml.remove(hp1);
  1673. hp1.free;
  1674. end
  1675. end;
  1676. A_SUB:
  1677. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1678. { * change "sub/add const1, reg" or "dec reg" followed by
  1679. "sub const2, reg" to one "sub ..., reg" }
  1680. begin
  1681. if (taicpu(p).oper[0]^.typ = top_const) and
  1682. (taicpu(p).oper[1]^.typ = top_reg) then
  1683. if (taicpu(p).oper[0]^.val = 2) and
  1684. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1685. { Don't do the sub/push optimization if the sub }
  1686. { comes from setting up the stack frame (JM) }
  1687. (not getLastInstruction(p,hp1) or
  1688. (hp1.typ <> ait_instruction) or
  1689. (taicpu(hp1).opcode <> A_MOV) or
  1690. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1691. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1692. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1693. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1694. begin
  1695. hp1 := tai(p.next);
  1696. while Assigned(hp1) and
  1697. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1698. not regReadByInstruction(RS_ESP,hp1) and
  1699. not regModifiedByInstruction(RS_ESP,hp1) do
  1700. hp1 := tai(hp1.next);
  1701. if Assigned(hp1) and
  1702. (tai(hp1).typ = ait_instruction) and
  1703. (taicpu(hp1).opcode = A_PUSH) and
  1704. (taicpu(hp1).opsize = S_W) then
  1705. begin
  1706. taicpu(hp1).changeopsize(S_L);
  1707. if taicpu(hp1).oper[0]^.typ=top_reg then
  1708. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1709. hp1 := tai(p.next);
  1710. asml.remove(p);
  1711. p.free;
  1712. p := hp1;
  1713. continue
  1714. end;
  1715. if DoSubAddOpt(p) then
  1716. continue;
  1717. end
  1718. else if DoSubAddOpt(p) then
  1719. continue
  1720. end;
  1721. end;
  1722. end; { if is_jmp }
  1723. end;
  1724. end;
  1725. updateUsedRegs(UsedRegs,p);
  1726. p:=tai(p.next);
  1727. end;
  1728. end;
  1729. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  1730. function CanBeCMOV(p : tai) : boolean;
  1731. begin
  1732. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1733. (taicpu(p).opcode=A_MOV) and
  1734. (taicpu(p).opsize in [S_L,S_W]) and
  1735. ((taicpu(p).oper[0]^.typ = top_reg)
  1736. { we can't use cmov ref,reg because
  1737. ref could be nil and cmov still throws an exception
  1738. if ref=nil but the mov isn't done (FK)
  1739. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1740. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1741. }
  1742. ) and
  1743. (taicpu(p).oper[1]^.typ in [top_reg]);
  1744. end;
  1745. var
  1746. p,hp1,hp2: tai;
  1747. l : longint;
  1748. condition : tasmcond;
  1749. hp3: tai;
  1750. UsedRegs, TmpUsedRegs: TRegSet;
  1751. carryadd_opcode: Tasmop;
  1752. begin
  1753. p := BlockStart;
  1754. UsedRegs := [];
  1755. while (p <> BlockEnd) Do
  1756. begin
  1757. UpdateUsedRegs(UsedRegs, tai(p.next));
  1758. case p.Typ Of
  1759. Ait_Instruction:
  1760. begin
  1761. if InsContainsSegRef(taicpu(p)) then
  1762. begin
  1763. p := tai(p.next);
  1764. continue;
  1765. end;
  1766. case taicpu(p).opcode Of
  1767. A_Jcc:
  1768. begin
  1769. { jb @@1 cmc
  1770. inc/dec operand --> adc/sbb operand,0
  1771. @@1:
  1772. ... and ...
  1773. jnb @@1
  1774. inc/dec operand --> adc/sbb operand,0
  1775. @@1: }
  1776. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1777. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1778. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1779. begin
  1780. carryadd_opcode:=A_NONE;
  1781. if Taicpu(p).condition in [C_NAE,C_B] then
  1782. begin
  1783. if Taicpu(hp1).opcode=A_INC then
  1784. carryadd_opcode:=A_ADC;
  1785. if Taicpu(hp1).opcode=A_DEC then
  1786. carryadd_opcode:=A_SBB;
  1787. if carryadd_opcode<>A_NONE then
  1788. begin
  1789. Taicpu(p).clearop(0);
  1790. Taicpu(p).ops:=0;
  1791. Taicpu(p).is_jmp:=false;
  1792. Taicpu(p).opcode:=A_CMC;
  1793. Taicpu(p).condition:=C_NONE;
  1794. Taicpu(hp1).ops:=2;
  1795. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1796. Taicpu(hp1).loadconst(0,0);
  1797. Taicpu(hp1).opcode:=carryadd_opcode;
  1798. continue;
  1799. end;
  1800. end;
  1801. if Taicpu(p).condition in [C_AE,C_NB] then
  1802. begin
  1803. if Taicpu(hp1).opcode=A_INC then
  1804. carryadd_opcode:=A_ADC;
  1805. if Taicpu(hp1).opcode=A_DEC then
  1806. carryadd_opcode:=A_SBB;
  1807. if carryadd_opcode<>A_NONE then
  1808. begin
  1809. asml.remove(p);
  1810. p.free;
  1811. Taicpu(hp1).ops:=2;
  1812. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1813. Taicpu(hp1).loadconst(0,0);
  1814. Taicpu(hp1).opcode:=carryadd_opcode;
  1815. p:=hp1;
  1816. continue;
  1817. end;
  1818. end;
  1819. end;
  1820. if (current_settings.cputype>=cpu_Pentium2) then
  1821. begin
  1822. { check for
  1823. jCC xxx
  1824. <several movs>
  1825. xxx:
  1826. }
  1827. l:=0;
  1828. GetNextInstruction(p, hp1);
  1829. while assigned(hp1) and
  1830. CanBeCMOV(hp1) and
  1831. { stop on labels }
  1832. not(hp1.typ=ait_label) do
  1833. begin
  1834. inc(l);
  1835. GetNextInstruction(hp1,hp1);
  1836. end;
  1837. if assigned(hp1) then
  1838. begin
  1839. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1840. begin
  1841. if (l<=4) and (l>0) then
  1842. begin
  1843. condition:=inverse_cond(taicpu(p).condition);
  1844. hp2:=p;
  1845. GetNextInstruction(p,hp1);
  1846. p:=hp1;
  1847. repeat
  1848. taicpu(hp1).opcode:=A_CMOVcc;
  1849. taicpu(hp1).condition:=condition;
  1850. GetNextInstruction(hp1,hp1);
  1851. until not(assigned(hp1)) or
  1852. not(CanBeCMOV(hp1));
  1853. { wait with removing else GetNextInstruction could
  1854. ignore the label if it was the only usage in the
  1855. jump moved away }
  1856. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1857. asml.remove(hp2);
  1858. hp2.free;
  1859. continue;
  1860. end;
  1861. end
  1862. else
  1863. begin
  1864. { check further for
  1865. jCC xxx
  1866. <several movs 1>
  1867. jmp yyy
  1868. xxx:
  1869. <several movs 2>
  1870. yyy:
  1871. }
  1872. { hp2 points to jmp yyy }
  1873. hp2:=hp1;
  1874. { skip hp1 to xxx }
  1875. GetNextInstruction(hp1, hp1);
  1876. if assigned(hp2) and
  1877. assigned(hp1) and
  1878. (l<=3) and
  1879. (hp2.typ=ait_instruction) and
  1880. (taicpu(hp2).is_jmp) and
  1881. (taicpu(hp2).condition=C_None) and
  1882. { real label and jump, no further references to the
  1883. label are allowed }
  1884. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1885. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1886. begin
  1887. l:=0;
  1888. { skip hp1 to <several moves 2> }
  1889. GetNextInstruction(hp1, hp1);
  1890. while assigned(hp1) and
  1891. CanBeCMOV(hp1) do
  1892. begin
  1893. inc(l);
  1894. GetNextInstruction(hp1, hp1);
  1895. end;
  1896. { hp1 points to yyy: }
  1897. if assigned(hp1) and
  1898. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1899. begin
  1900. condition:=inverse_cond(taicpu(p).condition);
  1901. GetNextInstruction(p,hp1);
  1902. hp3:=p;
  1903. p:=hp1;
  1904. repeat
  1905. taicpu(hp1).opcode:=A_CMOVcc;
  1906. taicpu(hp1).condition:=condition;
  1907. GetNextInstruction(hp1,hp1);
  1908. until not(assigned(hp1)) or
  1909. not(CanBeCMOV(hp1));
  1910. { hp2 is still at jmp yyy }
  1911. GetNextInstruction(hp2,hp1);
  1912. { hp2 is now at xxx: }
  1913. condition:=inverse_cond(condition);
  1914. GetNextInstruction(hp1,hp1);
  1915. { hp1 is now at <several movs 2> }
  1916. repeat
  1917. taicpu(hp1).opcode:=A_CMOVcc;
  1918. taicpu(hp1).condition:=condition;
  1919. GetNextInstruction(hp1,hp1);
  1920. until not(assigned(hp1)) or
  1921. not(CanBeCMOV(hp1));
  1922. {
  1923. asml.remove(hp1.next)
  1924. hp1.next.free;
  1925. asml.remove(hp1);
  1926. hp1.free;
  1927. }
  1928. { remove jCC }
  1929. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1930. asml.remove(hp3);
  1931. hp3.free;
  1932. { remove jmp }
  1933. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1934. asml.remove(hp2);
  1935. hp2.free;
  1936. continue;
  1937. end;
  1938. end;
  1939. end;
  1940. end;
  1941. end;
  1942. end;
  1943. A_FSTP,A_FISTP:
  1944. if doFpuLoadStoreOpt(asmL,p) then
  1945. continue;
  1946. A_IMUL:
  1947. begin
  1948. if (taicpu(p).ops >= 2) and
  1949. ((taicpu(p).oper[0]^.typ = top_const) or
  1950. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1951. (taicpu(p).oper[1]^.typ = top_reg) and
  1952. ((taicpu(p).ops = 2) or
  1953. ((taicpu(p).oper[2]^.typ = top_reg) and
  1954. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1955. getLastInstruction(p,hp1) and
  1956. (hp1.typ = ait_instruction) and
  1957. (taicpu(hp1).opcode = A_MOV) and
  1958. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1959. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1960. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1961. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1962. begin
  1963. taicpu(p).ops := 3;
  1964. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1965. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1966. asml.remove(hp1);
  1967. hp1.free;
  1968. end;
  1969. end;
  1970. A_MOV:
  1971. begin
  1972. if (taicpu(p).oper[0]^.typ = top_reg) and
  1973. (taicpu(p).oper[1]^.typ = top_reg) and
  1974. GetNextInstruction(p, hp1) and
  1975. (hp1.typ = ait_Instruction) and
  1976. ((taicpu(hp1).opcode = A_MOV) or
  1977. (taicpu(hp1).opcode = A_MOVZX) or
  1978. (taicpu(hp1).opcode = A_MOVSX)) and
  1979. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1980. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1981. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  1982. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  1983. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1984. {mov reg1, reg2
  1985. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1986. begin
  1987. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1988. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1989. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1990. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1991. asml.remove(p);
  1992. p.free;
  1993. p := hp1;
  1994. continue;
  1995. end
  1996. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1997. GetNextInstruction(p,hp1) and
  1998. (hp1.typ = ait_instruction) and
  1999. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2000. GetNextInstruction(hp1,hp2) and
  2001. (hp2.typ = ait_instruction) and
  2002. (taicpu(hp2).opcode = A_MOV) and
  2003. (taicpu(hp2).oper[0]^.typ = top_reg) and
  2004. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  2005. (taicpu(hp2).oper[1]^.typ = top_ref) then
  2006. begin
  2007. TmpUsedRegs := UsedRegs;
  2008. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  2009. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  2010. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  2011. hp2, TmpUsedRegs))) then
  2012. { change mov (ref), reg }
  2013. { add/sub/or/... reg2/$const, reg }
  2014. { mov reg, (ref) }
  2015. { # release reg }
  2016. { to add/sub/or/... reg2/$const, (ref) }
  2017. begin
  2018. case taicpu(hp1).opcode of
  2019. A_INC,A_DEC:
  2020. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^)
  2021. else
  2022. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2023. end;
  2024. asml.remove(p);
  2025. asml.remove(hp2);
  2026. p.free;
  2027. hp2.free;
  2028. p := hp1
  2029. end;
  2030. end
  2031. end;
  2032. end;
  2033. end;
  2034. end;
  2035. p := tai(p.next)
  2036. end;
  2037. end;
  2038. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  2039. var
  2040. p,hp1,hp2: tai;
  2041. begin
  2042. p := BlockStart;
  2043. while (p <> BlockEnd) Do
  2044. begin
  2045. case p.Typ Of
  2046. Ait_Instruction:
  2047. begin
  2048. if InsContainsSegRef(taicpu(p)) then
  2049. begin
  2050. p := tai(p.next);
  2051. continue;
  2052. end;
  2053. case taicpu(p).opcode Of
  2054. A_CALL:
  2055. if (current_settings.optimizecputype < cpu_Pentium2) and
  2056. not(cs_create_pic in current_settings.moduleswitches) and
  2057. GetNextInstruction(p, hp1) and
  2058. (hp1.typ = ait_instruction) and
  2059. (taicpu(hp1).opcode = A_JMP) and
  2060. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  2061. begin
  2062. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2063. InsertLLItem(asml, p.previous, p, hp2);
  2064. taicpu(p).opcode := A_JMP;
  2065. taicpu(p).is_jmp := true;
  2066. asml.remove(hp1);
  2067. hp1.free;
  2068. end;
  2069. A_CMP:
  2070. begin
  2071. if (taicpu(p).oper[0]^.typ = top_const) and
  2072. (taicpu(p).oper[0]^.val = 0) and
  2073. (taicpu(p).oper[1]^.typ = top_reg) then
  2074. {change "cmp $0, %reg" to "test %reg, %reg"}
  2075. begin
  2076. taicpu(p).opcode := A_TEST;
  2077. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2078. continue;
  2079. end;
  2080. end;
  2081. (*
  2082. Optimization is not safe; xor clears the carry flag.
  2083. See test/tgadint64 in the test suite.
  2084. A_MOV:
  2085. if (taicpu(p).oper[0]^.typ = Top_Const) and
  2086. (taicpu(p).oper[0]^.val = 0) and
  2087. (taicpu(p).oper[1]^.typ = Top_Reg) then
  2088. { change "mov $0, %reg" into "xor %reg, %reg" }
  2089. begin
  2090. taicpu(p).opcode := A_XOR;
  2091. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2092. end;
  2093. *)
  2094. A_MOVZX:
  2095. { if register vars are on, it's possible there is code like }
  2096. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  2097. { so we can't safely replace the movzx then with xor/mov, }
  2098. { since that would change the flags (JM) }
  2099. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  2100. begin
  2101. if (taicpu(p).oper[1]^.typ = top_reg) then
  2102. if (taicpu(p).oper[0]^.typ = top_reg)
  2103. then
  2104. case taicpu(p).opsize of
  2105. S_BL:
  2106. begin
  2107. if IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2108. not(cs_opt_size in current_settings.optimizerswitches) and
  2109. (current_settings.optimizecputype = cpu_Pentium) then
  2110. {Change "movzbl %reg1, %reg2" to
  2111. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2112. PentiumMMX}
  2113. begin
  2114. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2115. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2116. InsertLLItem(asml,p.previous, p, hp1);
  2117. taicpu(p).opcode := A_MOV;
  2118. taicpu(p).changeopsize(S_B);
  2119. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2120. end;
  2121. end;
  2122. end
  2123. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2124. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2125. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2126. not(cs_opt_size in current_settings.optimizerswitches) and
  2127. IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2128. (current_settings.optimizecputype = cpu_Pentium) and
  2129. (taicpu(p).opsize = S_BL) then
  2130. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2131. Pentium and PentiumMMX}
  2132. begin
  2133. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2134. taicpu(p).oper[1]^.reg);
  2135. taicpu(p).opcode := A_MOV;
  2136. taicpu(p).changeopsize(S_B);
  2137. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2138. InsertLLItem(asml,p.previous, p, hp1);
  2139. end;
  2140. end;
  2141. A_TEST, A_OR:
  2142. {removes the line marked with (x) from the sequence
  2143. and/or/xor/add/sub/... $x, %y
  2144. test/or %y, %y (x)
  2145. j(n)z _Label
  2146. as the first instruction already adjusts the ZF}
  2147. begin
  2148. if OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  2149. if GetLastInstruction(p, hp1) and
  2150. (tai(hp1).typ = ait_instruction) and
  2151. GetNextInstruction(p,hp2) and
  2152. (hp2.typ = ait_instruction) and
  2153. ((taicpu(hp2).opcode = A_SETcc) or
  2154. (taicpu(hp2).opcode = A_Jcc) or
  2155. (taicpu(hp2).opcode = A_CMOVcc)) then
  2156. case taicpu(hp1).opcode Of
  2157. A_ADD, A_SUB, A_OR, A_XOR, A_AND{, A_SHL, A_SHR}:
  2158. begin
  2159. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  2160. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2161. { and in case of carry for A(E)/B(E)/C/NC }
  2162. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2163. ((taicpu(hp1).opcode <> A_ADD) and
  2164. (taicpu(hp1).opcode <> A_SUB))) then
  2165. begin
  2166. hp1 := tai(p.next);
  2167. asml.remove(p);
  2168. p.free;
  2169. p := tai(hp1);
  2170. continue
  2171. end;
  2172. end;
  2173. A_DEC, A_INC, A_NEG:
  2174. begin
  2175. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  2176. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2177. { and in case of carry for A(E)/B(E)/C/NC }
  2178. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2179. begin
  2180. case taicpu(hp1).opcode Of
  2181. A_DEC, A_INC:
  2182. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2183. begin
  2184. case taicpu(hp1).opcode Of
  2185. A_DEC: taicpu(hp1).opcode := A_SUB;
  2186. A_INC: taicpu(hp1).opcode := A_ADD;
  2187. end;
  2188. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2189. taicpu(hp1).loadConst(0,1);
  2190. taicpu(hp1).ops:=2;
  2191. end
  2192. end;
  2193. hp1 := tai(p.next);
  2194. asml.remove(p);
  2195. p.free;
  2196. p := tai(hp1);
  2197. continue
  2198. end;
  2199. end
  2200. end
  2201. end;
  2202. end;
  2203. end;
  2204. end;
  2205. p := tai(p.next)
  2206. end;
  2207. end;
  2208. end.