popt386.pas 116 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit popt386;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses Aasmbase,aasmtai,aasmdata,aasmcpu,verbose;
  21. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  22. procedure PeepHoleOptPass1(asml: TAsmList; BlockStart, BlockEnd: tai);
  23. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  24. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  25. implementation
  26. uses
  27. globtype,systems,
  28. globals,cgbase,procinfo,
  29. symsym,
  30. {$ifdef finaldestdebug}
  31. cobjects,
  32. {$endif finaldestdebug}
  33. cpuinfo,cpubase,cgutils,daopt386,
  34. cgx86;
  35. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  36. begin
  37. isFoldableArithOp := False;
  38. case hp1.opcode of
  39. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  40. isFoldableArithOp :=
  41. ((taicpu(hp1).oper[0]^.typ = top_const) or
  42. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  43. (taicpu(hp1).oper[0]^.reg <> reg))) and
  44. (taicpu(hp1).oper[1]^.typ = top_reg) and
  45. (taicpu(hp1).oper[1]^.reg = reg);
  46. A_INC,A_DEC:
  47. isFoldableArithOp :=
  48. (taicpu(hp1).oper[0]^.typ = top_reg) and
  49. (taicpu(hp1).oper[0]^.reg = reg);
  50. end;
  51. end;
  52. function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean;
  53. var
  54. supreg: tsuperregister;
  55. begin
  56. supreg := getsupreg(reg);
  57. UpdateUsedRegs(UsedRegs, tai(p.Next));
  58. RegUsedAfterInstruction :=
  59. (supreg in UsedRegs) and
  60. (not(getNextInstruction(p,p)) or
  61. not(regLoadedWithNewValue(supreg,false,p)));
  62. end;
  63. function doFpuLoadStoreOpt(asmL: TAsmList; var p: tai): boolean;
  64. { returns true if a "continue" should be done after this optimization }
  65. var hp1, hp2: tai;
  66. begin
  67. doFpuLoadStoreOpt := false;
  68. if (taicpu(p).oper[0]^.typ = top_ref) and
  69. getNextInstruction(p, hp1) and
  70. (hp1.typ = ait_instruction) and
  71. (((taicpu(hp1).opcode = A_FLD) and
  72. (taicpu(p).opcode = A_FSTP)) or
  73. ((taicpu(p).opcode = A_FISTP) and
  74. (taicpu(hp1).opcode = A_FILD))) and
  75. (taicpu(hp1).oper[0]^.typ = top_ref) and
  76. (taicpu(hp1).opsize = taicpu(p).opsize) and
  77. refsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  78. begin
  79. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  80. if (taicpu(p).opsize=S_FX) and
  81. getNextInstruction(hp1, hp2) and
  82. (hp2.typ = ait_instruction) and
  83. ((taicpu(hp2).opcode = A_LEAVE) or
  84. (taicpu(hp2).opcode = A_RET)) and
  85. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  86. not(assigned(current_procinfo.procdef.funcretsym) and
  87. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  88. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  89. begin
  90. asml.remove(p);
  91. asml.remove(hp1);
  92. p.free;
  93. hp1.free;
  94. p := hp2;
  95. removeLastDeallocForFuncRes(asmL, p);
  96. doFPULoadStoreOpt := true;
  97. end
  98. (* can't be done because the store operation rounds
  99. else
  100. { fst can't store an extended value! }
  101. if (taicpu(p).opsize <> S_FX) and
  102. (taicpu(p).opsize <> S_IQ) then
  103. begin
  104. if (taicpu(p).opcode = A_FSTP) then
  105. taicpu(p).opcode := A_FST
  106. else taicpu(p).opcode := A_FIST;
  107. asml.remove(hp1);
  108. hp1.free;
  109. end
  110. *)
  111. end;
  112. end;
  113. { returns true if p contains a memory operand with a segment set }
  114. function InsContainsSegRef(p: taicpu): boolean;
  115. var
  116. i: longint;
  117. begin
  118. result:=true;
  119. for i:=0 to p.opercnt-1 do
  120. if (p.oper[i]^.typ=top_ref) and
  121. (p.oper[i]^.ref^.segment<>NR_NO) then
  122. exit;
  123. result:=false;
  124. end;
  125. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  126. var
  127. p,hp1: tai;
  128. l: aint;
  129. tmpRef: treference;
  130. begin
  131. p := BlockStart;
  132. while (p <> BlockEnd) Do
  133. begin
  134. case p.Typ Of
  135. Ait_Instruction:
  136. begin
  137. if InsContainsSegRef(taicpu(p)) then
  138. begin
  139. p := tai(p.next);
  140. continue;
  141. end;
  142. case taicpu(p).opcode Of
  143. A_IMUL:
  144. {changes certain "imul const, %reg"'s to lea sequences}
  145. begin
  146. if (taicpu(p).oper[0]^.typ = Top_Const) and
  147. (taicpu(p).oper[1]^.typ = Top_Reg) and
  148. (taicpu(p).opsize = S_L) then
  149. if (taicpu(p).oper[0]^.val = 1) then
  150. if (taicpu(p).ops = 2) then
  151. {remove "imul $1, reg"}
  152. begin
  153. hp1 := tai(p.Next);
  154. asml.remove(p);
  155. p.free;
  156. p := hp1;
  157. continue;
  158. end
  159. else
  160. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  161. begin
  162. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  163. InsertLLItem(asml, p.previous, p.next, hp1);
  164. p.free;
  165. p := hp1;
  166. end
  167. else if
  168. ((taicpu(p).ops <= 2) or
  169. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  170. (taicpu(p).oper[0]^.val <= 12) and
  171. not(cs_opt_size in current_settings.optimizerswitches) and
  172. (not(GetNextInstruction(p, hp1)) or
  173. {GetNextInstruction(p, hp1) and}
  174. not((tai(hp1).typ = ait_instruction) and
  175. ((taicpu(hp1).opcode=A_Jcc) and
  176. (taicpu(hp1).condition in [C_O,C_NO])))) then
  177. begin
  178. reference_reset(tmpref,1);
  179. case taicpu(p).oper[0]^.val Of
  180. 3: begin
  181. {imul 3, reg1, reg2 to
  182. lea (reg1,reg1,2), reg2
  183. imul 3, reg1 to
  184. lea (reg1,reg1,2), reg1}
  185. TmpRef.base := taicpu(p).oper[1]^.reg;
  186. TmpRef.index := taicpu(p).oper[1]^.reg;
  187. TmpRef.ScaleFactor := 2;
  188. if (taicpu(p).ops = 2) then
  189. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  190. else
  191. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  192. InsertLLItem(asml,p.previous, p.next, hp1);
  193. p.free;
  194. p := hp1;
  195. end;
  196. 5: begin
  197. {imul 5, reg1, reg2 to
  198. lea (reg1,reg1,4), reg2
  199. imul 5, reg1 to
  200. lea (reg1,reg1,4), reg1}
  201. TmpRef.base := taicpu(p).oper[1]^.reg;
  202. TmpRef.index := taicpu(p).oper[1]^.reg;
  203. TmpRef.ScaleFactor := 4;
  204. if (taicpu(p).ops = 2) then
  205. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  206. else
  207. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  208. InsertLLItem(asml,p.previous, p.next, hp1);
  209. p.free;
  210. p := hp1;
  211. end;
  212. 6: begin
  213. {imul 6, reg1, reg2 to
  214. lea (,reg1,2), reg2
  215. lea (reg2,reg1,4), reg2
  216. imul 6, reg1 to
  217. lea (reg1,reg1,2), reg1
  218. add reg1, reg1}
  219. if (current_settings.optimizecputype <= cpu_386) then
  220. begin
  221. TmpRef.index := taicpu(p).oper[1]^.reg;
  222. if (taicpu(p).ops = 3) then
  223. begin
  224. TmpRef.base := taicpu(p).oper[2]^.reg;
  225. TmpRef.ScaleFactor := 4;
  226. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  227. end
  228. else
  229. begin
  230. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  231. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  232. end;
  233. InsertLLItem(asml,p, p.next, hp1);
  234. reference_reset(tmpref,2);
  235. TmpRef.index := taicpu(p).oper[1]^.reg;
  236. TmpRef.ScaleFactor := 2;
  237. if (taicpu(p).ops = 3) then
  238. begin
  239. TmpRef.base := NR_NO;
  240. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  241. taicpu(p).oper[2]^.reg);
  242. end
  243. else
  244. begin
  245. TmpRef.base := taicpu(p).oper[1]^.reg;
  246. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  247. end;
  248. InsertLLItem(asml,p.previous, p.next, hp1);
  249. p.free;
  250. p := tai(hp1.next);
  251. end
  252. end;
  253. 9: begin
  254. {imul 9, reg1, reg2 to
  255. lea (reg1,reg1,8), reg2
  256. imul 9, reg1 to
  257. lea (reg1,reg1,8), reg1}
  258. TmpRef.base := taicpu(p).oper[1]^.reg;
  259. TmpRef.index := taicpu(p).oper[1]^.reg;
  260. TmpRef.ScaleFactor := 8;
  261. if (taicpu(p).ops = 2) then
  262. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  263. else
  264. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  265. InsertLLItem(asml,p.previous, p.next, hp1);
  266. p.free;
  267. p := hp1;
  268. end;
  269. 10: begin
  270. {imul 10, reg1, reg2 to
  271. lea (reg1,reg1,4), reg2
  272. add reg2, reg2
  273. imul 10, reg1 to
  274. lea (reg1,reg1,4), reg1
  275. add reg1, reg1}
  276. if (current_settings.optimizecputype <= cpu_386) then
  277. begin
  278. if (taicpu(p).ops = 3) then
  279. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  280. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  281. else
  282. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  283. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  284. InsertLLItem(asml,p, p.next, hp1);
  285. TmpRef.base := taicpu(p).oper[1]^.reg;
  286. TmpRef.index := taicpu(p).oper[1]^.reg;
  287. TmpRef.ScaleFactor := 4;
  288. if (taicpu(p).ops = 3) then
  289. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  290. else
  291. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  292. InsertLLItem(asml,p.previous, p.next, hp1);
  293. p.free;
  294. p := tai(hp1.next);
  295. end
  296. end;
  297. 12: begin
  298. {imul 12, reg1, reg2 to
  299. lea (,reg1,4), reg2
  300. lea (,reg1,8) reg2
  301. imul 12, reg1 to
  302. lea (reg1,reg1,2), reg1
  303. lea (,reg1,4), reg1}
  304. if (current_settings.optimizecputype <= cpu_386)
  305. then
  306. begin
  307. TmpRef.index := taicpu(p).oper[1]^.reg;
  308. if (taicpu(p).ops = 3) then
  309. begin
  310. TmpRef.base := taicpu(p).oper[2]^.reg;
  311. TmpRef.ScaleFactor := 8;
  312. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  313. end
  314. else
  315. begin
  316. TmpRef.base := NR_NO;
  317. TmpRef.ScaleFactor := 4;
  318. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  319. end;
  320. InsertLLItem(asml,p, p.next, hp1);
  321. reference_reset(tmpref,2);
  322. TmpRef.index := taicpu(p).oper[1]^.reg;
  323. if (taicpu(p).ops = 3) then
  324. begin
  325. TmpRef.base := NR_NO;
  326. TmpRef.ScaleFactor := 4;
  327. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  328. end
  329. else
  330. begin
  331. TmpRef.base := taicpu(p).oper[1]^.reg;
  332. TmpRef.ScaleFactor := 2;
  333. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  334. end;
  335. InsertLLItem(asml,p.previous, p.next, hp1);
  336. p.free;
  337. p := tai(hp1.next);
  338. end
  339. end
  340. end;
  341. end;
  342. end;
  343. A_SAR, A_SHR:
  344. {changes the code sequence
  345. shr/sar const1, x
  346. shl const2, x
  347. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  348. begin
  349. if GetNextInstruction(p, hp1) and
  350. (tai(hp1).typ = ait_instruction) and
  351. (taicpu(hp1).opcode = A_SHL) and
  352. (taicpu(p).oper[0]^.typ = top_const) and
  353. (taicpu(hp1).oper[0]^.typ = top_const) and
  354. (taicpu(hp1).opsize = taicpu(p).opsize) and
  355. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  356. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  357. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  358. not(cs_opt_size in current_settings.optimizerswitches) then
  359. { shr/sar const1, %reg
  360. shl const2, %reg
  361. with const1 > const2 }
  362. begin
  363. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  364. taicpu(hp1).opcode := A_AND;
  365. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  366. case taicpu(p).opsize Of
  367. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  368. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  369. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  370. end;
  371. end
  372. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  373. not(cs_opt_size in current_settings.optimizerswitches) then
  374. { shr/sar const1, %reg
  375. shl const2, %reg
  376. with const1 < const2 }
  377. begin
  378. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  379. taicpu(p).opcode := A_AND;
  380. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  381. case taicpu(p).opsize Of
  382. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  383. S_B: taicpu(p).loadConst(0,l Xor $ff);
  384. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  385. end;
  386. end
  387. else
  388. { shr/sar const1, %reg
  389. shl const2, %reg
  390. with const1 = const2 }
  391. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  392. begin
  393. taicpu(p).opcode := A_AND;
  394. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  395. case taicpu(p).opsize Of
  396. S_B: taicpu(p).loadConst(0,l Xor $ff);
  397. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  398. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  399. end;
  400. asml.remove(hp1);
  401. hp1.free;
  402. end;
  403. end;
  404. A_XOR:
  405. if (taicpu(p).oper[0]^.typ = top_reg) and
  406. (taicpu(p).oper[1]^.typ = top_reg) and
  407. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  408. { temporarily change this to 'mov reg,0' to make it easier }
  409. { for the CSE. Will be changed back in pass 2 }
  410. begin
  411. taicpu(p).opcode := A_MOV;
  412. taicpu(p).loadConst(0,0);
  413. end;
  414. end;
  415. end;
  416. end;
  417. p := tai(p.next)
  418. end;
  419. end;
  420. procedure PeepHoleOptPass1(Asml: TAsmList; BlockStart, BlockEnd: tai);
  421. {First pass of peepholeoptimizations}
  422. var
  423. l : longint;
  424. p,hp1,hp2 : tai;
  425. hp3,hp4: tai;
  426. v:aint;
  427. TmpRef: TReference;
  428. UsedRegs, TmpUsedRegs: TRegSet;
  429. TmpBool1, TmpBool2: Boolean;
  430. function SkipLabels(hp: tai; var hp2: tai): boolean;
  431. {skips all labels and returns the next "real" instruction}
  432. begin
  433. while assigned(hp.next) and
  434. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  435. hp := tai(hp.next);
  436. if assigned(hp.next) then
  437. begin
  438. SkipLabels := True;
  439. hp2 := tai(hp.next)
  440. end
  441. else
  442. begin
  443. hp2 := hp;
  444. SkipLabels := False
  445. end;
  446. end;
  447. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  448. {traces sucessive jumps to their final destination and sets it, e.g.
  449. je l1 je l3
  450. <code> <code>
  451. l1: becomes l1:
  452. je l2 je l3
  453. <code> <code>
  454. l2: l2:
  455. jmp l3 jmp l3
  456. the level parameter denotes how deeep we have already followed the jump,
  457. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  458. var p1, p2: tai;
  459. l: tasmlabel;
  460. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  461. begin
  462. FindAnyLabel := false;
  463. while assigned(hp.next) and
  464. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  465. hp := tai(hp.next);
  466. if assigned(hp.next) and
  467. (tai(hp.next).typ = ait_label) then
  468. begin
  469. FindAnyLabel := true;
  470. l := tai_label(hp.next).labsym;
  471. end
  472. end;
  473. begin
  474. GetfinalDestination := false;
  475. if level > 20 then
  476. exit;
  477. p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  478. if assigned(p1) then
  479. begin
  480. SkipLabels(p1,p1);
  481. if (tai(p1).typ = ait_instruction) and
  482. (taicpu(p1).is_jmp) then
  483. if { the next instruction after the label where the jump hp arrives}
  484. { is unconditional or of the same type as hp, so continue }
  485. (taicpu(p1).condition in [C_None,hp.condition]) or
  486. { the next instruction after the label where the jump hp arrives}
  487. { is the opposite of hp (so this one is never taken), but after }
  488. { that one there is a branch that will be taken, so perform a }
  489. { little hack: set p1 equal to this instruction (that's what the}
  490. { last SkipLabels is for, only works with short bool evaluation)}
  491. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  492. SkipLabels(p1,p2) and
  493. (p2.typ = ait_instruction) and
  494. (taicpu(p2).is_jmp) and
  495. (taicpu(p2).condition in [C_None,hp.condition]) and
  496. SkipLabels(p1,p1)) then
  497. begin
  498. { quick check for loops of the form "l5: ; jmp l5 }
  499. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  500. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  501. exit;
  502. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  503. exit;
  504. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  505. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  506. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  507. end
  508. else
  509. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  510. if not FindAnyLabel(p1,l) then
  511. begin
  512. {$ifdef finaldestdebug}
  513. insertllitem(asml,p1,p1.next,tai_comment.Create(
  514. strpnew('previous label inserted'))));
  515. {$endif finaldestdebug}
  516. current_asmdata.getjumplabel(l);
  517. insertllitem(asml,p1,p1.next,tai_label.Create(l));
  518. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  519. hp.oper[0]^.ref^.symbol := l;
  520. l.increfs;
  521. { this won't work, since the new label isn't in the labeltable }
  522. { so it will fail the rangecheck. Labeltable should become a }
  523. { hashtable to support this: }
  524. { GetFinalDestination(asml, hp); }
  525. end
  526. else
  527. begin
  528. {$ifdef finaldestdebug}
  529. insertllitem(asml,p1,p1.next,tai_comment.Create(
  530. strpnew('next label reused'))));
  531. {$endif finaldestdebug}
  532. l.increfs;
  533. hp.oper[0]^.ref^.symbol := l;
  534. if not GetFinalDestination(asml, hp,succ(level)) then
  535. exit;
  536. end;
  537. end;
  538. GetFinalDestination := true;
  539. end;
  540. function DoSubAddOpt(var p: tai): Boolean;
  541. begin
  542. DoSubAddOpt := False;
  543. if GetLastInstruction(p, hp1) and
  544. (hp1.typ = ait_instruction) and
  545. (taicpu(hp1).opsize = taicpu(p).opsize) then
  546. case taicpu(hp1).opcode Of
  547. A_DEC:
  548. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  549. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  550. begin
  551. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  552. asml.remove(hp1);
  553. hp1.free;
  554. end;
  555. A_SUB:
  556. if (taicpu(hp1).oper[0]^.typ = top_const) and
  557. (taicpu(hp1).oper[1]^.typ = top_reg) and
  558. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  559. begin
  560. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  561. asml.remove(hp1);
  562. hp1.free;
  563. end;
  564. A_ADD:
  565. if (taicpu(hp1).oper[0]^.typ = top_const) and
  566. (taicpu(hp1).oper[1]^.typ = top_reg) and
  567. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  568. begin
  569. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  570. asml.remove(hp1);
  571. hp1.free;
  572. if (taicpu(p).oper[0]^.val = 0) then
  573. begin
  574. hp1 := tai(p.next);
  575. asml.remove(p);
  576. p.free;
  577. if not GetLastInstruction(hp1, p) then
  578. p := hp1;
  579. DoSubAddOpt := True;
  580. end
  581. end;
  582. end;
  583. end;
  584. begin
  585. p := BlockStart;
  586. UsedRegs := [];
  587. while (p <> BlockEnd) Do
  588. begin
  589. UpDateUsedRegs(UsedRegs, tai(p.next));
  590. case p.Typ Of
  591. ait_instruction:
  592. begin
  593. if InsContainsSegRef(taicpu(p)) then
  594. begin
  595. p := tai(p.next);
  596. continue;
  597. end;
  598. { Handle Jmp Optimizations }
  599. if taicpu(p).is_jmp then
  600. begin
  601. {the following if-block removes all code between a jmp and the next label,
  602. because it can never be executed}
  603. if (taicpu(p).opcode = A_JMP) then
  604. begin
  605. hp2:=p;
  606. while GetNextInstruction(hp2, hp1) and
  607. (hp1.typ <> ait_label) do
  608. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  609. begin
  610. { don't kill start/end of assembler block,
  611. no-line-info-start/end etc }
  612. if hp1.typ<>ait_marker then
  613. begin
  614. asml.remove(hp1);
  615. hp1.free;
  616. end
  617. else
  618. hp2:=hp1;
  619. end
  620. else break;
  621. end;
  622. { remove jumps to a label coming right after them }
  623. if GetNextInstruction(p, hp1) then
  624. begin
  625. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  626. { TODO: FIXME removing the first instruction fails}
  627. (p<>blockstart) then
  628. begin
  629. hp2:=tai(hp1.next);
  630. asml.remove(p);
  631. p.free;
  632. p:=hp2;
  633. continue;
  634. end
  635. else
  636. begin
  637. if hp1.typ = ait_label then
  638. SkipLabels(hp1,hp1);
  639. if (tai(hp1).typ=ait_instruction) and
  640. (taicpu(hp1).opcode=A_JMP) and
  641. GetNextInstruction(hp1, hp2) and
  642. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  643. begin
  644. if taicpu(p).opcode=A_Jcc then
  645. begin
  646. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  647. tai_label(hp2).labsym.decrefs;
  648. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  649. { when free'ing hp1, the ref. isn't decresed, so we don't
  650. increase it (FK)
  651. taicpu(p).oper[0]^.ref^.symbol.increfs;
  652. }
  653. asml.remove(hp1);
  654. hp1.free;
  655. GetFinalDestination(asml, taicpu(p),0);
  656. end
  657. else
  658. begin
  659. GetFinalDestination(asml, taicpu(p),0);
  660. p:=tai(p.next);
  661. continue;
  662. end;
  663. end
  664. else
  665. GetFinalDestination(asml, taicpu(p),0);
  666. end;
  667. end;
  668. end
  669. else
  670. { All other optimizes }
  671. begin
  672. for l := 0 to taicpu(p).ops-1 Do
  673. if (taicpu(p).oper[l]^.typ = top_ref) then
  674. With taicpu(p).oper[l]^.ref^ Do
  675. begin
  676. if (base = NR_NO) and
  677. (index <> NR_NO) and
  678. (scalefactor in [0,1]) then
  679. begin
  680. base := index;
  681. index := NR_NO
  682. end
  683. end;
  684. case taicpu(p).opcode Of
  685. A_AND:
  686. begin
  687. if (taicpu(p).oper[0]^.typ = top_const) and
  688. (taicpu(p).oper[1]^.typ = top_reg) and
  689. GetNextInstruction(p, hp1) and
  690. (tai(hp1).typ = ait_instruction) and
  691. (taicpu(hp1).opcode = A_AND) and
  692. (taicpu(hp1).oper[0]^.typ = top_const) and
  693. (taicpu(hp1).oper[1]^.typ = top_reg) and
  694. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  695. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) then
  696. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  697. begin
  698. taicpu(hp1).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  699. asml.remove(p);
  700. p.free;
  701. p:=hp1;
  702. end
  703. else
  704. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  705. jump, but only if it's a conditional jump (PFV) }
  706. if (taicpu(p).oper[1]^.typ = top_reg) and
  707. GetNextInstruction(p, hp1) and
  708. (hp1.typ = ait_instruction) and
  709. (taicpu(hp1).is_jmp) and
  710. (taicpu(hp1).opcode<>A_JMP) and
  711. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  712. taicpu(p).opcode := A_TEST;
  713. end;
  714. A_CMP:
  715. begin
  716. { cmp register,$8000 neg register
  717. je target --> jo target
  718. .... only if register is deallocated before jump.}
  719. case Taicpu(p).opsize of
  720. S_B: v:=$80;
  721. S_W: v:=$8000;
  722. S_L: v:=aint($80000000);
  723. end;
  724. if (taicpu(p).oper[0]^.typ=Top_const) and
  725. (taicpu(p).oper[0]^.val=v) and
  726. (Taicpu(p).oper[1]^.typ=top_reg) and
  727. GetNextInstruction(p, hp1) and
  728. (hp1.typ=ait_instruction) and
  729. (taicpu(hp1).opcode=A_Jcc) and
  730. (Taicpu(hp1).condition in [C_E,C_NE]) and
  731. not(getsupreg(Taicpu(p).oper[1]^.reg) in usedregs) then
  732. begin
  733. Taicpu(p).opcode:=A_NEG;
  734. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  735. Taicpu(p).clearop(1);
  736. Taicpu(p).ops:=1;
  737. if Taicpu(hp1).condition=C_E then
  738. Taicpu(hp1).condition:=C_O
  739. else
  740. Taicpu(hp1).condition:=C_NO;
  741. continue;
  742. end;
  743. {
  744. @@2: @@2:
  745. .... ....
  746. cmp operand1,0
  747. jle/jbe @@1
  748. dec operand1 --> sub operand1,1
  749. jmp @@2 jge/jae @@2
  750. @@1: @@1:
  751. ... ....}
  752. if (taicpu(p).oper[0]^.typ = top_const) and
  753. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  754. (taicpu(p).oper[0]^.val = 0) and
  755. GetNextInstruction(p, hp1) and
  756. (hp1.typ = ait_instruction) and
  757. (taicpu(hp1).is_jmp) and
  758. (taicpu(hp1).opcode=A_Jcc) and
  759. (taicpu(hp1).condition in [C_LE,C_BE]) and
  760. GetNextInstruction(hp1,hp2) and
  761. (hp2.typ = ait_instruction) and
  762. (taicpu(hp2).opcode = A_DEC) and
  763. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  764. GetNextInstruction(hp2, hp3) and
  765. (hp3.typ = ait_instruction) and
  766. (taicpu(hp3).is_jmp) and
  767. (taicpu(hp3).opcode = A_JMP) and
  768. GetNextInstruction(hp3, hp4) and
  769. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  770. begin
  771. taicpu(hp2).Opcode := A_SUB;
  772. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  773. taicpu(hp2).loadConst(0,1);
  774. taicpu(hp2).ops:=2;
  775. taicpu(hp3).Opcode := A_Jcc;
  776. case taicpu(hp1).condition of
  777. C_LE: taicpu(hp3).condition := C_GE;
  778. C_BE: taicpu(hp3).condition := C_AE;
  779. end;
  780. asml.remove(p);
  781. asml.remove(hp1);
  782. p.free;
  783. hp1.free;
  784. p := hp2;
  785. continue;
  786. end
  787. end;
  788. A_FLD:
  789. begin
  790. if (taicpu(p).oper[0]^.typ = top_reg) and
  791. GetNextInstruction(p, hp1) and
  792. (hp1.typ = Ait_Instruction) and
  793. (taicpu(hp1).oper[0]^.typ = top_reg) and
  794. (taicpu(hp1).oper[1]^.typ = top_reg) and
  795. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  796. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  797. { change to
  798. fld reg fxxx reg,st
  799. fxxxp st, st1 (hp1)
  800. Remark: non commutative operations must be reversed!
  801. }
  802. begin
  803. case taicpu(hp1).opcode Of
  804. A_FMULP,A_FADDP,
  805. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  806. begin
  807. case taicpu(hp1).opcode Of
  808. A_FADDP: taicpu(hp1).opcode := A_FADD;
  809. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  810. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  811. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  812. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  813. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  814. end;
  815. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  816. taicpu(hp1).oper[1]^.reg := NR_ST;
  817. asml.remove(p);
  818. p.free;
  819. p := hp1;
  820. continue;
  821. end;
  822. end;
  823. end
  824. else
  825. if (taicpu(p).oper[0]^.typ = top_ref) and
  826. GetNextInstruction(p, hp2) and
  827. (hp2.typ = Ait_Instruction) and
  828. (taicpu(hp2).ops = 2) and
  829. (taicpu(hp2).oper[0]^.typ = top_reg) and
  830. (taicpu(hp2).oper[1]^.typ = top_reg) and
  831. (taicpu(p).opsize in [S_FS, S_FL]) and
  832. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  833. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  834. if GetLastInstruction(p, hp1) and
  835. (hp1.typ = Ait_Instruction) and
  836. ((taicpu(hp1).opcode = A_FLD) or
  837. (taicpu(hp1).opcode = A_FST)) and
  838. (taicpu(hp1).opsize = taicpu(p).opsize) and
  839. (taicpu(hp1).oper[0]^.typ = top_ref) and
  840. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  841. if ((taicpu(hp2).opcode = A_FMULP) or
  842. (taicpu(hp2).opcode = A_FADDP)) then
  843. { change to
  844. fld/fst mem1 (hp1) fld/fst mem1
  845. fld mem1 (p) fadd/
  846. faddp/ fmul st, st
  847. fmulp st, st1 (hp2) }
  848. begin
  849. asml.remove(p);
  850. p.free;
  851. p := hp1;
  852. if (taicpu(hp2).opcode = A_FADDP) then
  853. taicpu(hp2).opcode := A_FADD
  854. else
  855. taicpu(hp2).opcode := A_FMUL;
  856. taicpu(hp2).oper[1]^.reg := NR_ST;
  857. end
  858. else
  859. { change to
  860. fld/fst mem1 (hp1) fld/fst mem1
  861. fld mem1 (p) fld st}
  862. begin
  863. taicpu(p).changeopsize(S_FL);
  864. taicpu(p).loadreg(0,NR_ST);
  865. end
  866. else
  867. begin
  868. case taicpu(hp2).opcode Of
  869. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  870. { change to
  871. fld/fst mem1 (hp1) fld/fst mem1
  872. fld mem2 (p) fxxx mem2
  873. fxxxp st, st1 (hp2) }
  874. begin
  875. case taicpu(hp2).opcode Of
  876. A_FADDP: taicpu(p).opcode := A_FADD;
  877. A_FMULP: taicpu(p).opcode := A_FMUL;
  878. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  879. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  880. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  881. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  882. end;
  883. asml.remove(hp2);
  884. hp2.free;
  885. end
  886. end
  887. end
  888. end;
  889. A_FSTP,A_FISTP:
  890. if doFpuLoadStoreOpt(asmL,p) then
  891. continue;
  892. A_LEA:
  893. begin
  894. {removes seg register prefixes from LEA operations, as they
  895. don't do anything}
  896. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  897. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  898. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  899. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  900. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  901. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  902. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  903. (taicpu(p).oper[0]^.ref^.offset = 0) then
  904. begin
  905. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  906. taicpu(p).oper[1]^.reg);
  907. InsertLLItem(asml,p.previous,p.next, hp1);
  908. p.free;
  909. p := hp1;
  910. continue;
  911. end
  912. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  913. begin
  914. hp1 := tai(p.Next);
  915. asml.remove(p);
  916. p.free;
  917. p := hp1;
  918. continue;
  919. end
  920. else
  921. with taicpu(p).oper[0]^.ref^ do
  922. if (base = taicpu(p).oper[1]^.reg) then
  923. begin
  924. l := offset;
  925. if (l=1) and UseIncDec then
  926. begin
  927. taicpu(p).opcode := A_INC;
  928. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  929. taicpu(p).ops := 1
  930. end
  931. else if (l=-1) and UseIncDec then
  932. begin
  933. taicpu(p).opcode := A_DEC;
  934. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  935. taicpu(p).ops := 1;
  936. end
  937. else
  938. begin
  939. taicpu(p).opcode := A_ADD;
  940. taicpu(p).loadConst(0,l);
  941. end;
  942. end;
  943. end;
  944. A_MOV:
  945. begin
  946. TmpUsedRegs := UsedRegs;
  947. if (taicpu(p).oper[1]^.typ = top_reg) and
  948. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  949. GetNextInstruction(p, hp1) and
  950. (tai(hp1).typ = ait_instruction) and
  951. (taicpu(hp1).opcode = A_MOV) and
  952. (taicpu(hp1).oper[0]^.typ = top_reg) and
  953. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  954. begin
  955. {we have "mov x, %treg; mov %treg, y}
  956. if not(RegInOp(getsupreg(taicpu(p).oper[1]^.reg),taicpu(hp1).oper[1]^)) and
  957. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  958. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  959. case taicpu(p).oper[0]^.typ Of
  960. top_reg:
  961. begin
  962. { change "mov %reg, %treg; mov %treg, y"
  963. to "mov %reg, y" }
  964. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  965. asml.remove(hp1);
  966. hp1.free;
  967. continue;
  968. end;
  969. top_ref:
  970. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  971. begin
  972. { change "mov mem, %treg; mov %treg, %reg"
  973. to "mov mem, %reg" }
  974. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  975. asml.remove(hp1);
  976. hp1.free;
  977. continue;
  978. end;
  979. end
  980. end
  981. else
  982. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  983. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  984. penalty}
  985. if (taicpu(p).oper[0]^.typ = top_reg) and
  986. (taicpu(p).oper[1]^.typ = top_reg) and
  987. GetNextInstruction(p,hp1) and
  988. (tai(hp1).typ = ait_instruction) and
  989. (taicpu(hp1).ops >= 1) and
  990. (taicpu(hp1).oper[0]^.typ = top_reg) and
  991. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  992. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  993. begin
  994. if ((taicpu(hp1).opcode = A_OR) or
  995. (taicpu(hp1).opcode = A_TEST)) and
  996. (taicpu(hp1).oper[1]^.typ = top_reg) and
  997. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  998. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  999. begin
  1000. TmpUsedRegs := UsedRegs;
  1001. { reg1 will be used after the first instruction, }
  1002. { so update the allocation info }
  1003. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1004. if GetNextInstruction(hp1, hp2) and
  1005. (hp2.typ = ait_instruction) and
  1006. taicpu(hp2).is_jmp and
  1007. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1008. { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  1009. "test %reg1, %reg1; jxx" }
  1010. begin
  1011. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1012. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1013. asml.remove(p);
  1014. p.free;
  1015. p := hp1;
  1016. continue
  1017. end
  1018. else
  1019. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  1020. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  1021. begin
  1022. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1023. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1024. end;
  1025. end
  1026. { else
  1027. if (taicpu(p.next)^.opcode
  1028. in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  1029. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  1030. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  1031. end
  1032. else
  1033. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1034. x >= RetOffset) as it doesn't do anything (it writes either to a
  1035. parameter or to the temporary storage room for the function
  1036. result)}
  1037. if GetNextInstruction(p, hp1) and
  1038. (tai(hp1).typ = ait_instruction) then
  1039. if ((taicpu(hp1).opcode = A_LEAVE) or
  1040. (taicpu(hp1).opcode = A_RET)) and
  1041. (taicpu(p).oper[1]^.typ = top_ref) and
  1042. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1043. not(assigned(current_procinfo.procdef.funcretsym) and
  1044. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1045. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  1046. (taicpu(p).oper[0]^.typ = top_reg) then
  1047. begin
  1048. asml.remove(p);
  1049. p.free;
  1050. p := hp1;
  1051. RemoveLastDeallocForFuncRes(asmL,p);
  1052. end
  1053. else
  1054. if (taicpu(p).oper[0]^.typ = top_reg) and
  1055. (taicpu(p).oper[1]^.typ = top_ref) and
  1056. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1057. (taicpu(hp1).opcode = A_CMP) and
  1058. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1059. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1060. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  1061. begin
  1062. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1063. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1064. end;
  1065. { Next instruction is also a MOV ? }
  1066. if GetNextInstruction(p, hp1) and
  1067. (tai(hp1).typ = ait_instruction) and
  1068. (taicpu(hp1).opcode = A_MOV) and
  1069. (taicpu(hp1).opsize = taicpu(p).opsize) then
  1070. begin
  1071. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1072. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1073. {mov reg1, mem1 or mov mem1, reg1
  1074. mov mem2, reg2 mov reg2, mem2}
  1075. begin
  1076. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1077. {mov reg1, mem1 or mov mem1, reg1
  1078. mov mem2, reg1 mov reg2, mem1}
  1079. begin
  1080. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1081. { Removes the second statement from
  1082. mov reg1, mem1/reg2
  1083. mov mem1/reg2, reg1 }
  1084. begin
  1085. if (taicpu(p).oper[0]^.typ = top_reg) then
  1086. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1087. asml.remove(hp1);
  1088. hp1.free;
  1089. end
  1090. else
  1091. begin
  1092. TmpUsedRegs := UsedRegs;
  1093. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1094. if (taicpu(p).oper[1]^.typ = top_ref) and
  1095. { mov reg1, mem1
  1096. mov mem2, reg1 }
  1097. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1098. GetNextInstruction(hp1, hp2) and
  1099. (hp2.typ = ait_instruction) and
  1100. (taicpu(hp2).opcode = A_CMP) and
  1101. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1102. (taicpu(hp2).oper[0]^.typ = TOp_Ref) and
  1103. (taicpu(hp2).oper[1]^.typ = TOp_Reg) and
  1104. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(p).oper[1]^.ref^) and
  1105. (taicpu(hp2).oper[1]^.reg= taicpu(p).oper[0]^.reg) and
  1106. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1107. { change to
  1108. mov reg1, mem1 mov reg1, mem1
  1109. mov mem2, reg1 cmp reg1, mem2
  1110. cmp mem1, reg1 }
  1111. begin
  1112. asml.remove(hp2);
  1113. hp2.free;
  1114. taicpu(hp1).opcode := A_CMP;
  1115. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1116. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1117. end;
  1118. end;
  1119. end
  1120. else
  1121. begin
  1122. tmpUsedRegs := UsedRegs;
  1123. if GetNextInstruction(hp1, hp2) and
  1124. (taicpu(p).oper[0]^.typ = top_ref) and
  1125. (taicpu(p).oper[1]^.typ = top_reg) and
  1126. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1127. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1128. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1129. (tai(hp2).typ = ait_instruction) and
  1130. (taicpu(hp2).opcode = A_MOV) and
  1131. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1132. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1133. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1134. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1135. if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and
  1136. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1137. { mov mem1, %reg1
  1138. mov %reg1, mem2
  1139. mov mem2, reg2
  1140. to:
  1141. mov mem1, reg2
  1142. mov reg2, mem2}
  1143. begin
  1144. AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1145. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1146. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1147. asml.remove(hp2);
  1148. hp2.free;
  1149. end
  1150. else
  1151. if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1152. not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and
  1153. not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then
  1154. { mov mem1, reg1 mov mem1, reg1
  1155. mov reg1, mem2 mov reg1, mem2
  1156. mov mem2, reg2 mov mem2, reg1
  1157. to: to:
  1158. mov mem1, reg1 mov mem1, reg1
  1159. mov mem1, reg2 mov reg1, mem2
  1160. mov reg1, mem2
  1161. or (if mem1 depends on reg1
  1162. and/or if mem2 depends on reg2)
  1163. to:
  1164. mov mem1, reg1
  1165. mov reg1, mem2
  1166. mov reg1, reg2
  1167. }
  1168. begin
  1169. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1170. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1171. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1172. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1173. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1174. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1175. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1176. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1177. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1178. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1179. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1180. end
  1181. else
  1182. if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1183. begin
  1184. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1185. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1186. end
  1187. else
  1188. begin
  1189. asml.remove(hp2);
  1190. hp2.free;
  1191. end
  1192. end
  1193. end
  1194. else
  1195. (* {movl [mem1],reg1
  1196. movl [mem1],reg2
  1197. to:
  1198. movl [mem1],reg1
  1199. movl reg1,reg2 }
  1200. if (taicpu(p).oper[0]^.typ = top_ref) and
  1201. (taicpu(p).oper[1]^.typ = top_reg) and
  1202. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1203. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1204. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1205. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1206. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1207. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1208. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1209. else*)
  1210. { movl const1,[mem1]
  1211. movl [mem1],reg1
  1212. to:
  1213. movl const1,reg1
  1214. movl reg1,[mem1] }
  1215. if (taicpu(p).oper[0]^.typ = top_const) and
  1216. (taicpu(p).oper[1]^.typ = top_ref) and
  1217. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1218. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1219. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1220. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1221. not(reginref(getsupreg(taicpu(hp1).oper[1]^.reg),taicpu(hp1).oper[0]^.ref^)) then
  1222. begin
  1223. allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1224. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1225. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1226. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1227. end
  1228. end;
  1229. if GetNextInstruction(p, hp1) and
  1230. (Tai(hp1).typ = ait_instruction) and
  1231. ((Taicpu(hp1).opcode = A_BTS) or (Taicpu(hp1).opcode = A_BTR)) and
  1232. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  1233. GetNextInstruction(hp1, hp2) and
  1234. (Tai(hp2).typ = ait_instruction) and
  1235. (Taicpu(hp2).opcode = A_OR) and
  1236. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  1237. (Taicpu(hp2).opsize = Taicpu(p).opsize) and
  1238. (Taicpu(p).oper[0]^.typ = top_const) and (Taicpu(p).oper[0]^.val=0) and
  1239. (Taicpu(p).oper[1]^.typ = top_reg) and
  1240. (Taicpu(hp1).oper[1]^.typ = top_reg) and
  1241. (Taicpu(p).oper[1]^.reg=Taicpu(hp1).oper[1]^.reg) and
  1242. (Taicpu(hp2).oper[1]^.typ = top_reg) and
  1243. (Taicpu(p).oper[1]^.reg=Taicpu(hp2).oper[1]^.reg) then
  1244. {mov reg1,0
  1245. bts reg1,operand1 --> mov reg1,operand2
  1246. or reg1,operand2 bts reg1,operand1}
  1247. begin
  1248. Taicpu(hp2).opcode:=A_MOV;
  1249. asml.remove(hp1);
  1250. insertllitem(asml,hp2,hp2.next,hp1);
  1251. asml.remove(p);
  1252. p.free;
  1253. end;
  1254. end;
  1255. A_MOVSX,
  1256. A_MOVZX :
  1257. begin
  1258. if (taicpu(p).oper[1]^.typ = top_reg) and
  1259. GetNextInstruction(p,hp1) and
  1260. (hp1.typ = ait_instruction) and
  1261. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1262. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1263. GetNextInstruction(hp1,hp2) and
  1264. (hp2.typ = ait_instruction) and
  1265. (taicpu(hp2).opcode = A_MOV) and
  1266. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1267. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1268. (((taicpu(hp1).ops=2) and
  1269. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1270. ((taicpu(hp1).ops=1) and
  1271. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1272. { reg2 must not be used after the sequence considered, so
  1273. it must be either deallocated or loaded with a new value }
  1274. (GetNextInstruction(hp2,hp3) and
  1275. (FindRegDealloc(getsupreg(taicpu(hp2).oper[0]^.reg),tai(hp3)) or
  1276. RegLoadedWithNewValue(getsupreg(taicpu(hp2).oper[0]^.reg), false, hp3))) then
  1277. { change movsX/movzX reg/ref, reg2 }
  1278. { add/sub/or/... reg3/$const, reg2 }
  1279. { mov reg2 reg/ref }
  1280. { to add/sub/or/... reg3/$const, reg/ref }
  1281. begin
  1282. { by example:
  1283. movswl %si,%eax movswl %si,%eax p
  1284. decl %eax addl %edx,%eax hp1
  1285. movw %ax,%si movw %ax,%si hp2
  1286. ->
  1287. movswl %si,%eax movswl %si,%eax p
  1288. decw %eax addw %edx,%eax hp1
  1289. movw %ax,%si movw %ax,%si hp2
  1290. }
  1291. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1292. {
  1293. ->
  1294. movswl %si,%eax movswl %si,%eax p
  1295. decw %si addw %dx,%si hp1
  1296. movw %ax,%si movw %ax,%si hp2
  1297. }
  1298. case taicpu(hp1).ops of
  1299. 1:
  1300. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1301. 2:
  1302. begin
  1303. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1304. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1305. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1306. end;
  1307. else
  1308. internalerror(2008042701);
  1309. end;
  1310. {
  1311. ->
  1312. decw %si addw %dx,%si p
  1313. }
  1314. asml.remove(p);
  1315. asml.remove(hp2);
  1316. p.free;
  1317. hp2.free;
  1318. p := hp1
  1319. end
  1320. { removes superfluous And's after movzx's }
  1321. else if taicpu(p).opcode=A_MOVZX then
  1322. begin
  1323. if (taicpu(p).oper[1]^.typ = top_reg) and
  1324. GetNextInstruction(p, hp1) and
  1325. (tai(hp1).typ = ait_instruction) and
  1326. (taicpu(hp1).opcode = A_AND) and
  1327. (taicpu(hp1).oper[0]^.typ = top_const) and
  1328. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1329. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1330. case taicpu(p).opsize Of
  1331. S_BL, S_BW:
  1332. if (taicpu(hp1).oper[0]^.val = $ff) then
  1333. begin
  1334. asml.remove(hp1);
  1335. hp1.free;
  1336. end;
  1337. S_WL:
  1338. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1339. begin
  1340. asml.remove(hp1);
  1341. hp1.free;
  1342. end;
  1343. end;
  1344. {changes some movzx constructs to faster synonims (all examples
  1345. are given with eax/ax, but are also valid for other registers)}
  1346. if (taicpu(p).oper[1]^.typ = top_reg) then
  1347. if (taicpu(p).oper[0]^.typ = top_reg) then
  1348. case taicpu(p).opsize of
  1349. S_BW:
  1350. begin
  1351. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1352. not(cs_opt_size in current_settings.optimizerswitches) then
  1353. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1354. begin
  1355. taicpu(p).opcode := A_AND;
  1356. taicpu(p).changeopsize(S_W);
  1357. taicpu(p).loadConst(0,$ff);
  1358. end
  1359. else if GetNextInstruction(p, hp1) and
  1360. (tai(hp1).typ = ait_instruction) and
  1361. (taicpu(hp1).opcode = A_AND) and
  1362. (taicpu(hp1).oper[0]^.typ = top_const) and
  1363. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1364. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1365. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1366. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1367. begin
  1368. taicpu(p).opcode := A_MOV;
  1369. taicpu(p).changeopsize(S_W);
  1370. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1371. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1372. end;
  1373. end;
  1374. S_BL:
  1375. begin
  1376. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1377. not(cs_opt_size in current_settings.optimizerswitches) then
  1378. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1379. begin
  1380. taicpu(p).opcode := A_AND;
  1381. taicpu(p).changeopsize(S_L);
  1382. taicpu(p).loadConst(0,$ff)
  1383. end
  1384. else if GetNextInstruction(p, hp1) and
  1385. (tai(hp1).typ = ait_instruction) and
  1386. (taicpu(hp1).opcode = A_AND) and
  1387. (taicpu(hp1).oper[0]^.typ = top_const) and
  1388. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1389. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1390. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1391. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1392. begin
  1393. taicpu(p).opcode := A_MOV;
  1394. taicpu(p).changeopsize(S_L);
  1395. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1396. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1397. end
  1398. end;
  1399. S_WL:
  1400. begin
  1401. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1402. not(cs_opt_size in current_settings.optimizerswitches) then
  1403. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1404. begin
  1405. taicpu(p).opcode := A_AND;
  1406. taicpu(p).changeopsize(S_L);
  1407. taicpu(p).loadConst(0,$ffff);
  1408. end
  1409. else if GetNextInstruction(p, hp1) and
  1410. (tai(hp1).typ = ait_instruction) and
  1411. (taicpu(hp1).opcode = A_AND) and
  1412. (taicpu(hp1).oper[0]^.typ = top_const) and
  1413. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1414. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1415. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1416. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1417. begin
  1418. taicpu(p).opcode := A_MOV;
  1419. taicpu(p).changeopsize(S_L);
  1420. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1421. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1422. end;
  1423. end;
  1424. end
  1425. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1426. begin
  1427. if GetNextInstruction(p, hp1) and
  1428. (tai(hp1).typ = ait_instruction) and
  1429. (taicpu(hp1).opcode = A_AND) and
  1430. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1431. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1432. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1433. begin
  1434. taicpu(p).opcode := A_MOV;
  1435. case taicpu(p).opsize Of
  1436. S_BL:
  1437. begin
  1438. taicpu(p).changeopsize(S_L);
  1439. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1440. end;
  1441. S_WL:
  1442. begin
  1443. taicpu(p).changeopsize(S_L);
  1444. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1445. end;
  1446. S_BW:
  1447. begin
  1448. taicpu(p).changeopsize(S_W);
  1449. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1450. end;
  1451. end;
  1452. end;
  1453. end;
  1454. end;
  1455. end;
  1456. (* should not be generated anymore by the current code generator
  1457. A_POP:
  1458. begin
  1459. if target_info.system=system_i386_go32v2 then
  1460. begin
  1461. { Transform a series of pop/pop/pop/push/push/push to }
  1462. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1463. { because I'm not sure whether they can cope with }
  1464. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1465. { such a problem when using esp as frame pointer (JM) }
  1466. if (taicpu(p).oper[0]^.typ = top_reg) then
  1467. begin
  1468. hp1 := p;
  1469. hp2 := p;
  1470. l := 0;
  1471. while getNextInstruction(hp1,hp1) and
  1472. (hp1.typ = ait_instruction) and
  1473. (taicpu(hp1).opcode = A_POP) and
  1474. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1475. begin
  1476. hp2 := hp1;
  1477. inc(l,4);
  1478. end;
  1479. getLastInstruction(p,hp3);
  1480. l1 := 0;
  1481. while (hp2 <> hp3) and
  1482. assigned(hp1) and
  1483. (hp1.typ = ait_instruction) and
  1484. (taicpu(hp1).opcode = A_PUSH) and
  1485. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1486. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1487. begin
  1488. { change it to a two op operation }
  1489. taicpu(hp2).oper[1]^.typ:=top_none;
  1490. taicpu(hp2).ops:=2;
  1491. taicpu(hp2).opcode := A_MOV;
  1492. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1493. reference_reset(tmpref);
  1494. tmpRef.base.enum:=R_INTREGISTER;
  1495. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1496. convert_register_to_enum(tmpref.base);
  1497. tmpRef.offset := l;
  1498. taicpu(hp2).loadRef(0,tmpRef);
  1499. hp4 := hp1;
  1500. getNextInstruction(hp1,hp1);
  1501. asml.remove(hp4);
  1502. hp4.free;
  1503. getLastInstruction(hp2,hp2);
  1504. dec(l,4);
  1505. inc(l1);
  1506. end;
  1507. if l <> -4 then
  1508. begin
  1509. inc(l,4);
  1510. for l1 := l1 downto 1 do
  1511. begin
  1512. getNextInstruction(hp2,hp2);
  1513. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1514. end
  1515. end
  1516. end
  1517. end
  1518. else
  1519. begin
  1520. if (taicpu(p).oper[0]^.typ = top_reg) and
  1521. GetNextInstruction(p, hp1) and
  1522. (tai(hp1).typ=ait_instruction) and
  1523. (taicpu(hp1).opcode=A_PUSH) and
  1524. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1525. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1526. begin
  1527. { change it to a two op operation }
  1528. taicpu(p).oper[1]^.typ:=top_none;
  1529. taicpu(p).ops:=2;
  1530. taicpu(p).opcode := A_MOV;
  1531. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1532. reference_reset(tmpref);
  1533. TmpRef.base.enum := R_ESP;
  1534. taicpu(p).loadRef(0,TmpRef);
  1535. asml.remove(hp1);
  1536. hp1.free;
  1537. end;
  1538. end;
  1539. end;
  1540. *)
  1541. A_PUSH:
  1542. begin
  1543. if (taicpu(p).opsize = S_W) and
  1544. (taicpu(p).oper[0]^.typ = Top_Const) and
  1545. GetNextInstruction(p, hp1) and
  1546. (tai(hp1).typ = ait_instruction) and
  1547. (taicpu(hp1).opcode = A_PUSH) and
  1548. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1549. (taicpu(hp1).opsize = S_W) then
  1550. begin
  1551. taicpu(p).changeopsize(S_L);
  1552. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1553. asml.remove(hp1);
  1554. hp1.free;
  1555. end;
  1556. end;
  1557. A_SHL, A_SAL:
  1558. begin
  1559. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1560. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1561. (taicpu(p).opsize = S_L) and
  1562. (taicpu(p).oper[0]^.val <= 3) then
  1563. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1564. begin
  1565. TmpBool1 := True; {should we check the next instruction?}
  1566. TmpBool2 := False; {have we found an add/sub which could be
  1567. integrated in the lea?}
  1568. reference_reset(tmpref,2);
  1569. TmpRef.index := taicpu(p).oper[1]^.reg;
  1570. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1571. while TmpBool1 and
  1572. GetNextInstruction(p, hp1) and
  1573. (tai(hp1).typ = ait_instruction) and
  1574. ((((taicpu(hp1).opcode = A_ADD) or
  1575. (taicpu(hp1).opcode = A_SUB)) and
  1576. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1577. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1578. (((taicpu(hp1).opcode = A_INC) or
  1579. (taicpu(hp1).opcode = A_DEC)) and
  1580. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1581. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1582. (not GetNextInstruction(hp1,hp2) or
  1583. not instrReadsFlags(hp2)) Do
  1584. begin
  1585. TmpBool1 := False;
  1586. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1587. begin
  1588. TmpBool1 := True;
  1589. TmpBool2 := True;
  1590. case taicpu(hp1).opcode of
  1591. A_ADD:
  1592. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1593. A_SUB:
  1594. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1595. end;
  1596. asml.remove(hp1);
  1597. hp1.free;
  1598. end
  1599. else
  1600. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1601. (((taicpu(hp1).opcode = A_ADD) and
  1602. (TmpRef.base = NR_NO)) or
  1603. (taicpu(hp1).opcode = A_INC) or
  1604. (taicpu(hp1).opcode = A_DEC)) then
  1605. begin
  1606. TmpBool1 := True;
  1607. TmpBool2 := True;
  1608. case taicpu(hp1).opcode of
  1609. A_ADD:
  1610. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1611. A_INC:
  1612. inc(TmpRef.offset);
  1613. A_DEC:
  1614. dec(TmpRef.offset);
  1615. end;
  1616. asml.remove(hp1);
  1617. hp1.free;
  1618. end;
  1619. end;
  1620. if TmpBool2 or
  1621. ((current_settings.optimizecputype < cpu_Pentium2) and
  1622. (taicpu(p).oper[0]^.val <= 3) and
  1623. not(cs_opt_size in current_settings.optimizerswitches)) then
  1624. begin
  1625. if not(TmpBool2) and
  1626. (taicpu(p).oper[0]^.val = 1) then
  1627. begin
  1628. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1629. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1630. end
  1631. else
  1632. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1633. taicpu(p).oper[1]^.reg);
  1634. InsertLLItem(asml,p.previous, p.next, hp1);
  1635. p.free;
  1636. p := hp1;
  1637. end;
  1638. end
  1639. else
  1640. if (current_settings.optimizecputype < cpu_Pentium2) and
  1641. (taicpu(p).oper[0]^.typ = top_const) and
  1642. (taicpu(p).oper[1]^.typ = top_reg) then
  1643. if (taicpu(p).oper[0]^.val = 1) then
  1644. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1645. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1646. (unlike shl, which is only Tairable in the U pipe)}
  1647. begin
  1648. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1649. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1650. InsertLLItem(asml,p.previous, p.next, hp1);
  1651. p.free;
  1652. p := hp1;
  1653. end
  1654. else if (taicpu(p).opsize = S_L) and
  1655. (taicpu(p).oper[0]^.val<= 3) then
  1656. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1657. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1658. begin
  1659. reference_reset(tmpref,2);
  1660. TmpRef.index := taicpu(p).oper[1]^.reg;
  1661. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1662. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1663. InsertLLItem(asml,p.previous, p.next, hp1);
  1664. p.free;
  1665. p := hp1;
  1666. end
  1667. end;
  1668. A_SETcc :
  1669. { changes
  1670. setcc (funcres) setcc reg
  1671. movb (funcres), reg to leave/ret
  1672. leave/ret }
  1673. begin
  1674. if (taicpu(p).oper[0]^.typ = top_ref) and
  1675. GetNextInstruction(p, hp1) and
  1676. GetNextInstruction(hp1, hp2) and
  1677. (hp2.typ = ait_instruction) and
  1678. ((taicpu(hp2).opcode = A_LEAVE) or
  1679. (taicpu(hp2).opcode = A_RET)) and
  1680. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1681. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1682. not(assigned(current_procinfo.procdef.funcretsym) and
  1683. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1684. (hp1.typ = ait_instruction) and
  1685. (taicpu(hp1).opcode = A_MOV) and
  1686. (taicpu(hp1).opsize = S_B) and
  1687. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1688. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1689. begin
  1690. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1691. asml.remove(hp1);
  1692. hp1.free;
  1693. end
  1694. end;
  1695. A_SUB:
  1696. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1697. { * change "sub/add const1, reg" or "dec reg" followed by
  1698. "sub const2, reg" to one "sub ..., reg" }
  1699. begin
  1700. if (taicpu(p).oper[0]^.typ = top_const) and
  1701. (taicpu(p).oper[1]^.typ = top_reg) then
  1702. if (taicpu(p).oper[0]^.val = 2) and
  1703. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1704. { Don't do the sub/push optimization if the sub }
  1705. { comes from setting up the stack frame (JM) }
  1706. (not getLastInstruction(p,hp1) or
  1707. (hp1.typ <> ait_instruction) or
  1708. (taicpu(hp1).opcode <> A_MOV) or
  1709. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1710. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1711. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1712. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1713. begin
  1714. hp1 := tai(p.next);
  1715. while Assigned(hp1) and
  1716. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1717. not regReadByInstruction(RS_ESP,hp1) and
  1718. not regModifiedByInstruction(RS_ESP,hp1) do
  1719. hp1 := tai(hp1.next);
  1720. if Assigned(hp1) and
  1721. (tai(hp1).typ = ait_instruction) and
  1722. (taicpu(hp1).opcode = A_PUSH) and
  1723. (taicpu(hp1).opsize = S_W) then
  1724. begin
  1725. taicpu(hp1).changeopsize(S_L);
  1726. if taicpu(hp1).oper[0]^.typ=top_reg then
  1727. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1728. hp1 := tai(p.next);
  1729. asml.remove(p);
  1730. p.free;
  1731. p := hp1;
  1732. continue
  1733. end;
  1734. if DoSubAddOpt(p) then
  1735. continue;
  1736. end
  1737. else if DoSubAddOpt(p) then
  1738. continue
  1739. end;
  1740. end;
  1741. end; { if is_jmp }
  1742. end;
  1743. end;
  1744. updateUsedRegs(UsedRegs,p);
  1745. p:=tai(p.next);
  1746. end;
  1747. end;
  1748. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  1749. function CanBeCMOV(p : tai) : boolean;
  1750. begin
  1751. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1752. (taicpu(p).opcode=A_MOV) and
  1753. (taicpu(p).opsize in [S_L,S_W]) and
  1754. ((taicpu(p).oper[0]^.typ = top_reg)
  1755. { we can't use cmov ref,reg because
  1756. ref could be nil and cmov still throws an exception
  1757. if ref=nil but the mov isn't done (FK)
  1758. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1759. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1760. }
  1761. ) and
  1762. (taicpu(p).oper[1]^.typ in [top_reg]);
  1763. end;
  1764. var
  1765. p,hp1,hp2: tai;
  1766. l : longint;
  1767. condition : tasmcond;
  1768. hp3: tai;
  1769. UsedRegs, TmpUsedRegs: TRegSet;
  1770. carryadd_opcode: Tasmop;
  1771. begin
  1772. p := BlockStart;
  1773. UsedRegs := [];
  1774. while (p <> BlockEnd) Do
  1775. begin
  1776. UpdateUsedRegs(UsedRegs, tai(p.next));
  1777. case p.Typ Of
  1778. Ait_Instruction:
  1779. begin
  1780. if InsContainsSegRef(taicpu(p)) then
  1781. begin
  1782. p := tai(p.next);
  1783. continue;
  1784. end;
  1785. case taicpu(p).opcode Of
  1786. A_Jcc:
  1787. begin
  1788. { jb @@1 cmc
  1789. inc/dec operand --> adc/sbb operand,0
  1790. @@1:
  1791. ... and ...
  1792. jnb @@1
  1793. inc/dec operand --> adc/sbb operand,0
  1794. @@1: }
  1795. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1796. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1797. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1798. begin
  1799. carryadd_opcode:=A_NONE;
  1800. if Taicpu(p).condition in [C_NAE,C_B] then
  1801. begin
  1802. if Taicpu(hp1).opcode=A_INC then
  1803. carryadd_opcode:=A_ADC;
  1804. if Taicpu(hp1).opcode=A_DEC then
  1805. carryadd_opcode:=A_SBB;
  1806. if carryadd_opcode<>A_NONE then
  1807. begin
  1808. Taicpu(p).clearop(0);
  1809. Taicpu(p).ops:=0;
  1810. Taicpu(p).is_jmp:=false;
  1811. Taicpu(p).opcode:=A_CMC;
  1812. Taicpu(p).condition:=C_NONE;
  1813. Taicpu(hp1).ops:=2;
  1814. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1815. Taicpu(hp1).loadconst(0,0);
  1816. Taicpu(hp1).opcode:=carryadd_opcode;
  1817. continue;
  1818. end;
  1819. end;
  1820. if Taicpu(p).condition in [C_AE,C_NB] then
  1821. begin
  1822. if Taicpu(hp1).opcode=A_INC then
  1823. carryadd_opcode:=A_ADC;
  1824. if Taicpu(hp1).opcode=A_DEC then
  1825. carryadd_opcode:=A_SBB;
  1826. if carryadd_opcode<>A_NONE then
  1827. begin
  1828. asml.remove(p);
  1829. p.free;
  1830. Taicpu(hp1).ops:=2;
  1831. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1832. Taicpu(hp1).loadconst(0,0);
  1833. Taicpu(hp1).opcode:=carryadd_opcode;
  1834. p:=hp1;
  1835. continue;
  1836. end;
  1837. end;
  1838. end;
  1839. if (current_settings.cputype>=cpu_Pentium2) then
  1840. begin
  1841. { check for
  1842. jCC xxx
  1843. <several movs>
  1844. xxx:
  1845. }
  1846. l:=0;
  1847. GetNextInstruction(p, hp1);
  1848. while assigned(hp1) and
  1849. CanBeCMOV(hp1) and
  1850. { stop on labels }
  1851. not(hp1.typ=ait_label) do
  1852. begin
  1853. inc(l);
  1854. GetNextInstruction(hp1,hp1);
  1855. end;
  1856. if assigned(hp1) then
  1857. begin
  1858. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1859. begin
  1860. if (l<=4) and (l>0) then
  1861. begin
  1862. condition:=inverse_cond(taicpu(p).condition);
  1863. hp2:=p;
  1864. GetNextInstruction(p,hp1);
  1865. p:=hp1;
  1866. repeat
  1867. taicpu(hp1).opcode:=A_CMOVcc;
  1868. taicpu(hp1).condition:=condition;
  1869. GetNextInstruction(hp1,hp1);
  1870. until not(assigned(hp1)) or
  1871. not(CanBeCMOV(hp1));
  1872. { wait with removing else GetNextInstruction could
  1873. ignore the label if it was the only usage in the
  1874. jump moved away }
  1875. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1876. asml.remove(hp2);
  1877. hp2.free;
  1878. continue;
  1879. end;
  1880. end
  1881. else
  1882. begin
  1883. { check further for
  1884. jCC xxx
  1885. <several movs 1>
  1886. jmp yyy
  1887. xxx:
  1888. <several movs 2>
  1889. yyy:
  1890. }
  1891. { hp2 points to jmp yyy }
  1892. hp2:=hp1;
  1893. { skip hp1 to xxx }
  1894. GetNextInstruction(hp1, hp1);
  1895. if assigned(hp2) and
  1896. assigned(hp1) and
  1897. (l<=3) and
  1898. (hp2.typ=ait_instruction) and
  1899. (taicpu(hp2).is_jmp) and
  1900. (taicpu(hp2).condition=C_None) and
  1901. { real label and jump, no further references to the
  1902. label are allowed }
  1903. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1904. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1905. begin
  1906. l:=0;
  1907. { skip hp1 to <several moves 2> }
  1908. GetNextInstruction(hp1, hp1);
  1909. while assigned(hp1) and
  1910. CanBeCMOV(hp1) do
  1911. begin
  1912. inc(l);
  1913. GetNextInstruction(hp1, hp1);
  1914. end;
  1915. { hp1 points to yyy: }
  1916. if assigned(hp1) and
  1917. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1918. begin
  1919. condition:=inverse_cond(taicpu(p).condition);
  1920. GetNextInstruction(p,hp1);
  1921. hp3:=p;
  1922. p:=hp1;
  1923. repeat
  1924. taicpu(hp1).opcode:=A_CMOVcc;
  1925. taicpu(hp1).condition:=condition;
  1926. GetNextInstruction(hp1,hp1);
  1927. until not(assigned(hp1)) or
  1928. not(CanBeCMOV(hp1));
  1929. { hp2 is still at jmp yyy }
  1930. GetNextInstruction(hp2,hp1);
  1931. { hp2 is now at xxx: }
  1932. condition:=inverse_cond(condition);
  1933. GetNextInstruction(hp1,hp1);
  1934. { hp1 is now at <several movs 2> }
  1935. repeat
  1936. taicpu(hp1).opcode:=A_CMOVcc;
  1937. taicpu(hp1).condition:=condition;
  1938. GetNextInstruction(hp1,hp1);
  1939. until not(assigned(hp1)) or
  1940. not(CanBeCMOV(hp1));
  1941. {
  1942. asml.remove(hp1.next)
  1943. hp1.next.free;
  1944. asml.remove(hp1);
  1945. hp1.free;
  1946. }
  1947. { remove jCC }
  1948. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1949. asml.remove(hp3);
  1950. hp3.free;
  1951. { remove jmp }
  1952. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1953. asml.remove(hp2);
  1954. hp2.free;
  1955. continue;
  1956. end;
  1957. end;
  1958. end;
  1959. end;
  1960. end;
  1961. end;
  1962. A_FSTP,A_FISTP:
  1963. if doFpuLoadStoreOpt(asmL,p) then
  1964. continue;
  1965. A_IMUL:
  1966. begin
  1967. if (taicpu(p).ops >= 2) and
  1968. ((taicpu(p).oper[0]^.typ = top_const) or
  1969. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1970. (taicpu(p).oper[1]^.typ = top_reg) and
  1971. ((taicpu(p).ops = 2) or
  1972. ((taicpu(p).oper[2]^.typ = top_reg) and
  1973. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1974. getLastInstruction(p,hp1) and
  1975. (hp1.typ = ait_instruction) and
  1976. (taicpu(hp1).opcode = A_MOV) and
  1977. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1978. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1979. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1980. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1981. begin
  1982. taicpu(p).ops := 3;
  1983. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1984. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1985. asml.remove(hp1);
  1986. hp1.free;
  1987. end;
  1988. end;
  1989. A_MOV:
  1990. begin
  1991. if (taicpu(p).oper[0]^.typ = top_reg) and
  1992. (taicpu(p).oper[1]^.typ = top_reg) and
  1993. GetNextInstruction(p, hp1) and
  1994. (hp1.typ = ait_Instruction) and
  1995. ((taicpu(hp1).opcode = A_MOV) or
  1996. (taicpu(hp1).opcode = A_MOVZX) or
  1997. (taicpu(hp1).opcode = A_MOVSX)) and
  1998. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1999. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2000. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  2001. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  2002. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  2003. {mov reg1, reg2
  2004. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  2005. begin
  2006. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  2007. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  2008. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  2009. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  2010. asml.remove(p);
  2011. p.free;
  2012. p := hp1;
  2013. continue;
  2014. end
  2015. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2016. GetNextInstruction(p,hp1) and
  2017. (hp1.typ = ait_instruction) and
  2018. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2019. GetNextInstruction(hp1,hp2) and
  2020. (hp2.typ = ait_instruction) and
  2021. (taicpu(hp2).opcode = A_MOV) and
  2022. (taicpu(hp2).oper[0]^.typ = top_reg) and
  2023. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  2024. (taicpu(hp2).oper[1]^.typ = top_ref) then
  2025. begin
  2026. TmpUsedRegs := UsedRegs;
  2027. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  2028. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  2029. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  2030. hp2, TmpUsedRegs))) then
  2031. { change mov (ref), reg }
  2032. { add/sub/or/... reg2/$const, reg }
  2033. { mov reg, (ref) }
  2034. { # release reg }
  2035. { to add/sub/or/... reg2/$const, (ref) }
  2036. begin
  2037. case taicpu(hp1).opcode of
  2038. A_INC,A_DEC:
  2039. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^)
  2040. else
  2041. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2042. end;
  2043. asml.remove(p);
  2044. asml.remove(hp2);
  2045. p.free;
  2046. hp2.free;
  2047. p := hp1
  2048. end;
  2049. end
  2050. end;
  2051. end;
  2052. end;
  2053. end;
  2054. p := tai(p.next)
  2055. end;
  2056. end;
  2057. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  2058. var
  2059. p,hp1,hp2: tai;
  2060. begin
  2061. p := BlockStart;
  2062. while (p <> BlockEnd) Do
  2063. begin
  2064. case p.Typ Of
  2065. Ait_Instruction:
  2066. begin
  2067. if InsContainsSegRef(taicpu(p)) then
  2068. begin
  2069. p := tai(p.next);
  2070. continue;
  2071. end;
  2072. case taicpu(p).opcode Of
  2073. A_CALL:
  2074. { don't do this on modern CPUs, this really hurts them due to
  2075. broken call/ret pairing }
  2076. if (current_settings.optimizecputype < cpu_Pentium2) and
  2077. not(cs_create_pic in current_settings.moduleswitches) and
  2078. GetNextInstruction(p, hp1) and
  2079. (hp1.typ = ait_instruction) and
  2080. (taicpu(hp1).opcode = A_JMP) and
  2081. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  2082. begin
  2083. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2084. InsertLLItem(asml, p.previous, p, hp2);
  2085. taicpu(p).opcode := A_JMP;
  2086. taicpu(p).is_jmp := true;
  2087. asml.remove(hp1);
  2088. hp1.free;
  2089. end;
  2090. A_CMP:
  2091. begin
  2092. if (taicpu(p).oper[0]^.typ = top_const) and
  2093. (taicpu(p).oper[0]^.val = 0) and
  2094. (taicpu(p).oper[1]^.typ = top_reg) then
  2095. {change "cmp $0, %reg" to "test %reg, %reg"}
  2096. begin
  2097. taicpu(p).opcode := A_TEST;
  2098. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2099. continue;
  2100. end;
  2101. end;
  2102. (*
  2103. Optimization is not safe; xor clears the carry flag.
  2104. See test/tgadint64 in the test suite.
  2105. A_MOV:
  2106. if (taicpu(p).oper[0]^.typ = Top_Const) and
  2107. (taicpu(p).oper[0]^.val = 0) and
  2108. (taicpu(p).oper[1]^.typ = Top_Reg) then
  2109. { change "mov $0, %reg" into "xor %reg, %reg" }
  2110. begin
  2111. taicpu(p).opcode := A_XOR;
  2112. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2113. end;
  2114. *)
  2115. A_MOVZX:
  2116. { if register vars are on, it's possible there is code like }
  2117. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  2118. { so we can't safely replace the movzx then with xor/mov, }
  2119. { since that would change the flags (JM) }
  2120. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  2121. begin
  2122. if (taicpu(p).oper[1]^.typ = top_reg) then
  2123. if (taicpu(p).oper[0]^.typ = top_reg)
  2124. then
  2125. case taicpu(p).opsize of
  2126. S_BL:
  2127. begin
  2128. if IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2129. not(cs_opt_size in current_settings.optimizerswitches) and
  2130. (current_settings.optimizecputype = cpu_Pentium) then
  2131. {Change "movzbl %reg1, %reg2" to
  2132. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2133. PentiumMMX}
  2134. begin
  2135. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2136. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2137. InsertLLItem(asml,p.previous, p, hp1);
  2138. taicpu(p).opcode := A_MOV;
  2139. taicpu(p).changeopsize(S_B);
  2140. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2141. end;
  2142. end;
  2143. end
  2144. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2145. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2146. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2147. not(cs_opt_size in current_settings.optimizerswitches) and
  2148. IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2149. (current_settings.optimizecputype = cpu_Pentium) and
  2150. (taicpu(p).opsize = S_BL) then
  2151. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2152. Pentium and PentiumMMX}
  2153. begin
  2154. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2155. taicpu(p).oper[1]^.reg);
  2156. taicpu(p).opcode := A_MOV;
  2157. taicpu(p).changeopsize(S_B);
  2158. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2159. InsertLLItem(asml,p.previous, p, hp1);
  2160. end;
  2161. end;
  2162. A_TEST, A_OR:
  2163. {removes the line marked with (x) from the sequence
  2164. and/or/xor/add/sub/... $x, %y
  2165. test/or %y, %y (x)
  2166. j(n)z _Label
  2167. as the first instruction already adjusts the ZF}
  2168. begin
  2169. if OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  2170. if GetLastInstruction(p, hp1) and
  2171. (tai(hp1).typ = ait_instruction) and
  2172. GetNextInstruction(p,hp2) and
  2173. (hp2.typ = ait_instruction) and
  2174. ((taicpu(hp2).opcode = A_SETcc) or
  2175. (taicpu(hp2).opcode = A_Jcc) or
  2176. (taicpu(hp2).opcode = A_CMOVcc)) then
  2177. case taicpu(hp1).opcode Of
  2178. A_ADD, A_SUB, A_OR, A_XOR, A_AND{, A_SHL, A_SHR}:
  2179. begin
  2180. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  2181. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2182. { and in case of carry for A(E)/B(E)/C/NC }
  2183. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2184. ((taicpu(hp1).opcode <> A_ADD) and
  2185. (taicpu(hp1).opcode <> A_SUB))) then
  2186. begin
  2187. hp1 := tai(p.next);
  2188. asml.remove(p);
  2189. p.free;
  2190. p := tai(hp1);
  2191. continue
  2192. end;
  2193. end;
  2194. A_DEC, A_INC, A_NEG:
  2195. begin
  2196. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  2197. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2198. { and in case of carry for A(E)/B(E)/C/NC }
  2199. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2200. begin
  2201. case taicpu(hp1).opcode Of
  2202. A_DEC, A_INC:
  2203. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2204. begin
  2205. case taicpu(hp1).opcode Of
  2206. A_DEC: taicpu(hp1).opcode := A_SUB;
  2207. A_INC: taicpu(hp1).opcode := A_ADD;
  2208. end;
  2209. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2210. taicpu(hp1).loadConst(0,1);
  2211. taicpu(hp1).ops:=2;
  2212. end
  2213. end;
  2214. hp1 := tai(p.next);
  2215. asml.remove(p);
  2216. p.free;
  2217. p := tai(hp1);
  2218. continue
  2219. end;
  2220. end
  2221. end
  2222. end;
  2223. end;
  2224. end;
  2225. end;
  2226. p := tai(p.next)
  2227. end;
  2228. end;
  2229. end.