aoptcpu.pas 109 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aoptobj, aoptcpub, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  34. function InstructionLoadsFromReg(const reg : TRegister;const hp : tai) : boolean;override;
  35. end;
  36. Var
  37. AsmOptimizer : TCpuAsmOptimizer;
  38. Implementation
  39. uses
  40. verbose,globtype,globals,
  41. cutils,
  42. aoptbase,
  43. cpuinfo,
  44. aasmcpu,
  45. procinfo,
  46. cgutils,cgx86,
  47. { units we should get rid off: }
  48. symsym,symconst;
  49. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  50. { returns true if a "continue" should be done after this optimization }
  51. var hp1, hp2: tai;
  52. begin
  53. DoFpuLoadStoreOpt := false;
  54. if (taicpu(p).oper[0]^.typ = top_ref) and
  55. getNextInstruction(p, hp1) and
  56. (hp1.typ = ait_instruction) and
  57. (((taicpu(hp1).opcode = A_FLD) and
  58. (taicpu(p).opcode = A_FSTP)) or
  59. ((taicpu(p).opcode = A_FISTP) and
  60. (taicpu(hp1).opcode = A_FILD))) and
  61. (taicpu(hp1).oper[0]^.typ = top_ref) and
  62. (taicpu(hp1).opsize = taicpu(p).opsize) and
  63. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  64. begin
  65. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  66. if (taicpu(p).opsize=S_FX) and
  67. getNextInstruction(hp1, hp2) and
  68. (hp2.typ = ait_instruction) and
  69. IsExitCode(hp2) and
  70. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  71. not(assigned(current_procinfo.procdef.funcretsym) and
  72. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  73. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  74. begin
  75. asml.remove(p);
  76. asml.remove(hp1);
  77. p.free;
  78. hp1.free;
  79. p := hp2;
  80. removeLastDeallocForFuncRes(p);
  81. doFPULoadStoreOpt := true;
  82. end
  83. (* can't be done because the store operation rounds
  84. else
  85. { fst can't store an extended value! }
  86. if (taicpu(p).opsize <> S_FX) and
  87. (taicpu(p).opsize <> S_IQ) then
  88. begin
  89. if (taicpu(p).opcode = A_FSTP) then
  90. taicpu(p).opcode := A_FST
  91. else taicpu(p).opcode := A_FIST;
  92. asml.remove(hp1);
  93. hp1.free;
  94. end
  95. *)
  96. end;
  97. end;
  98. { converts a TChange variable to a TRegister }
  99. function tch2reg(ch: tinschange): tsuperregister;
  100. const
  101. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  102. begin
  103. if (ch <= CH_REDI) then
  104. tch2reg := ch2reg[ch]
  105. else if (ch <= CH_WEDI) then
  106. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  107. else if (ch <= CH_RWEDI) then
  108. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  109. else if (ch <= CH_MEDI) then
  110. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  111. else
  112. InternalError(2016041901)
  113. end;
  114. { Checks if the register is a 32 bit general purpose register }
  115. function isgp32reg(reg: TRegister): boolean;
  116. begin
  117. {$push}{$warnings off}
  118. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  119. {$pop}
  120. end;
  121. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  122. begin
  123. Result:=RegReadByInstruction(reg,hp);
  124. end;
  125. function TCpuAsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  126. var
  127. p: taicpu;
  128. opcount: longint;
  129. begin
  130. RegReadByInstruction := false;
  131. if hp.typ <> ait_instruction then
  132. exit;
  133. p := taicpu(hp);
  134. case p.opcode of
  135. A_CALL:
  136. regreadbyinstruction := true;
  137. A_IMUL:
  138. case p.ops of
  139. 1:
  140. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  141. (
  142. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  143. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  144. );
  145. 2,3:
  146. regReadByInstruction :=
  147. reginop(reg,p.oper[0]^) or
  148. reginop(reg,p.oper[1]^);
  149. end;
  150. A_MUL:
  151. begin
  152. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  153. (
  154. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  155. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  156. );
  157. end;
  158. A_IDIV,A_DIV:
  159. begin
  160. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  161. (
  162. (getregtype(reg)=R_INTREGISTER) and
  163. (
  164. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  165. )
  166. );
  167. end;
  168. else
  169. begin
  170. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  171. begin
  172. RegReadByInstruction := false;
  173. exit;
  174. end;
  175. for opcount := 0 to p.ops-1 do
  176. if (p.oper[opCount]^.typ = top_ref) and
  177. RegInRef(reg,p.oper[opcount]^.ref^) then
  178. begin
  179. RegReadByInstruction := true;
  180. exit
  181. end;
  182. { special handling for SSE MOVSD }
  183. if (p.opcode=A_MOVSD) and (p.ops>0) then
  184. begin
  185. if p.ops<>2 then
  186. internalerror(2017042702);
  187. regReadByInstruction := reginop(reg,p.oper[0]^) or
  188. (
  189. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  190. );
  191. exit;
  192. end;
  193. with insprop[p.opcode] do
  194. begin
  195. if getregtype(reg)=R_INTREGISTER then
  196. begin
  197. case getsupreg(reg) of
  198. RS_EAX:
  199. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  200. begin
  201. RegReadByInstruction := true;
  202. exit
  203. end;
  204. RS_ECX:
  205. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  206. begin
  207. RegReadByInstruction := true;
  208. exit
  209. end;
  210. RS_EDX:
  211. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  212. begin
  213. RegReadByInstruction := true;
  214. exit
  215. end;
  216. RS_EBX:
  217. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  218. begin
  219. RegReadByInstruction := true;
  220. exit
  221. end;
  222. RS_ESP:
  223. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  224. begin
  225. RegReadByInstruction := true;
  226. exit
  227. end;
  228. RS_EBP:
  229. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  230. begin
  231. RegReadByInstruction := true;
  232. exit
  233. end;
  234. RS_ESI:
  235. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  236. begin
  237. RegReadByInstruction := true;
  238. exit
  239. end;
  240. RS_EDI:
  241. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  242. begin
  243. RegReadByInstruction := true;
  244. exit
  245. end;
  246. end;
  247. end;
  248. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  249. begin
  250. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  251. begin
  252. case p.condition of
  253. C_A,C_NBE, { CF=0 and ZF=0 }
  254. C_BE,C_NA: { CF=1 or ZF=1 }
  255. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  256. C_AE,C_NB,C_NC, { CF=0 }
  257. C_B,C_NAE,C_C: { CF=1 }
  258. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  259. C_NE,C_NZ, { ZF=0 }
  260. C_E,C_Z: { ZF=1 }
  261. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  262. C_G,C_NLE, { ZF=0 and SF=OF }
  263. C_LE,C_NG: { ZF=1 or SF<>OF }
  264. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  265. C_GE,C_NL, { SF=OF }
  266. C_L,C_NGE: { SF<>OF }
  267. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  268. C_NO, { OF=0 }
  269. C_O: { OF=1 }
  270. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  271. C_NP,C_PO, { PF=0 }
  272. C_P,C_PE: { PF=1 }
  273. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  274. C_NS, { SF=0 }
  275. C_S: { SF=1 }
  276. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  277. else
  278. internalerror(2017042701);
  279. end;
  280. if RegReadByInstruction then
  281. exit;
  282. end;
  283. case getsubreg(reg) of
  284. R_SUBW,R_SUBD,R_SUBQ:
  285. RegReadByInstruction :=
  286. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  287. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  288. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  289. R_SUBFLAGCARRY:
  290. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  291. R_SUBFLAGPARITY:
  292. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  293. R_SUBFLAGAUXILIARY:
  294. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  295. R_SUBFLAGZERO:
  296. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  297. R_SUBFLAGSIGN:
  298. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  299. R_SUBFLAGOVERFLOW:
  300. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  301. R_SUBFLAGINTERRUPT:
  302. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  303. R_SUBFLAGDIRECTION:
  304. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  305. else
  306. internalerror(2017042601);
  307. end;
  308. exit;
  309. end;
  310. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  311. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  312. (p.oper[0]^.reg=p.oper[1]^.reg) then
  313. exit;
  314. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  315. begin
  316. RegReadByInstruction := true;
  317. exit
  318. end;
  319. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  320. begin
  321. RegReadByInstruction := true;
  322. exit
  323. end;
  324. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  325. begin
  326. RegReadByInstruction := true;
  327. exit
  328. end;
  329. end;
  330. end;
  331. end;
  332. end;
  333. { returns true if p contains a memory operand with a segment set }
  334. function InsContainsSegRef(p: taicpu): boolean;
  335. var
  336. i: longint;
  337. begin
  338. result:=true;
  339. for i:=0 to p.opercnt-1 do
  340. if (p.oper[i]^.typ=top_ref) and
  341. (p.oper[i]^.ref^.segment<>NR_NO) then
  342. exit;
  343. result:=false;
  344. end;
  345. function InstrReadsFlags(p: tai): boolean;
  346. var
  347. l: longint;
  348. begin
  349. InstrReadsFlags := true;
  350. case p.typ of
  351. ait_instruction:
  352. if InsProp[taicpu(p).opcode].Ch*
  353. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  354. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  355. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  356. exit;
  357. ait_label:
  358. exit;
  359. end;
  360. InstrReadsFlags := false;
  361. end;
  362. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  363. var
  364. p,hp1: tai;
  365. l: aint;
  366. tmpRef: treference;
  367. begin
  368. p := BlockStart;
  369. while (p <> BlockEnd) Do
  370. begin
  371. case p.Typ Of
  372. Ait_Instruction:
  373. begin
  374. if InsContainsSegRef(taicpu(p)) then
  375. begin
  376. p := tai(p.next);
  377. continue;
  378. end;
  379. case taicpu(p).opcode Of
  380. A_IMUL:
  381. {changes certain "imul const, %reg"'s to lea sequences}
  382. begin
  383. if (taicpu(p).oper[0]^.typ = Top_Const) and
  384. (taicpu(p).oper[1]^.typ = Top_Reg) and
  385. (taicpu(p).opsize = S_L) then
  386. if (taicpu(p).oper[0]^.val = 1) then
  387. if (taicpu(p).ops = 2) then
  388. {remove "imul $1, reg"}
  389. begin
  390. hp1 := tai(p.Next);
  391. asml.remove(p);
  392. p.free;
  393. p := hp1;
  394. continue;
  395. end
  396. else
  397. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  398. begin
  399. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  400. InsertLLItem(p.previous, p.next, hp1);
  401. p.free;
  402. p := hp1;
  403. end
  404. else if
  405. ((taicpu(p).ops <= 2) or
  406. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  407. (taicpu(p).oper[0]^.val <= 12) and
  408. not(cs_opt_size in current_settings.optimizerswitches) and
  409. (not(GetNextInstruction(p, hp1)) or
  410. {GetNextInstruction(p, hp1) and}
  411. not((tai(hp1).typ = ait_instruction) and
  412. ((taicpu(hp1).opcode=A_Jcc) and
  413. (taicpu(hp1).condition in [C_O,C_NO])))) then
  414. begin
  415. reference_reset(tmpref,1,[]);
  416. case taicpu(p).oper[0]^.val Of
  417. 3: begin
  418. {imul 3, reg1, reg2 to
  419. lea (reg1,reg1,2), reg2
  420. imul 3, reg1 to
  421. lea (reg1,reg1,2), reg1}
  422. TmpRef.base := taicpu(p).oper[1]^.reg;
  423. TmpRef.index := taicpu(p).oper[1]^.reg;
  424. TmpRef.ScaleFactor := 2;
  425. if (taicpu(p).ops = 2) then
  426. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  427. else
  428. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  429. InsertLLItem(p.previous, p.next, hp1);
  430. p.free;
  431. p := hp1;
  432. end;
  433. 5: begin
  434. {imul 5, reg1, reg2 to
  435. lea (reg1,reg1,4), reg2
  436. imul 5, reg1 to
  437. lea (reg1,reg1,4), reg1}
  438. TmpRef.base := taicpu(p).oper[1]^.reg;
  439. TmpRef.index := taicpu(p).oper[1]^.reg;
  440. TmpRef.ScaleFactor := 4;
  441. if (taicpu(p).ops = 2) then
  442. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  443. else
  444. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  445. InsertLLItem(p.previous, p.next, hp1);
  446. p.free;
  447. p := hp1;
  448. end;
  449. 6: begin
  450. {imul 6, reg1, reg2 to
  451. lea (,reg1,2), reg2
  452. lea (reg2,reg1,4), reg2
  453. imul 6, reg1 to
  454. lea (reg1,reg1,2), reg1
  455. add reg1, reg1}
  456. if (current_settings.optimizecputype <= cpu_386) then
  457. begin
  458. TmpRef.index := taicpu(p).oper[1]^.reg;
  459. if (taicpu(p).ops = 3) then
  460. begin
  461. TmpRef.base := taicpu(p).oper[2]^.reg;
  462. TmpRef.ScaleFactor := 4;
  463. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  464. end
  465. else
  466. begin
  467. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  468. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  469. end;
  470. InsertLLItem(p, p.next, hp1);
  471. reference_reset(tmpref,2,[]);
  472. TmpRef.index := taicpu(p).oper[1]^.reg;
  473. TmpRef.ScaleFactor := 2;
  474. if (taicpu(p).ops = 3) then
  475. begin
  476. TmpRef.base := NR_NO;
  477. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  478. taicpu(p).oper[2]^.reg);
  479. end
  480. else
  481. begin
  482. TmpRef.base := taicpu(p).oper[1]^.reg;
  483. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  484. end;
  485. InsertLLItem(p.previous, p.next, hp1);
  486. p.free;
  487. p := tai(hp1.next);
  488. end
  489. end;
  490. 9: begin
  491. {imul 9, reg1, reg2 to
  492. lea (reg1,reg1,8), reg2
  493. imul 9, reg1 to
  494. lea (reg1,reg1,8), reg1}
  495. TmpRef.base := taicpu(p).oper[1]^.reg;
  496. TmpRef.index := taicpu(p).oper[1]^.reg;
  497. TmpRef.ScaleFactor := 8;
  498. if (taicpu(p).ops = 2) then
  499. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  500. else
  501. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  502. InsertLLItem(p.previous, p.next, hp1);
  503. p.free;
  504. p := hp1;
  505. end;
  506. 10: begin
  507. {imul 10, reg1, reg2 to
  508. lea (reg1,reg1,4), reg2
  509. add reg2, reg2
  510. imul 10, reg1 to
  511. lea (reg1,reg1,4), reg1
  512. add reg1, reg1}
  513. if (current_settings.optimizecputype <= cpu_386) then
  514. begin
  515. if (taicpu(p).ops = 3) then
  516. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  517. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  518. else
  519. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  520. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  521. InsertLLItem(p, p.next, hp1);
  522. TmpRef.base := taicpu(p).oper[1]^.reg;
  523. TmpRef.index := taicpu(p).oper[1]^.reg;
  524. TmpRef.ScaleFactor := 4;
  525. if (taicpu(p).ops = 3) then
  526. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  527. else
  528. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  529. InsertLLItem(p.previous, p.next, hp1);
  530. p.free;
  531. p := tai(hp1.next);
  532. end
  533. end;
  534. 12: begin
  535. {imul 12, reg1, reg2 to
  536. lea (,reg1,4), reg2
  537. lea (reg2,reg1,8), reg2
  538. imul 12, reg1 to
  539. lea (reg1,reg1,2), reg1
  540. lea (,reg1,4), reg1}
  541. if (current_settings.optimizecputype <= cpu_386)
  542. then
  543. begin
  544. TmpRef.index := taicpu(p).oper[1]^.reg;
  545. if (taicpu(p).ops = 3) then
  546. begin
  547. TmpRef.base := taicpu(p).oper[2]^.reg;
  548. TmpRef.ScaleFactor := 8;
  549. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  550. end
  551. else
  552. begin
  553. TmpRef.base := NR_NO;
  554. TmpRef.ScaleFactor := 4;
  555. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  556. end;
  557. InsertLLItem(p, p.next, hp1);
  558. reference_reset(tmpref,2,[]);
  559. TmpRef.index := taicpu(p).oper[1]^.reg;
  560. if (taicpu(p).ops = 3) then
  561. begin
  562. TmpRef.base := NR_NO;
  563. TmpRef.ScaleFactor := 4;
  564. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  565. end
  566. else
  567. begin
  568. TmpRef.base := taicpu(p).oper[1]^.reg;
  569. TmpRef.ScaleFactor := 2;
  570. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  571. end;
  572. InsertLLItem(p.previous, p.next, hp1);
  573. p.free;
  574. p := tai(hp1.next);
  575. end
  576. end
  577. end;
  578. end;
  579. end;
  580. A_SAR, A_SHR:
  581. {changes the code sequence
  582. shr/sar const1, x
  583. shl const2, x
  584. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  585. begin
  586. if GetNextInstruction(p, hp1) and
  587. (tai(hp1).typ = ait_instruction) and
  588. (taicpu(hp1).opcode = A_SHL) and
  589. (taicpu(p).oper[0]^.typ = top_const) and
  590. (taicpu(hp1).oper[0]^.typ = top_const) and
  591. (taicpu(hp1).opsize = taicpu(p).opsize) and
  592. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  593. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  594. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  595. not(cs_opt_size in current_settings.optimizerswitches) then
  596. { shr/sar const1, %reg
  597. shl const2, %reg
  598. with const1 > const2 }
  599. begin
  600. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  601. taicpu(hp1).opcode := A_AND;
  602. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  603. case taicpu(p).opsize Of
  604. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  605. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  606. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  607. end;
  608. end
  609. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  610. not(cs_opt_size in current_settings.optimizerswitches) then
  611. { shr/sar const1, %reg
  612. shl const2, %reg
  613. with const1 < const2 }
  614. begin
  615. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  616. taicpu(p).opcode := A_AND;
  617. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  618. case taicpu(p).opsize Of
  619. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  620. S_B: taicpu(p).loadConst(0,l Xor $ff);
  621. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  622. end;
  623. end
  624. else
  625. { shr/sar const1, %reg
  626. shl const2, %reg
  627. with const1 = const2 }
  628. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  629. begin
  630. taicpu(p).opcode := A_AND;
  631. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  632. case taicpu(p).opsize Of
  633. S_B: taicpu(p).loadConst(0,l Xor $ff);
  634. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  635. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  636. end;
  637. asml.remove(hp1);
  638. hp1.free;
  639. end;
  640. end;
  641. A_XOR:
  642. if (taicpu(p).oper[0]^.typ = top_reg) and
  643. (taicpu(p).oper[1]^.typ = top_reg) and
  644. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  645. { temporarily change this to 'mov reg,0' to make it easier }
  646. { for the CSE. Will be changed back in pass 2 }
  647. begin
  648. taicpu(p).opcode := A_MOV;
  649. taicpu(p).loadConst(0,0);
  650. end;
  651. end;
  652. end;
  653. end;
  654. p := tai(p.next)
  655. end;
  656. end;
  657. { skips all labels and returns the next "real" instruction }
  658. function SkipLabels(hp: tai; var hp2: tai): boolean;
  659. begin
  660. while assigned(hp.next) and
  661. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  662. hp := tai(hp.next);
  663. if assigned(hp.next) then
  664. begin
  665. SkipLabels := True;
  666. hp2 := tai(hp.next)
  667. end
  668. else
  669. begin
  670. hp2 := hp;
  671. SkipLabels := False
  672. end;
  673. end;
  674. { First pass of peephole optimizations }
  675. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  676. function WriteOk : Boolean;
  677. begin
  678. writeln('Ok');
  679. Result:=True;
  680. end;
  681. var
  682. l : longint;
  683. p,hp1,hp2 : tai;
  684. hp3,hp4: tai;
  685. v:aint;
  686. TmpRef: TReference;
  687. TmpBool1, TmpBool2: Boolean;
  688. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  689. {traces sucessive jumps to their final destination and sets it, e.g.
  690. je l1 je l3
  691. <code> <code>
  692. l1: becomes l1:
  693. je l2 je l3
  694. <code> <code>
  695. l2: l2:
  696. jmp l3 jmp l3
  697. the level parameter denotes how deeep we have already followed the jump,
  698. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  699. var p1, p2: tai;
  700. l: tasmlabel;
  701. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  702. begin
  703. FindAnyLabel := false;
  704. while assigned(hp.next) and
  705. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  706. hp := tai(hp.next);
  707. if assigned(hp.next) and
  708. (tai(hp.next).typ = ait_label) then
  709. begin
  710. FindAnyLabel := true;
  711. l := tai_label(hp.next).labsym;
  712. end
  713. end;
  714. begin
  715. GetfinalDestination := false;
  716. if level > 20 then
  717. exit;
  718. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  719. if assigned(p1) then
  720. begin
  721. SkipLabels(p1,p1);
  722. if (tai(p1).typ = ait_instruction) and
  723. (taicpu(p1).is_jmp) then
  724. if { the next instruction after the label where the jump hp arrives}
  725. { is unconditional or of the same type as hp, so continue }
  726. (taicpu(p1).condition in [C_None,hp.condition]) or
  727. { the next instruction after the label where the jump hp arrives}
  728. { is the opposite of hp (so this one is never taken), but after }
  729. { that one there is a branch that will be taken, so perform a }
  730. { little hack: set p1 equal to this instruction (that's what the}
  731. { last SkipLabels is for, only works with short bool evaluation)}
  732. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  733. SkipLabels(p1,p2) and
  734. (p2.typ = ait_instruction) and
  735. (taicpu(p2).is_jmp) and
  736. (taicpu(p2).condition in [C_None,hp.condition]) and
  737. SkipLabels(p1,p1)) then
  738. begin
  739. { quick check for loops of the form "l5: ; jmp l5 }
  740. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  741. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  742. exit;
  743. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  744. exit;
  745. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  746. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  747. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  748. end
  749. else
  750. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  751. if not FindAnyLabel(p1,l) then
  752. begin
  753. {$ifdef finaldestdebug}
  754. insertllitem(asml,p1,p1.next,tai_comment.Create(
  755. strpnew('previous label inserted'))));
  756. {$endif finaldestdebug}
  757. current_asmdata.getjumplabel(l);
  758. insertllitem(p1,p1.next,tai_label.Create(l));
  759. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  760. hp.oper[0]^.ref^.symbol := l;
  761. l.increfs;
  762. { this won't work, since the new label isn't in the labeltable }
  763. { so it will fail the rangecheck. Labeltable should become a }
  764. { hashtable to support this: }
  765. { GetFinalDestination(asml, hp); }
  766. end
  767. else
  768. begin
  769. {$ifdef finaldestdebug}
  770. insertllitem(asml,p1,p1.next,tai_comment.Create(
  771. strpnew('next label reused'))));
  772. {$endif finaldestdebug}
  773. l.increfs;
  774. hp.oper[0]^.ref^.symbol := l;
  775. if not GetFinalDestination(asml, hp,succ(level)) then
  776. exit;
  777. end;
  778. end;
  779. GetFinalDestination := true;
  780. end;
  781. function DoSubAddOpt(var p: tai): Boolean;
  782. begin
  783. DoSubAddOpt := False;
  784. if GetLastInstruction(p, hp1) and
  785. (hp1.typ = ait_instruction) and
  786. (taicpu(hp1).opsize = taicpu(p).opsize) then
  787. case taicpu(hp1).opcode Of
  788. A_DEC:
  789. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  790. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  791. begin
  792. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  793. asml.remove(hp1);
  794. hp1.free;
  795. end;
  796. A_SUB:
  797. if (taicpu(hp1).oper[0]^.typ = top_const) and
  798. (taicpu(hp1).oper[1]^.typ = top_reg) and
  799. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  800. begin
  801. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  802. asml.remove(hp1);
  803. hp1.free;
  804. end;
  805. A_ADD:
  806. if (taicpu(hp1).oper[0]^.typ = top_const) and
  807. (taicpu(hp1).oper[1]^.typ = top_reg) and
  808. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  809. begin
  810. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  811. asml.remove(hp1);
  812. hp1.free;
  813. if (taicpu(p).oper[0]^.val = 0) then
  814. begin
  815. hp1 := tai(p.next);
  816. asml.remove(p);
  817. p.free;
  818. if not GetLastInstruction(hp1, p) then
  819. p := hp1;
  820. DoSubAddOpt := True;
  821. end
  822. end;
  823. end;
  824. end;
  825. begin
  826. p := BlockStart;
  827. ClearUsedRegs;
  828. while (p <> BlockEnd) Do
  829. begin
  830. UpDateUsedRegs(UsedRegs, tai(p.next));
  831. case p.Typ Of
  832. ait_instruction:
  833. begin
  834. current_filepos:=taicpu(p).fileinfo;
  835. if InsContainsSegRef(taicpu(p)) then
  836. begin
  837. p := tai(p.next);
  838. continue;
  839. end;
  840. { Handle Jmp Optimizations }
  841. if taicpu(p).is_jmp then
  842. begin
  843. {the following if-block removes all code between a jmp and the next label,
  844. because it can never be executed}
  845. if (taicpu(p).opcode = A_JMP) then
  846. begin
  847. hp2:=p;
  848. while GetNextInstruction(hp2, hp1) and
  849. (hp1.typ <> ait_label) do
  850. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  851. begin
  852. { don't kill start/end of assembler block,
  853. no-line-info-start/end etc }
  854. if hp1.typ<>ait_marker then
  855. begin
  856. asml.remove(hp1);
  857. hp1.free;
  858. end
  859. else
  860. hp2:=hp1;
  861. end
  862. else break;
  863. end;
  864. { remove jumps to a label coming right after them }
  865. if GetNextInstruction(p, hp1) then
  866. begin
  867. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  868. { TODO: FIXME removing the first instruction fails}
  869. (p<>blockstart) then
  870. begin
  871. hp2:=tai(hp1.next);
  872. asml.remove(p);
  873. p.free;
  874. p:=hp2;
  875. continue;
  876. end
  877. else
  878. begin
  879. if hp1.typ = ait_label then
  880. SkipLabels(hp1,hp1);
  881. if (tai(hp1).typ=ait_instruction) and
  882. (taicpu(hp1).opcode=A_JMP) and
  883. GetNextInstruction(hp1, hp2) and
  884. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  885. begin
  886. if taicpu(p).opcode=A_Jcc then
  887. begin
  888. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  889. tai_label(hp2).labsym.decrefs;
  890. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  891. { when free'ing hp1, the ref. isn't decresed, so we don't
  892. increase it (FK)
  893. taicpu(p).oper[0]^.ref^.symbol.increfs;
  894. }
  895. asml.remove(hp1);
  896. hp1.free;
  897. GetFinalDestination(asml, taicpu(p),0);
  898. end
  899. else
  900. begin
  901. GetFinalDestination(asml, taicpu(p),0);
  902. p:=tai(p.next);
  903. continue;
  904. end;
  905. end
  906. else
  907. GetFinalDestination(asml, taicpu(p),0);
  908. end;
  909. end;
  910. end
  911. else
  912. { All other optimizes }
  913. begin
  914. for l := 0 to taicpu(p).ops-1 Do
  915. if (taicpu(p).oper[l]^.typ = top_ref) then
  916. With taicpu(p).oper[l]^.ref^ Do
  917. begin
  918. if (base = NR_NO) and
  919. (index <> NR_NO) and
  920. (scalefactor in [0,1]) then
  921. begin
  922. base := index;
  923. index := NR_NO
  924. end
  925. end;
  926. case taicpu(p).opcode Of
  927. A_AND:
  928. if OptPass1And(p) then
  929. continue;
  930. A_CMP:
  931. begin
  932. { cmp register,$8000 neg register
  933. je target --> jo target
  934. .... only if register is deallocated before jump.}
  935. case Taicpu(p).opsize of
  936. S_B: v:=$80;
  937. S_W: v:=$8000;
  938. S_L: v:=aint($80000000);
  939. else
  940. internalerror(2013112905);
  941. end;
  942. if (taicpu(p).oper[0]^.typ=Top_const) and
  943. (taicpu(p).oper[0]^.val=v) and
  944. (Taicpu(p).oper[1]^.typ=top_reg) and
  945. GetNextInstruction(p, hp1) and
  946. (hp1.typ=ait_instruction) and
  947. (taicpu(hp1).opcode=A_Jcc) and
  948. (Taicpu(hp1).condition in [C_E,C_NE]) and
  949. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  950. begin
  951. Taicpu(p).opcode:=A_NEG;
  952. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  953. Taicpu(p).clearop(1);
  954. Taicpu(p).ops:=1;
  955. if Taicpu(hp1).condition=C_E then
  956. Taicpu(hp1).condition:=C_O
  957. else
  958. Taicpu(hp1).condition:=C_NO;
  959. continue;
  960. end;
  961. {
  962. @@2: @@2:
  963. .... ....
  964. cmp operand1,0
  965. jle/jbe @@1
  966. dec operand1 --> sub operand1,1
  967. jmp @@2 jge/jae @@2
  968. @@1: @@1:
  969. ... ....}
  970. if (taicpu(p).oper[0]^.typ = top_const) and
  971. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  972. (taicpu(p).oper[0]^.val = 0) and
  973. GetNextInstruction(p, hp1) and
  974. (hp1.typ = ait_instruction) and
  975. (taicpu(hp1).is_jmp) and
  976. (taicpu(hp1).opcode=A_Jcc) and
  977. (taicpu(hp1).condition in [C_LE,C_BE]) and
  978. GetNextInstruction(hp1,hp2) and
  979. (hp2.typ = ait_instruction) and
  980. (taicpu(hp2).opcode = A_DEC) and
  981. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  982. GetNextInstruction(hp2, hp3) and
  983. (hp3.typ = ait_instruction) and
  984. (taicpu(hp3).is_jmp) and
  985. (taicpu(hp3).opcode = A_JMP) and
  986. GetNextInstruction(hp3, hp4) and
  987. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  988. begin
  989. taicpu(hp2).Opcode := A_SUB;
  990. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  991. taicpu(hp2).loadConst(0,1);
  992. taicpu(hp2).ops:=2;
  993. taicpu(hp3).Opcode := A_Jcc;
  994. case taicpu(hp1).condition of
  995. C_LE: taicpu(hp3).condition := C_GE;
  996. C_BE: taicpu(hp3).condition := C_AE;
  997. end;
  998. asml.remove(p);
  999. asml.remove(hp1);
  1000. p.free;
  1001. hp1.free;
  1002. p := hp2;
  1003. continue;
  1004. end
  1005. end;
  1006. A_FLD:
  1007. begin
  1008. if (taicpu(p).oper[0]^.typ = top_reg) and
  1009. GetNextInstruction(p, hp1) and
  1010. (hp1.typ = Ait_Instruction) and
  1011. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1012. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1013. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  1014. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  1015. { change to
  1016. fld reg fxxx reg,st
  1017. fxxxp st, st1 (hp1)
  1018. Remark: non commutative operations must be reversed!
  1019. }
  1020. begin
  1021. case taicpu(hp1).opcode Of
  1022. A_FMULP,A_FADDP,
  1023. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  1024. begin
  1025. case taicpu(hp1).opcode Of
  1026. A_FADDP: taicpu(hp1).opcode := A_FADD;
  1027. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  1028. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  1029. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  1030. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  1031. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  1032. end;
  1033. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  1034. taicpu(hp1).oper[1]^.reg := NR_ST;
  1035. asml.remove(p);
  1036. p.free;
  1037. p := hp1;
  1038. continue;
  1039. end;
  1040. end;
  1041. end
  1042. else
  1043. if (taicpu(p).oper[0]^.typ = top_ref) and
  1044. GetNextInstruction(p, hp2) and
  1045. (hp2.typ = Ait_Instruction) and
  1046. (taicpu(hp2).ops = 2) and
  1047. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1048. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1049. (taicpu(p).opsize in [S_FS, S_FL]) and
  1050. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  1051. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  1052. if GetLastInstruction(p, hp1) and
  1053. (hp1.typ = Ait_Instruction) and
  1054. ((taicpu(hp1).opcode = A_FLD) or
  1055. (taicpu(hp1).opcode = A_FST)) and
  1056. (taicpu(hp1).opsize = taicpu(p).opsize) and
  1057. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1058. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  1059. if ((taicpu(hp2).opcode = A_FMULP) or
  1060. (taicpu(hp2).opcode = A_FADDP)) then
  1061. { change to
  1062. fld/fst mem1 (hp1) fld/fst mem1
  1063. fld mem1 (p) fadd/
  1064. faddp/ fmul st, st
  1065. fmulp st, st1 (hp2) }
  1066. begin
  1067. asml.remove(p);
  1068. p.free;
  1069. p := hp1;
  1070. if (taicpu(hp2).opcode = A_FADDP) then
  1071. taicpu(hp2).opcode := A_FADD
  1072. else
  1073. taicpu(hp2).opcode := A_FMUL;
  1074. taicpu(hp2).oper[1]^.reg := NR_ST;
  1075. end
  1076. else
  1077. { change to
  1078. fld/fst mem1 (hp1) fld/fst mem1
  1079. fld mem1 (p) fld st}
  1080. begin
  1081. taicpu(p).changeopsize(S_FL);
  1082. taicpu(p).loadreg(0,NR_ST);
  1083. end
  1084. else
  1085. begin
  1086. case taicpu(hp2).opcode Of
  1087. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  1088. { change to
  1089. fld/fst mem1 (hp1) fld/fst mem1
  1090. fld mem2 (p) fxxx mem2
  1091. fxxxp st, st1 (hp2) }
  1092. begin
  1093. case taicpu(hp2).opcode Of
  1094. A_FADDP: taicpu(p).opcode := A_FADD;
  1095. A_FMULP: taicpu(p).opcode := A_FMUL;
  1096. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  1097. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  1098. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  1099. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  1100. end;
  1101. asml.remove(hp2);
  1102. hp2.free;
  1103. end
  1104. end
  1105. end
  1106. end;
  1107. A_FSTP,A_FISTP:
  1108. if doFpuLoadStoreOpt(p) then
  1109. continue;
  1110. A_LEA:
  1111. begin
  1112. {removes seg register prefixes from LEA operations, as they
  1113. don't do anything}
  1114. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  1115. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  1116. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1117. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  1118. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1119. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1120. begin
  1121. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1122. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1123. begin
  1124. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  1125. taicpu(p).oper[1]^.reg);
  1126. InsertLLItem(p.previous,p.next, hp1);
  1127. p.free;
  1128. p := hp1;
  1129. continue;
  1130. end
  1131. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1132. begin
  1133. hp1 := tai(p.Next);
  1134. asml.remove(p);
  1135. p.free;
  1136. p := hp1;
  1137. continue;
  1138. end
  1139. { continue to use lea to adjust the stack pointer,
  1140. it is the recommended way, but only if not optimizing for size }
  1141. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1142. (cs_opt_size in current_settings.optimizerswitches) then
  1143. with taicpu(p).oper[0]^.ref^ do
  1144. if (base = taicpu(p).oper[1]^.reg) then
  1145. begin
  1146. l := offset;
  1147. if (l=1) and UseIncDec then
  1148. begin
  1149. taicpu(p).opcode := A_INC;
  1150. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1151. taicpu(p).ops := 1
  1152. end
  1153. else if (l=-1) and UseIncDec then
  1154. begin
  1155. taicpu(p).opcode := A_DEC;
  1156. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1157. taicpu(p).ops := 1;
  1158. end
  1159. else
  1160. begin
  1161. if (l<0) and (l<>-2147483648) then
  1162. begin
  1163. taicpu(p).opcode := A_SUB;
  1164. taicpu(p).loadConst(0,-l);
  1165. end
  1166. else
  1167. begin
  1168. taicpu(p).opcode := A_ADD;
  1169. taicpu(p).loadConst(0,l);
  1170. end;
  1171. end;
  1172. end;
  1173. end
  1174. (*
  1175. This is unsafe, lea doesn't modify the flags but "add"
  1176. does. This breaks webtbs/tw15694.pp. The above
  1177. transformations are also unsafe, but they don't seem to
  1178. be triggered by code that FPC generators (or that at
  1179. least does not occur in the tests...). This needs to be
  1180. fixed by checking for the liveness of the flags register.
  1181. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1182. begin
  1183. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1184. taicpu(p).oper[0]^.ref^.base);
  1185. InsertLLItem(asml,p.previous,p.next, hp1);
  1186. DebugMsg('Peephole Lea2AddBase done',hp1);
  1187. p.free;
  1188. p:=hp1;
  1189. continue;
  1190. end
  1191. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1192. begin
  1193. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1194. taicpu(p).oper[0]^.ref^.index);
  1195. InsertLLItem(asml,p.previous,p.next,hp1);
  1196. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1197. p.free;
  1198. p:=hp1;
  1199. continue;
  1200. end
  1201. *)
  1202. end;
  1203. A_MOV:
  1204. begin
  1205. If OptPass1MOV(p) then
  1206. Continue;
  1207. end;
  1208. A_MOVSX,
  1209. A_MOVZX :
  1210. begin
  1211. if (taicpu(p).oper[1]^.typ = top_reg) and
  1212. GetNextInstruction(p,hp1) and
  1213. (hp1.typ = ait_instruction) and
  1214. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1215. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1216. GetNextInstruction(hp1,hp2) and
  1217. MatchInstruction(hp2,A_MOV,[]) and
  1218. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1219. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1220. (((taicpu(hp1).ops=2) and
  1221. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1222. ((taicpu(hp1).ops=1) and
  1223. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1224. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1225. { change movsX/movzX reg/ref, reg2 }
  1226. { add/sub/or/... reg3/$const, reg2 }
  1227. { mov reg2 reg/ref }
  1228. { to add/sub/or/... reg3/$const, reg/ref }
  1229. begin
  1230. { by example:
  1231. movswl %si,%eax movswl %si,%eax p
  1232. decl %eax addl %edx,%eax hp1
  1233. movw %ax,%si movw %ax,%si hp2
  1234. ->
  1235. movswl %si,%eax movswl %si,%eax p
  1236. decw %eax addw %edx,%eax hp1
  1237. movw %ax,%si movw %ax,%si hp2
  1238. }
  1239. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1240. {
  1241. ->
  1242. movswl %si,%eax movswl %si,%eax p
  1243. decw %si addw %dx,%si hp1
  1244. movw %ax,%si movw %ax,%si hp2
  1245. }
  1246. case taicpu(hp1).ops of
  1247. 1:
  1248. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1249. 2:
  1250. begin
  1251. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1252. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1253. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1254. end;
  1255. else
  1256. internalerror(2008042701);
  1257. end;
  1258. {
  1259. ->
  1260. decw %si addw %dx,%si p
  1261. }
  1262. asml.remove(p);
  1263. asml.remove(hp2);
  1264. p.free;
  1265. hp2.free;
  1266. p := hp1
  1267. end
  1268. { removes superfluous And's after movzx's }
  1269. else if taicpu(p).opcode=A_MOVZX then
  1270. begin
  1271. if (taicpu(p).oper[1]^.typ = top_reg) and
  1272. GetNextInstruction(p, hp1) and
  1273. (tai(hp1).typ = ait_instruction) and
  1274. (taicpu(hp1).opcode = A_AND) and
  1275. (taicpu(hp1).oper[0]^.typ = top_const) and
  1276. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1277. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1278. case taicpu(p).opsize Of
  1279. S_BL, S_BW:
  1280. if (taicpu(hp1).oper[0]^.val = $ff) then
  1281. begin
  1282. asml.remove(hp1);
  1283. hp1.free;
  1284. end;
  1285. S_WL:
  1286. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1287. begin
  1288. asml.remove(hp1);
  1289. hp1.free;
  1290. end;
  1291. end;
  1292. {changes some movzx constructs to faster synonims (all examples
  1293. are given with eax/ax, but are also valid for other registers)}
  1294. if (taicpu(p).oper[1]^.typ = top_reg) then
  1295. if (taicpu(p).oper[0]^.typ = top_reg) then
  1296. case taicpu(p).opsize of
  1297. S_BW:
  1298. begin
  1299. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1300. not(cs_opt_size in current_settings.optimizerswitches) then
  1301. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1302. begin
  1303. taicpu(p).opcode := A_AND;
  1304. taicpu(p).changeopsize(S_W);
  1305. taicpu(p).loadConst(0,$ff);
  1306. end
  1307. else if GetNextInstruction(p, hp1) and
  1308. (tai(hp1).typ = ait_instruction) and
  1309. (taicpu(hp1).opcode = A_AND) and
  1310. (taicpu(hp1).oper[0]^.typ = top_const) and
  1311. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1312. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1313. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1314. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1315. begin
  1316. taicpu(p).opcode := A_MOV;
  1317. taicpu(p).changeopsize(S_W);
  1318. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1319. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1320. end;
  1321. end;
  1322. S_BL:
  1323. begin
  1324. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1325. not(cs_opt_size in current_settings.optimizerswitches) then
  1326. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1327. begin
  1328. taicpu(p).opcode := A_AND;
  1329. taicpu(p).changeopsize(S_L);
  1330. taicpu(p).loadConst(0,$ff)
  1331. end
  1332. else if GetNextInstruction(p, hp1) and
  1333. (tai(hp1).typ = ait_instruction) and
  1334. (taicpu(hp1).opcode = A_AND) and
  1335. (taicpu(hp1).oper[0]^.typ = top_const) and
  1336. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1337. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1338. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1339. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1340. begin
  1341. taicpu(p).opcode := A_MOV;
  1342. taicpu(p).changeopsize(S_L);
  1343. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1344. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1345. end
  1346. end;
  1347. S_WL:
  1348. begin
  1349. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1350. not(cs_opt_size in current_settings.optimizerswitches) then
  1351. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1352. begin
  1353. taicpu(p).opcode := A_AND;
  1354. taicpu(p).changeopsize(S_L);
  1355. taicpu(p).loadConst(0,$ffff);
  1356. end
  1357. else if GetNextInstruction(p, hp1) and
  1358. (tai(hp1).typ = ait_instruction) and
  1359. (taicpu(hp1).opcode = A_AND) and
  1360. (taicpu(hp1).oper[0]^.typ = top_const) and
  1361. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1362. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1363. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1364. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1365. begin
  1366. taicpu(p).opcode := A_MOV;
  1367. taicpu(p).changeopsize(S_L);
  1368. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1369. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1370. end;
  1371. end;
  1372. end
  1373. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1374. begin
  1375. if GetNextInstruction(p, hp1) and
  1376. (tai(hp1).typ = ait_instruction) and
  1377. (taicpu(hp1).opcode = A_AND) and
  1378. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1379. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1380. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1381. begin
  1382. taicpu(p).opcode := A_MOV;
  1383. case taicpu(p).opsize Of
  1384. S_BL:
  1385. begin
  1386. taicpu(p).changeopsize(S_L);
  1387. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1388. end;
  1389. S_WL:
  1390. begin
  1391. taicpu(p).changeopsize(S_L);
  1392. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1393. end;
  1394. S_BW:
  1395. begin
  1396. taicpu(p).changeopsize(S_W);
  1397. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1398. end;
  1399. end;
  1400. end;
  1401. end;
  1402. end;
  1403. end;
  1404. (* should not be generated anymore by the current code generator
  1405. A_POP:
  1406. begin
  1407. if target_info.system=system_i386_go32v2 then
  1408. begin
  1409. { Transform a series of pop/pop/pop/push/push/push to }
  1410. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1411. { because I'm not sure whether they can cope with }
  1412. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1413. { such a problem when using esp as frame pointer (JM) }
  1414. if (taicpu(p).oper[0]^.typ = top_reg) then
  1415. begin
  1416. hp1 := p;
  1417. hp2 := p;
  1418. l := 0;
  1419. while getNextInstruction(hp1,hp1) and
  1420. (hp1.typ = ait_instruction) and
  1421. (taicpu(hp1).opcode = A_POP) and
  1422. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1423. begin
  1424. hp2 := hp1;
  1425. inc(l,4);
  1426. end;
  1427. getLastInstruction(p,hp3);
  1428. l1 := 0;
  1429. while (hp2 <> hp3) and
  1430. assigned(hp1) and
  1431. (hp1.typ = ait_instruction) and
  1432. (taicpu(hp1).opcode = A_PUSH) and
  1433. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1434. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1435. begin
  1436. { change it to a two op operation }
  1437. taicpu(hp2).oper[1]^.typ:=top_none;
  1438. taicpu(hp2).ops:=2;
  1439. taicpu(hp2).opcode := A_MOV;
  1440. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1441. reference_reset(tmpref);
  1442. tmpRef.base.enum:=R_INTREGISTER;
  1443. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1444. convert_register_to_enum(tmpref.base);
  1445. tmpRef.offset := l;
  1446. taicpu(hp2).loadRef(0,tmpRef);
  1447. hp4 := hp1;
  1448. getNextInstruction(hp1,hp1);
  1449. asml.remove(hp4);
  1450. hp4.free;
  1451. getLastInstruction(hp2,hp2);
  1452. dec(l,4);
  1453. inc(l1);
  1454. end;
  1455. if l <> -4 then
  1456. begin
  1457. inc(l,4);
  1458. for l1 := l1 downto 1 do
  1459. begin
  1460. getNextInstruction(hp2,hp2);
  1461. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1462. end
  1463. end
  1464. end
  1465. end
  1466. else
  1467. begin
  1468. if (taicpu(p).oper[0]^.typ = top_reg) and
  1469. GetNextInstruction(p, hp1) and
  1470. (tai(hp1).typ=ait_instruction) and
  1471. (taicpu(hp1).opcode=A_PUSH) and
  1472. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1473. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1474. begin
  1475. { change it to a two op operation }
  1476. taicpu(p).oper[1]^.typ:=top_none;
  1477. taicpu(p).ops:=2;
  1478. taicpu(p).opcode := A_MOV;
  1479. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1480. reference_reset(tmpref);
  1481. TmpRef.base.enum := R_ESP;
  1482. taicpu(p).loadRef(0,TmpRef);
  1483. asml.remove(hp1);
  1484. hp1.free;
  1485. end;
  1486. end;
  1487. end;
  1488. *)
  1489. A_PUSH:
  1490. begin
  1491. if (taicpu(p).opsize = S_W) and
  1492. (taicpu(p).oper[0]^.typ = Top_Const) and
  1493. GetNextInstruction(p, hp1) and
  1494. (tai(hp1).typ = ait_instruction) and
  1495. (taicpu(hp1).opcode = A_PUSH) and
  1496. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1497. (taicpu(hp1).opsize = S_W) then
  1498. begin
  1499. taicpu(p).changeopsize(S_L);
  1500. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1501. asml.remove(hp1);
  1502. hp1.free;
  1503. end;
  1504. end;
  1505. A_SHL, A_SAL:
  1506. begin
  1507. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1508. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1509. (taicpu(p).opsize = S_L) and
  1510. (taicpu(p).oper[0]^.val <= 3) then
  1511. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1512. begin
  1513. TmpBool1 := True; {should we check the next instruction?}
  1514. TmpBool2 := False; {have we found an add/sub which could be
  1515. integrated in the lea?}
  1516. reference_reset(tmpref,2,[]);
  1517. TmpRef.index := taicpu(p).oper[1]^.reg;
  1518. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1519. while TmpBool1 and
  1520. GetNextInstruction(p, hp1) and
  1521. (tai(hp1).typ = ait_instruction) and
  1522. ((((taicpu(hp1).opcode = A_ADD) or
  1523. (taicpu(hp1).opcode = A_SUB)) and
  1524. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1525. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1526. (((taicpu(hp1).opcode = A_INC) or
  1527. (taicpu(hp1).opcode = A_DEC)) and
  1528. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1529. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1530. (not GetNextInstruction(hp1,hp2) or
  1531. not instrReadsFlags(hp2)) Do
  1532. begin
  1533. TmpBool1 := False;
  1534. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1535. begin
  1536. TmpBool1 := True;
  1537. TmpBool2 := True;
  1538. case taicpu(hp1).opcode of
  1539. A_ADD:
  1540. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1541. A_SUB:
  1542. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1543. end;
  1544. asml.remove(hp1);
  1545. hp1.free;
  1546. end
  1547. else
  1548. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1549. (((taicpu(hp1).opcode = A_ADD) and
  1550. (TmpRef.base = NR_NO)) or
  1551. (taicpu(hp1).opcode = A_INC) or
  1552. (taicpu(hp1).opcode = A_DEC)) then
  1553. begin
  1554. TmpBool1 := True;
  1555. TmpBool2 := True;
  1556. case taicpu(hp1).opcode of
  1557. A_ADD:
  1558. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1559. A_INC:
  1560. inc(TmpRef.offset);
  1561. A_DEC:
  1562. dec(TmpRef.offset);
  1563. end;
  1564. asml.remove(hp1);
  1565. hp1.free;
  1566. end;
  1567. end;
  1568. if TmpBool2 or
  1569. ((current_settings.optimizecputype < cpu_Pentium2) and
  1570. (taicpu(p).oper[0]^.val <= 3) and
  1571. not(cs_opt_size in current_settings.optimizerswitches)) then
  1572. begin
  1573. if not(TmpBool2) and
  1574. (taicpu(p).oper[0]^.val = 1) then
  1575. begin
  1576. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1577. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1578. end
  1579. else
  1580. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1581. taicpu(p).oper[1]^.reg);
  1582. InsertLLItem(p.previous, p.next, hp1);
  1583. p.free;
  1584. p := hp1;
  1585. end;
  1586. end
  1587. else
  1588. if (current_settings.optimizecputype < cpu_Pentium2) and
  1589. (taicpu(p).oper[0]^.typ = top_const) and
  1590. (taicpu(p).oper[1]^.typ = top_reg) then
  1591. if (taicpu(p).oper[0]^.val = 1) then
  1592. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1593. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1594. (unlike shl, which is only Tairable in the U pipe)}
  1595. begin
  1596. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1597. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1598. InsertLLItem(p.previous, p.next, hp1);
  1599. p.free;
  1600. p := hp1;
  1601. end
  1602. else if (taicpu(p).opsize = S_L) and
  1603. (taicpu(p).oper[0]^.val<= 3) then
  1604. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1605. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1606. begin
  1607. reference_reset(tmpref,2,[]);
  1608. TmpRef.index := taicpu(p).oper[1]^.reg;
  1609. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1610. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1611. InsertLLItem(p.previous, p.next, hp1);
  1612. p.free;
  1613. p := hp1;
  1614. end
  1615. end;
  1616. A_SETcc :
  1617. { changes
  1618. setcc (funcres) setcc reg
  1619. movb (funcres), reg to leave/ret
  1620. leave/ret }
  1621. begin
  1622. if (taicpu(p).oper[0]^.typ = top_ref) and
  1623. GetNextInstruction(p, hp1) and
  1624. GetNextInstruction(hp1, hp2) and
  1625. IsExitCode(hp2) and
  1626. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1627. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1628. not(assigned(current_procinfo.procdef.funcretsym) and
  1629. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1630. (hp1.typ = ait_instruction) and
  1631. (taicpu(hp1).opcode = A_MOV) and
  1632. (taicpu(hp1).opsize = S_B) and
  1633. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1634. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1635. begin
  1636. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1637. asml.remove(hp1);
  1638. hp1.free;
  1639. end
  1640. end;
  1641. A_SUB:
  1642. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1643. { * change "sub/add const1, reg" or "dec reg" followed by
  1644. "sub const2, reg" to one "sub ..., reg" }
  1645. begin
  1646. if (taicpu(p).oper[0]^.typ = top_const) and
  1647. (taicpu(p).oper[1]^.typ = top_reg) then
  1648. if (taicpu(p).oper[0]^.val = 2) and
  1649. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1650. { Don't do the sub/push optimization if the sub }
  1651. { comes from setting up the stack frame (JM) }
  1652. (not getLastInstruction(p,hp1) or
  1653. (hp1.typ <> ait_instruction) or
  1654. (taicpu(hp1).opcode <> A_MOV) or
  1655. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1656. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1657. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1658. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1659. begin
  1660. hp1 := tai(p.next);
  1661. while Assigned(hp1) and
  1662. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1663. not RegReadByInstruction(NR_ESP,hp1) and
  1664. not RegModifiedByInstruction(NR_ESP,hp1) do
  1665. hp1 := tai(hp1.next);
  1666. if Assigned(hp1) and
  1667. (tai(hp1).typ = ait_instruction) and
  1668. (taicpu(hp1).opcode = A_PUSH) and
  1669. (taicpu(hp1).opsize = S_W) then
  1670. begin
  1671. taicpu(hp1).changeopsize(S_L);
  1672. if taicpu(hp1).oper[0]^.typ=top_reg then
  1673. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1674. hp1 := tai(p.next);
  1675. asml.remove(p);
  1676. p.free;
  1677. p := hp1;
  1678. continue
  1679. end;
  1680. if DoSubAddOpt(p) then
  1681. continue;
  1682. end
  1683. else if DoSubAddOpt(p) then
  1684. continue
  1685. end;
  1686. A_VMOVAPS,
  1687. A_VMOVAPD:
  1688. if OptPass1VMOVAP(p) then
  1689. continue;
  1690. A_VDIVSD,
  1691. A_VDIVSS,
  1692. A_VSUBSD,
  1693. A_VSUBSS,
  1694. A_VMULSD,
  1695. A_VMULSS,
  1696. A_VADDSD,
  1697. A_VADDSS:
  1698. if OptPass1VOP(p) then
  1699. continue;
  1700. end;
  1701. end; { if is_jmp }
  1702. end;
  1703. end;
  1704. updateUsedRegs(UsedRegs,p);
  1705. p:=tai(p.next);
  1706. end;
  1707. end;
  1708. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1709. {$ifdef DEBUG_AOPTCPU}
  1710. procedure DebugMsg(const s: string;p : tai);
  1711. begin
  1712. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  1713. end;
  1714. {$else DEBUG_AOPTCPU}
  1715. procedure DebugMsg(const s: string;p : tai);inline;
  1716. begin
  1717. end;
  1718. {$endif DEBUG_AOPTCPU}
  1719. function CanBeCMOV(p : tai) : boolean;
  1720. begin
  1721. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1722. (taicpu(p).opcode=A_MOV) and
  1723. (taicpu(p).opsize in [S_L,S_W]) and
  1724. ((taicpu(p).oper[0]^.typ = top_reg)
  1725. { we can't use cmov ref,reg because
  1726. ref could be nil and cmov still throws an exception
  1727. if ref=nil but the mov isn't done (FK)
  1728. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1729. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1730. }
  1731. ) and
  1732. (taicpu(p).oper[1]^.typ in [top_reg]);
  1733. end;
  1734. var
  1735. p,hp1,hp2,hp3: tai;
  1736. l : longint;
  1737. condition : tasmcond;
  1738. carryadd_opcode: Tasmop;
  1739. begin
  1740. p := BlockStart;
  1741. ClearUsedRegs;
  1742. while (p <> BlockEnd) Do
  1743. begin
  1744. UpdateUsedRegs(UsedRegs, tai(p.next));
  1745. case p.Typ Of
  1746. Ait_Instruction:
  1747. begin
  1748. if InsContainsSegRef(taicpu(p)) then
  1749. begin
  1750. p := tai(p.next);
  1751. continue;
  1752. end;
  1753. case taicpu(p).opcode Of
  1754. A_Jcc:
  1755. begin
  1756. { jb @@1 cmc
  1757. inc/dec operand --> adc/sbb operand,0
  1758. @@1:
  1759. ... and ...
  1760. jnb @@1
  1761. inc/dec operand --> adc/sbb operand,0
  1762. @@1: }
  1763. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1764. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1765. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1766. begin
  1767. carryadd_opcode:=A_NONE;
  1768. if Taicpu(p).condition in [C_NAE,C_B] then
  1769. begin
  1770. if Taicpu(hp1).opcode=A_INC then
  1771. carryadd_opcode:=A_ADC;
  1772. if Taicpu(hp1).opcode=A_DEC then
  1773. carryadd_opcode:=A_SBB;
  1774. if carryadd_opcode<>A_NONE then
  1775. begin
  1776. Taicpu(p).clearop(0);
  1777. Taicpu(p).ops:=0;
  1778. Taicpu(p).is_jmp:=false;
  1779. Taicpu(p).opcode:=A_CMC;
  1780. Taicpu(p).condition:=C_NONE;
  1781. Taicpu(hp1).ops:=2;
  1782. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1783. Taicpu(hp1).loadconst(0,0);
  1784. Taicpu(hp1).opcode:=carryadd_opcode;
  1785. continue;
  1786. end;
  1787. end;
  1788. if Taicpu(p).condition in [C_AE,C_NB] then
  1789. begin
  1790. if Taicpu(hp1).opcode=A_INC then
  1791. carryadd_opcode:=A_ADC;
  1792. if Taicpu(hp1).opcode=A_DEC then
  1793. carryadd_opcode:=A_SBB;
  1794. if carryadd_opcode<>A_NONE then
  1795. begin
  1796. asml.remove(p);
  1797. p.free;
  1798. Taicpu(hp1).ops:=2;
  1799. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1800. Taicpu(hp1).loadconst(0,0);
  1801. Taicpu(hp1).opcode:=carryadd_opcode;
  1802. p:=hp1;
  1803. continue;
  1804. end;
  1805. end;
  1806. end;
  1807. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1808. begin
  1809. { check for
  1810. jCC xxx
  1811. <several movs>
  1812. xxx:
  1813. }
  1814. l:=0;
  1815. GetNextInstruction(p, hp1);
  1816. while assigned(hp1) and
  1817. CanBeCMOV(hp1) and
  1818. { stop on labels }
  1819. not(hp1.typ=ait_label) do
  1820. begin
  1821. inc(l);
  1822. GetNextInstruction(hp1,hp1);
  1823. end;
  1824. if assigned(hp1) then
  1825. begin
  1826. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1827. begin
  1828. if (l<=4) and (l>0) then
  1829. begin
  1830. condition:=inverse_cond(taicpu(p).condition);
  1831. hp2:=p;
  1832. GetNextInstruction(p,hp1);
  1833. p:=hp1;
  1834. repeat
  1835. taicpu(hp1).opcode:=A_CMOVcc;
  1836. taicpu(hp1).condition:=condition;
  1837. GetNextInstruction(hp1,hp1);
  1838. until not(assigned(hp1)) or
  1839. not(CanBeCMOV(hp1));
  1840. { wait with removing else GetNextInstruction could
  1841. ignore the label if it was the only usage in the
  1842. jump moved away }
  1843. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1844. asml.remove(hp2);
  1845. hp2.free;
  1846. continue;
  1847. end;
  1848. end
  1849. else
  1850. begin
  1851. { check further for
  1852. jCC xxx
  1853. <several movs 1>
  1854. jmp yyy
  1855. xxx:
  1856. <several movs 2>
  1857. yyy:
  1858. }
  1859. { hp2 points to jmp yyy }
  1860. hp2:=hp1;
  1861. { skip hp1 to xxx }
  1862. GetNextInstruction(hp1, hp1);
  1863. if assigned(hp2) and
  1864. assigned(hp1) and
  1865. (l<=3) and
  1866. (hp2.typ=ait_instruction) and
  1867. (taicpu(hp2).is_jmp) and
  1868. (taicpu(hp2).condition=C_None) and
  1869. { real label and jump, no further references to the
  1870. label are allowed }
  1871. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1872. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1873. begin
  1874. l:=0;
  1875. { skip hp1 to <several moves 2> }
  1876. GetNextInstruction(hp1, hp1);
  1877. while assigned(hp1) and
  1878. CanBeCMOV(hp1) do
  1879. begin
  1880. inc(l);
  1881. GetNextInstruction(hp1, hp1);
  1882. end;
  1883. { hp1 points to yyy: }
  1884. if assigned(hp1) and
  1885. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1886. begin
  1887. condition:=inverse_cond(taicpu(p).condition);
  1888. GetNextInstruction(p,hp1);
  1889. hp3:=p;
  1890. p:=hp1;
  1891. repeat
  1892. taicpu(hp1).opcode:=A_CMOVcc;
  1893. taicpu(hp1).condition:=condition;
  1894. GetNextInstruction(hp1,hp1);
  1895. until not(assigned(hp1)) or
  1896. not(CanBeCMOV(hp1));
  1897. { hp2 is still at jmp yyy }
  1898. GetNextInstruction(hp2,hp1);
  1899. { hp2 is now at xxx: }
  1900. condition:=inverse_cond(condition);
  1901. GetNextInstruction(hp1,hp1);
  1902. { hp1 is now at <several movs 2> }
  1903. repeat
  1904. taicpu(hp1).opcode:=A_CMOVcc;
  1905. taicpu(hp1).condition:=condition;
  1906. GetNextInstruction(hp1,hp1);
  1907. until not(assigned(hp1)) or
  1908. not(CanBeCMOV(hp1));
  1909. {
  1910. asml.remove(hp1.next)
  1911. hp1.next.free;
  1912. asml.remove(hp1);
  1913. hp1.free;
  1914. }
  1915. { remove jCC }
  1916. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1917. asml.remove(hp3);
  1918. hp3.free;
  1919. { remove jmp }
  1920. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1921. asml.remove(hp2);
  1922. hp2.free;
  1923. continue;
  1924. end;
  1925. end;
  1926. end;
  1927. end;
  1928. end;
  1929. end;
  1930. A_FSTP,A_FISTP:
  1931. if DoFpuLoadStoreOpt(p) then
  1932. continue;
  1933. A_IMUL:
  1934. if OptPass2Imul(p) then
  1935. continue;
  1936. A_JMP:
  1937. {
  1938. change
  1939. jmp .L1
  1940. ...
  1941. .L1:
  1942. ret
  1943. into
  1944. ret
  1945. }
  1946. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) then
  1947. begin
  1948. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1949. if assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_RET) and (taicpu(p).condition=C_None) then
  1950. begin
  1951. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1952. taicpu(p).opcode:=A_RET;
  1953. taicpu(p).is_jmp:=false;
  1954. taicpu(p).ops:=taicpu(hp1).ops;
  1955. case taicpu(hp1).ops of
  1956. 0:
  1957. taicpu(p).clearop(0);
  1958. 1:
  1959. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1960. else
  1961. internalerror(2016041301);
  1962. end;
  1963. continue;
  1964. end;
  1965. end;
  1966. A_MOV:
  1967. if OptPass2MOV(p) then
  1968. continue;
  1969. end;
  1970. end;
  1971. end;
  1972. p := tai(p.next)
  1973. end;
  1974. end;
  1975. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1976. var
  1977. p,hp1,hp2: tai;
  1978. IsTestConstX: boolean;
  1979. begin
  1980. p := BlockStart;
  1981. ClearUsedRegs;
  1982. while (p <> BlockEnd) Do
  1983. begin
  1984. UpdateUsedRegs(UsedRegs, tai(p.next));
  1985. case p.Typ Of
  1986. Ait_Instruction:
  1987. begin
  1988. if InsContainsSegRef(taicpu(p)) then
  1989. begin
  1990. p := tai(p.next);
  1991. continue;
  1992. end;
  1993. case taicpu(p).opcode Of
  1994. A_CALL:
  1995. begin
  1996. { don't do this on modern CPUs, this really hurts them due to
  1997. broken call/ret pairing }
  1998. if (current_settings.optimizecputype < cpu_Pentium2) and
  1999. not(cs_create_pic in current_settings.moduleswitches) and
  2000. GetNextInstruction(p, hp1) and
  2001. (hp1.typ = ait_instruction) and
  2002. (taicpu(hp1).opcode = A_JMP) and
  2003. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  2004. begin
  2005. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2006. InsertLLItem(p.previous, p, hp2);
  2007. taicpu(p).opcode := A_JMP;
  2008. taicpu(p).is_jmp := true;
  2009. asml.remove(hp1);
  2010. hp1.free;
  2011. end
  2012. { replace
  2013. call procname
  2014. ret
  2015. by
  2016. jmp procname
  2017. this should never hurt except when pic is used, not sure
  2018. how to handle it then
  2019. but do it only on level 4 because it destroys stack back traces
  2020. }
  2021. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  2022. not(cs_create_pic in current_settings.moduleswitches) and
  2023. GetNextInstruction(p, hp1) and
  2024. (hp1.typ = ait_instruction) and
  2025. (taicpu(hp1).opcode = A_RET) and
  2026. (taicpu(hp1).ops=0) then
  2027. begin
  2028. taicpu(p).opcode := A_JMP;
  2029. taicpu(p).is_jmp := true;
  2030. asml.remove(hp1);
  2031. hp1.free;
  2032. end;
  2033. end;
  2034. A_CMP:
  2035. begin
  2036. if (taicpu(p).oper[0]^.typ = top_const) and
  2037. (taicpu(p).oper[0]^.val = 0) and
  2038. (taicpu(p).oper[1]^.typ = top_reg) then
  2039. {change "cmp $0, %reg" to "test %reg, %reg"}
  2040. begin
  2041. taicpu(p).opcode := A_TEST;
  2042. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2043. continue;
  2044. end;
  2045. end;
  2046. A_MOV:
  2047. PostPeepholeOptMov(p);
  2048. A_MOVZX:
  2049. { if register vars are on, it's possible there is code like }
  2050. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  2051. { so we can't safely replace the movzx then with xor/mov, }
  2052. { since that would change the flags (JM) }
  2053. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  2054. begin
  2055. if (taicpu(p).oper[1]^.typ = top_reg) then
  2056. if (taicpu(p).oper[0]^.typ = top_reg)
  2057. then
  2058. case taicpu(p).opsize of
  2059. S_BL:
  2060. begin
  2061. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  2062. not(cs_opt_size in current_settings.optimizerswitches) and
  2063. (current_settings.optimizecputype = cpu_Pentium) then
  2064. {Change "movzbl %reg1, %reg2" to
  2065. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2066. PentiumMMX}
  2067. begin
  2068. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2069. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2070. InsertLLItem(p.previous, p, hp1);
  2071. taicpu(p).opcode := A_MOV;
  2072. taicpu(p).changeopsize(S_B);
  2073. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2074. end;
  2075. end;
  2076. end
  2077. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2078. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2079. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2080. not(cs_opt_size in current_settings.optimizerswitches) and
  2081. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  2082. (current_settings.optimizecputype = cpu_Pentium) and
  2083. (taicpu(p).opsize = S_BL) then
  2084. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2085. Pentium and PentiumMMX}
  2086. begin
  2087. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2088. taicpu(p).oper[1]^.reg);
  2089. taicpu(p).opcode := A_MOV;
  2090. taicpu(p).changeopsize(S_B);
  2091. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2092. InsertLLItem(p.previous, p, hp1);
  2093. end;
  2094. end;
  2095. A_TEST, A_OR:
  2096. {removes the line marked with (x) from the sequence
  2097. and/or/xor/add/sub/... $x, %y
  2098. test/or %y, %y | test $-1, %y (x)
  2099. j(n)z _Label
  2100. as the first instruction already adjusts the ZF
  2101. %y operand may also be a reference }
  2102. begin
  2103. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2104. MatchOperand(taicpu(p).oper[0]^,-1);
  2105. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2106. GetLastInstruction(p, hp1) and
  2107. (tai(hp1).typ = ait_instruction) and
  2108. GetNextInstruction(p,hp2) and
  2109. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2110. case taicpu(hp1).opcode Of
  2111. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2112. begin
  2113. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2114. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2115. { and in case of carry for A(E)/B(E)/C/NC }
  2116. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2117. ((taicpu(hp1).opcode <> A_ADD) and
  2118. (taicpu(hp1).opcode <> A_SUB))) then
  2119. begin
  2120. hp1 := tai(p.next);
  2121. asml.remove(p);
  2122. p.free;
  2123. p := tai(hp1);
  2124. continue
  2125. end;
  2126. end;
  2127. A_SHL, A_SAL, A_SHR, A_SAR:
  2128. begin
  2129. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2130. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2131. { therefore, it's only safe to do this optimization for }
  2132. { shifts by a (nonzero) constant }
  2133. (taicpu(hp1).oper[0]^.typ = top_const) and
  2134. (taicpu(hp1).oper[0]^.val <> 0) and
  2135. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2136. { and in case of carry for A(E)/B(E)/C/NC }
  2137. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2138. begin
  2139. hp1 := tai(p.next);
  2140. asml.remove(p);
  2141. p.free;
  2142. p := tai(hp1);
  2143. continue
  2144. end;
  2145. end;
  2146. A_DEC, A_INC, A_NEG:
  2147. begin
  2148. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2149. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2150. { and in case of carry for A(E)/B(E)/C/NC }
  2151. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2152. begin
  2153. case taicpu(hp1).opcode Of
  2154. A_DEC, A_INC:
  2155. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2156. begin
  2157. case taicpu(hp1).opcode Of
  2158. A_DEC: taicpu(hp1).opcode := A_SUB;
  2159. A_INC: taicpu(hp1).opcode := A_ADD;
  2160. end;
  2161. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2162. taicpu(hp1).loadConst(0,1);
  2163. taicpu(hp1).ops:=2;
  2164. end
  2165. end;
  2166. hp1 := tai(p.next);
  2167. asml.remove(p);
  2168. p.free;
  2169. p := tai(hp1);
  2170. continue
  2171. end;
  2172. end
  2173. else
  2174. { change "test $-1,%reg" into "test %reg,%reg" }
  2175. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2176. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2177. end { case }
  2178. else
  2179. { change "test $-1,%reg" into "test %reg,%reg" }
  2180. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2181. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2182. end;
  2183. end;
  2184. end;
  2185. end;
  2186. p := tai(p.next)
  2187. end;
  2188. end;
  2189. Procedure TCpuAsmOptimizer.Optimize;
  2190. Var
  2191. HP: Tai;
  2192. pass: longint;
  2193. slowopt, changed, lastLoop: boolean;
  2194. Begin
  2195. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  2196. pass := 0;
  2197. changed := false;
  2198. repeat
  2199. lastLoop :=
  2200. not(slowopt) or
  2201. (not changed and (pass > 2)) or
  2202. { prevent endless loops }
  2203. (pass = 4);
  2204. changed := false;
  2205. { Setup labeltable, always necessary }
  2206. blockstart := tai(asml.first);
  2207. pass_1;
  2208. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  2209. { or nil }
  2210. While Assigned(BlockStart) Do
  2211. Begin
  2212. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2213. begin
  2214. if (pass = 0) then
  2215. PrePeepHoleOpts;
  2216. { Peephole optimizations }
  2217. PeepHoleOptPass1;
  2218. { Only perform them twice in the first pass }
  2219. if pass = 0 then
  2220. PeepHoleOptPass1;
  2221. end;
  2222. { More peephole optimizations }
  2223. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2224. begin
  2225. PeepHoleOptPass2;
  2226. if lastLoop then
  2227. PostPeepHoleOpts;
  2228. end;
  2229. { Continue where we left off, BlockEnd is either the start of an }
  2230. { assembler block or nil }
  2231. BlockStart := BlockEnd;
  2232. While Assigned(BlockStart) And
  2233. (BlockStart.typ = ait_Marker) And
  2234. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  2235. Begin
  2236. { We stopped at an assembler block, so skip it }
  2237. Repeat
  2238. BlockStart := Tai(BlockStart.Next);
  2239. Until (BlockStart.Typ = Ait_Marker) And
  2240. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  2241. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  2242. If GetNextInstruction(BlockStart, HP) And
  2243. ((HP.typ <> ait_Marker) Or
  2244. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  2245. { There is no assembler block anymore after the current one, so }
  2246. { optimize the next block of "normal" instructions }
  2247. pass_1
  2248. { Otherwise, skip the next assembler block }
  2249. else
  2250. blockStart := hp;
  2251. End;
  2252. End;
  2253. inc(pass);
  2254. until lastLoop;
  2255. dfa.free;
  2256. End;
  2257. begin
  2258. casmoptimizer:=TCpuAsmOptimizer;
  2259. end.