aoptcpu.pas 109 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aoptobj, aoptcpub, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  34. function InstructionLoadsFromReg(const reg : TRegister;const hp : tai) : boolean;override;
  35. end;
  36. Var
  37. AsmOptimizer : TCpuAsmOptimizer;
  38. Implementation
  39. uses
  40. verbose,globtype,globals,
  41. cutils,
  42. aoptbase,
  43. cpuinfo,
  44. aasmcpu,
  45. procinfo,
  46. cgutils,cgx86,
  47. { units we should get rid off: }
  48. symsym,symconst;
  49. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  50. { returns true if a "continue" should be done after this optimization }
  51. var hp1, hp2: tai;
  52. begin
  53. DoFpuLoadStoreOpt := false;
  54. if (taicpu(p).oper[0]^.typ = top_ref) and
  55. getNextInstruction(p, hp1) and
  56. (hp1.typ = ait_instruction) and
  57. (((taicpu(hp1).opcode = A_FLD) and
  58. (taicpu(p).opcode = A_FSTP)) or
  59. ((taicpu(p).opcode = A_FISTP) and
  60. (taicpu(hp1).opcode = A_FILD))) and
  61. (taicpu(hp1).oper[0]^.typ = top_ref) and
  62. (taicpu(hp1).opsize = taicpu(p).opsize) and
  63. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  64. begin
  65. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  66. if (taicpu(p).opsize=S_FX) and
  67. getNextInstruction(hp1, hp2) and
  68. (hp2.typ = ait_instruction) and
  69. IsExitCode(hp2) and
  70. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  71. not(assigned(current_procinfo.procdef.funcretsym) and
  72. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  73. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  74. begin
  75. asml.remove(p);
  76. asml.remove(hp1);
  77. p.free;
  78. hp1.free;
  79. p := hp2;
  80. removeLastDeallocForFuncRes(p);
  81. doFPULoadStoreOpt := true;
  82. end
  83. (* can't be done because the store operation rounds
  84. else
  85. { fst can't store an extended value! }
  86. if (taicpu(p).opsize <> S_FX) and
  87. (taicpu(p).opsize <> S_IQ) then
  88. begin
  89. if (taicpu(p).opcode = A_FSTP) then
  90. taicpu(p).opcode := A_FST
  91. else taicpu(p).opcode := A_FIST;
  92. asml.remove(hp1);
  93. hp1.free;
  94. end
  95. *)
  96. end;
  97. end;
  98. { converts a TChange variable to a TRegister }
  99. function tch2reg(ch: tinschange): tsuperregister;
  100. const
  101. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  102. begin
  103. if (ch <= CH_REDI) then
  104. tch2reg := ch2reg[ch]
  105. else if (ch <= CH_WEDI) then
  106. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  107. else if (ch <= CH_RWEDI) then
  108. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  109. else if (ch <= CH_MEDI) then
  110. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  111. else
  112. InternalError(2016041901)
  113. end;
  114. { Checks if the register is a 32 bit general purpose register }
  115. function isgp32reg(reg: TRegister): boolean;
  116. begin
  117. {$push}{$warnings off}
  118. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  119. {$pop}
  120. end;
  121. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  122. begin
  123. Result:=RegReadByInstruction(reg,hp);
  124. end;
  125. function TCpuAsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  126. var
  127. p: taicpu;
  128. opcount: longint;
  129. begin
  130. RegReadByInstruction := false;
  131. if hp.typ <> ait_instruction then
  132. exit;
  133. p := taicpu(hp);
  134. case p.opcode of
  135. A_CALL:
  136. regreadbyinstruction := true;
  137. A_IMUL:
  138. case p.ops of
  139. 1:
  140. regReadByInstruction :=
  141. (reg = NR_EAX) or RegInOp(reg,p.oper[0]^);
  142. 2,3:
  143. regReadByInstruction :=
  144. reginop(reg,p.oper[0]^) or
  145. reginop(reg,p.oper[1]^);
  146. end;
  147. A_IDIV,A_DIV,A_MUL:
  148. begin
  149. regReadByInstruction :=
  150. RegInOp(reg,p.oper[0]^) or (getsupreg(reg) in [RS_EAX,RS_EDX]);
  151. end;
  152. else
  153. begin
  154. for opcount := 0 to p.ops-1 do
  155. if (p.oper[opCount]^.typ = top_ref) and
  156. RegInRef(reg,p.oper[opcount]^.ref^) then
  157. begin
  158. RegReadByInstruction := true;
  159. exit
  160. end;
  161. for opcount := 1 to maxinschanges do
  162. case insprop[p.opcode].ch[opcount] of
  163. CH_REAX..CH_REDI,CH_RWEAX..CH_MEDI:
  164. if getsupreg(reg) = tch2reg(insprop[p.opcode].ch[opcount]) then
  165. begin
  166. RegReadByInstruction := true;
  167. exit
  168. end;
  169. CH_RWOP1,CH_ROP1,CH_MOP1:
  170. if reginop(reg,p.oper[0]^) then
  171. begin
  172. RegReadByInstruction := true;
  173. exit
  174. end;
  175. Ch_RWOP2,Ch_ROP2,Ch_MOP2:
  176. if reginop(reg,p.oper[1]^) then
  177. begin
  178. RegReadByInstruction := true;
  179. exit
  180. end;
  181. Ch_RWOP3,Ch_ROP3,Ch_MOP3:
  182. if reginop(reg,p.oper[2]^) then
  183. begin
  184. RegReadByInstruction := true;
  185. exit
  186. end;
  187. Ch_RFlags,Ch_RWFlags:
  188. if reg=NR_DEFAULTFLAGS then
  189. begin
  190. RegReadByInstruction := true;
  191. exit
  192. end;
  193. end;
  194. end;
  195. end;
  196. end;
  197. { returns true if p contains a memory operand with a segment set }
  198. function InsContainsSegRef(p: taicpu): boolean;
  199. var
  200. i: longint;
  201. begin
  202. result:=true;
  203. for i:=0 to p.opercnt-1 do
  204. if (p.oper[i]^.typ=top_ref) and
  205. (p.oper[i]^.ref^.segment<>NR_NO) then
  206. exit;
  207. result:=false;
  208. end;
  209. function InstrReadsFlags(p: tai): boolean;
  210. var
  211. l: longint;
  212. begin
  213. InstrReadsFlags := true;
  214. case p.typ of
  215. ait_instruction:
  216. begin
  217. for l := 1 to maxinschanges do
  218. if InsProp[taicpu(p).opcode].Ch[l] in [Ch_RFlags,Ch_RWFlags,Ch_All] then
  219. exit;
  220. end;
  221. ait_label:
  222. exit;
  223. end;
  224. InstrReadsFlags := false;
  225. end;
  226. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  227. var
  228. p,hp1: tai;
  229. l: aint;
  230. tmpRef: treference;
  231. begin
  232. p := BlockStart;
  233. while (p <> BlockEnd) Do
  234. begin
  235. case p.Typ Of
  236. Ait_Instruction:
  237. begin
  238. if InsContainsSegRef(taicpu(p)) then
  239. begin
  240. p := tai(p.next);
  241. continue;
  242. end;
  243. case taicpu(p).opcode Of
  244. A_IMUL:
  245. {changes certain "imul const, %reg"'s to lea sequences}
  246. begin
  247. if (taicpu(p).oper[0]^.typ = Top_Const) and
  248. (taicpu(p).oper[1]^.typ = Top_Reg) and
  249. (taicpu(p).opsize = S_L) then
  250. if (taicpu(p).oper[0]^.val = 1) then
  251. if (taicpu(p).ops = 2) then
  252. {remove "imul $1, reg"}
  253. begin
  254. hp1 := tai(p.Next);
  255. asml.remove(p);
  256. p.free;
  257. p := hp1;
  258. continue;
  259. end
  260. else
  261. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  262. begin
  263. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  264. InsertLLItem(p.previous, p.next, hp1);
  265. p.free;
  266. p := hp1;
  267. end
  268. else if
  269. ((taicpu(p).ops <= 2) or
  270. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  271. (taicpu(p).oper[0]^.val <= 12) and
  272. not(cs_opt_size in current_settings.optimizerswitches) and
  273. (not(GetNextInstruction(p, hp1)) or
  274. {GetNextInstruction(p, hp1) and}
  275. not((tai(hp1).typ = ait_instruction) and
  276. ((taicpu(hp1).opcode=A_Jcc) and
  277. (taicpu(hp1).condition in [C_O,C_NO])))) then
  278. begin
  279. reference_reset(tmpref,1);
  280. case taicpu(p).oper[0]^.val Of
  281. 3: begin
  282. {imul 3, reg1, reg2 to
  283. lea (reg1,reg1,2), reg2
  284. imul 3, reg1 to
  285. lea (reg1,reg1,2), reg1}
  286. TmpRef.base := taicpu(p).oper[1]^.reg;
  287. TmpRef.index := taicpu(p).oper[1]^.reg;
  288. TmpRef.ScaleFactor := 2;
  289. if (taicpu(p).ops = 2) then
  290. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  291. else
  292. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  293. InsertLLItem(p.previous, p.next, hp1);
  294. p.free;
  295. p := hp1;
  296. end;
  297. 5: begin
  298. {imul 5, reg1, reg2 to
  299. lea (reg1,reg1,4), reg2
  300. imul 5, reg1 to
  301. lea (reg1,reg1,4), reg1}
  302. TmpRef.base := taicpu(p).oper[1]^.reg;
  303. TmpRef.index := taicpu(p).oper[1]^.reg;
  304. TmpRef.ScaleFactor := 4;
  305. if (taicpu(p).ops = 2) then
  306. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  307. else
  308. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  309. InsertLLItem(p.previous, p.next, hp1);
  310. p.free;
  311. p := hp1;
  312. end;
  313. 6: begin
  314. {imul 6, reg1, reg2 to
  315. lea (,reg1,2), reg2
  316. lea (reg2,reg1,4), reg2
  317. imul 6, reg1 to
  318. lea (reg1,reg1,2), reg1
  319. add reg1, reg1}
  320. if (current_settings.optimizecputype <= cpu_386) then
  321. begin
  322. TmpRef.index := taicpu(p).oper[1]^.reg;
  323. if (taicpu(p).ops = 3) then
  324. begin
  325. TmpRef.base := taicpu(p).oper[2]^.reg;
  326. TmpRef.ScaleFactor := 4;
  327. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  328. end
  329. else
  330. begin
  331. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  332. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  333. end;
  334. InsertLLItem(p, p.next, hp1);
  335. reference_reset(tmpref,2);
  336. TmpRef.index := taicpu(p).oper[1]^.reg;
  337. TmpRef.ScaleFactor := 2;
  338. if (taicpu(p).ops = 3) then
  339. begin
  340. TmpRef.base := NR_NO;
  341. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  342. taicpu(p).oper[2]^.reg);
  343. end
  344. else
  345. begin
  346. TmpRef.base := taicpu(p).oper[1]^.reg;
  347. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  348. end;
  349. InsertLLItem(p.previous, p.next, hp1);
  350. p.free;
  351. p := tai(hp1.next);
  352. end
  353. end;
  354. 9: begin
  355. {imul 9, reg1, reg2 to
  356. lea (reg1,reg1,8), reg2
  357. imul 9, reg1 to
  358. lea (reg1,reg1,8), reg1}
  359. TmpRef.base := taicpu(p).oper[1]^.reg;
  360. TmpRef.index := taicpu(p).oper[1]^.reg;
  361. TmpRef.ScaleFactor := 8;
  362. if (taicpu(p).ops = 2) then
  363. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  364. else
  365. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  366. InsertLLItem(p.previous, p.next, hp1);
  367. p.free;
  368. p := hp1;
  369. end;
  370. 10: begin
  371. {imul 10, reg1, reg2 to
  372. lea (reg1,reg1,4), reg2
  373. add reg2, reg2
  374. imul 10, reg1 to
  375. lea (reg1,reg1,4), reg1
  376. add reg1, reg1}
  377. if (current_settings.optimizecputype <= cpu_386) then
  378. begin
  379. if (taicpu(p).ops = 3) then
  380. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  381. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  382. else
  383. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  384. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  385. InsertLLItem(p, p.next, hp1);
  386. TmpRef.base := taicpu(p).oper[1]^.reg;
  387. TmpRef.index := taicpu(p).oper[1]^.reg;
  388. TmpRef.ScaleFactor := 4;
  389. if (taicpu(p).ops = 3) then
  390. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  391. else
  392. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  393. InsertLLItem(p.previous, p.next, hp1);
  394. p.free;
  395. p := tai(hp1.next);
  396. end
  397. end;
  398. 12: begin
  399. {imul 12, reg1, reg2 to
  400. lea (,reg1,4), reg2
  401. lea (reg2,reg1,8), reg2
  402. imul 12, reg1 to
  403. lea (reg1,reg1,2), reg1
  404. lea (,reg1,4), reg1}
  405. if (current_settings.optimizecputype <= cpu_386)
  406. then
  407. begin
  408. TmpRef.index := taicpu(p).oper[1]^.reg;
  409. if (taicpu(p).ops = 3) then
  410. begin
  411. TmpRef.base := taicpu(p).oper[2]^.reg;
  412. TmpRef.ScaleFactor := 8;
  413. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  414. end
  415. else
  416. begin
  417. TmpRef.base := NR_NO;
  418. TmpRef.ScaleFactor := 4;
  419. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  420. end;
  421. InsertLLItem(p, p.next, hp1);
  422. reference_reset(tmpref,2);
  423. TmpRef.index := taicpu(p).oper[1]^.reg;
  424. if (taicpu(p).ops = 3) then
  425. begin
  426. TmpRef.base := NR_NO;
  427. TmpRef.ScaleFactor := 4;
  428. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  429. end
  430. else
  431. begin
  432. TmpRef.base := taicpu(p).oper[1]^.reg;
  433. TmpRef.ScaleFactor := 2;
  434. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  435. end;
  436. InsertLLItem(p.previous, p.next, hp1);
  437. p.free;
  438. p := tai(hp1.next);
  439. end
  440. end
  441. end;
  442. end;
  443. end;
  444. A_SAR, A_SHR:
  445. {changes the code sequence
  446. shr/sar const1, x
  447. shl const2, x
  448. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  449. begin
  450. if GetNextInstruction(p, hp1) and
  451. (tai(hp1).typ = ait_instruction) and
  452. (taicpu(hp1).opcode = A_SHL) and
  453. (taicpu(p).oper[0]^.typ = top_const) and
  454. (taicpu(hp1).oper[0]^.typ = top_const) and
  455. (taicpu(hp1).opsize = taicpu(p).opsize) and
  456. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  457. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  458. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  459. not(cs_opt_size in current_settings.optimizerswitches) then
  460. { shr/sar const1, %reg
  461. shl const2, %reg
  462. with const1 > const2 }
  463. begin
  464. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  465. taicpu(hp1).opcode := A_AND;
  466. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  467. case taicpu(p).opsize Of
  468. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  469. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  470. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  471. end;
  472. end
  473. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  474. not(cs_opt_size in current_settings.optimizerswitches) then
  475. { shr/sar const1, %reg
  476. shl const2, %reg
  477. with const1 < const2 }
  478. begin
  479. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  480. taicpu(p).opcode := A_AND;
  481. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  482. case taicpu(p).opsize Of
  483. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  484. S_B: taicpu(p).loadConst(0,l Xor $ff);
  485. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  486. end;
  487. end
  488. else
  489. { shr/sar const1, %reg
  490. shl const2, %reg
  491. with const1 = const2 }
  492. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  493. begin
  494. taicpu(p).opcode := A_AND;
  495. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  496. case taicpu(p).opsize Of
  497. S_B: taicpu(p).loadConst(0,l Xor $ff);
  498. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  499. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  500. end;
  501. asml.remove(hp1);
  502. hp1.free;
  503. end;
  504. end;
  505. A_XOR:
  506. if (taicpu(p).oper[0]^.typ = top_reg) and
  507. (taicpu(p).oper[1]^.typ = top_reg) and
  508. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  509. { temporarily change this to 'mov reg,0' to make it easier }
  510. { for the CSE. Will be changed back in pass 2 }
  511. begin
  512. taicpu(p).opcode := A_MOV;
  513. taicpu(p).loadConst(0,0);
  514. end;
  515. end;
  516. end;
  517. end;
  518. p := tai(p.next)
  519. end;
  520. end;
  521. { skips all labels and returns the next "real" instruction }
  522. function SkipLabels(hp: tai; var hp2: tai): boolean;
  523. begin
  524. while assigned(hp.next) and
  525. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  526. hp := tai(hp.next);
  527. if assigned(hp.next) then
  528. begin
  529. SkipLabels := True;
  530. hp2 := tai(hp.next)
  531. end
  532. else
  533. begin
  534. hp2 := hp;
  535. SkipLabels := False
  536. end;
  537. end;
  538. { First pass of peephole optimizations }
  539. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  540. function WriteOk : Boolean;
  541. begin
  542. writeln('Ok');
  543. Result:=True;
  544. end;
  545. var
  546. l : longint;
  547. p,hp1,hp2 : tai;
  548. hp3,hp4: tai;
  549. v:aint;
  550. TmpRef: TReference;
  551. TmpUsedRegs: TAllUsedRegs;
  552. TmpBool1, TmpBool2: Boolean;
  553. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  554. {traces sucessive jumps to their final destination and sets it, e.g.
  555. je l1 je l3
  556. <code> <code>
  557. l1: becomes l1:
  558. je l2 je l3
  559. <code> <code>
  560. l2: l2:
  561. jmp l3 jmp l3
  562. the level parameter denotes how deeep we have already followed the jump,
  563. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  564. var p1, p2: tai;
  565. l: tasmlabel;
  566. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  567. begin
  568. FindAnyLabel := false;
  569. while assigned(hp.next) and
  570. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  571. hp := tai(hp.next);
  572. if assigned(hp.next) and
  573. (tai(hp.next).typ = ait_label) then
  574. begin
  575. FindAnyLabel := true;
  576. l := tai_label(hp.next).labsym;
  577. end
  578. end;
  579. begin
  580. GetfinalDestination := false;
  581. if level > 20 then
  582. exit;
  583. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  584. if assigned(p1) then
  585. begin
  586. SkipLabels(p1,p1);
  587. if (tai(p1).typ = ait_instruction) and
  588. (taicpu(p1).is_jmp) then
  589. if { the next instruction after the label where the jump hp arrives}
  590. { is unconditional or of the same type as hp, so continue }
  591. (taicpu(p1).condition in [C_None,hp.condition]) or
  592. { the next instruction after the label where the jump hp arrives}
  593. { is the opposite of hp (so this one is never taken), but after }
  594. { that one there is a branch that will be taken, so perform a }
  595. { little hack: set p1 equal to this instruction (that's what the}
  596. { last SkipLabels is for, only works with short bool evaluation)}
  597. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  598. SkipLabels(p1,p2) and
  599. (p2.typ = ait_instruction) and
  600. (taicpu(p2).is_jmp) and
  601. (taicpu(p2).condition in [C_None,hp.condition]) and
  602. SkipLabels(p1,p1)) then
  603. begin
  604. { quick check for loops of the form "l5: ; jmp l5 }
  605. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  606. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  607. exit;
  608. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  609. exit;
  610. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  611. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  612. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  613. end
  614. else
  615. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  616. if not FindAnyLabel(p1,l) then
  617. begin
  618. {$ifdef finaldestdebug}
  619. insertllitem(asml,p1,p1.next,tai_comment.Create(
  620. strpnew('previous label inserted'))));
  621. {$endif finaldestdebug}
  622. current_asmdata.getjumplabel(l);
  623. insertllitem(p1,p1.next,tai_label.Create(l));
  624. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  625. hp.oper[0]^.ref^.symbol := l;
  626. l.increfs;
  627. { this won't work, since the new label isn't in the labeltable }
  628. { so it will fail the rangecheck. Labeltable should become a }
  629. { hashtable to support this: }
  630. { GetFinalDestination(asml, hp); }
  631. end
  632. else
  633. begin
  634. {$ifdef finaldestdebug}
  635. insertllitem(asml,p1,p1.next,tai_comment.Create(
  636. strpnew('next label reused'))));
  637. {$endif finaldestdebug}
  638. l.increfs;
  639. hp.oper[0]^.ref^.symbol := l;
  640. if not GetFinalDestination(asml, hp,succ(level)) then
  641. exit;
  642. end;
  643. end;
  644. GetFinalDestination := true;
  645. end;
  646. function DoSubAddOpt(var p: tai): Boolean;
  647. begin
  648. DoSubAddOpt := False;
  649. if GetLastInstruction(p, hp1) and
  650. (hp1.typ = ait_instruction) and
  651. (taicpu(hp1).opsize = taicpu(p).opsize) then
  652. case taicpu(hp1).opcode Of
  653. A_DEC:
  654. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  655. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  656. begin
  657. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  658. asml.remove(hp1);
  659. hp1.free;
  660. end;
  661. A_SUB:
  662. if (taicpu(hp1).oper[0]^.typ = top_const) and
  663. (taicpu(hp1).oper[1]^.typ = top_reg) and
  664. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  665. begin
  666. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  667. asml.remove(hp1);
  668. hp1.free;
  669. end;
  670. A_ADD:
  671. if (taicpu(hp1).oper[0]^.typ = top_const) and
  672. (taicpu(hp1).oper[1]^.typ = top_reg) and
  673. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  674. begin
  675. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  676. asml.remove(hp1);
  677. hp1.free;
  678. if (taicpu(p).oper[0]^.val = 0) then
  679. begin
  680. hp1 := tai(p.next);
  681. asml.remove(p);
  682. p.free;
  683. if not GetLastInstruction(hp1, p) then
  684. p := hp1;
  685. DoSubAddOpt := True;
  686. end
  687. end;
  688. end;
  689. end;
  690. begin
  691. p := BlockStart;
  692. ClearUsedRegs;
  693. while (p <> BlockEnd) Do
  694. begin
  695. UpDateUsedRegs(UsedRegs, tai(p.next));
  696. case p.Typ Of
  697. ait_instruction:
  698. begin
  699. current_filepos:=taicpu(p).fileinfo;
  700. if InsContainsSegRef(taicpu(p)) then
  701. begin
  702. p := tai(p.next);
  703. continue;
  704. end;
  705. { Handle Jmp Optimizations }
  706. if taicpu(p).is_jmp then
  707. begin
  708. {the following if-block removes all code between a jmp and the next label,
  709. because it can never be executed}
  710. if (taicpu(p).opcode = A_JMP) then
  711. begin
  712. hp2:=p;
  713. while GetNextInstruction(hp2, hp1) and
  714. (hp1.typ <> ait_label) do
  715. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  716. begin
  717. { don't kill start/end of assembler block,
  718. no-line-info-start/end etc }
  719. if hp1.typ<>ait_marker then
  720. begin
  721. asml.remove(hp1);
  722. hp1.free;
  723. end
  724. else
  725. hp2:=hp1;
  726. end
  727. else break;
  728. end;
  729. { remove jumps to a label coming right after them }
  730. if GetNextInstruction(p, hp1) then
  731. begin
  732. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  733. { TODO: FIXME removing the first instruction fails}
  734. (p<>blockstart) then
  735. begin
  736. hp2:=tai(hp1.next);
  737. asml.remove(p);
  738. p.free;
  739. p:=hp2;
  740. continue;
  741. end
  742. else
  743. begin
  744. if hp1.typ = ait_label then
  745. SkipLabels(hp1,hp1);
  746. if (tai(hp1).typ=ait_instruction) and
  747. (taicpu(hp1).opcode=A_JMP) and
  748. GetNextInstruction(hp1, hp2) and
  749. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  750. begin
  751. if taicpu(p).opcode=A_Jcc then
  752. begin
  753. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  754. tai_label(hp2).labsym.decrefs;
  755. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  756. { when free'ing hp1, the ref. isn't decresed, so we don't
  757. increase it (FK)
  758. taicpu(p).oper[0]^.ref^.symbol.increfs;
  759. }
  760. asml.remove(hp1);
  761. hp1.free;
  762. GetFinalDestination(asml, taicpu(p),0);
  763. end
  764. else
  765. begin
  766. GetFinalDestination(asml, taicpu(p),0);
  767. p:=tai(p.next);
  768. continue;
  769. end;
  770. end
  771. else
  772. GetFinalDestination(asml, taicpu(p),0);
  773. end;
  774. end;
  775. end
  776. else
  777. { All other optimizes }
  778. begin
  779. for l := 0 to taicpu(p).ops-1 Do
  780. if (taicpu(p).oper[l]^.typ = top_ref) then
  781. With taicpu(p).oper[l]^.ref^ Do
  782. begin
  783. if (base = NR_NO) and
  784. (index <> NR_NO) and
  785. (scalefactor in [0,1]) then
  786. begin
  787. base := index;
  788. index := NR_NO
  789. end
  790. end;
  791. case taicpu(p).opcode Of
  792. A_AND:
  793. begin
  794. if (taicpu(p).oper[0]^.typ = top_const) and
  795. (taicpu(p).oper[1]^.typ = top_reg) and
  796. GetNextInstruction(p, hp1) and
  797. (tai(hp1).typ = ait_instruction) and
  798. (taicpu(hp1).opcode = A_AND) and
  799. (taicpu(hp1).oper[0]^.typ = top_const) and
  800. (taicpu(hp1).oper[1]^.typ = top_reg) and
  801. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  802. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) then
  803. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  804. begin
  805. taicpu(hp1).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  806. asml.remove(p);
  807. p.free;
  808. p:=hp1;
  809. end
  810. else
  811. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  812. jump, but only if it's a conditional jump (PFV) }
  813. if (taicpu(p).oper[1]^.typ = top_reg) and
  814. GetNextInstruction(p, hp1) and
  815. (hp1.typ = ait_instruction) and
  816. (taicpu(hp1).is_jmp) and
  817. (taicpu(hp1).opcode<>A_JMP) and
  818. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  819. taicpu(p).opcode := A_TEST;
  820. end;
  821. A_CMP:
  822. begin
  823. { cmp register,$8000 neg register
  824. je target --> jo target
  825. .... only if register is deallocated before jump.}
  826. case Taicpu(p).opsize of
  827. S_B: v:=$80;
  828. S_W: v:=$8000;
  829. S_L: v:=aint($80000000);
  830. else
  831. internalerror(2013112905);
  832. end;
  833. if (taicpu(p).oper[0]^.typ=Top_const) and
  834. (taicpu(p).oper[0]^.val=v) and
  835. (Taicpu(p).oper[1]^.typ=top_reg) and
  836. GetNextInstruction(p, hp1) and
  837. (hp1.typ=ait_instruction) and
  838. (taicpu(hp1).opcode=A_Jcc) and
  839. (Taicpu(hp1).condition in [C_E,C_NE]) and
  840. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  841. begin
  842. Taicpu(p).opcode:=A_NEG;
  843. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  844. Taicpu(p).clearop(1);
  845. Taicpu(p).ops:=1;
  846. if Taicpu(hp1).condition=C_E then
  847. Taicpu(hp1).condition:=C_O
  848. else
  849. Taicpu(hp1).condition:=C_NO;
  850. continue;
  851. end;
  852. {
  853. @@2: @@2:
  854. .... ....
  855. cmp operand1,0
  856. jle/jbe @@1
  857. dec operand1 --> sub operand1,1
  858. jmp @@2 jge/jae @@2
  859. @@1: @@1:
  860. ... ....}
  861. if (taicpu(p).oper[0]^.typ = top_const) and
  862. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  863. (taicpu(p).oper[0]^.val = 0) and
  864. GetNextInstruction(p, hp1) and
  865. (hp1.typ = ait_instruction) and
  866. (taicpu(hp1).is_jmp) and
  867. (taicpu(hp1).opcode=A_Jcc) and
  868. (taicpu(hp1).condition in [C_LE,C_BE]) and
  869. GetNextInstruction(hp1,hp2) and
  870. (hp2.typ = ait_instruction) and
  871. (taicpu(hp2).opcode = A_DEC) and
  872. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  873. GetNextInstruction(hp2, hp3) and
  874. (hp3.typ = ait_instruction) and
  875. (taicpu(hp3).is_jmp) and
  876. (taicpu(hp3).opcode = A_JMP) and
  877. GetNextInstruction(hp3, hp4) and
  878. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  879. begin
  880. taicpu(hp2).Opcode := A_SUB;
  881. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  882. taicpu(hp2).loadConst(0,1);
  883. taicpu(hp2).ops:=2;
  884. taicpu(hp3).Opcode := A_Jcc;
  885. case taicpu(hp1).condition of
  886. C_LE: taicpu(hp3).condition := C_GE;
  887. C_BE: taicpu(hp3).condition := C_AE;
  888. end;
  889. asml.remove(p);
  890. asml.remove(hp1);
  891. p.free;
  892. hp1.free;
  893. p := hp2;
  894. continue;
  895. end
  896. end;
  897. A_FLD:
  898. begin
  899. if (taicpu(p).oper[0]^.typ = top_reg) and
  900. GetNextInstruction(p, hp1) and
  901. (hp1.typ = Ait_Instruction) and
  902. (taicpu(hp1).oper[0]^.typ = top_reg) and
  903. (taicpu(hp1).oper[1]^.typ = top_reg) and
  904. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  905. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  906. { change to
  907. fld reg fxxx reg,st
  908. fxxxp st, st1 (hp1)
  909. Remark: non commutative operations must be reversed!
  910. }
  911. begin
  912. case taicpu(hp1).opcode Of
  913. A_FMULP,A_FADDP,
  914. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  915. begin
  916. case taicpu(hp1).opcode Of
  917. A_FADDP: taicpu(hp1).opcode := A_FADD;
  918. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  919. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  920. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  921. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  922. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  923. end;
  924. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  925. taicpu(hp1).oper[1]^.reg := NR_ST;
  926. asml.remove(p);
  927. p.free;
  928. p := hp1;
  929. continue;
  930. end;
  931. end;
  932. end
  933. else
  934. if (taicpu(p).oper[0]^.typ = top_ref) and
  935. GetNextInstruction(p, hp2) and
  936. (hp2.typ = Ait_Instruction) and
  937. (taicpu(hp2).ops = 2) and
  938. (taicpu(hp2).oper[0]^.typ = top_reg) and
  939. (taicpu(hp2).oper[1]^.typ = top_reg) and
  940. (taicpu(p).opsize in [S_FS, S_FL]) and
  941. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  942. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  943. if GetLastInstruction(p, hp1) and
  944. (hp1.typ = Ait_Instruction) and
  945. ((taicpu(hp1).opcode = A_FLD) or
  946. (taicpu(hp1).opcode = A_FST)) and
  947. (taicpu(hp1).opsize = taicpu(p).opsize) and
  948. (taicpu(hp1).oper[0]^.typ = top_ref) and
  949. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  950. if ((taicpu(hp2).opcode = A_FMULP) or
  951. (taicpu(hp2).opcode = A_FADDP)) then
  952. { change to
  953. fld/fst mem1 (hp1) fld/fst mem1
  954. fld mem1 (p) fadd/
  955. faddp/ fmul st, st
  956. fmulp st, st1 (hp2) }
  957. begin
  958. asml.remove(p);
  959. p.free;
  960. p := hp1;
  961. if (taicpu(hp2).opcode = A_FADDP) then
  962. taicpu(hp2).opcode := A_FADD
  963. else
  964. taicpu(hp2).opcode := A_FMUL;
  965. taicpu(hp2).oper[1]^.reg := NR_ST;
  966. end
  967. else
  968. { change to
  969. fld/fst mem1 (hp1) fld/fst mem1
  970. fld mem1 (p) fld st}
  971. begin
  972. taicpu(p).changeopsize(S_FL);
  973. taicpu(p).loadreg(0,NR_ST);
  974. end
  975. else
  976. begin
  977. case taicpu(hp2).opcode Of
  978. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  979. { change to
  980. fld/fst mem1 (hp1) fld/fst mem1
  981. fld mem2 (p) fxxx mem2
  982. fxxxp st, st1 (hp2) }
  983. begin
  984. case taicpu(hp2).opcode Of
  985. A_FADDP: taicpu(p).opcode := A_FADD;
  986. A_FMULP: taicpu(p).opcode := A_FMUL;
  987. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  988. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  989. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  990. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  991. end;
  992. asml.remove(hp2);
  993. hp2.free;
  994. end
  995. end
  996. end
  997. end;
  998. A_FSTP,A_FISTP:
  999. if doFpuLoadStoreOpt(p) then
  1000. continue;
  1001. A_LEA:
  1002. begin
  1003. {removes seg register prefixes from LEA operations, as they
  1004. don't do anything}
  1005. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  1006. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  1007. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1008. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  1009. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1010. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1011. begin
  1012. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1013. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1014. begin
  1015. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  1016. taicpu(p).oper[1]^.reg);
  1017. InsertLLItem(p.previous,p.next, hp1);
  1018. p.free;
  1019. p := hp1;
  1020. continue;
  1021. end
  1022. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1023. begin
  1024. hp1 := tai(p.Next);
  1025. asml.remove(p);
  1026. p.free;
  1027. p := hp1;
  1028. continue;
  1029. end
  1030. { continue to use lea to adjust the stack pointer,
  1031. it is the recommended way, but only if not optimizing for size }
  1032. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1033. (cs_opt_size in current_settings.optimizerswitches) then
  1034. with taicpu(p).oper[0]^.ref^ do
  1035. if (base = taicpu(p).oper[1]^.reg) then
  1036. begin
  1037. l := offset;
  1038. if (l=1) and UseIncDec then
  1039. begin
  1040. taicpu(p).opcode := A_INC;
  1041. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1042. taicpu(p).ops := 1
  1043. end
  1044. else if (l=-1) and UseIncDec then
  1045. begin
  1046. taicpu(p).opcode := A_DEC;
  1047. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1048. taicpu(p).ops := 1;
  1049. end
  1050. else
  1051. begin
  1052. if (l<0) and (l<>-2147483648) then
  1053. begin
  1054. taicpu(p).opcode := A_SUB;
  1055. taicpu(p).loadConst(0,-l);
  1056. end
  1057. else
  1058. begin
  1059. taicpu(p).opcode := A_ADD;
  1060. taicpu(p).loadConst(0,l);
  1061. end;
  1062. end;
  1063. end;
  1064. end
  1065. (*
  1066. This is unsafe, lea doesn't modify the flags but "add"
  1067. does. This breaks webtbs/tw15694.pp. The above
  1068. transformations are also unsafe, but they don't seem to
  1069. be triggered by code that FPC generators (or that at
  1070. least does not occur in the tests...). This needs to be
  1071. fixed by checking for the liveness of the flags register.
  1072. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1073. begin
  1074. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1075. taicpu(p).oper[0]^.ref^.base);
  1076. InsertLLItem(asml,p.previous,p.next, hp1);
  1077. DebugMsg('Peephole Lea2AddBase done',hp1);
  1078. p.free;
  1079. p:=hp1;
  1080. continue;
  1081. end
  1082. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1083. begin
  1084. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1085. taicpu(p).oper[0]^.ref^.index);
  1086. InsertLLItem(asml,p.previous,p.next,hp1);
  1087. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1088. p.free;
  1089. p:=hp1;
  1090. continue;
  1091. end
  1092. *)
  1093. end;
  1094. A_MOV:
  1095. begin
  1096. If OptPass1MOV(p) then
  1097. Continue;
  1098. end;
  1099. A_MOVSX,
  1100. A_MOVZX :
  1101. begin
  1102. if (taicpu(p).oper[1]^.typ = top_reg) and
  1103. GetNextInstruction(p,hp1) and
  1104. (hp1.typ = ait_instruction) and
  1105. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1106. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1107. GetNextInstruction(hp1,hp2) and
  1108. MatchInstruction(hp2,A_MOV,[]) and
  1109. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1110. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1111. (((taicpu(hp1).ops=2) and
  1112. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1113. ((taicpu(hp1).ops=1) and
  1114. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1115. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1116. { change movsX/movzX reg/ref, reg2 }
  1117. { add/sub/or/... reg3/$const, reg2 }
  1118. { mov reg2 reg/ref }
  1119. { to add/sub/or/... reg3/$const, reg/ref }
  1120. begin
  1121. { by example:
  1122. movswl %si,%eax movswl %si,%eax p
  1123. decl %eax addl %edx,%eax hp1
  1124. movw %ax,%si movw %ax,%si hp2
  1125. ->
  1126. movswl %si,%eax movswl %si,%eax p
  1127. decw %eax addw %edx,%eax hp1
  1128. movw %ax,%si movw %ax,%si hp2
  1129. }
  1130. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1131. {
  1132. ->
  1133. movswl %si,%eax movswl %si,%eax p
  1134. decw %si addw %dx,%si hp1
  1135. movw %ax,%si movw %ax,%si hp2
  1136. }
  1137. case taicpu(hp1).ops of
  1138. 1:
  1139. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1140. 2:
  1141. begin
  1142. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1143. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1144. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1145. end;
  1146. else
  1147. internalerror(2008042701);
  1148. end;
  1149. {
  1150. ->
  1151. decw %si addw %dx,%si p
  1152. }
  1153. asml.remove(p);
  1154. asml.remove(hp2);
  1155. p.free;
  1156. hp2.free;
  1157. p := hp1
  1158. end
  1159. { removes superfluous And's after movzx's }
  1160. else if taicpu(p).opcode=A_MOVZX then
  1161. begin
  1162. if (taicpu(p).oper[1]^.typ = top_reg) and
  1163. GetNextInstruction(p, hp1) and
  1164. (tai(hp1).typ = ait_instruction) and
  1165. (taicpu(hp1).opcode = A_AND) and
  1166. (taicpu(hp1).oper[0]^.typ = top_const) and
  1167. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1168. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1169. case taicpu(p).opsize Of
  1170. S_BL, S_BW:
  1171. if (taicpu(hp1).oper[0]^.val = $ff) then
  1172. begin
  1173. asml.remove(hp1);
  1174. hp1.free;
  1175. end;
  1176. S_WL:
  1177. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1178. begin
  1179. asml.remove(hp1);
  1180. hp1.free;
  1181. end;
  1182. end;
  1183. {changes some movzx constructs to faster synonims (all examples
  1184. are given with eax/ax, but are also valid for other registers)}
  1185. if (taicpu(p).oper[1]^.typ = top_reg) then
  1186. if (taicpu(p).oper[0]^.typ = top_reg) then
  1187. case taicpu(p).opsize of
  1188. S_BW:
  1189. begin
  1190. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1191. not(cs_opt_size in current_settings.optimizerswitches) then
  1192. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1193. begin
  1194. taicpu(p).opcode := A_AND;
  1195. taicpu(p).changeopsize(S_W);
  1196. taicpu(p).loadConst(0,$ff);
  1197. end
  1198. else if GetNextInstruction(p, hp1) and
  1199. (tai(hp1).typ = ait_instruction) and
  1200. (taicpu(hp1).opcode = A_AND) and
  1201. (taicpu(hp1).oper[0]^.typ = top_const) and
  1202. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1203. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1204. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1205. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1206. begin
  1207. taicpu(p).opcode := A_MOV;
  1208. taicpu(p).changeopsize(S_W);
  1209. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1210. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1211. end;
  1212. end;
  1213. S_BL:
  1214. begin
  1215. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1216. not(cs_opt_size in current_settings.optimizerswitches) then
  1217. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1218. begin
  1219. taicpu(p).opcode := A_AND;
  1220. taicpu(p).changeopsize(S_L);
  1221. taicpu(p).loadConst(0,$ff)
  1222. end
  1223. else if GetNextInstruction(p, hp1) and
  1224. (tai(hp1).typ = ait_instruction) and
  1225. (taicpu(hp1).opcode = A_AND) and
  1226. (taicpu(hp1).oper[0]^.typ = top_const) and
  1227. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1228. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1229. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1230. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1231. begin
  1232. taicpu(p).opcode := A_MOV;
  1233. taicpu(p).changeopsize(S_L);
  1234. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1235. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1236. end
  1237. end;
  1238. S_WL:
  1239. begin
  1240. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1241. not(cs_opt_size in current_settings.optimizerswitches) then
  1242. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1243. begin
  1244. taicpu(p).opcode := A_AND;
  1245. taicpu(p).changeopsize(S_L);
  1246. taicpu(p).loadConst(0,$ffff);
  1247. end
  1248. else if GetNextInstruction(p, hp1) and
  1249. (tai(hp1).typ = ait_instruction) and
  1250. (taicpu(hp1).opcode = A_AND) and
  1251. (taicpu(hp1).oper[0]^.typ = top_const) and
  1252. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1253. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1254. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1255. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1256. begin
  1257. taicpu(p).opcode := A_MOV;
  1258. taicpu(p).changeopsize(S_L);
  1259. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1260. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1261. end;
  1262. end;
  1263. end
  1264. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1265. begin
  1266. if GetNextInstruction(p, hp1) and
  1267. (tai(hp1).typ = ait_instruction) and
  1268. (taicpu(hp1).opcode = A_AND) and
  1269. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1270. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1271. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1272. begin
  1273. taicpu(p).opcode := A_MOV;
  1274. case taicpu(p).opsize Of
  1275. S_BL:
  1276. begin
  1277. taicpu(p).changeopsize(S_L);
  1278. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1279. end;
  1280. S_WL:
  1281. begin
  1282. taicpu(p).changeopsize(S_L);
  1283. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1284. end;
  1285. S_BW:
  1286. begin
  1287. taicpu(p).changeopsize(S_W);
  1288. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1289. end;
  1290. end;
  1291. end;
  1292. end;
  1293. end;
  1294. end;
  1295. (* should not be generated anymore by the current code generator
  1296. A_POP:
  1297. begin
  1298. if target_info.system=system_i386_go32v2 then
  1299. begin
  1300. { Transform a series of pop/pop/pop/push/push/push to }
  1301. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1302. { because I'm not sure whether they can cope with }
  1303. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1304. { such a problem when using esp as frame pointer (JM) }
  1305. if (taicpu(p).oper[0]^.typ = top_reg) then
  1306. begin
  1307. hp1 := p;
  1308. hp2 := p;
  1309. l := 0;
  1310. while getNextInstruction(hp1,hp1) and
  1311. (hp1.typ = ait_instruction) and
  1312. (taicpu(hp1).opcode = A_POP) and
  1313. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1314. begin
  1315. hp2 := hp1;
  1316. inc(l,4);
  1317. end;
  1318. getLastInstruction(p,hp3);
  1319. l1 := 0;
  1320. while (hp2 <> hp3) and
  1321. assigned(hp1) and
  1322. (hp1.typ = ait_instruction) and
  1323. (taicpu(hp1).opcode = A_PUSH) and
  1324. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1325. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1326. begin
  1327. { change it to a two op operation }
  1328. taicpu(hp2).oper[1]^.typ:=top_none;
  1329. taicpu(hp2).ops:=2;
  1330. taicpu(hp2).opcode := A_MOV;
  1331. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1332. reference_reset(tmpref);
  1333. tmpRef.base.enum:=R_INTREGISTER;
  1334. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1335. convert_register_to_enum(tmpref.base);
  1336. tmpRef.offset := l;
  1337. taicpu(hp2).loadRef(0,tmpRef);
  1338. hp4 := hp1;
  1339. getNextInstruction(hp1,hp1);
  1340. asml.remove(hp4);
  1341. hp4.free;
  1342. getLastInstruction(hp2,hp2);
  1343. dec(l,4);
  1344. inc(l1);
  1345. end;
  1346. if l <> -4 then
  1347. begin
  1348. inc(l,4);
  1349. for l1 := l1 downto 1 do
  1350. begin
  1351. getNextInstruction(hp2,hp2);
  1352. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1353. end
  1354. end
  1355. end
  1356. end
  1357. else
  1358. begin
  1359. if (taicpu(p).oper[0]^.typ = top_reg) and
  1360. GetNextInstruction(p, hp1) and
  1361. (tai(hp1).typ=ait_instruction) and
  1362. (taicpu(hp1).opcode=A_PUSH) and
  1363. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1364. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1365. begin
  1366. { change it to a two op operation }
  1367. taicpu(p).oper[1]^.typ:=top_none;
  1368. taicpu(p).ops:=2;
  1369. taicpu(p).opcode := A_MOV;
  1370. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1371. reference_reset(tmpref);
  1372. TmpRef.base.enum := R_ESP;
  1373. taicpu(p).loadRef(0,TmpRef);
  1374. asml.remove(hp1);
  1375. hp1.free;
  1376. end;
  1377. end;
  1378. end;
  1379. *)
  1380. A_PUSH:
  1381. begin
  1382. if (taicpu(p).opsize = S_W) and
  1383. (taicpu(p).oper[0]^.typ = Top_Const) and
  1384. GetNextInstruction(p, hp1) and
  1385. (tai(hp1).typ = ait_instruction) and
  1386. (taicpu(hp1).opcode = A_PUSH) and
  1387. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1388. (taicpu(hp1).opsize = S_W) then
  1389. begin
  1390. taicpu(p).changeopsize(S_L);
  1391. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1392. asml.remove(hp1);
  1393. hp1.free;
  1394. end;
  1395. end;
  1396. A_SHL, A_SAL:
  1397. begin
  1398. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1399. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1400. (taicpu(p).opsize = S_L) and
  1401. (taicpu(p).oper[0]^.val <= 3) then
  1402. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1403. begin
  1404. TmpBool1 := True; {should we check the next instruction?}
  1405. TmpBool2 := False; {have we found an add/sub which could be
  1406. integrated in the lea?}
  1407. reference_reset(tmpref,2);
  1408. TmpRef.index := taicpu(p).oper[1]^.reg;
  1409. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1410. while TmpBool1 and
  1411. GetNextInstruction(p, hp1) and
  1412. (tai(hp1).typ = ait_instruction) and
  1413. ((((taicpu(hp1).opcode = A_ADD) or
  1414. (taicpu(hp1).opcode = A_SUB)) and
  1415. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1416. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1417. (((taicpu(hp1).opcode = A_INC) or
  1418. (taicpu(hp1).opcode = A_DEC)) and
  1419. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1420. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1421. (not GetNextInstruction(hp1,hp2) or
  1422. not instrReadsFlags(hp2)) Do
  1423. begin
  1424. TmpBool1 := False;
  1425. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1426. begin
  1427. TmpBool1 := True;
  1428. TmpBool2 := True;
  1429. case taicpu(hp1).opcode of
  1430. A_ADD:
  1431. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1432. A_SUB:
  1433. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1434. end;
  1435. asml.remove(hp1);
  1436. hp1.free;
  1437. end
  1438. else
  1439. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1440. (((taicpu(hp1).opcode = A_ADD) and
  1441. (TmpRef.base = NR_NO)) or
  1442. (taicpu(hp1).opcode = A_INC) or
  1443. (taicpu(hp1).opcode = A_DEC)) then
  1444. begin
  1445. TmpBool1 := True;
  1446. TmpBool2 := True;
  1447. case taicpu(hp1).opcode of
  1448. A_ADD:
  1449. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1450. A_INC:
  1451. inc(TmpRef.offset);
  1452. A_DEC:
  1453. dec(TmpRef.offset);
  1454. end;
  1455. asml.remove(hp1);
  1456. hp1.free;
  1457. end;
  1458. end;
  1459. if TmpBool2 or
  1460. ((current_settings.optimizecputype < cpu_Pentium2) and
  1461. (taicpu(p).oper[0]^.val <= 3) and
  1462. not(cs_opt_size in current_settings.optimizerswitches)) then
  1463. begin
  1464. if not(TmpBool2) and
  1465. (taicpu(p).oper[0]^.val = 1) then
  1466. begin
  1467. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1468. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1469. end
  1470. else
  1471. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1472. taicpu(p).oper[1]^.reg);
  1473. InsertLLItem(p.previous, p.next, hp1);
  1474. p.free;
  1475. p := hp1;
  1476. end;
  1477. end
  1478. else
  1479. if (current_settings.optimizecputype < cpu_Pentium2) and
  1480. (taicpu(p).oper[0]^.typ = top_const) and
  1481. (taicpu(p).oper[1]^.typ = top_reg) then
  1482. if (taicpu(p).oper[0]^.val = 1) then
  1483. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1484. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1485. (unlike shl, which is only Tairable in the U pipe)}
  1486. begin
  1487. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1488. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1489. InsertLLItem(p.previous, p.next, hp1);
  1490. p.free;
  1491. p := hp1;
  1492. end
  1493. else if (taicpu(p).opsize = S_L) and
  1494. (taicpu(p).oper[0]^.val<= 3) then
  1495. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1496. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1497. begin
  1498. reference_reset(tmpref,2);
  1499. TmpRef.index := taicpu(p).oper[1]^.reg;
  1500. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1501. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1502. InsertLLItem(p.previous, p.next, hp1);
  1503. p.free;
  1504. p := hp1;
  1505. end
  1506. end;
  1507. A_SETcc :
  1508. { changes
  1509. setcc (funcres) setcc reg
  1510. movb (funcres), reg to leave/ret
  1511. leave/ret }
  1512. begin
  1513. if (taicpu(p).oper[0]^.typ = top_ref) and
  1514. GetNextInstruction(p, hp1) and
  1515. GetNextInstruction(hp1, hp2) and
  1516. IsExitCode(hp2) and
  1517. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1518. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1519. not(assigned(current_procinfo.procdef.funcretsym) and
  1520. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1521. (hp1.typ = ait_instruction) and
  1522. (taicpu(hp1).opcode = A_MOV) and
  1523. (taicpu(hp1).opsize = S_B) and
  1524. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1525. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1526. begin
  1527. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1528. asml.remove(hp1);
  1529. hp1.free;
  1530. end
  1531. end;
  1532. A_SUB:
  1533. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1534. { * change "sub/add const1, reg" or "dec reg" followed by
  1535. "sub const2, reg" to one "sub ..., reg" }
  1536. begin
  1537. if (taicpu(p).oper[0]^.typ = top_const) and
  1538. (taicpu(p).oper[1]^.typ = top_reg) then
  1539. if (taicpu(p).oper[0]^.val = 2) and
  1540. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1541. { Don't do the sub/push optimization if the sub }
  1542. { comes from setting up the stack frame (JM) }
  1543. (not getLastInstruction(p,hp1) or
  1544. (hp1.typ <> ait_instruction) or
  1545. (taicpu(hp1).opcode <> A_MOV) or
  1546. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1547. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1548. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1549. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1550. begin
  1551. hp1 := tai(p.next);
  1552. while Assigned(hp1) and
  1553. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1554. not RegReadByInstruction(NR_ESP,hp1) and
  1555. not RegModifiedByInstruction(NR_ESP,hp1) do
  1556. hp1 := tai(hp1.next);
  1557. if Assigned(hp1) and
  1558. (tai(hp1).typ = ait_instruction) and
  1559. (taicpu(hp1).opcode = A_PUSH) and
  1560. (taicpu(hp1).opsize = S_W) then
  1561. begin
  1562. taicpu(hp1).changeopsize(S_L);
  1563. if taicpu(hp1).oper[0]^.typ=top_reg then
  1564. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1565. hp1 := tai(p.next);
  1566. asml.remove(p);
  1567. p.free;
  1568. p := hp1;
  1569. continue
  1570. end;
  1571. if DoSubAddOpt(p) then
  1572. continue;
  1573. end
  1574. else if DoSubAddOpt(p) then
  1575. continue
  1576. end;
  1577. A_VMOVAPS,
  1578. A_VMOVAPD:
  1579. if OptPass1VMOVAP(p) then
  1580. continue;
  1581. A_VDIVSD,
  1582. A_VDIVSS,
  1583. A_VSUBSD,
  1584. A_VSUBSS,
  1585. A_VMULSD,
  1586. A_VMULSS,
  1587. A_VADDSD,
  1588. A_VADDSS:
  1589. if OptPass1VOP(p) then
  1590. continue;
  1591. end;
  1592. end; { if is_jmp }
  1593. end;
  1594. end;
  1595. updateUsedRegs(UsedRegs,p);
  1596. p:=tai(p.next);
  1597. end;
  1598. end;
  1599. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1600. {$ifdef DEBUG_AOPTCPU}
  1601. procedure DebugMsg(const s: string;p : tai);
  1602. begin
  1603. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  1604. end;
  1605. {$else DEBUG_AOPTCPU}
  1606. procedure DebugMsg(const s: string;p : tai);inline;
  1607. begin
  1608. end;
  1609. {$endif DEBUG_AOPTCPU}
  1610. function CanBeCMOV(p : tai) : boolean;
  1611. begin
  1612. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1613. (taicpu(p).opcode=A_MOV) and
  1614. (taicpu(p).opsize in [S_L,S_W]) and
  1615. ((taicpu(p).oper[0]^.typ = top_reg)
  1616. { we can't use cmov ref,reg because
  1617. ref could be nil and cmov still throws an exception
  1618. if ref=nil but the mov isn't done (FK)
  1619. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1620. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1621. }
  1622. ) and
  1623. (taicpu(p).oper[1]^.typ in [top_reg]);
  1624. end;
  1625. var
  1626. p,hp1,hp2,hp3: tai;
  1627. l : longint;
  1628. condition : tasmcond;
  1629. TmpUsedRegs: TAllUsedRegs;
  1630. carryadd_opcode: Tasmop;
  1631. begin
  1632. p := BlockStart;
  1633. ClearUsedRegs;
  1634. while (p <> BlockEnd) Do
  1635. begin
  1636. UpdateUsedRegs(UsedRegs, tai(p.next));
  1637. case p.Typ Of
  1638. Ait_Instruction:
  1639. begin
  1640. if InsContainsSegRef(taicpu(p)) then
  1641. begin
  1642. p := tai(p.next);
  1643. continue;
  1644. end;
  1645. case taicpu(p).opcode Of
  1646. A_Jcc:
  1647. begin
  1648. { jb @@1 cmc
  1649. inc/dec operand --> adc/sbb operand,0
  1650. @@1:
  1651. ... and ...
  1652. jnb @@1
  1653. inc/dec operand --> adc/sbb operand,0
  1654. @@1: }
  1655. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1656. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1657. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1658. begin
  1659. carryadd_opcode:=A_NONE;
  1660. if Taicpu(p).condition in [C_NAE,C_B] then
  1661. begin
  1662. if Taicpu(hp1).opcode=A_INC then
  1663. carryadd_opcode:=A_ADC;
  1664. if Taicpu(hp1).opcode=A_DEC then
  1665. carryadd_opcode:=A_SBB;
  1666. if carryadd_opcode<>A_NONE then
  1667. begin
  1668. Taicpu(p).clearop(0);
  1669. Taicpu(p).ops:=0;
  1670. Taicpu(p).is_jmp:=false;
  1671. Taicpu(p).opcode:=A_CMC;
  1672. Taicpu(p).condition:=C_NONE;
  1673. Taicpu(hp1).ops:=2;
  1674. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1675. Taicpu(hp1).loadconst(0,0);
  1676. Taicpu(hp1).opcode:=carryadd_opcode;
  1677. continue;
  1678. end;
  1679. end;
  1680. if Taicpu(p).condition in [C_AE,C_NB] then
  1681. begin
  1682. if Taicpu(hp1).opcode=A_INC then
  1683. carryadd_opcode:=A_ADC;
  1684. if Taicpu(hp1).opcode=A_DEC then
  1685. carryadd_opcode:=A_SBB;
  1686. if carryadd_opcode<>A_NONE then
  1687. begin
  1688. asml.remove(p);
  1689. p.free;
  1690. Taicpu(hp1).ops:=2;
  1691. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1692. Taicpu(hp1).loadconst(0,0);
  1693. Taicpu(hp1).opcode:=carryadd_opcode;
  1694. p:=hp1;
  1695. continue;
  1696. end;
  1697. end;
  1698. end;
  1699. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1700. begin
  1701. { check for
  1702. jCC xxx
  1703. <several movs>
  1704. xxx:
  1705. }
  1706. l:=0;
  1707. GetNextInstruction(p, hp1);
  1708. while assigned(hp1) and
  1709. CanBeCMOV(hp1) and
  1710. { stop on labels }
  1711. not(hp1.typ=ait_label) do
  1712. begin
  1713. inc(l);
  1714. GetNextInstruction(hp1,hp1);
  1715. end;
  1716. if assigned(hp1) then
  1717. begin
  1718. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1719. begin
  1720. if (l<=4) and (l>0) then
  1721. begin
  1722. condition:=inverse_cond(taicpu(p).condition);
  1723. hp2:=p;
  1724. GetNextInstruction(p,hp1);
  1725. p:=hp1;
  1726. repeat
  1727. taicpu(hp1).opcode:=A_CMOVcc;
  1728. taicpu(hp1).condition:=condition;
  1729. GetNextInstruction(hp1,hp1);
  1730. until not(assigned(hp1)) or
  1731. not(CanBeCMOV(hp1));
  1732. { wait with removing else GetNextInstruction could
  1733. ignore the label if it was the only usage in the
  1734. jump moved away }
  1735. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1736. asml.remove(hp2);
  1737. hp2.free;
  1738. continue;
  1739. end;
  1740. end
  1741. else
  1742. begin
  1743. { check further for
  1744. jCC xxx
  1745. <several movs 1>
  1746. jmp yyy
  1747. xxx:
  1748. <several movs 2>
  1749. yyy:
  1750. }
  1751. { hp2 points to jmp yyy }
  1752. hp2:=hp1;
  1753. { skip hp1 to xxx }
  1754. GetNextInstruction(hp1, hp1);
  1755. if assigned(hp2) and
  1756. assigned(hp1) and
  1757. (l<=3) and
  1758. (hp2.typ=ait_instruction) and
  1759. (taicpu(hp2).is_jmp) and
  1760. (taicpu(hp2).condition=C_None) and
  1761. { real label and jump, no further references to the
  1762. label are allowed }
  1763. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1764. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1765. begin
  1766. l:=0;
  1767. { skip hp1 to <several moves 2> }
  1768. GetNextInstruction(hp1, hp1);
  1769. while assigned(hp1) and
  1770. CanBeCMOV(hp1) do
  1771. begin
  1772. inc(l);
  1773. GetNextInstruction(hp1, hp1);
  1774. end;
  1775. { hp1 points to yyy: }
  1776. if assigned(hp1) and
  1777. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1778. begin
  1779. condition:=inverse_cond(taicpu(p).condition);
  1780. GetNextInstruction(p,hp1);
  1781. hp3:=p;
  1782. p:=hp1;
  1783. repeat
  1784. taicpu(hp1).opcode:=A_CMOVcc;
  1785. taicpu(hp1).condition:=condition;
  1786. GetNextInstruction(hp1,hp1);
  1787. until not(assigned(hp1)) or
  1788. not(CanBeCMOV(hp1));
  1789. { hp2 is still at jmp yyy }
  1790. GetNextInstruction(hp2,hp1);
  1791. { hp2 is now at xxx: }
  1792. condition:=inverse_cond(condition);
  1793. GetNextInstruction(hp1,hp1);
  1794. { hp1 is now at <several movs 2> }
  1795. repeat
  1796. taicpu(hp1).opcode:=A_CMOVcc;
  1797. taicpu(hp1).condition:=condition;
  1798. GetNextInstruction(hp1,hp1);
  1799. until not(assigned(hp1)) or
  1800. not(CanBeCMOV(hp1));
  1801. {
  1802. asml.remove(hp1.next)
  1803. hp1.next.free;
  1804. asml.remove(hp1);
  1805. hp1.free;
  1806. }
  1807. { remove jCC }
  1808. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1809. asml.remove(hp3);
  1810. hp3.free;
  1811. { remove jmp }
  1812. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1813. asml.remove(hp2);
  1814. hp2.free;
  1815. continue;
  1816. end;
  1817. end;
  1818. end;
  1819. end;
  1820. end;
  1821. end;
  1822. A_FSTP,A_FISTP:
  1823. if DoFpuLoadStoreOpt(p) then
  1824. continue;
  1825. A_IMUL:
  1826. begin
  1827. if (taicpu(p).ops >= 2) and
  1828. ((taicpu(p).oper[0]^.typ = top_const) or
  1829. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1830. (taicpu(p).oper[1]^.typ = top_reg) and
  1831. ((taicpu(p).ops = 2) or
  1832. ((taicpu(p).oper[2]^.typ = top_reg) and
  1833. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1834. getLastInstruction(p,hp1) and
  1835. (hp1.typ = ait_instruction) and
  1836. (taicpu(hp1).opcode = A_MOV) and
  1837. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1838. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1839. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1840. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1841. begin
  1842. taicpu(p).ops := 3;
  1843. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1844. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1845. asml.remove(hp1);
  1846. hp1.free;
  1847. end;
  1848. end;
  1849. A_JMP:
  1850. {
  1851. change
  1852. jmp .L1
  1853. ...
  1854. .L1:
  1855. ret
  1856. into
  1857. ret
  1858. }
  1859. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) then
  1860. begin
  1861. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1862. if assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_RET) and (taicpu(p).condition=C_None) then
  1863. begin
  1864. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1865. taicpu(p).opcode:=A_RET;
  1866. taicpu(p).is_jmp:=false;
  1867. taicpu(p).ops:=taicpu(hp1).ops;
  1868. case taicpu(hp1).ops of
  1869. 0:
  1870. taicpu(p).clearop(0);
  1871. 1:
  1872. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1873. else
  1874. internalerror(2016041301);
  1875. end;
  1876. continue;
  1877. end;
  1878. end;
  1879. A_MOV:
  1880. begin
  1881. if (taicpu(p).oper[0]^.typ = top_reg) and
  1882. (taicpu(p).oper[1]^.typ = top_reg) and
  1883. GetNextInstruction(p, hp1) and
  1884. (hp1.typ = ait_Instruction) and
  1885. ((taicpu(hp1).opcode = A_MOV) or
  1886. (taicpu(hp1).opcode = A_MOVZX) or
  1887. (taicpu(hp1).opcode = A_MOVSX)) and
  1888. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1889. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1890. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  1891. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  1892. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1893. {mov reg1, reg2
  1894. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1895. begin
  1896. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1897. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1898. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1899. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1900. asml.remove(p);
  1901. p.free;
  1902. p := hp1;
  1903. continue;
  1904. end
  1905. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1906. GetNextInstruction(p,hp1) and
  1907. (hp1.typ = ait_instruction) and
  1908. (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or
  1909. ((taicpu(hp1).opcode=A_LEA) and
  1910. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  1911. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1912. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)) or
  1913. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  1914. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
  1915. )
  1916. )
  1917. ) and
  1918. GetNextInstruction(hp1,hp2) and
  1919. MatchInstruction(hp2,A_MOV,[]) and
  1920. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1921. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1922. begin
  1923. CopyUsedRegs(TmpUsedRegs);
  1924. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1925. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1926. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  1927. hp2, TmpUsedRegs))) then
  1928. { change mov (ref), reg }
  1929. { add/sub/or/... reg2/$const, reg }
  1930. { mov reg, (ref) }
  1931. { # release reg }
  1932. { to add/sub/or/... reg2/$const, (ref) }
  1933. begin
  1934. case taicpu(hp1).opcode of
  1935. A_INC,A_DEC,A_NOT,A_NEG:
  1936. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1937. A_LEA:
  1938. begin
  1939. taicpu(hp1).opcode:=A_ADD;
  1940. if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
  1941. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1942. else
  1943. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
  1944. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1945. DebugMsg('Peephole FoldLea done',hp1);
  1946. end
  1947. else
  1948. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1949. end;
  1950. asml.remove(p);
  1951. asml.remove(hp2);
  1952. p.free;
  1953. hp2.free;
  1954. p := hp1
  1955. end;
  1956. ReleaseUsedRegs(TmpUsedRegs);
  1957. end
  1958. end;
  1959. end;
  1960. end;
  1961. end;
  1962. p := tai(p.next)
  1963. end;
  1964. end;
  1965. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1966. var
  1967. p,hp1,hp2: tai;
  1968. IsTestConstX: boolean;
  1969. begin
  1970. p := BlockStart;
  1971. ClearUsedRegs;
  1972. while (p <> BlockEnd) Do
  1973. begin
  1974. UpdateUsedRegs(UsedRegs, tai(p.next));
  1975. case p.Typ Of
  1976. Ait_Instruction:
  1977. begin
  1978. if InsContainsSegRef(taicpu(p)) then
  1979. begin
  1980. p := tai(p.next);
  1981. continue;
  1982. end;
  1983. case taicpu(p).opcode Of
  1984. A_CALL:
  1985. begin
  1986. { don't do this on modern CPUs, this really hurts them due to
  1987. broken call/ret pairing }
  1988. if (current_settings.optimizecputype < cpu_Pentium2) and
  1989. not(cs_create_pic in current_settings.moduleswitches) and
  1990. GetNextInstruction(p, hp1) and
  1991. (hp1.typ = ait_instruction) and
  1992. (taicpu(hp1).opcode = A_JMP) and
  1993. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1994. begin
  1995. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1996. InsertLLItem(p.previous, p, hp2);
  1997. taicpu(p).opcode := A_JMP;
  1998. taicpu(p).is_jmp := true;
  1999. asml.remove(hp1);
  2000. hp1.free;
  2001. end
  2002. { replace
  2003. call procname
  2004. ret
  2005. by
  2006. jmp procname
  2007. this should never hurt except when pic is used, not sure
  2008. how to handle it then
  2009. but do it only on level 4 because it destroys stack back traces
  2010. }
  2011. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  2012. not(cs_create_pic in current_settings.moduleswitches) and
  2013. GetNextInstruction(p, hp1) and
  2014. (hp1.typ = ait_instruction) and
  2015. (taicpu(hp1).opcode = A_RET) and
  2016. (taicpu(hp1).ops=0) then
  2017. begin
  2018. taicpu(p).opcode := A_JMP;
  2019. taicpu(p).is_jmp := true;
  2020. asml.remove(hp1);
  2021. hp1.free;
  2022. end;
  2023. end;
  2024. A_CMP:
  2025. begin
  2026. if (taicpu(p).oper[0]^.typ = top_const) and
  2027. (taicpu(p).oper[0]^.val = 0) and
  2028. (taicpu(p).oper[1]^.typ = top_reg) then
  2029. {change "cmp $0, %reg" to "test %reg, %reg"}
  2030. begin
  2031. taicpu(p).opcode := A_TEST;
  2032. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2033. continue;
  2034. end;
  2035. end;
  2036. A_MOV:
  2037. PostPeepholeOptMov(p);
  2038. A_MOVZX:
  2039. { if register vars are on, it's possible there is code like }
  2040. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  2041. { so we can't safely replace the movzx then with xor/mov, }
  2042. { since that would change the flags (JM) }
  2043. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  2044. begin
  2045. if (taicpu(p).oper[1]^.typ = top_reg) then
  2046. if (taicpu(p).oper[0]^.typ = top_reg)
  2047. then
  2048. case taicpu(p).opsize of
  2049. S_BL:
  2050. begin
  2051. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  2052. not(cs_opt_size in current_settings.optimizerswitches) and
  2053. (current_settings.optimizecputype = cpu_Pentium) then
  2054. {Change "movzbl %reg1, %reg2" to
  2055. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2056. PentiumMMX}
  2057. begin
  2058. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2059. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2060. InsertLLItem(p.previous, p, hp1);
  2061. taicpu(p).opcode := A_MOV;
  2062. taicpu(p).changeopsize(S_B);
  2063. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2064. end;
  2065. end;
  2066. end
  2067. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2068. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2069. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2070. not(cs_opt_size in current_settings.optimizerswitches) and
  2071. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  2072. (current_settings.optimizecputype = cpu_Pentium) and
  2073. (taicpu(p).opsize = S_BL) then
  2074. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2075. Pentium and PentiumMMX}
  2076. begin
  2077. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2078. taicpu(p).oper[1]^.reg);
  2079. taicpu(p).opcode := A_MOV;
  2080. taicpu(p).changeopsize(S_B);
  2081. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2082. InsertLLItem(p.previous, p, hp1);
  2083. end;
  2084. end;
  2085. A_TEST, A_OR:
  2086. {removes the line marked with (x) from the sequence
  2087. and/or/xor/add/sub/... $x, %y
  2088. test/or %y, %y | test $-1, %y (x)
  2089. j(n)z _Label
  2090. as the first instruction already adjusts the ZF
  2091. %y operand may also be a reference }
  2092. begin
  2093. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2094. MatchOperand(taicpu(p).oper[0]^,-1);
  2095. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2096. GetLastInstruction(p, hp1) and
  2097. (tai(hp1).typ = ait_instruction) and
  2098. GetNextInstruction(p,hp2) and
  2099. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2100. case taicpu(hp1).opcode Of
  2101. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2102. begin
  2103. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2104. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2105. { and in case of carry for A(E)/B(E)/C/NC }
  2106. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2107. ((taicpu(hp1).opcode <> A_ADD) and
  2108. (taicpu(hp1).opcode <> A_SUB))) then
  2109. begin
  2110. hp1 := tai(p.next);
  2111. asml.remove(p);
  2112. p.free;
  2113. p := tai(hp1);
  2114. continue
  2115. end;
  2116. end;
  2117. A_SHL, A_SAL, A_SHR, A_SAR:
  2118. begin
  2119. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2120. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2121. { therefore, it's only safe to do this optimization for }
  2122. { shifts by a (nonzero) constant }
  2123. (taicpu(hp1).oper[0]^.typ = top_const) and
  2124. (taicpu(hp1).oper[0]^.val <> 0) and
  2125. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2126. { and in case of carry for A(E)/B(E)/C/NC }
  2127. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2128. begin
  2129. hp1 := tai(p.next);
  2130. asml.remove(p);
  2131. p.free;
  2132. p := tai(hp1);
  2133. continue
  2134. end;
  2135. end;
  2136. A_DEC, A_INC, A_NEG:
  2137. begin
  2138. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2139. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2140. { and in case of carry for A(E)/B(E)/C/NC }
  2141. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2142. begin
  2143. case taicpu(hp1).opcode Of
  2144. A_DEC, A_INC:
  2145. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2146. begin
  2147. case taicpu(hp1).opcode Of
  2148. A_DEC: taicpu(hp1).opcode := A_SUB;
  2149. A_INC: taicpu(hp1).opcode := A_ADD;
  2150. end;
  2151. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2152. taicpu(hp1).loadConst(0,1);
  2153. taicpu(hp1).ops:=2;
  2154. end
  2155. end;
  2156. hp1 := tai(p.next);
  2157. asml.remove(p);
  2158. p.free;
  2159. p := tai(hp1);
  2160. continue
  2161. end;
  2162. end
  2163. else
  2164. { change "test $-1,%reg" into "test %reg,%reg" }
  2165. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2166. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2167. end { case }
  2168. else
  2169. { change "test $-1,%reg" into "test %reg,%reg" }
  2170. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2171. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2172. end;
  2173. end;
  2174. end;
  2175. end;
  2176. p := tai(p.next)
  2177. end;
  2178. end;
  2179. Procedure TCpuAsmOptimizer.Optimize;
  2180. Var
  2181. HP: Tai;
  2182. pass: longint;
  2183. slowopt, changed, lastLoop: boolean;
  2184. Begin
  2185. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  2186. pass := 0;
  2187. changed := false;
  2188. repeat
  2189. lastLoop :=
  2190. not(slowopt) or
  2191. (not changed and (pass > 2)) or
  2192. { prevent endless loops }
  2193. (pass = 4);
  2194. changed := false;
  2195. { Setup labeltable, always necessary }
  2196. blockstart := tai(asml.first);
  2197. pass_1;
  2198. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  2199. { or nil }
  2200. While Assigned(BlockStart) Do
  2201. Begin
  2202. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2203. begin
  2204. if (pass = 0) then
  2205. PrePeepHoleOpts;
  2206. { Peephole optimizations }
  2207. PeepHoleOptPass1;
  2208. { Only perform them twice in the first pass }
  2209. if pass = 0 then
  2210. PeepHoleOptPass1;
  2211. end;
  2212. { More peephole optimizations }
  2213. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2214. begin
  2215. PeepHoleOptPass2;
  2216. if lastLoop then
  2217. PostPeepHoleOpts;
  2218. end;
  2219. { Continue where we left off, BlockEnd is either the start of an }
  2220. { assembler block or nil }
  2221. BlockStart := BlockEnd;
  2222. While Assigned(BlockStart) And
  2223. (BlockStart.typ = ait_Marker) And
  2224. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  2225. Begin
  2226. { We stopped at an assembler block, so skip it }
  2227. Repeat
  2228. BlockStart := Tai(BlockStart.Next);
  2229. Until (BlockStart.Typ = Ait_Marker) And
  2230. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  2231. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  2232. If GetNextInstruction(BlockStart, HP) And
  2233. ((HP.typ <> ait_Marker) Or
  2234. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  2235. { There is no assembler block anymore after the current one, so }
  2236. { optimize the next block of "normal" instructions }
  2237. pass_1
  2238. { Otherwise, skip the next assembler block }
  2239. else
  2240. blockStart := hp;
  2241. End;
  2242. End;
  2243. inc(pass);
  2244. until lastLoop;
  2245. dfa.free;
  2246. End;
  2247. begin
  2248. casmoptimizer:=TCpuAsmOptimizer;
  2249. end.