aoptcpu.pas 104 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aoptobj, aoptcpub, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  34. function InstructionLoadsFromReg(const reg : TRegister;const hp : tai) : boolean;override;
  35. end;
  36. Var
  37. AsmOptimizer : TCpuAsmOptimizer;
  38. Implementation
  39. uses
  40. verbose,globtype,globals,
  41. cutils,
  42. aoptbase,
  43. cpuinfo,
  44. aasmcpu,
  45. procinfo,
  46. cgutils,cgx86,
  47. { units we should get rid off: }
  48. symsym,symconst;
  49. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  50. { returns true if a "continue" should be done after this optimization }
  51. var hp1, hp2: tai;
  52. begin
  53. DoFpuLoadStoreOpt := false;
  54. if (taicpu(p).oper[0]^.typ = top_ref) and
  55. getNextInstruction(p, hp1) and
  56. (hp1.typ = ait_instruction) and
  57. (((taicpu(hp1).opcode = A_FLD) and
  58. (taicpu(p).opcode = A_FSTP)) or
  59. ((taicpu(p).opcode = A_FISTP) and
  60. (taicpu(hp1).opcode = A_FILD))) and
  61. (taicpu(hp1).oper[0]^.typ = top_ref) and
  62. (taicpu(hp1).opsize = taicpu(p).opsize) and
  63. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  64. begin
  65. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  66. if (taicpu(p).opsize=S_FX) and
  67. getNextInstruction(hp1, hp2) and
  68. (hp2.typ = ait_instruction) and
  69. IsExitCode(hp2) and
  70. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  71. not(assigned(current_procinfo.procdef.funcretsym) and
  72. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  73. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  74. begin
  75. asml.remove(p);
  76. asml.remove(hp1);
  77. p.free;
  78. hp1.free;
  79. p := hp2;
  80. removeLastDeallocForFuncRes(p);
  81. doFPULoadStoreOpt := true;
  82. end
  83. (* can't be done because the store operation rounds
  84. else
  85. { fst can't store an extended value! }
  86. if (taicpu(p).opsize <> S_FX) and
  87. (taicpu(p).opsize <> S_IQ) then
  88. begin
  89. if (taicpu(p).opcode = A_FSTP) then
  90. taicpu(p).opcode := A_FST
  91. else taicpu(p).opcode := A_FIST;
  92. asml.remove(hp1);
  93. hp1.free;
  94. end
  95. *)
  96. end;
  97. end;
  98. { converts a TChange variable to a TRegister }
  99. function tch2reg(ch: tinschange): tsuperregister;
  100. const
  101. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  102. begin
  103. if (ch <= CH_REDI) then
  104. tch2reg := ch2reg[ch]
  105. else if (ch <= CH_WEDI) then
  106. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  107. else if (ch <= CH_RWEDI) then
  108. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  109. else if (ch <= CH_MEDI) then
  110. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  111. else
  112. InternalError(2016041901)
  113. end;
  114. { Checks if the register is a 32 bit general purpose register }
  115. function isgp32reg(reg: TRegister): boolean;
  116. begin
  117. {$push}{$warnings off}
  118. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  119. {$pop}
  120. end;
  121. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  122. begin
  123. Result:=RegReadByInstruction(reg,hp);
  124. end;
  125. function TCpuAsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  126. var
  127. p: taicpu;
  128. opcount: longint;
  129. begin
  130. RegReadByInstruction := false;
  131. if hp.typ <> ait_instruction then
  132. exit;
  133. p := taicpu(hp);
  134. case p.opcode of
  135. A_CALL:
  136. regreadbyinstruction := true;
  137. A_IMUL:
  138. case p.ops of
  139. 1:
  140. regReadByInstruction :=
  141. (reg = NR_EAX) or RegInOp(reg,p.oper[0]^);
  142. 2,3:
  143. regReadByInstruction :=
  144. reginop(reg,p.oper[0]^) or
  145. reginop(reg,p.oper[1]^);
  146. end;
  147. A_IDIV,A_DIV,A_MUL:
  148. begin
  149. regReadByInstruction :=
  150. RegInOp(reg,p.oper[0]^) or (getsupreg(reg) in [RS_EAX,RS_EDX]);
  151. end;
  152. else
  153. begin
  154. for opcount := 0 to p.ops-1 do
  155. if (p.oper[opCount]^.typ = top_ref) and
  156. RegInRef(reg,p.oper[opcount]^.ref^) then
  157. begin
  158. RegReadByInstruction := true;
  159. exit
  160. end;
  161. with insprop[p.opcode] do
  162. begin
  163. case getsupreg(reg) of
  164. RS_EAX:
  165. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  166. begin
  167. RegReadByInstruction := true;
  168. exit
  169. end;
  170. RS_ECX:
  171. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  172. begin
  173. RegReadByInstruction := true;
  174. exit
  175. end;
  176. RS_EDX:
  177. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  178. begin
  179. RegReadByInstruction := true;
  180. exit
  181. end;
  182. RS_EBX:
  183. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  184. begin
  185. RegReadByInstruction := true;
  186. exit
  187. end;
  188. RS_ESP:
  189. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  190. begin
  191. RegReadByInstruction := true;
  192. exit
  193. end;
  194. RS_EBP:
  195. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  196. begin
  197. RegReadByInstruction := true;
  198. exit
  199. end;
  200. RS_ESI:
  201. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  202. begin
  203. RegReadByInstruction := true;
  204. exit
  205. end;
  206. RS_EDI:
  207. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  208. begin
  209. RegReadByInstruction := true;
  210. exit
  211. end;
  212. end;
  213. if ([Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  214. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  215. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[]) and (reg=NR_DEFAULTFLAGS) then
  216. begin
  217. RegReadByInstruction := true;
  218. exit
  219. end;
  220. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  221. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  222. (p.oper[0]^.reg=p.oper[1]^.reg) then
  223. exit;
  224. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  225. begin
  226. RegReadByInstruction := true;
  227. exit
  228. end;
  229. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  230. begin
  231. RegReadByInstruction := true;
  232. exit
  233. end;
  234. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  235. begin
  236. RegReadByInstruction := true;
  237. exit
  238. end;
  239. end;
  240. end;
  241. end;
  242. end;
  243. { returns true if p contains a memory operand with a segment set }
  244. function InsContainsSegRef(p: taicpu): boolean;
  245. var
  246. i: longint;
  247. begin
  248. result:=true;
  249. for i:=0 to p.opercnt-1 do
  250. if (p.oper[i]^.typ=top_ref) and
  251. (p.oper[i]^.ref^.segment<>NR_NO) then
  252. exit;
  253. result:=false;
  254. end;
  255. function InstrReadsFlags(p: tai): boolean;
  256. var
  257. l: longint;
  258. begin
  259. InstrReadsFlags := true;
  260. case p.typ of
  261. ait_instruction:
  262. if InsProp[taicpu(p).opcode].Ch*
  263. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  264. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  265. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  266. exit;
  267. ait_label:
  268. exit;
  269. end;
  270. InstrReadsFlags := false;
  271. end;
  272. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  273. var
  274. p,hp1: tai;
  275. l: aint;
  276. tmpRef: treference;
  277. begin
  278. p := BlockStart;
  279. while (p <> BlockEnd) Do
  280. begin
  281. case p.Typ Of
  282. Ait_Instruction:
  283. begin
  284. if InsContainsSegRef(taicpu(p)) then
  285. begin
  286. p := tai(p.next);
  287. continue;
  288. end;
  289. case taicpu(p).opcode Of
  290. A_IMUL:
  291. {changes certain "imul const, %reg"'s to lea sequences}
  292. begin
  293. if (taicpu(p).oper[0]^.typ = Top_Const) and
  294. (taicpu(p).oper[1]^.typ = Top_Reg) and
  295. (taicpu(p).opsize = S_L) then
  296. if (taicpu(p).oper[0]^.val = 1) then
  297. if (taicpu(p).ops = 2) then
  298. {remove "imul $1, reg"}
  299. begin
  300. hp1 := tai(p.Next);
  301. asml.remove(p);
  302. p.free;
  303. p := hp1;
  304. continue;
  305. end
  306. else
  307. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  308. begin
  309. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  310. InsertLLItem(p.previous, p.next, hp1);
  311. p.free;
  312. p := hp1;
  313. end
  314. else if
  315. ((taicpu(p).ops <= 2) or
  316. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  317. (taicpu(p).oper[0]^.val <= 12) and
  318. not(cs_opt_size in current_settings.optimizerswitches) and
  319. (not(GetNextInstruction(p, hp1)) or
  320. {GetNextInstruction(p, hp1) and}
  321. not((tai(hp1).typ = ait_instruction) and
  322. ((taicpu(hp1).opcode=A_Jcc) and
  323. (taicpu(hp1).condition in [C_O,C_NO])))) then
  324. begin
  325. reference_reset(tmpref,1,[]);
  326. case taicpu(p).oper[0]^.val Of
  327. 3: begin
  328. {imul 3, reg1, reg2 to
  329. lea (reg1,reg1,2), reg2
  330. imul 3, reg1 to
  331. lea (reg1,reg1,2), reg1}
  332. TmpRef.base := taicpu(p).oper[1]^.reg;
  333. TmpRef.index := taicpu(p).oper[1]^.reg;
  334. TmpRef.ScaleFactor := 2;
  335. if (taicpu(p).ops = 2) then
  336. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  337. else
  338. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  339. InsertLLItem(p.previous, p.next, hp1);
  340. p.free;
  341. p := hp1;
  342. end;
  343. 5: begin
  344. {imul 5, reg1, reg2 to
  345. lea (reg1,reg1,4), reg2
  346. imul 5, reg1 to
  347. lea (reg1,reg1,4), reg1}
  348. TmpRef.base := taicpu(p).oper[1]^.reg;
  349. TmpRef.index := taicpu(p).oper[1]^.reg;
  350. TmpRef.ScaleFactor := 4;
  351. if (taicpu(p).ops = 2) then
  352. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  353. else
  354. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  355. InsertLLItem(p.previous, p.next, hp1);
  356. p.free;
  357. p := hp1;
  358. end;
  359. 6: begin
  360. {imul 6, reg1, reg2 to
  361. lea (,reg1,2), reg2
  362. lea (reg2,reg1,4), reg2
  363. imul 6, reg1 to
  364. lea (reg1,reg1,2), reg1
  365. add reg1, reg1}
  366. if (current_settings.optimizecputype <= cpu_386) then
  367. begin
  368. TmpRef.index := taicpu(p).oper[1]^.reg;
  369. if (taicpu(p).ops = 3) then
  370. begin
  371. TmpRef.base := taicpu(p).oper[2]^.reg;
  372. TmpRef.ScaleFactor := 4;
  373. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  374. end
  375. else
  376. begin
  377. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  378. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  379. end;
  380. InsertLLItem(p, p.next, hp1);
  381. reference_reset(tmpref,2,[]);
  382. TmpRef.index := taicpu(p).oper[1]^.reg;
  383. TmpRef.ScaleFactor := 2;
  384. if (taicpu(p).ops = 3) then
  385. begin
  386. TmpRef.base := NR_NO;
  387. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  388. taicpu(p).oper[2]^.reg);
  389. end
  390. else
  391. begin
  392. TmpRef.base := taicpu(p).oper[1]^.reg;
  393. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  394. end;
  395. InsertLLItem(p.previous, p.next, hp1);
  396. p.free;
  397. p := tai(hp1.next);
  398. end
  399. end;
  400. 9: begin
  401. {imul 9, reg1, reg2 to
  402. lea (reg1,reg1,8), reg2
  403. imul 9, reg1 to
  404. lea (reg1,reg1,8), reg1}
  405. TmpRef.base := taicpu(p).oper[1]^.reg;
  406. TmpRef.index := taicpu(p).oper[1]^.reg;
  407. TmpRef.ScaleFactor := 8;
  408. if (taicpu(p).ops = 2) then
  409. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  410. else
  411. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  412. InsertLLItem(p.previous, p.next, hp1);
  413. p.free;
  414. p := hp1;
  415. end;
  416. 10: begin
  417. {imul 10, reg1, reg2 to
  418. lea (reg1,reg1,4), reg2
  419. add reg2, reg2
  420. imul 10, reg1 to
  421. lea (reg1,reg1,4), reg1
  422. add reg1, reg1}
  423. if (current_settings.optimizecputype <= cpu_386) then
  424. begin
  425. if (taicpu(p).ops = 3) then
  426. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  427. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  428. else
  429. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  430. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  431. InsertLLItem(p, p.next, hp1);
  432. TmpRef.base := taicpu(p).oper[1]^.reg;
  433. TmpRef.index := taicpu(p).oper[1]^.reg;
  434. TmpRef.ScaleFactor := 4;
  435. if (taicpu(p).ops = 3) then
  436. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  437. else
  438. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  439. InsertLLItem(p.previous, p.next, hp1);
  440. p.free;
  441. p := tai(hp1.next);
  442. end
  443. end;
  444. 12: begin
  445. {imul 12, reg1, reg2 to
  446. lea (,reg1,4), reg2
  447. lea (reg2,reg1,8), reg2
  448. imul 12, reg1 to
  449. lea (reg1,reg1,2), reg1
  450. lea (,reg1,4), reg1}
  451. if (current_settings.optimizecputype <= cpu_386)
  452. then
  453. begin
  454. TmpRef.index := taicpu(p).oper[1]^.reg;
  455. if (taicpu(p).ops = 3) then
  456. begin
  457. TmpRef.base := taicpu(p).oper[2]^.reg;
  458. TmpRef.ScaleFactor := 8;
  459. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  460. end
  461. else
  462. begin
  463. TmpRef.base := NR_NO;
  464. TmpRef.ScaleFactor := 4;
  465. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  466. end;
  467. InsertLLItem(p, p.next, hp1);
  468. reference_reset(tmpref,2,[]);
  469. TmpRef.index := taicpu(p).oper[1]^.reg;
  470. if (taicpu(p).ops = 3) then
  471. begin
  472. TmpRef.base := NR_NO;
  473. TmpRef.ScaleFactor := 4;
  474. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  475. end
  476. else
  477. begin
  478. TmpRef.base := taicpu(p).oper[1]^.reg;
  479. TmpRef.ScaleFactor := 2;
  480. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  481. end;
  482. InsertLLItem(p.previous, p.next, hp1);
  483. p.free;
  484. p := tai(hp1.next);
  485. end
  486. end
  487. end;
  488. end;
  489. end;
  490. A_SAR, A_SHR:
  491. {changes the code sequence
  492. shr/sar const1, x
  493. shl const2, x
  494. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  495. begin
  496. if GetNextInstruction(p, hp1) and
  497. (tai(hp1).typ = ait_instruction) and
  498. (taicpu(hp1).opcode = A_SHL) and
  499. (taicpu(p).oper[0]^.typ = top_const) and
  500. (taicpu(hp1).oper[0]^.typ = top_const) and
  501. (taicpu(hp1).opsize = taicpu(p).opsize) and
  502. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  503. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  504. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  505. not(cs_opt_size in current_settings.optimizerswitches) then
  506. { shr/sar const1, %reg
  507. shl const2, %reg
  508. with const1 > const2 }
  509. begin
  510. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  511. taicpu(hp1).opcode := A_AND;
  512. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  513. case taicpu(p).opsize Of
  514. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  515. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  516. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  517. end;
  518. end
  519. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  520. not(cs_opt_size in current_settings.optimizerswitches) then
  521. { shr/sar const1, %reg
  522. shl const2, %reg
  523. with const1 < const2 }
  524. begin
  525. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  526. taicpu(p).opcode := A_AND;
  527. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  528. case taicpu(p).opsize Of
  529. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  530. S_B: taicpu(p).loadConst(0,l Xor $ff);
  531. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  532. end;
  533. end
  534. else
  535. { shr/sar const1, %reg
  536. shl const2, %reg
  537. with const1 = const2 }
  538. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  539. begin
  540. taicpu(p).opcode := A_AND;
  541. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  542. case taicpu(p).opsize Of
  543. S_B: taicpu(p).loadConst(0,l Xor $ff);
  544. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  545. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  546. end;
  547. asml.remove(hp1);
  548. hp1.free;
  549. end;
  550. end;
  551. A_XOR:
  552. if (taicpu(p).oper[0]^.typ = top_reg) and
  553. (taicpu(p).oper[1]^.typ = top_reg) and
  554. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  555. { temporarily change this to 'mov reg,0' to make it easier }
  556. { for the CSE. Will be changed back in pass 2 }
  557. begin
  558. taicpu(p).opcode := A_MOV;
  559. taicpu(p).loadConst(0,0);
  560. end;
  561. end;
  562. end;
  563. end;
  564. p := tai(p.next)
  565. end;
  566. end;
  567. { skips all labels and returns the next "real" instruction }
  568. function SkipLabels(hp: tai; var hp2: tai): boolean;
  569. begin
  570. while assigned(hp.next) and
  571. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  572. hp := tai(hp.next);
  573. if assigned(hp.next) then
  574. begin
  575. SkipLabels := True;
  576. hp2 := tai(hp.next)
  577. end
  578. else
  579. begin
  580. hp2 := hp;
  581. SkipLabels := False
  582. end;
  583. end;
  584. { First pass of peephole optimizations }
  585. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  586. function WriteOk : Boolean;
  587. begin
  588. writeln('Ok');
  589. Result:=True;
  590. end;
  591. var
  592. l : longint;
  593. p,hp1,hp2 : tai;
  594. hp3,hp4: tai;
  595. v:aint;
  596. TmpRef: TReference;
  597. TmpBool1, TmpBool2: Boolean;
  598. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  599. {traces sucessive jumps to their final destination and sets it, e.g.
  600. je l1 je l3
  601. <code> <code>
  602. l1: becomes l1:
  603. je l2 je l3
  604. <code> <code>
  605. l2: l2:
  606. jmp l3 jmp l3
  607. the level parameter denotes how deeep we have already followed the jump,
  608. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  609. var p1, p2: tai;
  610. l: tasmlabel;
  611. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  612. begin
  613. FindAnyLabel := false;
  614. while assigned(hp.next) and
  615. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  616. hp := tai(hp.next);
  617. if assigned(hp.next) and
  618. (tai(hp.next).typ = ait_label) then
  619. begin
  620. FindAnyLabel := true;
  621. l := tai_label(hp.next).labsym;
  622. end
  623. end;
  624. begin
  625. GetfinalDestination := false;
  626. if level > 20 then
  627. exit;
  628. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  629. if assigned(p1) then
  630. begin
  631. SkipLabels(p1,p1);
  632. if (tai(p1).typ = ait_instruction) and
  633. (taicpu(p1).is_jmp) then
  634. if { the next instruction after the label where the jump hp arrives}
  635. { is unconditional or of the same type as hp, so continue }
  636. (taicpu(p1).condition in [C_None,hp.condition]) or
  637. { the next instruction after the label where the jump hp arrives}
  638. { is the opposite of hp (so this one is never taken), but after }
  639. { that one there is a branch that will be taken, so perform a }
  640. { little hack: set p1 equal to this instruction (that's what the}
  641. { last SkipLabels is for, only works with short bool evaluation)}
  642. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  643. SkipLabels(p1,p2) and
  644. (p2.typ = ait_instruction) and
  645. (taicpu(p2).is_jmp) and
  646. (taicpu(p2).condition in [C_None,hp.condition]) and
  647. SkipLabels(p1,p1)) then
  648. begin
  649. { quick check for loops of the form "l5: ; jmp l5 }
  650. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  651. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  652. exit;
  653. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  654. exit;
  655. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  656. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  657. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  658. end
  659. else
  660. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  661. if not FindAnyLabel(p1,l) then
  662. begin
  663. {$ifdef finaldestdebug}
  664. insertllitem(asml,p1,p1.next,tai_comment.Create(
  665. strpnew('previous label inserted'))));
  666. {$endif finaldestdebug}
  667. current_asmdata.getjumplabel(l);
  668. insertllitem(p1,p1.next,tai_label.Create(l));
  669. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  670. hp.oper[0]^.ref^.symbol := l;
  671. l.increfs;
  672. { this won't work, since the new label isn't in the labeltable }
  673. { so it will fail the rangecheck. Labeltable should become a }
  674. { hashtable to support this: }
  675. { GetFinalDestination(asml, hp); }
  676. end
  677. else
  678. begin
  679. {$ifdef finaldestdebug}
  680. insertllitem(asml,p1,p1.next,tai_comment.Create(
  681. strpnew('next label reused'))));
  682. {$endif finaldestdebug}
  683. l.increfs;
  684. hp.oper[0]^.ref^.symbol := l;
  685. if not GetFinalDestination(asml, hp,succ(level)) then
  686. exit;
  687. end;
  688. end;
  689. GetFinalDestination := true;
  690. end;
  691. function DoSubAddOpt(var p: tai): Boolean;
  692. begin
  693. DoSubAddOpt := False;
  694. if GetLastInstruction(p, hp1) and
  695. (hp1.typ = ait_instruction) and
  696. (taicpu(hp1).opsize = taicpu(p).opsize) then
  697. case taicpu(hp1).opcode Of
  698. A_DEC:
  699. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  700. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  701. begin
  702. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  703. asml.remove(hp1);
  704. hp1.free;
  705. end;
  706. A_SUB:
  707. if (taicpu(hp1).oper[0]^.typ = top_const) and
  708. (taicpu(hp1).oper[1]^.typ = top_reg) and
  709. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  710. begin
  711. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  712. asml.remove(hp1);
  713. hp1.free;
  714. end;
  715. A_ADD:
  716. if (taicpu(hp1).oper[0]^.typ = top_const) and
  717. (taicpu(hp1).oper[1]^.typ = top_reg) and
  718. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  719. begin
  720. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  721. asml.remove(hp1);
  722. hp1.free;
  723. if (taicpu(p).oper[0]^.val = 0) then
  724. begin
  725. hp1 := tai(p.next);
  726. asml.remove(p);
  727. p.free;
  728. if not GetLastInstruction(hp1, p) then
  729. p := hp1;
  730. DoSubAddOpt := True;
  731. end
  732. end;
  733. end;
  734. end;
  735. begin
  736. p := BlockStart;
  737. ClearUsedRegs;
  738. while (p <> BlockEnd) Do
  739. begin
  740. UpDateUsedRegs(UsedRegs, tai(p.next));
  741. case p.Typ Of
  742. ait_instruction:
  743. begin
  744. current_filepos:=taicpu(p).fileinfo;
  745. if InsContainsSegRef(taicpu(p)) then
  746. begin
  747. p := tai(p.next);
  748. continue;
  749. end;
  750. { Handle Jmp Optimizations }
  751. if taicpu(p).is_jmp then
  752. begin
  753. {the following if-block removes all code between a jmp and the next label,
  754. because it can never be executed}
  755. if (taicpu(p).opcode = A_JMP) then
  756. begin
  757. hp2:=p;
  758. while GetNextInstruction(hp2, hp1) and
  759. (hp1.typ <> ait_label) do
  760. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  761. begin
  762. { don't kill start/end of assembler block,
  763. no-line-info-start/end etc }
  764. if hp1.typ<>ait_marker then
  765. begin
  766. asml.remove(hp1);
  767. hp1.free;
  768. end
  769. else
  770. hp2:=hp1;
  771. end
  772. else break;
  773. end;
  774. { remove jumps to a label coming right after them }
  775. if GetNextInstruction(p, hp1) then
  776. begin
  777. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  778. { TODO: FIXME removing the first instruction fails}
  779. (p<>blockstart) then
  780. begin
  781. hp2:=tai(hp1.next);
  782. asml.remove(p);
  783. p.free;
  784. p:=hp2;
  785. continue;
  786. end
  787. else
  788. begin
  789. if hp1.typ = ait_label then
  790. SkipLabels(hp1,hp1);
  791. if (tai(hp1).typ=ait_instruction) and
  792. (taicpu(hp1).opcode=A_JMP) and
  793. GetNextInstruction(hp1, hp2) and
  794. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  795. begin
  796. if taicpu(p).opcode=A_Jcc then
  797. begin
  798. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  799. tai_label(hp2).labsym.decrefs;
  800. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  801. { when free'ing hp1, the ref. isn't decresed, so we don't
  802. increase it (FK)
  803. taicpu(p).oper[0]^.ref^.symbol.increfs;
  804. }
  805. asml.remove(hp1);
  806. hp1.free;
  807. GetFinalDestination(asml, taicpu(p),0);
  808. end
  809. else
  810. begin
  811. GetFinalDestination(asml, taicpu(p),0);
  812. p:=tai(p.next);
  813. continue;
  814. end;
  815. end
  816. else
  817. GetFinalDestination(asml, taicpu(p),0);
  818. end;
  819. end;
  820. end
  821. else
  822. { All other optimizes }
  823. begin
  824. for l := 0 to taicpu(p).ops-1 Do
  825. if (taicpu(p).oper[l]^.typ = top_ref) then
  826. With taicpu(p).oper[l]^.ref^ Do
  827. begin
  828. if (base = NR_NO) and
  829. (index <> NR_NO) and
  830. (scalefactor in [0,1]) then
  831. begin
  832. base := index;
  833. index := NR_NO
  834. end
  835. end;
  836. case taicpu(p).opcode Of
  837. A_AND:
  838. if OptPass1And(p) then
  839. continue;
  840. A_CMP:
  841. begin
  842. { cmp register,$8000 neg register
  843. je target --> jo target
  844. .... only if register is deallocated before jump.}
  845. case Taicpu(p).opsize of
  846. S_B: v:=$80;
  847. S_W: v:=$8000;
  848. S_L: v:=aint($80000000);
  849. else
  850. internalerror(2013112905);
  851. end;
  852. if (taicpu(p).oper[0]^.typ=Top_const) and
  853. (taicpu(p).oper[0]^.val=v) and
  854. (Taicpu(p).oper[1]^.typ=top_reg) and
  855. GetNextInstruction(p, hp1) and
  856. (hp1.typ=ait_instruction) and
  857. (taicpu(hp1).opcode=A_Jcc) and
  858. (Taicpu(hp1).condition in [C_E,C_NE]) and
  859. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  860. begin
  861. Taicpu(p).opcode:=A_NEG;
  862. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  863. Taicpu(p).clearop(1);
  864. Taicpu(p).ops:=1;
  865. if Taicpu(hp1).condition=C_E then
  866. Taicpu(hp1).condition:=C_O
  867. else
  868. Taicpu(hp1).condition:=C_NO;
  869. continue;
  870. end;
  871. {
  872. @@2: @@2:
  873. .... ....
  874. cmp operand1,0
  875. jle/jbe @@1
  876. dec operand1 --> sub operand1,1
  877. jmp @@2 jge/jae @@2
  878. @@1: @@1:
  879. ... ....}
  880. if (taicpu(p).oper[0]^.typ = top_const) and
  881. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  882. (taicpu(p).oper[0]^.val = 0) and
  883. GetNextInstruction(p, hp1) and
  884. (hp1.typ = ait_instruction) and
  885. (taicpu(hp1).is_jmp) and
  886. (taicpu(hp1).opcode=A_Jcc) and
  887. (taicpu(hp1).condition in [C_LE,C_BE]) and
  888. GetNextInstruction(hp1,hp2) and
  889. (hp2.typ = ait_instruction) and
  890. (taicpu(hp2).opcode = A_DEC) and
  891. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  892. GetNextInstruction(hp2, hp3) and
  893. (hp3.typ = ait_instruction) and
  894. (taicpu(hp3).is_jmp) and
  895. (taicpu(hp3).opcode = A_JMP) and
  896. GetNextInstruction(hp3, hp4) and
  897. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  898. begin
  899. taicpu(hp2).Opcode := A_SUB;
  900. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  901. taicpu(hp2).loadConst(0,1);
  902. taicpu(hp2).ops:=2;
  903. taicpu(hp3).Opcode := A_Jcc;
  904. case taicpu(hp1).condition of
  905. C_LE: taicpu(hp3).condition := C_GE;
  906. C_BE: taicpu(hp3).condition := C_AE;
  907. end;
  908. asml.remove(p);
  909. asml.remove(hp1);
  910. p.free;
  911. hp1.free;
  912. p := hp2;
  913. continue;
  914. end
  915. end;
  916. A_FLD:
  917. begin
  918. if (taicpu(p).oper[0]^.typ = top_reg) and
  919. GetNextInstruction(p, hp1) and
  920. (hp1.typ = Ait_Instruction) and
  921. (taicpu(hp1).oper[0]^.typ = top_reg) and
  922. (taicpu(hp1).oper[1]^.typ = top_reg) and
  923. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  924. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  925. { change to
  926. fld reg fxxx reg,st
  927. fxxxp st, st1 (hp1)
  928. Remark: non commutative operations must be reversed!
  929. }
  930. begin
  931. case taicpu(hp1).opcode Of
  932. A_FMULP,A_FADDP,
  933. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  934. begin
  935. case taicpu(hp1).opcode Of
  936. A_FADDP: taicpu(hp1).opcode := A_FADD;
  937. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  938. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  939. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  940. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  941. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  942. end;
  943. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  944. taicpu(hp1).oper[1]^.reg := NR_ST;
  945. asml.remove(p);
  946. p.free;
  947. p := hp1;
  948. continue;
  949. end;
  950. end;
  951. end
  952. else
  953. if (taicpu(p).oper[0]^.typ = top_ref) and
  954. GetNextInstruction(p, hp2) and
  955. (hp2.typ = Ait_Instruction) and
  956. (taicpu(hp2).ops = 2) and
  957. (taicpu(hp2).oper[0]^.typ = top_reg) and
  958. (taicpu(hp2).oper[1]^.typ = top_reg) and
  959. (taicpu(p).opsize in [S_FS, S_FL]) and
  960. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  961. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  962. if GetLastInstruction(p, hp1) and
  963. (hp1.typ = Ait_Instruction) and
  964. ((taicpu(hp1).opcode = A_FLD) or
  965. (taicpu(hp1).opcode = A_FST)) and
  966. (taicpu(hp1).opsize = taicpu(p).opsize) and
  967. (taicpu(hp1).oper[0]^.typ = top_ref) and
  968. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  969. if ((taicpu(hp2).opcode = A_FMULP) or
  970. (taicpu(hp2).opcode = A_FADDP)) then
  971. { change to
  972. fld/fst mem1 (hp1) fld/fst mem1
  973. fld mem1 (p) fadd/
  974. faddp/ fmul st, st
  975. fmulp st, st1 (hp2) }
  976. begin
  977. asml.remove(p);
  978. p.free;
  979. p := hp1;
  980. if (taicpu(hp2).opcode = A_FADDP) then
  981. taicpu(hp2).opcode := A_FADD
  982. else
  983. taicpu(hp2).opcode := A_FMUL;
  984. taicpu(hp2).oper[1]^.reg := NR_ST;
  985. end
  986. else
  987. { change to
  988. fld/fst mem1 (hp1) fld/fst mem1
  989. fld mem1 (p) fld st}
  990. begin
  991. taicpu(p).changeopsize(S_FL);
  992. taicpu(p).loadreg(0,NR_ST);
  993. end
  994. else
  995. begin
  996. case taicpu(hp2).opcode Of
  997. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  998. { change to
  999. fld/fst mem1 (hp1) fld/fst mem1
  1000. fld mem2 (p) fxxx mem2
  1001. fxxxp st, st1 (hp2) }
  1002. begin
  1003. case taicpu(hp2).opcode Of
  1004. A_FADDP: taicpu(p).opcode := A_FADD;
  1005. A_FMULP: taicpu(p).opcode := A_FMUL;
  1006. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  1007. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  1008. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  1009. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  1010. end;
  1011. asml.remove(hp2);
  1012. hp2.free;
  1013. end
  1014. end
  1015. end
  1016. end;
  1017. A_FSTP,A_FISTP:
  1018. if doFpuLoadStoreOpt(p) then
  1019. continue;
  1020. A_LEA:
  1021. begin
  1022. {removes seg register prefixes from LEA operations, as they
  1023. don't do anything}
  1024. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  1025. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  1026. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1027. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  1028. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1029. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1030. begin
  1031. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1032. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1033. begin
  1034. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  1035. taicpu(p).oper[1]^.reg);
  1036. InsertLLItem(p.previous,p.next, hp1);
  1037. p.free;
  1038. p := hp1;
  1039. continue;
  1040. end
  1041. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1042. begin
  1043. hp1 := tai(p.Next);
  1044. asml.remove(p);
  1045. p.free;
  1046. p := hp1;
  1047. continue;
  1048. end
  1049. { continue to use lea to adjust the stack pointer,
  1050. it is the recommended way, but only if not optimizing for size }
  1051. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1052. (cs_opt_size in current_settings.optimizerswitches) then
  1053. with taicpu(p).oper[0]^.ref^ do
  1054. if (base = taicpu(p).oper[1]^.reg) then
  1055. begin
  1056. l := offset;
  1057. if (l=1) and UseIncDec then
  1058. begin
  1059. taicpu(p).opcode := A_INC;
  1060. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1061. taicpu(p).ops := 1
  1062. end
  1063. else if (l=-1) and UseIncDec then
  1064. begin
  1065. taicpu(p).opcode := A_DEC;
  1066. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1067. taicpu(p).ops := 1;
  1068. end
  1069. else
  1070. begin
  1071. if (l<0) and (l<>-2147483648) then
  1072. begin
  1073. taicpu(p).opcode := A_SUB;
  1074. taicpu(p).loadConst(0,-l);
  1075. end
  1076. else
  1077. begin
  1078. taicpu(p).opcode := A_ADD;
  1079. taicpu(p).loadConst(0,l);
  1080. end;
  1081. end;
  1082. end;
  1083. end
  1084. (*
  1085. This is unsafe, lea doesn't modify the flags but "add"
  1086. does. This breaks webtbs/tw15694.pp. The above
  1087. transformations are also unsafe, but they don't seem to
  1088. be triggered by code that FPC generators (or that at
  1089. least does not occur in the tests...). This needs to be
  1090. fixed by checking for the liveness of the flags register.
  1091. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1092. begin
  1093. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1094. taicpu(p).oper[0]^.ref^.base);
  1095. InsertLLItem(asml,p.previous,p.next, hp1);
  1096. DebugMsg('Peephole Lea2AddBase done',hp1);
  1097. p.free;
  1098. p:=hp1;
  1099. continue;
  1100. end
  1101. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1102. begin
  1103. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1104. taicpu(p).oper[0]^.ref^.index);
  1105. InsertLLItem(asml,p.previous,p.next,hp1);
  1106. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1107. p.free;
  1108. p:=hp1;
  1109. continue;
  1110. end
  1111. *)
  1112. end;
  1113. A_MOV:
  1114. begin
  1115. If OptPass1MOV(p) then
  1116. Continue;
  1117. end;
  1118. A_MOVSX,
  1119. A_MOVZX :
  1120. begin
  1121. if (taicpu(p).oper[1]^.typ = top_reg) and
  1122. GetNextInstruction(p,hp1) and
  1123. (hp1.typ = ait_instruction) and
  1124. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1125. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1126. GetNextInstruction(hp1,hp2) and
  1127. MatchInstruction(hp2,A_MOV,[]) and
  1128. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1129. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1130. (((taicpu(hp1).ops=2) and
  1131. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1132. ((taicpu(hp1).ops=1) and
  1133. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1134. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1135. { change movsX/movzX reg/ref, reg2 }
  1136. { add/sub/or/... reg3/$const, reg2 }
  1137. { mov reg2 reg/ref }
  1138. { to add/sub/or/... reg3/$const, reg/ref }
  1139. begin
  1140. { by example:
  1141. movswl %si,%eax movswl %si,%eax p
  1142. decl %eax addl %edx,%eax hp1
  1143. movw %ax,%si movw %ax,%si hp2
  1144. ->
  1145. movswl %si,%eax movswl %si,%eax p
  1146. decw %eax addw %edx,%eax hp1
  1147. movw %ax,%si movw %ax,%si hp2
  1148. }
  1149. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1150. {
  1151. ->
  1152. movswl %si,%eax movswl %si,%eax p
  1153. decw %si addw %dx,%si hp1
  1154. movw %ax,%si movw %ax,%si hp2
  1155. }
  1156. case taicpu(hp1).ops of
  1157. 1:
  1158. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1159. 2:
  1160. begin
  1161. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1162. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1163. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1164. end;
  1165. else
  1166. internalerror(2008042701);
  1167. end;
  1168. {
  1169. ->
  1170. decw %si addw %dx,%si p
  1171. }
  1172. asml.remove(p);
  1173. asml.remove(hp2);
  1174. p.free;
  1175. hp2.free;
  1176. p := hp1
  1177. end
  1178. { removes superfluous And's after movzx's }
  1179. else if taicpu(p).opcode=A_MOVZX then
  1180. begin
  1181. if (taicpu(p).oper[1]^.typ = top_reg) and
  1182. GetNextInstruction(p, hp1) and
  1183. (tai(hp1).typ = ait_instruction) and
  1184. (taicpu(hp1).opcode = A_AND) and
  1185. (taicpu(hp1).oper[0]^.typ = top_const) and
  1186. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1187. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1188. case taicpu(p).opsize Of
  1189. S_BL, S_BW:
  1190. if (taicpu(hp1).oper[0]^.val = $ff) then
  1191. begin
  1192. asml.remove(hp1);
  1193. hp1.free;
  1194. end;
  1195. S_WL:
  1196. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1197. begin
  1198. asml.remove(hp1);
  1199. hp1.free;
  1200. end;
  1201. end;
  1202. {changes some movzx constructs to faster synonims (all examples
  1203. are given with eax/ax, but are also valid for other registers)}
  1204. if (taicpu(p).oper[1]^.typ = top_reg) then
  1205. if (taicpu(p).oper[0]^.typ = top_reg) then
  1206. case taicpu(p).opsize of
  1207. S_BW:
  1208. begin
  1209. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1210. not(cs_opt_size in current_settings.optimizerswitches) then
  1211. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1212. begin
  1213. taicpu(p).opcode := A_AND;
  1214. taicpu(p).changeopsize(S_W);
  1215. taicpu(p).loadConst(0,$ff);
  1216. end
  1217. else if GetNextInstruction(p, hp1) and
  1218. (tai(hp1).typ = ait_instruction) and
  1219. (taicpu(hp1).opcode = A_AND) and
  1220. (taicpu(hp1).oper[0]^.typ = top_const) and
  1221. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1222. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1223. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1224. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1225. begin
  1226. taicpu(p).opcode := A_MOV;
  1227. taicpu(p).changeopsize(S_W);
  1228. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1229. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1230. end;
  1231. end;
  1232. S_BL:
  1233. begin
  1234. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1235. not(cs_opt_size in current_settings.optimizerswitches) then
  1236. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1237. begin
  1238. taicpu(p).opcode := A_AND;
  1239. taicpu(p).changeopsize(S_L);
  1240. taicpu(p).loadConst(0,$ff)
  1241. end
  1242. else if GetNextInstruction(p, hp1) and
  1243. (tai(hp1).typ = ait_instruction) and
  1244. (taicpu(hp1).opcode = A_AND) and
  1245. (taicpu(hp1).oper[0]^.typ = top_const) and
  1246. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1247. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1248. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1249. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1250. begin
  1251. taicpu(p).opcode := A_MOV;
  1252. taicpu(p).changeopsize(S_L);
  1253. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1254. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1255. end
  1256. end;
  1257. S_WL:
  1258. begin
  1259. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1260. not(cs_opt_size in current_settings.optimizerswitches) then
  1261. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1262. begin
  1263. taicpu(p).opcode := A_AND;
  1264. taicpu(p).changeopsize(S_L);
  1265. taicpu(p).loadConst(0,$ffff);
  1266. end
  1267. else if GetNextInstruction(p, hp1) and
  1268. (tai(hp1).typ = ait_instruction) and
  1269. (taicpu(hp1).opcode = A_AND) and
  1270. (taicpu(hp1).oper[0]^.typ = top_const) and
  1271. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1272. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1273. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1274. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1275. begin
  1276. taicpu(p).opcode := A_MOV;
  1277. taicpu(p).changeopsize(S_L);
  1278. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1279. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1280. end;
  1281. end;
  1282. end
  1283. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1284. begin
  1285. if GetNextInstruction(p, hp1) and
  1286. (tai(hp1).typ = ait_instruction) and
  1287. (taicpu(hp1).opcode = A_AND) and
  1288. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1289. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1290. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1291. begin
  1292. taicpu(p).opcode := A_MOV;
  1293. case taicpu(p).opsize Of
  1294. S_BL:
  1295. begin
  1296. taicpu(p).changeopsize(S_L);
  1297. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1298. end;
  1299. S_WL:
  1300. begin
  1301. taicpu(p).changeopsize(S_L);
  1302. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1303. end;
  1304. S_BW:
  1305. begin
  1306. taicpu(p).changeopsize(S_W);
  1307. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1308. end;
  1309. end;
  1310. end;
  1311. end;
  1312. end;
  1313. end;
  1314. (* should not be generated anymore by the current code generator
  1315. A_POP:
  1316. begin
  1317. if target_info.system=system_i386_go32v2 then
  1318. begin
  1319. { Transform a series of pop/pop/pop/push/push/push to }
  1320. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1321. { because I'm not sure whether they can cope with }
  1322. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1323. { such a problem when using esp as frame pointer (JM) }
  1324. if (taicpu(p).oper[0]^.typ = top_reg) then
  1325. begin
  1326. hp1 := p;
  1327. hp2 := p;
  1328. l := 0;
  1329. while getNextInstruction(hp1,hp1) and
  1330. (hp1.typ = ait_instruction) and
  1331. (taicpu(hp1).opcode = A_POP) and
  1332. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1333. begin
  1334. hp2 := hp1;
  1335. inc(l,4);
  1336. end;
  1337. getLastInstruction(p,hp3);
  1338. l1 := 0;
  1339. while (hp2 <> hp3) and
  1340. assigned(hp1) and
  1341. (hp1.typ = ait_instruction) and
  1342. (taicpu(hp1).opcode = A_PUSH) and
  1343. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1344. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1345. begin
  1346. { change it to a two op operation }
  1347. taicpu(hp2).oper[1]^.typ:=top_none;
  1348. taicpu(hp2).ops:=2;
  1349. taicpu(hp2).opcode := A_MOV;
  1350. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1351. reference_reset(tmpref);
  1352. tmpRef.base.enum:=R_INTREGISTER;
  1353. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1354. convert_register_to_enum(tmpref.base);
  1355. tmpRef.offset := l;
  1356. taicpu(hp2).loadRef(0,tmpRef);
  1357. hp4 := hp1;
  1358. getNextInstruction(hp1,hp1);
  1359. asml.remove(hp4);
  1360. hp4.free;
  1361. getLastInstruction(hp2,hp2);
  1362. dec(l,4);
  1363. inc(l1);
  1364. end;
  1365. if l <> -4 then
  1366. begin
  1367. inc(l,4);
  1368. for l1 := l1 downto 1 do
  1369. begin
  1370. getNextInstruction(hp2,hp2);
  1371. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1372. end
  1373. end
  1374. end
  1375. end
  1376. else
  1377. begin
  1378. if (taicpu(p).oper[0]^.typ = top_reg) and
  1379. GetNextInstruction(p, hp1) and
  1380. (tai(hp1).typ=ait_instruction) and
  1381. (taicpu(hp1).opcode=A_PUSH) and
  1382. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1383. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1384. begin
  1385. { change it to a two op operation }
  1386. taicpu(p).oper[1]^.typ:=top_none;
  1387. taicpu(p).ops:=2;
  1388. taicpu(p).opcode := A_MOV;
  1389. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1390. reference_reset(tmpref);
  1391. TmpRef.base.enum := R_ESP;
  1392. taicpu(p).loadRef(0,TmpRef);
  1393. asml.remove(hp1);
  1394. hp1.free;
  1395. end;
  1396. end;
  1397. end;
  1398. *)
  1399. A_PUSH:
  1400. begin
  1401. if (taicpu(p).opsize = S_W) and
  1402. (taicpu(p).oper[0]^.typ = Top_Const) and
  1403. GetNextInstruction(p, hp1) and
  1404. (tai(hp1).typ = ait_instruction) and
  1405. (taicpu(hp1).opcode = A_PUSH) and
  1406. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1407. (taicpu(hp1).opsize = S_W) then
  1408. begin
  1409. taicpu(p).changeopsize(S_L);
  1410. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1411. asml.remove(hp1);
  1412. hp1.free;
  1413. end;
  1414. end;
  1415. A_SHL, A_SAL:
  1416. begin
  1417. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1418. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1419. (taicpu(p).opsize = S_L) and
  1420. (taicpu(p).oper[0]^.val <= 3) then
  1421. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1422. begin
  1423. TmpBool1 := True; {should we check the next instruction?}
  1424. TmpBool2 := False; {have we found an add/sub which could be
  1425. integrated in the lea?}
  1426. reference_reset(tmpref,2,[]);
  1427. TmpRef.index := taicpu(p).oper[1]^.reg;
  1428. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1429. while TmpBool1 and
  1430. GetNextInstruction(p, hp1) and
  1431. (tai(hp1).typ = ait_instruction) and
  1432. ((((taicpu(hp1).opcode = A_ADD) or
  1433. (taicpu(hp1).opcode = A_SUB)) and
  1434. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1435. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1436. (((taicpu(hp1).opcode = A_INC) or
  1437. (taicpu(hp1).opcode = A_DEC)) and
  1438. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1439. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1440. (not GetNextInstruction(hp1,hp2) or
  1441. not instrReadsFlags(hp2)) Do
  1442. begin
  1443. TmpBool1 := False;
  1444. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1445. begin
  1446. TmpBool1 := True;
  1447. TmpBool2 := True;
  1448. case taicpu(hp1).opcode of
  1449. A_ADD:
  1450. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1451. A_SUB:
  1452. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1453. end;
  1454. asml.remove(hp1);
  1455. hp1.free;
  1456. end
  1457. else
  1458. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1459. (((taicpu(hp1).opcode = A_ADD) and
  1460. (TmpRef.base = NR_NO)) or
  1461. (taicpu(hp1).opcode = A_INC) or
  1462. (taicpu(hp1).opcode = A_DEC)) then
  1463. begin
  1464. TmpBool1 := True;
  1465. TmpBool2 := True;
  1466. case taicpu(hp1).opcode of
  1467. A_ADD:
  1468. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1469. A_INC:
  1470. inc(TmpRef.offset);
  1471. A_DEC:
  1472. dec(TmpRef.offset);
  1473. end;
  1474. asml.remove(hp1);
  1475. hp1.free;
  1476. end;
  1477. end;
  1478. if TmpBool2 or
  1479. ((current_settings.optimizecputype < cpu_Pentium2) and
  1480. (taicpu(p).oper[0]^.val <= 3) and
  1481. not(cs_opt_size in current_settings.optimizerswitches)) then
  1482. begin
  1483. if not(TmpBool2) and
  1484. (taicpu(p).oper[0]^.val = 1) then
  1485. begin
  1486. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1487. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1488. end
  1489. else
  1490. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1491. taicpu(p).oper[1]^.reg);
  1492. InsertLLItem(p.previous, p.next, hp1);
  1493. p.free;
  1494. p := hp1;
  1495. end;
  1496. end
  1497. else
  1498. if (current_settings.optimizecputype < cpu_Pentium2) and
  1499. (taicpu(p).oper[0]^.typ = top_const) and
  1500. (taicpu(p).oper[1]^.typ = top_reg) then
  1501. if (taicpu(p).oper[0]^.val = 1) then
  1502. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1503. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1504. (unlike shl, which is only Tairable in the U pipe)}
  1505. begin
  1506. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1507. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1508. InsertLLItem(p.previous, p.next, hp1);
  1509. p.free;
  1510. p := hp1;
  1511. end
  1512. else if (taicpu(p).opsize = S_L) and
  1513. (taicpu(p).oper[0]^.val<= 3) then
  1514. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1515. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1516. begin
  1517. reference_reset(tmpref,2,[]);
  1518. TmpRef.index := taicpu(p).oper[1]^.reg;
  1519. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1520. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1521. InsertLLItem(p.previous, p.next, hp1);
  1522. p.free;
  1523. p := hp1;
  1524. end
  1525. end;
  1526. A_SETcc :
  1527. { changes
  1528. setcc (funcres) setcc reg
  1529. movb (funcres), reg to leave/ret
  1530. leave/ret }
  1531. begin
  1532. if (taicpu(p).oper[0]^.typ = top_ref) and
  1533. GetNextInstruction(p, hp1) and
  1534. GetNextInstruction(hp1, hp2) and
  1535. IsExitCode(hp2) and
  1536. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1537. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1538. not(assigned(current_procinfo.procdef.funcretsym) and
  1539. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1540. (hp1.typ = ait_instruction) and
  1541. (taicpu(hp1).opcode = A_MOV) and
  1542. (taicpu(hp1).opsize = S_B) and
  1543. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1544. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1545. begin
  1546. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1547. asml.remove(hp1);
  1548. hp1.free;
  1549. end
  1550. end;
  1551. A_SUB:
  1552. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1553. { * change "sub/add const1, reg" or "dec reg" followed by
  1554. "sub const2, reg" to one "sub ..., reg" }
  1555. begin
  1556. if (taicpu(p).oper[0]^.typ = top_const) and
  1557. (taicpu(p).oper[1]^.typ = top_reg) then
  1558. if (taicpu(p).oper[0]^.val = 2) and
  1559. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1560. { Don't do the sub/push optimization if the sub }
  1561. { comes from setting up the stack frame (JM) }
  1562. (not getLastInstruction(p,hp1) or
  1563. (hp1.typ <> ait_instruction) or
  1564. (taicpu(hp1).opcode <> A_MOV) or
  1565. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1566. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1567. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1568. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1569. begin
  1570. hp1 := tai(p.next);
  1571. while Assigned(hp1) and
  1572. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1573. not RegReadByInstruction(NR_ESP,hp1) and
  1574. not RegModifiedByInstruction(NR_ESP,hp1) do
  1575. hp1 := tai(hp1.next);
  1576. if Assigned(hp1) and
  1577. (tai(hp1).typ = ait_instruction) and
  1578. (taicpu(hp1).opcode = A_PUSH) and
  1579. (taicpu(hp1).opsize = S_W) then
  1580. begin
  1581. taicpu(hp1).changeopsize(S_L);
  1582. if taicpu(hp1).oper[0]^.typ=top_reg then
  1583. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1584. hp1 := tai(p.next);
  1585. asml.remove(p);
  1586. p.free;
  1587. p := hp1;
  1588. continue
  1589. end;
  1590. if DoSubAddOpt(p) then
  1591. continue;
  1592. end
  1593. else if DoSubAddOpt(p) then
  1594. continue
  1595. end;
  1596. A_VMOVAPS,
  1597. A_VMOVAPD:
  1598. if OptPass1VMOVAP(p) then
  1599. continue;
  1600. A_VDIVSD,
  1601. A_VDIVSS,
  1602. A_VSUBSD,
  1603. A_VSUBSS,
  1604. A_VMULSD,
  1605. A_VMULSS,
  1606. A_VADDSD,
  1607. A_VADDSS:
  1608. if OptPass1VOP(p) then
  1609. continue;
  1610. end;
  1611. end; { if is_jmp }
  1612. end;
  1613. end;
  1614. updateUsedRegs(UsedRegs,p);
  1615. p:=tai(p.next);
  1616. end;
  1617. end;
  1618. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1619. {$ifdef DEBUG_AOPTCPU}
  1620. procedure DebugMsg(const s: string;p : tai);
  1621. begin
  1622. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  1623. end;
  1624. {$else DEBUG_AOPTCPU}
  1625. procedure DebugMsg(const s: string;p : tai);inline;
  1626. begin
  1627. end;
  1628. {$endif DEBUG_AOPTCPU}
  1629. function CanBeCMOV(p : tai) : boolean;
  1630. begin
  1631. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1632. (taicpu(p).opcode=A_MOV) and
  1633. (taicpu(p).opsize in [S_L,S_W]) and
  1634. ((taicpu(p).oper[0]^.typ = top_reg)
  1635. { we can't use cmov ref,reg because
  1636. ref could be nil and cmov still throws an exception
  1637. if ref=nil but the mov isn't done (FK)
  1638. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1639. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1640. }
  1641. ) and
  1642. (taicpu(p).oper[1]^.typ in [top_reg]);
  1643. end;
  1644. var
  1645. p,hp1,hp2,hp3: tai;
  1646. l : longint;
  1647. condition : tasmcond;
  1648. carryadd_opcode: Tasmop;
  1649. begin
  1650. p := BlockStart;
  1651. ClearUsedRegs;
  1652. while (p <> BlockEnd) Do
  1653. begin
  1654. UpdateUsedRegs(UsedRegs, tai(p.next));
  1655. case p.Typ Of
  1656. Ait_Instruction:
  1657. begin
  1658. if InsContainsSegRef(taicpu(p)) then
  1659. begin
  1660. p := tai(p.next);
  1661. continue;
  1662. end;
  1663. case taicpu(p).opcode Of
  1664. A_Jcc:
  1665. begin
  1666. { jb @@1 cmc
  1667. inc/dec operand --> adc/sbb operand,0
  1668. @@1:
  1669. ... and ...
  1670. jnb @@1
  1671. inc/dec operand --> adc/sbb operand,0
  1672. @@1: }
  1673. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1674. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1675. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1676. begin
  1677. carryadd_opcode:=A_NONE;
  1678. if Taicpu(p).condition in [C_NAE,C_B] then
  1679. begin
  1680. if Taicpu(hp1).opcode=A_INC then
  1681. carryadd_opcode:=A_ADC;
  1682. if Taicpu(hp1).opcode=A_DEC then
  1683. carryadd_opcode:=A_SBB;
  1684. if carryadd_opcode<>A_NONE then
  1685. begin
  1686. Taicpu(p).clearop(0);
  1687. Taicpu(p).ops:=0;
  1688. Taicpu(p).is_jmp:=false;
  1689. Taicpu(p).opcode:=A_CMC;
  1690. Taicpu(p).condition:=C_NONE;
  1691. Taicpu(hp1).ops:=2;
  1692. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1693. Taicpu(hp1).loadconst(0,0);
  1694. Taicpu(hp1).opcode:=carryadd_opcode;
  1695. continue;
  1696. end;
  1697. end;
  1698. if Taicpu(p).condition in [C_AE,C_NB] then
  1699. begin
  1700. if Taicpu(hp1).opcode=A_INC then
  1701. carryadd_opcode:=A_ADC;
  1702. if Taicpu(hp1).opcode=A_DEC then
  1703. carryadd_opcode:=A_SBB;
  1704. if carryadd_opcode<>A_NONE then
  1705. begin
  1706. asml.remove(p);
  1707. p.free;
  1708. Taicpu(hp1).ops:=2;
  1709. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1710. Taicpu(hp1).loadconst(0,0);
  1711. Taicpu(hp1).opcode:=carryadd_opcode;
  1712. p:=hp1;
  1713. continue;
  1714. end;
  1715. end;
  1716. end;
  1717. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1718. begin
  1719. { check for
  1720. jCC xxx
  1721. <several movs>
  1722. xxx:
  1723. }
  1724. l:=0;
  1725. GetNextInstruction(p, hp1);
  1726. while assigned(hp1) and
  1727. CanBeCMOV(hp1) and
  1728. { stop on labels }
  1729. not(hp1.typ=ait_label) do
  1730. begin
  1731. inc(l);
  1732. GetNextInstruction(hp1,hp1);
  1733. end;
  1734. if assigned(hp1) then
  1735. begin
  1736. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1737. begin
  1738. if (l<=4) and (l>0) then
  1739. begin
  1740. condition:=inverse_cond(taicpu(p).condition);
  1741. hp2:=p;
  1742. GetNextInstruction(p,hp1);
  1743. p:=hp1;
  1744. repeat
  1745. taicpu(hp1).opcode:=A_CMOVcc;
  1746. taicpu(hp1).condition:=condition;
  1747. GetNextInstruction(hp1,hp1);
  1748. until not(assigned(hp1)) or
  1749. not(CanBeCMOV(hp1));
  1750. { wait with removing else GetNextInstruction could
  1751. ignore the label if it was the only usage in the
  1752. jump moved away }
  1753. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1754. asml.remove(hp2);
  1755. hp2.free;
  1756. continue;
  1757. end;
  1758. end
  1759. else
  1760. begin
  1761. { check further for
  1762. jCC xxx
  1763. <several movs 1>
  1764. jmp yyy
  1765. xxx:
  1766. <several movs 2>
  1767. yyy:
  1768. }
  1769. { hp2 points to jmp yyy }
  1770. hp2:=hp1;
  1771. { skip hp1 to xxx }
  1772. GetNextInstruction(hp1, hp1);
  1773. if assigned(hp2) and
  1774. assigned(hp1) and
  1775. (l<=3) and
  1776. (hp2.typ=ait_instruction) and
  1777. (taicpu(hp2).is_jmp) and
  1778. (taicpu(hp2).condition=C_None) and
  1779. { real label and jump, no further references to the
  1780. label are allowed }
  1781. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1782. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1783. begin
  1784. l:=0;
  1785. { skip hp1 to <several moves 2> }
  1786. GetNextInstruction(hp1, hp1);
  1787. while assigned(hp1) and
  1788. CanBeCMOV(hp1) do
  1789. begin
  1790. inc(l);
  1791. GetNextInstruction(hp1, hp1);
  1792. end;
  1793. { hp1 points to yyy: }
  1794. if assigned(hp1) and
  1795. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1796. begin
  1797. condition:=inverse_cond(taicpu(p).condition);
  1798. GetNextInstruction(p,hp1);
  1799. hp3:=p;
  1800. p:=hp1;
  1801. repeat
  1802. taicpu(hp1).opcode:=A_CMOVcc;
  1803. taicpu(hp1).condition:=condition;
  1804. GetNextInstruction(hp1,hp1);
  1805. until not(assigned(hp1)) or
  1806. not(CanBeCMOV(hp1));
  1807. { hp2 is still at jmp yyy }
  1808. GetNextInstruction(hp2,hp1);
  1809. { hp2 is now at xxx: }
  1810. condition:=inverse_cond(condition);
  1811. GetNextInstruction(hp1,hp1);
  1812. { hp1 is now at <several movs 2> }
  1813. repeat
  1814. taicpu(hp1).opcode:=A_CMOVcc;
  1815. taicpu(hp1).condition:=condition;
  1816. GetNextInstruction(hp1,hp1);
  1817. until not(assigned(hp1)) or
  1818. not(CanBeCMOV(hp1));
  1819. {
  1820. asml.remove(hp1.next)
  1821. hp1.next.free;
  1822. asml.remove(hp1);
  1823. hp1.free;
  1824. }
  1825. { remove jCC }
  1826. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1827. asml.remove(hp3);
  1828. hp3.free;
  1829. { remove jmp }
  1830. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1831. asml.remove(hp2);
  1832. hp2.free;
  1833. continue;
  1834. end;
  1835. end;
  1836. end;
  1837. end;
  1838. end;
  1839. end;
  1840. A_FSTP,A_FISTP:
  1841. if DoFpuLoadStoreOpt(p) then
  1842. continue;
  1843. A_IMUL:
  1844. if OptPass2Imul(p) then
  1845. continue;
  1846. A_JMP:
  1847. {
  1848. change
  1849. jmp .L1
  1850. ...
  1851. .L1:
  1852. ret
  1853. into
  1854. ret
  1855. }
  1856. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) then
  1857. begin
  1858. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1859. if assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_RET) and (taicpu(p).condition=C_None) then
  1860. begin
  1861. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1862. taicpu(p).opcode:=A_RET;
  1863. taicpu(p).is_jmp:=false;
  1864. taicpu(p).ops:=taicpu(hp1).ops;
  1865. case taicpu(hp1).ops of
  1866. 0:
  1867. taicpu(p).clearop(0);
  1868. 1:
  1869. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1870. else
  1871. internalerror(2016041301);
  1872. end;
  1873. continue;
  1874. end;
  1875. end;
  1876. A_MOV:
  1877. if OptPass2MOV(p) then
  1878. continue;
  1879. end;
  1880. end;
  1881. end;
  1882. p := tai(p.next)
  1883. end;
  1884. end;
  1885. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1886. var
  1887. p,hp1,hp2: tai;
  1888. IsTestConstX: boolean;
  1889. begin
  1890. p := BlockStart;
  1891. ClearUsedRegs;
  1892. while (p <> BlockEnd) Do
  1893. begin
  1894. UpdateUsedRegs(UsedRegs, tai(p.next));
  1895. case p.Typ Of
  1896. Ait_Instruction:
  1897. begin
  1898. if InsContainsSegRef(taicpu(p)) then
  1899. begin
  1900. p := tai(p.next);
  1901. continue;
  1902. end;
  1903. case taicpu(p).opcode Of
  1904. A_CALL:
  1905. begin
  1906. { don't do this on modern CPUs, this really hurts them due to
  1907. broken call/ret pairing }
  1908. if (current_settings.optimizecputype < cpu_Pentium2) and
  1909. not(cs_create_pic in current_settings.moduleswitches) and
  1910. GetNextInstruction(p, hp1) and
  1911. (hp1.typ = ait_instruction) and
  1912. (taicpu(hp1).opcode = A_JMP) and
  1913. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1914. begin
  1915. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1916. InsertLLItem(p.previous, p, hp2);
  1917. taicpu(p).opcode := A_JMP;
  1918. taicpu(p).is_jmp := true;
  1919. asml.remove(hp1);
  1920. hp1.free;
  1921. end
  1922. { replace
  1923. call procname
  1924. ret
  1925. by
  1926. jmp procname
  1927. this should never hurt except when pic is used, not sure
  1928. how to handle it then
  1929. but do it only on level 4 because it destroys stack back traces
  1930. }
  1931. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  1932. not(cs_create_pic in current_settings.moduleswitches) and
  1933. GetNextInstruction(p, hp1) and
  1934. (hp1.typ = ait_instruction) and
  1935. (taicpu(hp1).opcode = A_RET) and
  1936. (taicpu(hp1).ops=0) then
  1937. begin
  1938. taicpu(p).opcode := A_JMP;
  1939. taicpu(p).is_jmp := true;
  1940. asml.remove(hp1);
  1941. hp1.free;
  1942. end;
  1943. end;
  1944. A_CMP:
  1945. begin
  1946. if (taicpu(p).oper[0]^.typ = top_const) and
  1947. (taicpu(p).oper[0]^.val = 0) and
  1948. (taicpu(p).oper[1]^.typ = top_reg) then
  1949. {change "cmp $0, %reg" to "test %reg, %reg"}
  1950. begin
  1951. taicpu(p).opcode := A_TEST;
  1952. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1953. continue;
  1954. end;
  1955. end;
  1956. A_MOV:
  1957. PostPeepholeOptMov(p);
  1958. A_MOVZX:
  1959. { if register vars are on, it's possible there is code like }
  1960. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1961. { so we can't safely replace the movzx then with xor/mov, }
  1962. { since that would change the flags (JM) }
  1963. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  1964. begin
  1965. if (taicpu(p).oper[1]^.typ = top_reg) then
  1966. if (taicpu(p).oper[0]^.typ = top_reg)
  1967. then
  1968. case taicpu(p).opsize of
  1969. S_BL:
  1970. begin
  1971. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1972. not(cs_opt_size in current_settings.optimizerswitches) and
  1973. (current_settings.optimizecputype = cpu_Pentium) then
  1974. {Change "movzbl %reg1, %reg2" to
  1975. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1976. PentiumMMX}
  1977. begin
  1978. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1979. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1980. InsertLLItem(p.previous, p, hp1);
  1981. taicpu(p).opcode := A_MOV;
  1982. taicpu(p).changeopsize(S_B);
  1983. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1984. end;
  1985. end;
  1986. end
  1987. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1988. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1989. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1990. not(cs_opt_size in current_settings.optimizerswitches) and
  1991. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1992. (current_settings.optimizecputype = cpu_Pentium) and
  1993. (taicpu(p).opsize = S_BL) then
  1994. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1995. Pentium and PentiumMMX}
  1996. begin
  1997. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1998. taicpu(p).oper[1]^.reg);
  1999. taicpu(p).opcode := A_MOV;
  2000. taicpu(p).changeopsize(S_B);
  2001. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2002. InsertLLItem(p.previous, p, hp1);
  2003. end;
  2004. end;
  2005. A_TEST, A_OR:
  2006. {removes the line marked with (x) from the sequence
  2007. and/or/xor/add/sub/... $x, %y
  2008. test/or %y, %y | test $-1, %y (x)
  2009. j(n)z _Label
  2010. as the first instruction already adjusts the ZF
  2011. %y operand may also be a reference }
  2012. begin
  2013. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2014. MatchOperand(taicpu(p).oper[0]^,-1);
  2015. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2016. GetLastInstruction(p, hp1) and
  2017. (tai(hp1).typ = ait_instruction) and
  2018. GetNextInstruction(p,hp2) and
  2019. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2020. case taicpu(hp1).opcode Of
  2021. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2022. begin
  2023. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2024. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2025. { and in case of carry for A(E)/B(E)/C/NC }
  2026. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2027. ((taicpu(hp1).opcode <> A_ADD) and
  2028. (taicpu(hp1).opcode <> A_SUB))) then
  2029. begin
  2030. hp1 := tai(p.next);
  2031. asml.remove(p);
  2032. p.free;
  2033. p := tai(hp1);
  2034. continue
  2035. end;
  2036. end;
  2037. A_SHL, A_SAL, A_SHR, A_SAR:
  2038. begin
  2039. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2040. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2041. { therefore, it's only safe to do this optimization for }
  2042. { shifts by a (nonzero) constant }
  2043. (taicpu(hp1).oper[0]^.typ = top_const) and
  2044. (taicpu(hp1).oper[0]^.val <> 0) and
  2045. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2046. { and in case of carry for A(E)/B(E)/C/NC }
  2047. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2048. begin
  2049. hp1 := tai(p.next);
  2050. asml.remove(p);
  2051. p.free;
  2052. p := tai(hp1);
  2053. continue
  2054. end;
  2055. end;
  2056. A_DEC, A_INC, A_NEG:
  2057. begin
  2058. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2059. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2060. { and in case of carry for A(E)/B(E)/C/NC }
  2061. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2062. begin
  2063. case taicpu(hp1).opcode Of
  2064. A_DEC, A_INC:
  2065. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2066. begin
  2067. case taicpu(hp1).opcode Of
  2068. A_DEC: taicpu(hp1).opcode := A_SUB;
  2069. A_INC: taicpu(hp1).opcode := A_ADD;
  2070. end;
  2071. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2072. taicpu(hp1).loadConst(0,1);
  2073. taicpu(hp1).ops:=2;
  2074. end
  2075. end;
  2076. hp1 := tai(p.next);
  2077. asml.remove(p);
  2078. p.free;
  2079. p := tai(hp1);
  2080. continue
  2081. end;
  2082. end
  2083. else
  2084. { change "test $-1,%reg" into "test %reg,%reg" }
  2085. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2086. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2087. end { case }
  2088. else
  2089. { change "test $-1,%reg" into "test %reg,%reg" }
  2090. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2091. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2092. end;
  2093. end;
  2094. end;
  2095. end;
  2096. p := tai(p.next)
  2097. end;
  2098. end;
  2099. Procedure TCpuAsmOptimizer.Optimize;
  2100. Var
  2101. HP: Tai;
  2102. pass: longint;
  2103. slowopt, changed, lastLoop: boolean;
  2104. Begin
  2105. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  2106. pass := 0;
  2107. changed := false;
  2108. repeat
  2109. lastLoop :=
  2110. not(slowopt) or
  2111. (not changed and (pass > 2)) or
  2112. { prevent endless loops }
  2113. (pass = 4);
  2114. changed := false;
  2115. { Setup labeltable, always necessary }
  2116. blockstart := tai(asml.first);
  2117. pass_1;
  2118. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  2119. { or nil }
  2120. While Assigned(BlockStart) Do
  2121. Begin
  2122. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2123. begin
  2124. if (pass = 0) then
  2125. PrePeepHoleOpts;
  2126. { Peephole optimizations }
  2127. PeepHoleOptPass1;
  2128. { Only perform them twice in the first pass }
  2129. if pass = 0 then
  2130. PeepHoleOptPass1;
  2131. end;
  2132. { More peephole optimizations }
  2133. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2134. begin
  2135. PeepHoleOptPass2;
  2136. if lastLoop then
  2137. PostPeepHoleOpts;
  2138. end;
  2139. { Continue where we left off, BlockEnd is either the start of an }
  2140. { assembler block or nil }
  2141. BlockStart := BlockEnd;
  2142. While Assigned(BlockStart) And
  2143. (BlockStart.typ = ait_Marker) And
  2144. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  2145. Begin
  2146. { We stopped at an assembler block, so skip it }
  2147. Repeat
  2148. BlockStart := Tai(BlockStart.Next);
  2149. Until (BlockStart.Typ = Ait_Marker) And
  2150. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  2151. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  2152. If GetNextInstruction(BlockStart, HP) And
  2153. ((HP.typ <> ait_Marker) Or
  2154. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  2155. { There is no assembler block anymore after the current one, so }
  2156. { optimize the next block of "normal" instructions }
  2157. pass_1
  2158. { Otherwise, skip the next assembler block }
  2159. else
  2160. blockStart := hp;
  2161. End;
  2162. End;
  2163. inc(pass);
  2164. until lastLoop;
  2165. dfa.free;
  2166. End;
  2167. begin
  2168. casmoptimizer:=TCpuAsmOptimizer;
  2169. end.