aoptcpu.pas 69 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. end;
  34. Var
  35. AsmOptimizer : TCpuAsmOptimizer;
  36. Implementation
  37. uses
  38. verbose,globtype,globals,
  39. cpuinfo,
  40. aasmcpu,
  41. aoptutils,
  42. procinfo,
  43. cgutils,
  44. { units we should get rid off: }
  45. symsym,symconst;
  46. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  47. { returns true if a "continue" should be done after this optimization }
  48. var hp1, hp2: tai;
  49. begin
  50. DoFpuLoadStoreOpt := false;
  51. if (taicpu(p).oper[0]^.typ = top_ref) and
  52. getNextInstruction(p, hp1) and
  53. (hp1.typ = ait_instruction) and
  54. (((taicpu(hp1).opcode = A_FLD) and
  55. (taicpu(p).opcode = A_FSTP)) or
  56. ((taicpu(p).opcode = A_FISTP) and
  57. (taicpu(hp1).opcode = A_FILD))) and
  58. (taicpu(hp1).oper[0]^.typ = top_ref) and
  59. (taicpu(hp1).opsize = taicpu(p).opsize) and
  60. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  61. begin
  62. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  63. if (taicpu(p).opsize=S_FX) and
  64. getNextInstruction(hp1, hp2) and
  65. (hp2.typ = ait_instruction) and
  66. IsExitCode(hp2) and
  67. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  68. not(assigned(current_procinfo.procdef.funcretsym) and
  69. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  70. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  71. begin
  72. asml.remove(p);
  73. asml.remove(hp1);
  74. p.free;
  75. hp1.free;
  76. p := hp2;
  77. removeLastDeallocForFuncRes(p);
  78. doFPULoadStoreOpt := true;
  79. end
  80. (* can't be done because the store operation rounds
  81. else
  82. { fst can't store an extended value! }
  83. if (taicpu(p).opsize <> S_FX) and
  84. (taicpu(p).opsize <> S_IQ) then
  85. begin
  86. if (taicpu(p).opcode = A_FSTP) then
  87. taicpu(p).opcode := A_FST
  88. else taicpu(p).opcode := A_FIST;
  89. asml.remove(hp1);
  90. hp1.free;
  91. end
  92. *)
  93. end;
  94. end;
  95. { converts a TChange variable to a TRegister }
  96. function tch2reg(ch: tinschange): tsuperregister;
  97. const
  98. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  99. begin
  100. if (ch <= CH_REDI) then
  101. tch2reg := ch2reg[ch]
  102. else if (ch <= CH_WEDI) then
  103. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  104. else if (ch <= CH_RWEDI) then
  105. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  106. else if (ch <= CH_MEDI) then
  107. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  108. else
  109. InternalError(2016041901)
  110. end;
  111. { Checks if the register is a 32 bit general purpose register }
  112. function isgp32reg(reg: TRegister): boolean;
  113. begin
  114. {$push}{$warnings off}
  115. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  116. {$pop}
  117. end;
  118. { returns true if p contains a memory operand with a segment set }
  119. function InsContainsSegRef(p: taicpu): boolean;
  120. var
  121. i: longint;
  122. begin
  123. result:=true;
  124. for i:=0 to p.opercnt-1 do
  125. if (p.oper[i]^.typ=top_ref) and
  126. (p.oper[i]^.ref^.segment<>NR_NO) then
  127. exit;
  128. result:=false;
  129. end;
  130. function InstrReadsFlags(p: tai): boolean;
  131. var
  132. l: longint;
  133. begin
  134. InstrReadsFlags := true;
  135. case p.typ of
  136. ait_instruction:
  137. if InsProp[taicpu(p).opcode].Ch*
  138. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  139. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  140. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  141. exit;
  142. ait_label:
  143. exit;
  144. end;
  145. InstrReadsFlags := false;
  146. end;
  147. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  148. var
  149. p,hp1: tai;
  150. l: aint;
  151. tmpRef: treference;
  152. begin
  153. p := BlockStart;
  154. while (p <> BlockEnd) Do
  155. begin
  156. case p.Typ Of
  157. Ait_Instruction:
  158. begin
  159. if InsContainsSegRef(taicpu(p)) then
  160. begin
  161. p := tai(p.next);
  162. continue;
  163. end;
  164. case taicpu(p).opcode Of
  165. A_IMUL:
  166. {changes certain "imul const, %reg"'s to lea sequences}
  167. begin
  168. if (taicpu(p).oper[0]^.typ = Top_Const) and
  169. (taicpu(p).oper[1]^.typ = Top_Reg) and
  170. (taicpu(p).opsize = S_L) then
  171. if (taicpu(p).oper[0]^.val = 1) then
  172. if (taicpu(p).ops = 2) then
  173. {remove "imul $1, reg"}
  174. begin
  175. hp1 := tai(p.Next);
  176. asml.remove(p);
  177. p.free;
  178. p := hp1;
  179. continue;
  180. end
  181. else
  182. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  183. begin
  184. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  185. InsertLLItem(p.previous, p.next, hp1);
  186. p.free;
  187. p := hp1;
  188. end
  189. else if
  190. ((taicpu(p).ops <= 2) or
  191. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  192. (taicpu(p).oper[0]^.val <= 12) and
  193. not(cs_opt_size in current_settings.optimizerswitches) and
  194. (not(GetNextInstruction(p, hp1)) or
  195. {GetNextInstruction(p, hp1) and}
  196. not((tai(hp1).typ = ait_instruction) and
  197. ((taicpu(hp1).opcode=A_Jcc) and
  198. (taicpu(hp1).condition in [C_O,C_NO])))) then
  199. begin
  200. reference_reset(tmpref,1,[]);
  201. case taicpu(p).oper[0]^.val Of
  202. 3: begin
  203. {imul 3, reg1, reg2 to
  204. lea (reg1,reg1,2), reg2
  205. imul 3, reg1 to
  206. lea (reg1,reg1,2), reg1}
  207. TmpRef.base := taicpu(p).oper[1]^.reg;
  208. TmpRef.index := taicpu(p).oper[1]^.reg;
  209. TmpRef.ScaleFactor := 2;
  210. if (taicpu(p).ops = 2) then
  211. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  212. else
  213. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  214. InsertLLItem(p.previous, p.next, hp1);
  215. p.free;
  216. p := hp1;
  217. end;
  218. 5: begin
  219. {imul 5, reg1, reg2 to
  220. lea (reg1,reg1,4), reg2
  221. imul 5, reg1 to
  222. lea (reg1,reg1,4), reg1}
  223. TmpRef.base := taicpu(p).oper[1]^.reg;
  224. TmpRef.index := taicpu(p).oper[1]^.reg;
  225. TmpRef.ScaleFactor := 4;
  226. if (taicpu(p).ops = 2) then
  227. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  228. else
  229. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  230. InsertLLItem(p.previous, p.next, hp1);
  231. p.free;
  232. p := hp1;
  233. end;
  234. 6: begin
  235. {imul 6, reg1, reg2 to
  236. lea (,reg1,2), reg2
  237. lea (reg2,reg1,4), reg2
  238. imul 6, reg1 to
  239. lea (reg1,reg1,2), reg1
  240. add reg1, reg1}
  241. if (current_settings.optimizecputype <= cpu_386) then
  242. begin
  243. TmpRef.index := taicpu(p).oper[1]^.reg;
  244. if (taicpu(p).ops = 3) then
  245. begin
  246. TmpRef.base := taicpu(p).oper[2]^.reg;
  247. TmpRef.ScaleFactor := 4;
  248. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  249. end
  250. else
  251. begin
  252. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  253. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  254. end;
  255. InsertLLItem(p, p.next, hp1);
  256. reference_reset(tmpref,2,[]);
  257. TmpRef.index := taicpu(p).oper[1]^.reg;
  258. TmpRef.ScaleFactor := 2;
  259. if (taicpu(p).ops = 3) then
  260. begin
  261. TmpRef.base := NR_NO;
  262. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  263. taicpu(p).oper[2]^.reg);
  264. end
  265. else
  266. begin
  267. TmpRef.base := taicpu(p).oper[1]^.reg;
  268. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  269. end;
  270. InsertLLItem(p.previous, p.next, hp1);
  271. p.free;
  272. p := tai(hp1.next);
  273. end
  274. end;
  275. 9: begin
  276. {imul 9, reg1, reg2 to
  277. lea (reg1,reg1,8), reg2
  278. imul 9, reg1 to
  279. lea (reg1,reg1,8), reg1}
  280. TmpRef.base := taicpu(p).oper[1]^.reg;
  281. TmpRef.index := taicpu(p).oper[1]^.reg;
  282. TmpRef.ScaleFactor := 8;
  283. if (taicpu(p).ops = 2) then
  284. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  285. else
  286. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  287. InsertLLItem(p.previous, p.next, hp1);
  288. p.free;
  289. p := hp1;
  290. end;
  291. 10: begin
  292. {imul 10, reg1, reg2 to
  293. lea (reg1,reg1,4), reg2
  294. add reg2, reg2
  295. imul 10, reg1 to
  296. lea (reg1,reg1,4), reg1
  297. add reg1, reg1}
  298. if (current_settings.optimizecputype <= cpu_386) then
  299. begin
  300. if (taicpu(p).ops = 3) then
  301. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  302. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  303. else
  304. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  305. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  306. InsertLLItem(p, p.next, hp1);
  307. TmpRef.base := taicpu(p).oper[1]^.reg;
  308. TmpRef.index := taicpu(p).oper[1]^.reg;
  309. TmpRef.ScaleFactor := 4;
  310. if (taicpu(p).ops = 3) then
  311. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  312. else
  313. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  314. InsertLLItem(p.previous, p.next, hp1);
  315. p.free;
  316. p := tai(hp1.next);
  317. end
  318. end;
  319. 12: begin
  320. {imul 12, reg1, reg2 to
  321. lea (,reg1,4), reg2
  322. lea (reg2,reg1,8), reg2
  323. imul 12, reg1 to
  324. lea (reg1,reg1,2), reg1
  325. lea (,reg1,4), reg1}
  326. if (current_settings.optimizecputype <= cpu_386)
  327. then
  328. begin
  329. TmpRef.index := taicpu(p).oper[1]^.reg;
  330. if (taicpu(p).ops = 3) then
  331. begin
  332. TmpRef.base := taicpu(p).oper[2]^.reg;
  333. TmpRef.ScaleFactor := 8;
  334. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  335. end
  336. else
  337. begin
  338. TmpRef.base := NR_NO;
  339. TmpRef.ScaleFactor := 4;
  340. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  341. end;
  342. InsertLLItem(p, p.next, hp1);
  343. reference_reset(tmpref,2,[]);
  344. TmpRef.index := taicpu(p).oper[1]^.reg;
  345. if (taicpu(p).ops = 3) then
  346. begin
  347. TmpRef.base := NR_NO;
  348. TmpRef.ScaleFactor := 4;
  349. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  350. end
  351. else
  352. begin
  353. TmpRef.base := taicpu(p).oper[1]^.reg;
  354. TmpRef.ScaleFactor := 2;
  355. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  356. end;
  357. InsertLLItem(p.previous, p.next, hp1);
  358. p.free;
  359. p := tai(hp1.next);
  360. end
  361. end
  362. end;
  363. end;
  364. end;
  365. A_SAR,A_SHR:
  366. if PrePeepholeOptSxx(p) then
  367. continue;
  368. A_XOR:
  369. if (taicpu(p).oper[0]^.typ = top_reg) and
  370. (taicpu(p).oper[1]^.typ = top_reg) and
  371. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  372. { temporarily change this to 'mov reg,0' to make it easier }
  373. { for the CSE. Will be changed back in pass 2 }
  374. begin
  375. taicpu(p).opcode := A_MOV;
  376. taicpu(p).loadConst(0,0);
  377. end;
  378. end;
  379. end;
  380. end;
  381. p := tai(p.next)
  382. end;
  383. end;
  384. { First pass of peephole optimizations }
  385. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  386. function WriteOk : Boolean;
  387. begin
  388. writeln('Ok');
  389. Result:=True;
  390. end;
  391. var
  392. l : longint;
  393. p,hp1,hp2 : tai;
  394. hp3,hp4: tai;
  395. v:aint;
  396. TmpRef: TReference;
  397. TmpBool1, TmpBool2: Boolean;
  398. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  399. {traces sucessive jumps to their final destination and sets it, e.g.
  400. je l1 je l3
  401. <code> <code>
  402. l1: becomes l1:
  403. je l2 je l3
  404. <code> <code>
  405. l2: l2:
  406. jmp l3 jmp l3
  407. the level parameter denotes how deeep we have already followed the jump,
  408. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  409. var p1, p2: tai;
  410. l: tasmlabel;
  411. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  412. begin
  413. FindAnyLabel := false;
  414. while assigned(hp.next) and
  415. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  416. hp := tai(hp.next);
  417. if assigned(hp.next) and
  418. (tai(hp.next).typ = ait_label) then
  419. begin
  420. FindAnyLabel := true;
  421. l := tai_label(hp.next).labsym;
  422. end
  423. end;
  424. begin
  425. GetfinalDestination := false;
  426. if level > 20 then
  427. exit;
  428. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  429. if assigned(p1) then
  430. begin
  431. SkipLabels(p1,p1);
  432. if (tai(p1).typ = ait_instruction) and
  433. (taicpu(p1).is_jmp) then
  434. if { the next instruction after the label where the jump hp arrives}
  435. { is unconditional or of the same type as hp, so continue }
  436. (taicpu(p1).condition in [C_None,hp.condition]) or
  437. { the next instruction after the label where the jump hp arrives}
  438. { is the opposite of hp (so this one is never taken), but after }
  439. { that one there is a branch that will be taken, so perform a }
  440. { little hack: set p1 equal to this instruction (that's what the}
  441. { last SkipLabels is for, only works with short bool evaluation)}
  442. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  443. SkipLabels(p1,p2) and
  444. (p2.typ = ait_instruction) and
  445. (taicpu(p2).is_jmp) and
  446. (taicpu(p2).condition in [C_None,hp.condition]) and
  447. SkipLabels(p1,p1)) then
  448. begin
  449. { quick check for loops of the form "l5: ; jmp l5 }
  450. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  451. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  452. exit;
  453. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  454. exit;
  455. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  456. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  457. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  458. end
  459. else
  460. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  461. if not FindAnyLabel(p1,l) then
  462. begin
  463. {$ifdef finaldestdebug}
  464. insertllitem(asml,p1,p1.next,tai_comment.Create(
  465. strpnew('previous label inserted'))));
  466. {$endif finaldestdebug}
  467. current_asmdata.getjumplabel(l);
  468. insertllitem(p1,p1.next,tai_label.Create(l));
  469. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  470. hp.oper[0]^.ref^.symbol := l;
  471. l.increfs;
  472. { this won't work, since the new label isn't in the labeltable }
  473. { so it will fail the rangecheck. Labeltable should become a }
  474. { hashtable to support this: }
  475. { GetFinalDestination(asml, hp); }
  476. end
  477. else
  478. begin
  479. {$ifdef finaldestdebug}
  480. insertllitem(asml,p1,p1.next,tai_comment.Create(
  481. strpnew('next label reused'))));
  482. {$endif finaldestdebug}
  483. l.increfs;
  484. hp.oper[0]^.ref^.symbol := l;
  485. if not GetFinalDestination(asml, hp,succ(level)) then
  486. exit;
  487. end;
  488. end;
  489. GetFinalDestination := true;
  490. end;
  491. function DoSubAddOpt(var p: tai): Boolean;
  492. begin
  493. DoSubAddOpt := False;
  494. if GetLastInstruction(p, hp1) and
  495. (hp1.typ = ait_instruction) and
  496. (taicpu(hp1).opsize = taicpu(p).opsize) then
  497. case taicpu(hp1).opcode Of
  498. A_DEC:
  499. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  500. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  501. begin
  502. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  503. asml.remove(hp1);
  504. hp1.free;
  505. end;
  506. A_SUB:
  507. if (taicpu(hp1).oper[0]^.typ = top_const) and
  508. (taicpu(hp1).oper[1]^.typ = top_reg) and
  509. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  510. begin
  511. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  512. asml.remove(hp1);
  513. hp1.free;
  514. end;
  515. A_ADD:
  516. if (taicpu(hp1).oper[0]^.typ = top_const) and
  517. (taicpu(hp1).oper[1]^.typ = top_reg) and
  518. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  519. begin
  520. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  521. asml.remove(hp1);
  522. hp1.free;
  523. if (taicpu(p).oper[0]^.val = 0) then
  524. begin
  525. hp1 := tai(p.next);
  526. asml.remove(p);
  527. p.free;
  528. if not GetLastInstruction(hp1, p) then
  529. p := hp1;
  530. DoSubAddOpt := True;
  531. end
  532. end;
  533. end;
  534. end;
  535. begin
  536. p := BlockStart;
  537. ClearUsedRegs;
  538. while (p <> BlockEnd) Do
  539. begin
  540. UpDateUsedRegs(UsedRegs, tai(p.next));
  541. case p.Typ Of
  542. ait_instruction:
  543. begin
  544. current_filepos:=taicpu(p).fileinfo;
  545. if InsContainsSegRef(taicpu(p)) then
  546. begin
  547. p := tai(p.next);
  548. continue;
  549. end;
  550. { Handle Jmp Optimizations }
  551. if taicpu(p).is_jmp then
  552. begin
  553. {the following if-block removes all code between a jmp and the next label,
  554. because it can never be executed}
  555. if (taicpu(p).opcode = A_JMP) then
  556. begin
  557. hp2:=p;
  558. while GetNextInstruction(hp2, hp1) and
  559. (hp1.typ <> ait_label) do
  560. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  561. begin
  562. { don't kill start/end of assembler block,
  563. no-line-info-start/end etc }
  564. if hp1.typ<>ait_marker then
  565. begin
  566. asml.remove(hp1);
  567. hp1.free;
  568. end
  569. else
  570. hp2:=hp1;
  571. end
  572. else break;
  573. end;
  574. { remove jumps to a label coming right after them }
  575. if GetNextInstruction(p, hp1) then
  576. begin
  577. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  578. { TODO: FIXME removing the first instruction fails}
  579. (p<>blockstart) then
  580. begin
  581. hp2:=tai(hp1.next);
  582. asml.remove(p);
  583. p.free;
  584. p:=hp2;
  585. continue;
  586. end
  587. else
  588. begin
  589. if hp1.typ = ait_label then
  590. SkipLabels(hp1,hp1);
  591. if (tai(hp1).typ=ait_instruction) and
  592. (taicpu(hp1).opcode=A_JMP) and
  593. GetNextInstruction(hp1, hp2) and
  594. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  595. begin
  596. if taicpu(p).opcode=A_Jcc then
  597. begin
  598. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  599. tai_label(hp2).labsym.decrefs;
  600. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  601. { when free'ing hp1, the ref. isn't decresed, so we don't
  602. increase it (FK)
  603. taicpu(p).oper[0]^.ref^.symbol.increfs;
  604. }
  605. asml.remove(hp1);
  606. hp1.free;
  607. GetFinalDestination(asml, taicpu(p),0);
  608. end
  609. else
  610. begin
  611. GetFinalDestination(asml, taicpu(p),0);
  612. p:=tai(p.next);
  613. continue;
  614. end;
  615. end
  616. else
  617. GetFinalDestination(asml, taicpu(p),0);
  618. end;
  619. end;
  620. end
  621. else
  622. { All other optimizes }
  623. begin
  624. for l := 0 to taicpu(p).ops-1 Do
  625. if (taicpu(p).oper[l]^.typ = top_ref) then
  626. With taicpu(p).oper[l]^.ref^ Do
  627. begin
  628. if (base = NR_NO) and
  629. (index <> NR_NO) and
  630. (scalefactor in [0,1]) then
  631. begin
  632. base := index;
  633. index := NR_NO
  634. end
  635. end;
  636. case taicpu(p).opcode Of
  637. A_AND:
  638. if OptPass1And(p) then
  639. continue;
  640. A_CMP:
  641. begin
  642. { cmp register,$8000 neg register
  643. je target --> jo target
  644. .... only if register is deallocated before jump.}
  645. case Taicpu(p).opsize of
  646. S_B: v:=$80;
  647. S_W: v:=$8000;
  648. S_L: v:=aint($80000000);
  649. else
  650. internalerror(2013112905);
  651. end;
  652. if (taicpu(p).oper[0]^.typ=Top_const) and
  653. (taicpu(p).oper[0]^.val=v) and
  654. (Taicpu(p).oper[1]^.typ=top_reg) and
  655. GetNextInstruction(p, hp1) and
  656. (hp1.typ=ait_instruction) and
  657. (taicpu(hp1).opcode=A_Jcc) and
  658. (Taicpu(hp1).condition in [C_E,C_NE]) and
  659. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  660. begin
  661. Taicpu(p).opcode:=A_NEG;
  662. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  663. Taicpu(p).clearop(1);
  664. Taicpu(p).ops:=1;
  665. if Taicpu(hp1).condition=C_E then
  666. Taicpu(hp1).condition:=C_O
  667. else
  668. Taicpu(hp1).condition:=C_NO;
  669. continue;
  670. end;
  671. {
  672. @@2: @@2:
  673. .... ....
  674. cmp operand1,0
  675. jle/jbe @@1
  676. dec operand1 --> sub operand1,1
  677. jmp @@2 jge/jae @@2
  678. @@1: @@1:
  679. ... ....}
  680. if (taicpu(p).oper[0]^.typ = top_const) and
  681. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  682. (taicpu(p).oper[0]^.val = 0) and
  683. GetNextInstruction(p, hp1) and
  684. (hp1.typ = ait_instruction) and
  685. (taicpu(hp1).is_jmp) and
  686. (taicpu(hp1).opcode=A_Jcc) and
  687. (taicpu(hp1).condition in [C_LE,C_BE]) and
  688. GetNextInstruction(hp1,hp2) and
  689. (hp2.typ = ait_instruction) and
  690. (taicpu(hp2).opcode = A_DEC) and
  691. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  692. GetNextInstruction(hp2, hp3) and
  693. (hp3.typ = ait_instruction) and
  694. (taicpu(hp3).is_jmp) and
  695. (taicpu(hp3).opcode = A_JMP) and
  696. GetNextInstruction(hp3, hp4) and
  697. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  698. begin
  699. taicpu(hp2).Opcode := A_SUB;
  700. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  701. taicpu(hp2).loadConst(0,1);
  702. taicpu(hp2).ops:=2;
  703. taicpu(hp3).Opcode := A_Jcc;
  704. case taicpu(hp1).condition of
  705. C_LE: taicpu(hp3).condition := C_GE;
  706. C_BE: taicpu(hp3).condition := C_AE;
  707. end;
  708. asml.remove(p);
  709. asml.remove(hp1);
  710. p.free;
  711. hp1.free;
  712. p := hp2;
  713. continue;
  714. end
  715. end;
  716. A_FLD:
  717. begin
  718. if (taicpu(p).oper[0]^.typ = top_reg) and
  719. GetNextInstruction(p, hp1) and
  720. (hp1.typ = Ait_Instruction) and
  721. (taicpu(hp1).oper[0]^.typ = top_reg) and
  722. (taicpu(hp1).oper[1]^.typ = top_reg) and
  723. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  724. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  725. { change to
  726. fld reg fxxx reg,st
  727. fxxxp st, st1 (hp1)
  728. Remark: non commutative operations must be reversed!
  729. }
  730. begin
  731. case taicpu(hp1).opcode Of
  732. A_FMULP,A_FADDP,
  733. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  734. begin
  735. case taicpu(hp1).opcode Of
  736. A_FADDP: taicpu(hp1).opcode := A_FADD;
  737. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  738. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  739. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  740. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  741. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  742. end;
  743. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  744. taicpu(hp1).oper[1]^.reg := NR_ST;
  745. asml.remove(p);
  746. p.free;
  747. p := hp1;
  748. continue;
  749. end;
  750. end;
  751. end
  752. else
  753. if (taicpu(p).oper[0]^.typ = top_ref) and
  754. GetNextInstruction(p, hp2) and
  755. (hp2.typ = Ait_Instruction) and
  756. (taicpu(hp2).ops = 2) and
  757. (taicpu(hp2).oper[0]^.typ = top_reg) and
  758. (taicpu(hp2).oper[1]^.typ = top_reg) and
  759. (taicpu(p).opsize in [S_FS, S_FL]) and
  760. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  761. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  762. if GetLastInstruction(p, hp1) and
  763. (hp1.typ = Ait_Instruction) and
  764. ((taicpu(hp1).opcode = A_FLD) or
  765. (taicpu(hp1).opcode = A_FST)) and
  766. (taicpu(hp1).opsize = taicpu(p).opsize) and
  767. (taicpu(hp1).oper[0]^.typ = top_ref) and
  768. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  769. if ((taicpu(hp2).opcode = A_FMULP) or
  770. (taicpu(hp2).opcode = A_FADDP)) then
  771. { change to
  772. fld/fst mem1 (hp1) fld/fst mem1
  773. fld mem1 (p) fadd/
  774. faddp/ fmul st, st
  775. fmulp st, st1 (hp2) }
  776. begin
  777. asml.remove(p);
  778. p.free;
  779. p := hp1;
  780. if (taicpu(hp2).opcode = A_FADDP) then
  781. taicpu(hp2).opcode := A_FADD
  782. else
  783. taicpu(hp2).opcode := A_FMUL;
  784. taicpu(hp2).oper[1]^.reg := NR_ST;
  785. end
  786. else
  787. { change to
  788. fld/fst mem1 (hp1) fld/fst mem1
  789. fld mem1 (p) fld st}
  790. begin
  791. taicpu(p).changeopsize(S_FL);
  792. taicpu(p).loadreg(0,NR_ST);
  793. end
  794. else
  795. begin
  796. case taicpu(hp2).opcode Of
  797. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  798. { change to
  799. fld/fst mem1 (hp1) fld/fst mem1
  800. fld mem2 (p) fxxx mem2
  801. fxxxp st, st1 (hp2) }
  802. begin
  803. case taicpu(hp2).opcode Of
  804. A_FADDP: taicpu(p).opcode := A_FADD;
  805. A_FMULP: taicpu(p).opcode := A_FMUL;
  806. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  807. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  808. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  809. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  810. end;
  811. asml.remove(hp2);
  812. hp2.free;
  813. end
  814. end
  815. end
  816. end;
  817. A_FSTP,A_FISTP:
  818. if doFpuLoadStoreOpt(p) then
  819. continue;
  820. A_LEA:
  821. begin
  822. if OptPass1LEA(p) then
  823. continue;
  824. end;
  825. A_MOV:
  826. begin
  827. If OptPass1MOV(p) then
  828. Continue;
  829. end;
  830. A_MOVSX,
  831. A_MOVZX :
  832. begin
  833. If OptPass1Movx(p) then
  834. Continue
  835. end;
  836. (* should not be generated anymore by the current code generator
  837. A_POP:
  838. begin
  839. if target_info.system=system_i386_go32v2 then
  840. begin
  841. { Transform a series of pop/pop/pop/push/push/push to }
  842. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  843. { because I'm not sure whether they can cope with }
  844. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  845. { such a problem when using esp as frame pointer (JM) }
  846. if (taicpu(p).oper[0]^.typ = top_reg) then
  847. begin
  848. hp1 := p;
  849. hp2 := p;
  850. l := 0;
  851. while getNextInstruction(hp1,hp1) and
  852. (hp1.typ = ait_instruction) and
  853. (taicpu(hp1).opcode = A_POP) and
  854. (taicpu(hp1).oper[0]^.typ = top_reg) do
  855. begin
  856. hp2 := hp1;
  857. inc(l,4);
  858. end;
  859. getLastInstruction(p,hp3);
  860. l1 := 0;
  861. while (hp2 <> hp3) and
  862. assigned(hp1) and
  863. (hp1.typ = ait_instruction) and
  864. (taicpu(hp1).opcode = A_PUSH) and
  865. (taicpu(hp1).oper[0]^.typ = top_reg) and
  866. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  867. begin
  868. { change it to a two op operation }
  869. taicpu(hp2).oper[1]^.typ:=top_none;
  870. taicpu(hp2).ops:=2;
  871. taicpu(hp2).opcode := A_MOV;
  872. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  873. reference_reset(tmpref);
  874. tmpRef.base.enum:=R_INTREGISTER;
  875. tmpRef.base.number:=NR_STACK_POINTER_REG;
  876. convert_register_to_enum(tmpref.base);
  877. tmpRef.offset := l;
  878. taicpu(hp2).loadRef(0,tmpRef);
  879. hp4 := hp1;
  880. getNextInstruction(hp1,hp1);
  881. asml.remove(hp4);
  882. hp4.free;
  883. getLastInstruction(hp2,hp2);
  884. dec(l,4);
  885. inc(l1);
  886. end;
  887. if l <> -4 then
  888. begin
  889. inc(l,4);
  890. for l1 := l1 downto 1 do
  891. begin
  892. getNextInstruction(hp2,hp2);
  893. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  894. end
  895. end
  896. end
  897. end
  898. else
  899. begin
  900. if (taicpu(p).oper[0]^.typ = top_reg) and
  901. GetNextInstruction(p, hp1) and
  902. (tai(hp1).typ=ait_instruction) and
  903. (taicpu(hp1).opcode=A_PUSH) and
  904. (taicpu(hp1).oper[0]^.typ = top_reg) and
  905. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  906. begin
  907. { change it to a two op operation }
  908. taicpu(p).oper[1]^.typ:=top_none;
  909. taicpu(p).ops:=2;
  910. taicpu(p).opcode := A_MOV;
  911. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  912. reference_reset(tmpref);
  913. TmpRef.base.enum := R_ESP;
  914. taicpu(p).loadRef(0,TmpRef);
  915. asml.remove(hp1);
  916. hp1.free;
  917. end;
  918. end;
  919. end;
  920. *)
  921. A_PUSH:
  922. begin
  923. if (taicpu(p).opsize = S_W) and
  924. (taicpu(p).oper[0]^.typ = Top_Const) and
  925. GetNextInstruction(p, hp1) and
  926. (tai(hp1).typ = ait_instruction) and
  927. (taicpu(hp1).opcode = A_PUSH) and
  928. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  929. (taicpu(hp1).opsize = S_W) then
  930. begin
  931. taicpu(p).changeopsize(S_L);
  932. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  933. asml.remove(hp1);
  934. hp1.free;
  935. end;
  936. end;
  937. A_SHL, A_SAL:
  938. begin
  939. if (taicpu(p).oper[0]^.typ = Top_Const) and
  940. (taicpu(p).oper[1]^.typ = Top_Reg) and
  941. (taicpu(p).opsize = S_L) and
  942. (taicpu(p).oper[0]^.val <= 3) then
  943. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  944. begin
  945. TmpBool1 := True; {should we check the next instruction?}
  946. TmpBool2 := False; {have we found an add/sub which could be
  947. integrated in the lea?}
  948. reference_reset(tmpref,2,[]);
  949. TmpRef.index := taicpu(p).oper[1]^.reg;
  950. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  951. while TmpBool1 and
  952. GetNextInstruction(p, hp1) and
  953. (tai(hp1).typ = ait_instruction) and
  954. ((((taicpu(hp1).opcode = A_ADD) or
  955. (taicpu(hp1).opcode = A_SUB)) and
  956. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  957. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  958. (((taicpu(hp1).opcode = A_INC) or
  959. (taicpu(hp1).opcode = A_DEC)) and
  960. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  961. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  962. (not GetNextInstruction(hp1,hp2) or
  963. not instrReadsFlags(hp2)) Do
  964. begin
  965. TmpBool1 := False;
  966. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  967. begin
  968. TmpBool1 := True;
  969. TmpBool2 := True;
  970. case taicpu(hp1).opcode of
  971. A_ADD:
  972. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  973. A_SUB:
  974. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  975. end;
  976. asml.remove(hp1);
  977. hp1.free;
  978. end
  979. else
  980. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  981. (((taicpu(hp1).opcode = A_ADD) and
  982. (TmpRef.base = NR_NO)) or
  983. (taicpu(hp1).opcode = A_INC) or
  984. (taicpu(hp1).opcode = A_DEC)) then
  985. begin
  986. TmpBool1 := True;
  987. TmpBool2 := True;
  988. case taicpu(hp1).opcode of
  989. A_ADD:
  990. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  991. A_INC:
  992. inc(TmpRef.offset);
  993. A_DEC:
  994. dec(TmpRef.offset);
  995. end;
  996. asml.remove(hp1);
  997. hp1.free;
  998. end;
  999. end;
  1000. if TmpBool2 or
  1001. ((current_settings.optimizecputype < cpu_Pentium2) and
  1002. (taicpu(p).oper[0]^.val <= 3) and
  1003. not(cs_opt_size in current_settings.optimizerswitches)) then
  1004. begin
  1005. if not(TmpBool2) and
  1006. (taicpu(p).oper[0]^.val = 1) then
  1007. begin
  1008. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1009. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1010. end
  1011. else
  1012. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1013. taicpu(p).oper[1]^.reg);
  1014. InsertLLItem(p.previous, p.next, hp1);
  1015. p.free;
  1016. p := hp1;
  1017. end;
  1018. end
  1019. else
  1020. if (current_settings.optimizecputype < cpu_Pentium2) and
  1021. (taicpu(p).oper[0]^.typ = top_const) and
  1022. (taicpu(p).oper[1]^.typ = top_reg) then
  1023. if (taicpu(p).oper[0]^.val = 1) then
  1024. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1025. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1026. (unlike shl, which is only Tairable in the U pipe)}
  1027. begin
  1028. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1029. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1030. InsertLLItem(p.previous, p.next, hp1);
  1031. p.free;
  1032. p := hp1;
  1033. end
  1034. else if (taicpu(p).opsize = S_L) and
  1035. (taicpu(p).oper[0]^.val<= 3) then
  1036. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1037. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1038. begin
  1039. reference_reset(tmpref,2,[]);
  1040. TmpRef.index := taicpu(p).oper[1]^.reg;
  1041. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1042. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1043. InsertLLItem(p.previous, p.next, hp1);
  1044. p.free;
  1045. p := hp1;
  1046. end
  1047. end;
  1048. A_SETcc :
  1049. { changes
  1050. setcc (funcres) setcc reg
  1051. movb (funcres), reg to leave/ret
  1052. leave/ret }
  1053. begin
  1054. if (taicpu(p).oper[0]^.typ = top_ref) and
  1055. GetNextInstruction(p, hp1) and
  1056. GetNextInstruction(hp1, hp2) and
  1057. IsExitCode(hp2) and
  1058. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1059. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1060. not(assigned(current_procinfo.procdef.funcretsym) and
  1061. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1062. (hp1.typ = ait_instruction) and
  1063. (taicpu(hp1).opcode = A_MOV) and
  1064. (taicpu(hp1).opsize = S_B) and
  1065. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1066. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1067. begin
  1068. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1069. DebugMsg('Peephole optimizer SetccMovbLeaveRet2SetccLeaveRet',p);
  1070. asml.remove(hp1);
  1071. hp1.free;
  1072. end
  1073. end;
  1074. A_SUB:
  1075. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1076. { * change "sub/add const1, reg" or "dec reg" followed by
  1077. "sub const2, reg" to one "sub ..., reg" }
  1078. begin
  1079. if (taicpu(p).oper[0]^.typ = top_const) and
  1080. (taicpu(p).oper[1]^.typ = top_reg) then
  1081. if (taicpu(p).oper[0]^.val = 2) and
  1082. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1083. { Don't do the sub/push optimization if the sub }
  1084. { comes from setting up the stack frame (JM) }
  1085. (not getLastInstruction(p,hp1) or
  1086. (hp1.typ <> ait_instruction) or
  1087. (taicpu(hp1).opcode <> A_MOV) or
  1088. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1089. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1090. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1091. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1092. begin
  1093. hp1 := tai(p.next);
  1094. while Assigned(hp1) and
  1095. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1096. not RegReadByInstruction(NR_ESP,hp1) and
  1097. not RegModifiedByInstruction(NR_ESP,hp1) do
  1098. hp1 := tai(hp1.next);
  1099. if Assigned(hp1) and
  1100. (tai(hp1).typ = ait_instruction) and
  1101. (taicpu(hp1).opcode = A_PUSH) and
  1102. (taicpu(hp1).opsize = S_W) then
  1103. begin
  1104. taicpu(hp1).changeopsize(S_L);
  1105. if taicpu(hp1).oper[0]^.typ=top_reg then
  1106. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1107. hp1 := tai(p.next);
  1108. asml.remove(p);
  1109. p.free;
  1110. p := hp1;
  1111. continue
  1112. end;
  1113. if DoSubAddOpt(p) then
  1114. continue;
  1115. end
  1116. else if DoSubAddOpt(p) then
  1117. continue
  1118. end;
  1119. A_VMOVAPS,
  1120. A_VMOVAPD:
  1121. if OptPass1VMOVAP(p) then
  1122. continue;
  1123. A_VDIVSD,
  1124. A_VDIVSS,
  1125. A_VSUBSD,
  1126. A_VSUBSS,
  1127. A_VMULSD,
  1128. A_VMULSS,
  1129. A_VADDSD,
  1130. A_VADDSS:
  1131. if OptPass1VOP(p) then
  1132. continue;
  1133. A_MULSD,
  1134. A_MULSS,
  1135. A_ADDSD,
  1136. A_ADDSS:
  1137. if OptPass1OP(p) then
  1138. continue;
  1139. A_MOVAPD,
  1140. A_MOVAPS:
  1141. if OptPass1MOVAP(p) then
  1142. continue;
  1143. A_VMOVSD,
  1144. A_VMOVSS,
  1145. A_MOVSD,
  1146. A_MOVSS:
  1147. if OptPass1MOVXX(p) then
  1148. continue;
  1149. end;
  1150. end; { if is_jmp }
  1151. end;
  1152. end;
  1153. updateUsedRegs(UsedRegs,p);
  1154. p:=tai(p.next);
  1155. end;
  1156. end;
  1157. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1158. var
  1159. p : tai;
  1160. begin
  1161. p := BlockStart;
  1162. ClearUsedRegs;
  1163. while (p <> BlockEnd) Do
  1164. begin
  1165. UpdateUsedRegs(UsedRegs, tai(p.next));
  1166. case p.Typ Of
  1167. Ait_Instruction:
  1168. begin
  1169. if InsContainsSegRef(taicpu(p)) then
  1170. begin
  1171. p := tai(p.next);
  1172. continue;
  1173. end;
  1174. case taicpu(p).opcode Of
  1175. A_Jcc:
  1176. if OptPass2Jcc(p) then
  1177. continue;
  1178. A_FSTP,A_FISTP:
  1179. if DoFpuLoadStoreOpt(p) then
  1180. continue;
  1181. A_IMUL:
  1182. if OptPass2Imul(p) then
  1183. continue;
  1184. A_JMP:
  1185. if OptPass2Jmp(p) then
  1186. continue;
  1187. A_MOV:
  1188. if OptPass2MOV(p) then
  1189. continue;
  1190. end;
  1191. end;
  1192. end;
  1193. p := tai(p.next)
  1194. end;
  1195. end;
  1196. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1197. var
  1198. p,hp1,hp2: tai;
  1199. IsTestConstX: boolean;
  1200. begin
  1201. p := BlockStart;
  1202. ClearUsedRegs;
  1203. while (p <> BlockEnd) Do
  1204. begin
  1205. UpdateUsedRegs(UsedRegs, tai(p.next));
  1206. case p.Typ Of
  1207. Ait_Instruction:
  1208. begin
  1209. if InsContainsSegRef(taicpu(p)) then
  1210. begin
  1211. p := tai(p.next);
  1212. continue;
  1213. end;
  1214. case taicpu(p).opcode Of
  1215. A_CALL:
  1216. begin
  1217. { don't do this on modern CPUs, this really hurts them due to
  1218. broken call/ret pairing }
  1219. if (current_settings.optimizecputype < cpu_Pentium2) and
  1220. not(cs_create_pic in current_settings.moduleswitches) and
  1221. GetNextInstruction(p, hp1) and
  1222. (hp1.typ = ait_instruction) and
  1223. (taicpu(hp1).opcode = A_JMP) and
  1224. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1225. begin
  1226. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1227. InsertLLItem(p.previous, p, hp2);
  1228. taicpu(p).opcode := A_JMP;
  1229. taicpu(p).is_jmp := true;
  1230. asml.remove(hp1);
  1231. hp1.free;
  1232. end
  1233. { replace
  1234. call procname
  1235. ret
  1236. by
  1237. jmp procname
  1238. this should never hurt except when pic is used, not sure
  1239. how to handle it then
  1240. but do it only on level 4 because it destroys stack back traces
  1241. }
  1242. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  1243. not(cs_create_pic in current_settings.moduleswitches) and
  1244. GetNextInstruction(p, hp1) and
  1245. (hp1.typ = ait_instruction) and
  1246. (taicpu(hp1).opcode = A_RET) and
  1247. (taicpu(hp1).ops=0) then
  1248. begin
  1249. taicpu(p).opcode := A_JMP;
  1250. taicpu(p).is_jmp := true;
  1251. asml.remove(hp1);
  1252. hp1.free;
  1253. end;
  1254. end;
  1255. A_CMP:
  1256. begin
  1257. if (taicpu(p).oper[0]^.typ = top_const) and
  1258. (taicpu(p).oper[0]^.val = 0) and
  1259. (taicpu(p).oper[1]^.typ = top_reg) then
  1260. {change "cmp $0, %reg" to "test %reg, %reg"}
  1261. begin
  1262. taicpu(p).opcode := A_TEST;
  1263. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1264. continue;
  1265. end;
  1266. end;
  1267. A_MOV:
  1268. PostPeepholeOptMov(p);
  1269. A_MOVZX:
  1270. { if register vars are on, it's possible there is code like }
  1271. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1272. { so we can't safely replace the movzx then with xor/mov, }
  1273. { since that would change the flags (JM) }
  1274. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  1275. begin
  1276. if (taicpu(p).oper[1]^.typ = top_reg) then
  1277. if (taicpu(p).oper[0]^.typ = top_reg)
  1278. then
  1279. case taicpu(p).opsize of
  1280. S_BL:
  1281. begin
  1282. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1283. not(cs_opt_size in current_settings.optimizerswitches) and
  1284. (current_settings.optimizecputype = cpu_Pentium) then
  1285. {Change "movzbl %reg1, %reg2" to
  1286. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1287. PentiumMMX}
  1288. begin
  1289. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1290. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1291. InsertLLItem(p.previous, p, hp1);
  1292. taicpu(p).opcode := A_MOV;
  1293. taicpu(p).changeopsize(S_B);
  1294. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1295. end;
  1296. end;
  1297. end
  1298. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1299. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1300. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1301. not(cs_opt_size in current_settings.optimizerswitches) and
  1302. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1303. (current_settings.optimizecputype = cpu_Pentium) and
  1304. (taicpu(p).opsize = S_BL) then
  1305. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1306. Pentium and PentiumMMX}
  1307. begin
  1308. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1309. taicpu(p).oper[1]^.reg);
  1310. taicpu(p).opcode := A_MOV;
  1311. taicpu(p).changeopsize(S_B);
  1312. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1313. InsertLLItem(p.previous, p, hp1);
  1314. end;
  1315. end;
  1316. A_TEST, A_OR:
  1317. {removes the line marked with (x) from the sequence
  1318. and/or/xor/add/sub/... $x, %y
  1319. test/or %y, %y | test $-1, %y (x)
  1320. j(n)z _Label
  1321. as the first instruction already adjusts the ZF
  1322. %y operand may also be a reference }
  1323. begin
  1324. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  1325. MatchOperand(taicpu(p).oper[0]^,-1);
  1326. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  1327. GetLastInstruction(p, hp1) and
  1328. (tai(hp1).typ = ait_instruction) and
  1329. GetNextInstruction(p,hp2) and
  1330. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  1331. case taicpu(hp1).opcode Of
  1332. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  1333. begin
  1334. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  1335. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1336. { and in case of carry for A(E)/B(E)/C/NC }
  1337. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  1338. ((taicpu(hp1).opcode <> A_ADD) and
  1339. (taicpu(hp1).opcode <> A_SUB))) then
  1340. begin
  1341. hp1 := tai(p.next);
  1342. asml.remove(p);
  1343. p.free;
  1344. p := tai(hp1);
  1345. continue
  1346. end;
  1347. end;
  1348. A_SHL, A_SAL, A_SHR, A_SAR:
  1349. begin
  1350. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  1351. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  1352. { therefore, it's only safe to do this optimization for }
  1353. { shifts by a (nonzero) constant }
  1354. (taicpu(hp1).oper[0]^.typ = top_const) and
  1355. (taicpu(hp1).oper[0]^.val <> 0) and
  1356. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1357. { and in case of carry for A(E)/B(E)/C/NC }
  1358. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  1359. begin
  1360. hp1 := tai(p.next);
  1361. asml.remove(p);
  1362. p.free;
  1363. p := tai(hp1);
  1364. continue
  1365. end;
  1366. end;
  1367. A_DEC, A_INC, A_NEG:
  1368. begin
  1369. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  1370. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1371. { and in case of carry for A(E)/B(E)/C/NC }
  1372. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  1373. begin
  1374. case taicpu(hp1).opcode Of
  1375. A_DEC, A_INC:
  1376. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  1377. begin
  1378. case taicpu(hp1).opcode Of
  1379. A_DEC: taicpu(hp1).opcode := A_SUB;
  1380. A_INC: taicpu(hp1).opcode := A_ADD;
  1381. end;
  1382. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  1383. taicpu(hp1).loadConst(0,1);
  1384. taicpu(hp1).ops:=2;
  1385. end
  1386. end;
  1387. hp1 := tai(p.next);
  1388. asml.remove(p);
  1389. p.free;
  1390. p := tai(hp1);
  1391. continue
  1392. end;
  1393. end
  1394. else
  1395. { change "test $-1,%reg" into "test %reg,%reg" }
  1396. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  1397. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  1398. end { case }
  1399. else
  1400. { change "test $-1,%reg" into "test %reg,%reg" }
  1401. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  1402. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  1403. end;
  1404. end;
  1405. end;
  1406. end;
  1407. p := tai(p.next)
  1408. end;
  1409. end;
  1410. Procedure TCpuAsmOptimizer.Optimize;
  1411. Var
  1412. HP: Tai;
  1413. pass: longint;
  1414. slowopt, changed, lastLoop: boolean;
  1415. Begin
  1416. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  1417. pass := 0;
  1418. changed := false;
  1419. repeat
  1420. lastLoop :=
  1421. not(slowopt) or
  1422. (not changed and (pass > 2)) or
  1423. { prevent endless loops }
  1424. (pass = 4);
  1425. changed := false;
  1426. { Setup labeltable, always necessary }
  1427. blockstart := tai(asml.first);
  1428. pass_1;
  1429. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  1430. { or nil }
  1431. While Assigned(BlockStart) Do
  1432. Begin
  1433. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1434. begin
  1435. if (pass = 0) then
  1436. PrePeepHoleOpts;
  1437. { Peephole optimizations }
  1438. PeepHoleOptPass1;
  1439. { Only perform them twice in the first pass }
  1440. if pass = 0 then
  1441. PeepHoleOptPass1;
  1442. end;
  1443. { More peephole optimizations }
  1444. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1445. begin
  1446. PeepHoleOptPass2;
  1447. if lastLoop then
  1448. PostPeepHoleOpts;
  1449. end;
  1450. { Continue where we left off, BlockEnd is either the start of an }
  1451. { assembler block or nil }
  1452. BlockStart := BlockEnd;
  1453. While Assigned(BlockStart) And
  1454. (BlockStart.typ = ait_Marker) And
  1455. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  1456. Begin
  1457. { We stopped at an assembler block, so skip it }
  1458. Repeat
  1459. BlockStart := Tai(BlockStart.Next);
  1460. Until (BlockStart.Typ = Ait_Marker) And
  1461. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  1462. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  1463. If GetNextInstruction(BlockStart, HP) And
  1464. ((HP.typ <> ait_Marker) Or
  1465. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  1466. { There is no assembler block anymore after the current one, so }
  1467. { optimize the next block of "normal" instructions }
  1468. pass_1
  1469. { Otherwise, skip the next assembler block }
  1470. else
  1471. blockStart := hp;
  1472. End;
  1473. End;
  1474. inc(pass);
  1475. until lastLoop;
  1476. dfa.free;
  1477. End;
  1478. begin
  1479. casmoptimizer:=TCpuAsmOptimizer;
  1480. end.