aoptcpu.pas 69 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. end;
  34. Var
  35. AsmOptimizer : TCpuAsmOptimizer;
  36. Implementation
  37. uses
  38. verbose,globtype,globals,
  39. cpuinfo,
  40. aasmcpu,
  41. aoptutils,
  42. procinfo,
  43. cgutils,
  44. { units we should get rid off: }
  45. symsym,symconst;
  46. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  47. { returns true if a "continue" should be done after this optimization }
  48. var hp1, hp2: tai;
  49. begin
  50. DoFpuLoadStoreOpt := false;
  51. if (taicpu(p).oper[0]^.typ = top_ref) and
  52. getNextInstruction(p, hp1) and
  53. (hp1.typ = ait_instruction) and
  54. (((taicpu(hp1).opcode = A_FLD) and
  55. (taicpu(p).opcode = A_FSTP)) or
  56. ((taicpu(p).opcode = A_FISTP) and
  57. (taicpu(hp1).opcode = A_FILD))) and
  58. (taicpu(hp1).oper[0]^.typ = top_ref) and
  59. (taicpu(hp1).opsize = taicpu(p).opsize) and
  60. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  61. begin
  62. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  63. if (taicpu(p).opsize=S_FX) and
  64. getNextInstruction(hp1, hp2) and
  65. (hp2.typ = ait_instruction) and
  66. IsExitCode(hp2) and
  67. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  68. not(assigned(current_procinfo.procdef.funcretsym) and
  69. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  70. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  71. begin
  72. asml.remove(p);
  73. asml.remove(hp1);
  74. p.free;
  75. hp1.free;
  76. p := hp2;
  77. removeLastDeallocForFuncRes(p);
  78. doFPULoadStoreOpt := true;
  79. end
  80. (* can't be done because the store operation rounds
  81. else
  82. { fst can't store an extended value! }
  83. if (taicpu(p).opsize <> S_FX) and
  84. (taicpu(p).opsize <> S_IQ) then
  85. begin
  86. if (taicpu(p).opcode = A_FSTP) then
  87. taicpu(p).opcode := A_FST
  88. else taicpu(p).opcode := A_FIST;
  89. asml.remove(hp1);
  90. hp1.free;
  91. end
  92. *)
  93. end;
  94. end;
  95. { converts a TChange variable to a TRegister }
  96. function tch2reg(ch: tinschange): tsuperregister;
  97. const
  98. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  99. begin
  100. if (ch <= CH_REDI) then
  101. tch2reg := ch2reg[ch]
  102. else if (ch <= CH_WEDI) then
  103. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  104. else if (ch <= CH_RWEDI) then
  105. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  106. else if (ch <= CH_MEDI) then
  107. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  108. else
  109. InternalError(2016041901)
  110. end;
  111. { Checks if the register is a 32 bit general purpose register }
  112. function isgp32reg(reg: TRegister): boolean;
  113. begin
  114. {$push}{$warnings off}
  115. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  116. {$pop}
  117. end;
  118. { returns true if p contains a memory operand with a segment set }
  119. function InsContainsSegRef(p: taicpu): boolean;
  120. var
  121. i: longint;
  122. begin
  123. result:=true;
  124. for i:=0 to p.opercnt-1 do
  125. if (p.oper[i]^.typ=top_ref) and
  126. (p.oper[i]^.ref^.segment<>NR_NO) then
  127. exit;
  128. result:=false;
  129. end;
  130. function InstrReadsFlags(p: tai): boolean;
  131. var
  132. l: longint;
  133. begin
  134. InstrReadsFlags := true;
  135. case p.typ of
  136. ait_instruction:
  137. if InsProp[taicpu(p).opcode].Ch*
  138. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  139. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  140. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  141. exit;
  142. ait_label:
  143. exit;
  144. end;
  145. InstrReadsFlags := false;
  146. end;
  147. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  148. var
  149. p,hp1: tai;
  150. l: aint;
  151. tmpRef: treference;
  152. begin
  153. p := BlockStart;
  154. while (p <> BlockEnd) Do
  155. begin
  156. case p.Typ Of
  157. Ait_Instruction:
  158. begin
  159. if InsContainsSegRef(taicpu(p)) then
  160. begin
  161. p := tai(p.next);
  162. continue;
  163. end;
  164. case taicpu(p).opcode Of
  165. A_IMUL:
  166. {changes certain "imul const, %reg"'s to lea sequences}
  167. begin
  168. if (taicpu(p).oper[0]^.typ = Top_Const) and
  169. (taicpu(p).oper[1]^.typ = Top_Reg) and
  170. (taicpu(p).opsize = S_L) then
  171. if (taicpu(p).oper[0]^.val = 1) then
  172. if (taicpu(p).ops = 2) then
  173. {remove "imul $1, reg"}
  174. begin
  175. hp1 := tai(p.Next);
  176. asml.remove(p);
  177. p.free;
  178. p := hp1;
  179. continue;
  180. end
  181. else
  182. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  183. begin
  184. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  185. InsertLLItem(p.previous, p.next, hp1);
  186. p.free;
  187. p := hp1;
  188. end
  189. else if
  190. ((taicpu(p).ops <= 2) or
  191. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  192. (taicpu(p).oper[0]^.val <= 12) and
  193. not(cs_opt_size in current_settings.optimizerswitches) and
  194. (not(GetNextInstruction(p, hp1)) or
  195. {GetNextInstruction(p, hp1) and}
  196. not((tai(hp1).typ = ait_instruction) and
  197. ((taicpu(hp1).opcode=A_Jcc) and
  198. (taicpu(hp1).condition in [C_O,C_NO])))) then
  199. begin
  200. reference_reset(tmpref,1,[]);
  201. case taicpu(p).oper[0]^.val Of
  202. 3: begin
  203. {imul 3, reg1, reg2 to
  204. lea (reg1,reg1,2), reg2
  205. imul 3, reg1 to
  206. lea (reg1,reg1,2), reg1}
  207. TmpRef.base := taicpu(p).oper[1]^.reg;
  208. TmpRef.index := taicpu(p).oper[1]^.reg;
  209. TmpRef.ScaleFactor := 2;
  210. if (taicpu(p).ops = 2) then
  211. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  212. else
  213. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  214. InsertLLItem(p.previous, p.next, hp1);
  215. p.free;
  216. p := hp1;
  217. end;
  218. 5: begin
  219. {imul 5, reg1, reg2 to
  220. lea (reg1,reg1,4), reg2
  221. imul 5, reg1 to
  222. lea (reg1,reg1,4), reg1}
  223. TmpRef.base := taicpu(p).oper[1]^.reg;
  224. TmpRef.index := taicpu(p).oper[1]^.reg;
  225. TmpRef.ScaleFactor := 4;
  226. if (taicpu(p).ops = 2) then
  227. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  228. else
  229. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  230. InsertLLItem(p.previous, p.next, hp1);
  231. p.free;
  232. p := hp1;
  233. end;
  234. 6: begin
  235. {imul 6, reg1, reg2 to
  236. lea (,reg1,2), reg2
  237. lea (reg2,reg1,4), reg2
  238. imul 6, reg1 to
  239. lea (reg1,reg1,2), reg1
  240. add reg1, reg1}
  241. if (current_settings.optimizecputype <= cpu_386) then
  242. begin
  243. TmpRef.index := taicpu(p).oper[1]^.reg;
  244. if (taicpu(p).ops = 3) then
  245. begin
  246. TmpRef.base := taicpu(p).oper[2]^.reg;
  247. TmpRef.ScaleFactor := 4;
  248. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  249. end
  250. else
  251. begin
  252. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  253. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  254. end;
  255. InsertLLItem(p, p.next, hp1);
  256. reference_reset(tmpref,2,[]);
  257. TmpRef.index := taicpu(p).oper[1]^.reg;
  258. TmpRef.ScaleFactor := 2;
  259. if (taicpu(p).ops = 3) then
  260. begin
  261. TmpRef.base := NR_NO;
  262. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  263. taicpu(p).oper[2]^.reg);
  264. end
  265. else
  266. begin
  267. TmpRef.base := taicpu(p).oper[1]^.reg;
  268. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  269. end;
  270. InsertLLItem(p.previous, p.next, hp1);
  271. p.free;
  272. p := tai(hp1.next);
  273. end
  274. end;
  275. 9: begin
  276. {imul 9, reg1, reg2 to
  277. lea (reg1,reg1,8), reg2
  278. imul 9, reg1 to
  279. lea (reg1,reg1,8), reg1}
  280. TmpRef.base := taicpu(p).oper[1]^.reg;
  281. TmpRef.index := taicpu(p).oper[1]^.reg;
  282. TmpRef.ScaleFactor := 8;
  283. if (taicpu(p).ops = 2) then
  284. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  285. else
  286. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  287. InsertLLItem(p.previous, p.next, hp1);
  288. p.free;
  289. p := hp1;
  290. end;
  291. 10: begin
  292. {imul 10, reg1, reg2 to
  293. lea (reg1,reg1,4), reg2
  294. add reg2, reg2
  295. imul 10, reg1 to
  296. lea (reg1,reg1,4), reg1
  297. add reg1, reg1}
  298. if (current_settings.optimizecputype <= cpu_386) then
  299. begin
  300. if (taicpu(p).ops = 3) then
  301. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  302. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  303. else
  304. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  305. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  306. InsertLLItem(p, p.next, hp1);
  307. TmpRef.base := taicpu(p).oper[1]^.reg;
  308. TmpRef.index := taicpu(p).oper[1]^.reg;
  309. TmpRef.ScaleFactor := 4;
  310. if (taicpu(p).ops = 3) then
  311. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  312. else
  313. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  314. InsertLLItem(p.previous, p.next, hp1);
  315. p.free;
  316. p := tai(hp1.next);
  317. end
  318. end;
  319. 12: begin
  320. {imul 12, reg1, reg2 to
  321. lea (,reg1,4), reg2
  322. lea (reg2,reg1,8), reg2
  323. imul 12, reg1 to
  324. lea (reg1,reg1,2), reg1
  325. lea (,reg1,4), reg1}
  326. if (current_settings.optimizecputype <= cpu_386)
  327. then
  328. begin
  329. TmpRef.index := taicpu(p).oper[1]^.reg;
  330. if (taicpu(p).ops = 3) then
  331. begin
  332. TmpRef.base := taicpu(p).oper[2]^.reg;
  333. TmpRef.ScaleFactor := 8;
  334. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  335. end
  336. else
  337. begin
  338. TmpRef.base := NR_NO;
  339. TmpRef.ScaleFactor := 4;
  340. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  341. end;
  342. InsertLLItem(p, p.next, hp1);
  343. reference_reset(tmpref,2,[]);
  344. TmpRef.index := taicpu(p).oper[1]^.reg;
  345. if (taicpu(p).ops = 3) then
  346. begin
  347. TmpRef.base := NR_NO;
  348. TmpRef.ScaleFactor := 4;
  349. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  350. end
  351. else
  352. begin
  353. TmpRef.base := taicpu(p).oper[1]^.reg;
  354. TmpRef.ScaleFactor := 2;
  355. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  356. end;
  357. InsertLLItem(p.previous, p.next, hp1);
  358. p.free;
  359. p := tai(hp1.next);
  360. end
  361. end
  362. end;
  363. end;
  364. end;
  365. A_SAR,A_SHR:
  366. if PrePeepholeOptSxx(p) then
  367. continue;
  368. A_XOR:
  369. if (taicpu(p).oper[0]^.typ = top_reg) and
  370. (taicpu(p).oper[1]^.typ = top_reg) and
  371. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  372. { temporarily change this to 'mov reg,0' to make it easier }
  373. { for the CSE. Will be changed back in pass 2 }
  374. begin
  375. taicpu(p).opcode := A_MOV;
  376. taicpu(p).loadConst(0,0);
  377. end;
  378. end;
  379. end;
  380. end;
  381. p := tai(p.next)
  382. end;
  383. end;
  384. { First pass of peephole optimizations }
  385. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  386. function WriteOk : Boolean;
  387. begin
  388. writeln('Ok');
  389. Result:=True;
  390. end;
  391. var
  392. l : longint;
  393. p,hp1,hp2 : tai;
  394. hp3,hp4: tai;
  395. v:aint;
  396. TmpRef: TReference;
  397. TmpBool1, TmpBool2: Boolean;
  398. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  399. {traces sucessive jumps to their final destination and sets it, e.g.
  400. je l1 je l3
  401. <code> <code>
  402. l1: becomes l1:
  403. je l2 je l3
  404. <code> <code>
  405. l2: l2:
  406. jmp l3 jmp l3
  407. the level parameter denotes how deeep we have already followed the jump,
  408. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  409. var p1, p2: tai;
  410. l: tasmlabel;
  411. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  412. begin
  413. FindAnyLabel := false;
  414. while assigned(hp.next) and
  415. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  416. hp := tai(hp.next);
  417. if assigned(hp.next) and
  418. (tai(hp.next).typ = ait_label) then
  419. begin
  420. FindAnyLabel := true;
  421. l := tai_label(hp.next).labsym;
  422. end
  423. end;
  424. begin
  425. GetfinalDestination := false;
  426. if level > 20 then
  427. exit;
  428. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  429. if assigned(p1) then
  430. begin
  431. SkipLabels(p1,p1);
  432. if (tai(p1).typ = ait_instruction) and
  433. (taicpu(p1).is_jmp) then
  434. if { the next instruction after the label where the jump hp arrives}
  435. { is unconditional or of the same type as hp, so continue }
  436. (taicpu(p1).condition in [C_None,hp.condition]) or
  437. { the next instruction after the label where the jump hp arrives}
  438. { is the opposite of hp (so this one is never taken), but after }
  439. { that one there is a branch that will be taken, so perform a }
  440. { little hack: set p1 equal to this instruction (that's what the}
  441. { last SkipLabels is for, only works with short bool evaluation)}
  442. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  443. SkipLabels(p1,p2) and
  444. (p2.typ = ait_instruction) and
  445. (taicpu(p2).is_jmp) and
  446. (taicpu(p2).condition in [C_None,hp.condition]) and
  447. SkipLabels(p1,p1)) then
  448. begin
  449. { quick check for loops of the form "l5: ; jmp l5 }
  450. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  451. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  452. exit;
  453. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  454. exit;
  455. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  456. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  457. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  458. end
  459. else
  460. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  461. if not FindAnyLabel(p1,l) then
  462. begin
  463. {$ifdef finaldestdebug}
  464. insertllitem(asml,p1,p1.next,tai_comment.Create(
  465. strpnew('previous label inserted'))));
  466. {$endif finaldestdebug}
  467. current_asmdata.getjumplabel(l);
  468. insertllitem(p1,p1.next,tai_label.Create(l));
  469. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  470. hp.oper[0]^.ref^.symbol := l;
  471. l.increfs;
  472. { this won't work, since the new label isn't in the labeltable }
  473. { so it will fail the rangecheck. Labeltable should become a }
  474. { hashtable to support this: }
  475. { GetFinalDestination(asml, hp); }
  476. end
  477. else
  478. begin
  479. {$ifdef finaldestdebug}
  480. insertllitem(asml,p1,p1.next,tai_comment.Create(
  481. strpnew('next label reused'))));
  482. {$endif finaldestdebug}
  483. l.increfs;
  484. hp.oper[0]^.ref^.symbol := l;
  485. if not GetFinalDestination(asml, hp,succ(level)) then
  486. exit;
  487. end;
  488. end;
  489. GetFinalDestination := true;
  490. end;
  491. function DoSubAddOpt(var p: tai): Boolean;
  492. begin
  493. DoSubAddOpt := False;
  494. if GetLastInstruction(p, hp1) and
  495. (hp1.typ = ait_instruction) and
  496. (taicpu(hp1).opsize = taicpu(p).opsize) then
  497. case taicpu(hp1).opcode Of
  498. A_DEC:
  499. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  500. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  501. begin
  502. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  503. asml.remove(hp1);
  504. hp1.free;
  505. end;
  506. A_SUB:
  507. if (taicpu(hp1).oper[0]^.typ = top_const) and
  508. (taicpu(hp1).oper[1]^.typ = top_reg) and
  509. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  510. begin
  511. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  512. asml.remove(hp1);
  513. hp1.free;
  514. end;
  515. A_ADD:
  516. if (taicpu(hp1).oper[0]^.typ = top_const) and
  517. (taicpu(hp1).oper[1]^.typ = top_reg) and
  518. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  519. begin
  520. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  521. asml.remove(hp1);
  522. hp1.free;
  523. if (taicpu(p).oper[0]^.val = 0) then
  524. begin
  525. hp1 := tai(p.next);
  526. asml.remove(p);
  527. p.free;
  528. if not GetLastInstruction(hp1, p) then
  529. p := hp1;
  530. DoSubAddOpt := True;
  531. end
  532. end;
  533. end;
  534. end;
  535. begin
  536. p := BlockStart;
  537. ClearUsedRegs;
  538. while (p <> BlockEnd) Do
  539. begin
  540. UpDateUsedRegs(UsedRegs, tai(p.next));
  541. case p.Typ Of
  542. ait_instruction:
  543. begin
  544. current_filepos:=taicpu(p).fileinfo;
  545. if InsContainsSegRef(taicpu(p)) then
  546. begin
  547. p := tai(p.next);
  548. continue;
  549. end;
  550. { Handle Jmp Optimizations }
  551. if taicpu(p).is_jmp then
  552. begin
  553. {the following if-block removes all code between a jmp and the next label,
  554. because it can never be executed}
  555. if (taicpu(p).opcode = A_JMP) then
  556. begin
  557. hp2:=p;
  558. while GetNextInstruction(hp2, hp1) and
  559. (hp1.typ <> ait_label) do
  560. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  561. begin
  562. { don't kill start/end of assembler block,
  563. no-line-info-start/end etc }
  564. if hp1.typ<>ait_marker then
  565. begin
  566. asml.remove(hp1);
  567. hp1.free;
  568. end
  569. else
  570. hp2:=hp1;
  571. end
  572. else break;
  573. end;
  574. { remove jumps to a label coming right after them }
  575. if GetNextInstruction(p, hp1) then
  576. begin
  577. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  578. { TODO: FIXME removing the first instruction fails}
  579. (p<>blockstart) then
  580. begin
  581. hp2:=tai(hp1.next);
  582. asml.remove(p);
  583. p.free;
  584. p:=hp2;
  585. continue;
  586. end
  587. else
  588. begin
  589. if hp1.typ = ait_label then
  590. SkipLabels(hp1,hp1);
  591. if (tai(hp1).typ=ait_instruction) and
  592. (taicpu(hp1).opcode=A_JMP) and
  593. GetNextInstruction(hp1, hp2) and
  594. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  595. begin
  596. if taicpu(p).opcode=A_Jcc then
  597. begin
  598. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  599. tai_label(hp2).labsym.decrefs;
  600. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  601. { when free'ing hp1, the ref. isn't decresed, so we don't
  602. increase it (FK)
  603. taicpu(p).oper[0]^.ref^.symbol.increfs;
  604. }
  605. asml.remove(hp1);
  606. hp1.free;
  607. GetFinalDestination(asml, taicpu(p),0);
  608. end
  609. else
  610. begin
  611. GetFinalDestination(asml, taicpu(p),0);
  612. p:=tai(p.next);
  613. continue;
  614. end;
  615. end
  616. else
  617. GetFinalDestination(asml, taicpu(p),0);
  618. end;
  619. end;
  620. end
  621. else
  622. { All other optimizes }
  623. begin
  624. for l := 0 to taicpu(p).ops-1 Do
  625. if (taicpu(p).oper[l]^.typ = top_ref) then
  626. With taicpu(p).oper[l]^.ref^ Do
  627. begin
  628. if (base = NR_NO) and
  629. (index <> NR_NO) and
  630. (scalefactor in [0,1]) then
  631. begin
  632. base := index;
  633. index := NR_NO
  634. end
  635. end;
  636. case taicpu(p).opcode Of
  637. A_AND:
  638. if OptPass1And(p) then
  639. continue;
  640. A_CMP:
  641. begin
  642. { cmp register,$8000 neg register
  643. je target --> jo target
  644. .... only if register is deallocated before jump.}
  645. case Taicpu(p).opsize of
  646. S_B: v:=$80;
  647. S_W: v:=$8000;
  648. S_L: v:=aint($80000000);
  649. else
  650. internalerror(2013112905);
  651. end;
  652. if (taicpu(p).oper[0]^.typ=Top_const) and
  653. (taicpu(p).oper[0]^.val=v) and
  654. (Taicpu(p).oper[1]^.typ=top_reg) and
  655. GetNextInstruction(p, hp1) and
  656. (hp1.typ=ait_instruction) and
  657. (taicpu(hp1).opcode=A_Jcc) and
  658. (Taicpu(hp1).condition in [C_E,C_NE]) and
  659. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  660. begin
  661. Taicpu(p).opcode:=A_NEG;
  662. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  663. Taicpu(p).clearop(1);
  664. Taicpu(p).ops:=1;
  665. if Taicpu(hp1).condition=C_E then
  666. Taicpu(hp1).condition:=C_O
  667. else
  668. Taicpu(hp1).condition:=C_NO;
  669. continue;
  670. end;
  671. {
  672. @@2: @@2:
  673. .... ....
  674. cmp operand1,0
  675. jle/jbe @@1
  676. dec operand1 --> sub operand1,1
  677. jmp @@2 jge/jae @@2
  678. @@1: @@1:
  679. ... ....}
  680. if (taicpu(p).oper[0]^.typ = top_const) and
  681. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  682. (taicpu(p).oper[0]^.val = 0) and
  683. GetNextInstruction(p, hp1) and
  684. (hp1.typ = ait_instruction) and
  685. (taicpu(hp1).is_jmp) and
  686. (taicpu(hp1).opcode=A_Jcc) and
  687. (taicpu(hp1).condition in [C_LE,C_BE]) and
  688. GetNextInstruction(hp1,hp2) and
  689. (hp2.typ = ait_instruction) and
  690. (taicpu(hp2).opcode = A_DEC) and
  691. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  692. GetNextInstruction(hp2, hp3) and
  693. (hp3.typ = ait_instruction) and
  694. (taicpu(hp3).is_jmp) and
  695. (taicpu(hp3).opcode = A_JMP) and
  696. GetNextInstruction(hp3, hp4) and
  697. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  698. begin
  699. taicpu(hp2).Opcode := A_SUB;
  700. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  701. taicpu(hp2).loadConst(0,1);
  702. taicpu(hp2).ops:=2;
  703. taicpu(hp3).Opcode := A_Jcc;
  704. case taicpu(hp1).condition of
  705. C_LE: taicpu(hp3).condition := C_GE;
  706. C_BE: taicpu(hp3).condition := C_AE;
  707. end;
  708. asml.remove(p);
  709. asml.remove(hp1);
  710. p.free;
  711. hp1.free;
  712. p := hp2;
  713. continue;
  714. end
  715. end;
  716. A_FLD:
  717. begin
  718. if (taicpu(p).oper[0]^.typ = top_reg) and
  719. GetNextInstruction(p, hp1) and
  720. (hp1.typ = Ait_Instruction) and
  721. (taicpu(hp1).oper[0]^.typ = top_reg) and
  722. (taicpu(hp1).oper[1]^.typ = top_reg) and
  723. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  724. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  725. { change to
  726. fld reg fxxx reg,st
  727. fxxxp st, st1 (hp1)
  728. Remark: non commutative operations must be reversed!
  729. }
  730. begin
  731. case taicpu(hp1).opcode Of
  732. A_FMULP,A_FADDP,
  733. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  734. begin
  735. case taicpu(hp1).opcode Of
  736. A_FADDP: taicpu(hp1).opcode := A_FADD;
  737. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  738. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  739. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  740. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  741. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  742. end;
  743. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  744. taicpu(hp1).oper[1]^.reg := NR_ST;
  745. asml.remove(p);
  746. p.free;
  747. p := hp1;
  748. continue;
  749. end;
  750. end;
  751. end
  752. else
  753. if (taicpu(p).oper[0]^.typ = top_ref) and
  754. GetNextInstruction(p, hp2) and
  755. (hp2.typ = Ait_Instruction) and
  756. (taicpu(hp2).ops = 2) and
  757. (taicpu(hp2).oper[0]^.typ = top_reg) and
  758. (taicpu(hp2).oper[1]^.typ = top_reg) and
  759. (taicpu(p).opsize in [S_FS, S_FL]) and
  760. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  761. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  762. if GetLastInstruction(p, hp1) and
  763. (hp1.typ = Ait_Instruction) and
  764. ((taicpu(hp1).opcode = A_FLD) or
  765. (taicpu(hp1).opcode = A_FST)) and
  766. (taicpu(hp1).opsize = taicpu(p).opsize) and
  767. (taicpu(hp1).oper[0]^.typ = top_ref) and
  768. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  769. if ((taicpu(hp2).opcode = A_FMULP) or
  770. (taicpu(hp2).opcode = A_FADDP)) then
  771. { change to
  772. fld/fst mem1 (hp1) fld/fst mem1
  773. fld mem1 (p) fadd/
  774. faddp/ fmul st, st
  775. fmulp st, st1 (hp2) }
  776. begin
  777. asml.remove(p);
  778. p.free;
  779. p := hp1;
  780. if (taicpu(hp2).opcode = A_FADDP) then
  781. taicpu(hp2).opcode := A_FADD
  782. else
  783. taicpu(hp2).opcode := A_FMUL;
  784. taicpu(hp2).oper[1]^.reg := NR_ST;
  785. end
  786. else
  787. { change to
  788. fld/fst mem1 (hp1) fld/fst mem1
  789. fld mem1 (p) fld st}
  790. begin
  791. taicpu(p).changeopsize(S_FL);
  792. taicpu(p).loadreg(0,NR_ST);
  793. end
  794. else
  795. begin
  796. case taicpu(hp2).opcode Of
  797. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  798. { change to
  799. fld/fst mem1 (hp1) fld/fst mem1
  800. fld mem2 (p) fxxx mem2
  801. fxxxp st, st1 (hp2) }
  802. begin
  803. case taicpu(hp2).opcode Of
  804. A_FADDP: taicpu(p).opcode := A_FADD;
  805. A_FMULP: taicpu(p).opcode := A_FMUL;
  806. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  807. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  808. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  809. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  810. end;
  811. asml.remove(hp2);
  812. hp2.free;
  813. end
  814. end
  815. end
  816. end;
  817. A_FSTP,A_FISTP:
  818. if doFpuLoadStoreOpt(p) then
  819. continue;
  820. A_LEA:
  821. begin
  822. if OptPass1LEA(p) then
  823. continue;
  824. end;
  825. A_MOV:
  826. begin
  827. If OptPass1MOV(p) then
  828. Continue;
  829. end;
  830. A_MOVSX,
  831. A_MOVZX :
  832. begin
  833. If OptPass1Movx(p) then
  834. Continue
  835. end;
  836. (* should not be generated anymore by the current code generator
  837. A_POP:
  838. begin
  839. if target_info.system=system_i386_go32v2 then
  840. begin
  841. { Transform a series of pop/pop/pop/push/push/push to }
  842. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  843. { because I'm not sure whether they can cope with }
  844. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  845. { such a problem when using esp as frame pointer (JM) }
  846. if (taicpu(p).oper[0]^.typ = top_reg) then
  847. begin
  848. hp1 := p;
  849. hp2 := p;
  850. l := 0;
  851. while getNextInstruction(hp1,hp1) and
  852. (hp1.typ = ait_instruction) and
  853. (taicpu(hp1).opcode = A_POP) and
  854. (taicpu(hp1).oper[0]^.typ = top_reg) do
  855. begin
  856. hp2 := hp1;
  857. inc(l,4);
  858. end;
  859. getLastInstruction(p,hp3);
  860. l1 := 0;
  861. while (hp2 <> hp3) and
  862. assigned(hp1) and
  863. (hp1.typ = ait_instruction) and
  864. (taicpu(hp1).opcode = A_PUSH) and
  865. (taicpu(hp1).oper[0]^.typ = top_reg) and
  866. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  867. begin
  868. { change it to a two op operation }
  869. taicpu(hp2).oper[1]^.typ:=top_none;
  870. taicpu(hp2).ops:=2;
  871. taicpu(hp2).opcode := A_MOV;
  872. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  873. reference_reset(tmpref);
  874. tmpRef.base.enum:=R_INTREGISTER;
  875. tmpRef.base.number:=NR_STACK_POINTER_REG;
  876. convert_register_to_enum(tmpref.base);
  877. tmpRef.offset := l;
  878. taicpu(hp2).loadRef(0,tmpRef);
  879. hp4 := hp1;
  880. getNextInstruction(hp1,hp1);
  881. asml.remove(hp4);
  882. hp4.free;
  883. getLastInstruction(hp2,hp2);
  884. dec(l,4);
  885. inc(l1);
  886. end;
  887. if l <> -4 then
  888. begin
  889. inc(l,4);
  890. for l1 := l1 downto 1 do
  891. begin
  892. getNextInstruction(hp2,hp2);
  893. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  894. end
  895. end
  896. end
  897. end
  898. else
  899. begin
  900. if (taicpu(p).oper[0]^.typ = top_reg) and
  901. GetNextInstruction(p, hp1) and
  902. (tai(hp1).typ=ait_instruction) and
  903. (taicpu(hp1).opcode=A_PUSH) and
  904. (taicpu(hp1).oper[0]^.typ = top_reg) and
  905. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  906. begin
  907. { change it to a two op operation }
  908. taicpu(p).oper[1]^.typ:=top_none;
  909. taicpu(p).ops:=2;
  910. taicpu(p).opcode := A_MOV;
  911. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  912. reference_reset(tmpref);
  913. TmpRef.base.enum := R_ESP;
  914. taicpu(p).loadRef(0,TmpRef);
  915. asml.remove(hp1);
  916. hp1.free;
  917. end;
  918. end;
  919. end;
  920. *)
  921. A_PUSH:
  922. begin
  923. if (taicpu(p).opsize = S_W) and
  924. (taicpu(p).oper[0]^.typ = Top_Const) and
  925. GetNextInstruction(p, hp1) and
  926. (tai(hp1).typ = ait_instruction) and
  927. (taicpu(hp1).opcode = A_PUSH) and
  928. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  929. (taicpu(hp1).opsize = S_W) then
  930. begin
  931. taicpu(p).changeopsize(S_L);
  932. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  933. asml.remove(hp1);
  934. hp1.free;
  935. end;
  936. end;
  937. A_SHL, A_SAL:
  938. begin
  939. if (taicpu(p).oper[0]^.typ = Top_Const) and
  940. (taicpu(p).oper[1]^.typ = Top_Reg) and
  941. (taicpu(p).opsize = S_L) and
  942. (taicpu(p).oper[0]^.val <= 3) then
  943. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  944. begin
  945. TmpBool1 := True; {should we check the next instruction?}
  946. TmpBool2 := False; {have we found an add/sub which could be
  947. integrated in the lea?}
  948. reference_reset(tmpref,2,[]);
  949. TmpRef.index := taicpu(p).oper[1]^.reg;
  950. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  951. while TmpBool1 and
  952. GetNextInstruction(p, hp1) and
  953. (tai(hp1).typ = ait_instruction) and
  954. ((((taicpu(hp1).opcode = A_ADD) or
  955. (taicpu(hp1).opcode = A_SUB)) and
  956. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  957. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  958. (((taicpu(hp1).opcode = A_INC) or
  959. (taicpu(hp1).opcode = A_DEC)) and
  960. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  961. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  962. (not GetNextInstruction(hp1,hp2) or
  963. not instrReadsFlags(hp2)) Do
  964. begin
  965. TmpBool1 := False;
  966. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  967. begin
  968. TmpBool1 := True;
  969. TmpBool2 := True;
  970. case taicpu(hp1).opcode of
  971. A_ADD:
  972. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  973. A_SUB:
  974. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  975. end;
  976. asml.remove(hp1);
  977. hp1.free;
  978. end
  979. else
  980. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  981. (((taicpu(hp1).opcode = A_ADD) and
  982. (TmpRef.base = NR_NO)) or
  983. (taicpu(hp1).opcode = A_INC) or
  984. (taicpu(hp1).opcode = A_DEC)) then
  985. begin
  986. TmpBool1 := True;
  987. TmpBool2 := True;
  988. case taicpu(hp1).opcode of
  989. A_ADD:
  990. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  991. A_INC:
  992. inc(TmpRef.offset);
  993. A_DEC:
  994. dec(TmpRef.offset);
  995. end;
  996. asml.remove(hp1);
  997. hp1.free;
  998. end;
  999. end;
  1000. if TmpBool2 or
  1001. ((current_settings.optimizecputype < cpu_Pentium2) and
  1002. (taicpu(p).oper[0]^.val <= 3) and
  1003. not(cs_opt_size in current_settings.optimizerswitches)) then
  1004. begin
  1005. if not(TmpBool2) and
  1006. (taicpu(p).oper[0]^.val = 1) then
  1007. begin
  1008. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1009. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1010. end
  1011. else
  1012. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1013. taicpu(p).oper[1]^.reg);
  1014. InsertLLItem(p.previous, p.next, hp1);
  1015. p.free;
  1016. p := hp1;
  1017. end;
  1018. end
  1019. else
  1020. if (current_settings.optimizecputype < cpu_Pentium2) and
  1021. (taicpu(p).oper[0]^.typ = top_const) and
  1022. (taicpu(p).oper[1]^.typ = top_reg) then
  1023. if (taicpu(p).oper[0]^.val = 1) then
  1024. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1025. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1026. (unlike shl, which is only Tairable in the U pipe)}
  1027. begin
  1028. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1029. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1030. InsertLLItem(p.previous, p.next, hp1);
  1031. p.free;
  1032. p := hp1;
  1033. end
  1034. else if (taicpu(p).opsize = S_L) and
  1035. (taicpu(p).oper[0]^.val<= 3) then
  1036. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1037. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1038. begin
  1039. reference_reset(tmpref,2,[]);
  1040. TmpRef.index := taicpu(p).oper[1]^.reg;
  1041. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1042. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1043. InsertLLItem(p.previous, p.next, hp1);
  1044. p.free;
  1045. p := hp1;
  1046. end
  1047. end;
  1048. A_SETcc :
  1049. { changes
  1050. setcc (funcres) setcc reg
  1051. movb (funcres), reg to leave/ret
  1052. leave/ret }
  1053. begin
  1054. if (taicpu(p).oper[0]^.typ = top_ref) and
  1055. GetNextInstruction(p, hp1) and
  1056. GetNextInstruction(hp1, hp2) and
  1057. IsExitCode(hp2) and
  1058. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1059. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1060. not(assigned(current_procinfo.procdef.funcretsym) and
  1061. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1062. (hp1.typ = ait_instruction) and
  1063. (taicpu(hp1).opcode = A_MOV) and
  1064. (taicpu(hp1).opsize = S_B) and
  1065. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1066. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1067. begin
  1068. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1069. DebugMsg('Peephole optimizer SetccMovbLeaveRet2SetccLeaveRet',p);
  1070. asml.remove(hp1);
  1071. hp1.free;
  1072. end
  1073. end;
  1074. A_SUB:
  1075. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1076. { * change "sub/add const1, reg" or "dec reg" followed by
  1077. "sub const2, reg" to one "sub ..., reg" }
  1078. begin
  1079. if (taicpu(p).oper[0]^.typ = top_const) and
  1080. (taicpu(p).oper[1]^.typ = top_reg) then
  1081. if (taicpu(p).oper[0]^.val = 2) and
  1082. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1083. { Don't do the sub/push optimization if the sub }
  1084. { comes from setting up the stack frame (JM) }
  1085. (not getLastInstruction(p,hp1) or
  1086. (hp1.typ <> ait_instruction) or
  1087. (taicpu(hp1).opcode <> A_MOV) or
  1088. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1089. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1090. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1091. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1092. begin
  1093. hp1 := tai(p.next);
  1094. while Assigned(hp1) and
  1095. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1096. not RegReadByInstruction(NR_ESP,hp1) and
  1097. not RegModifiedByInstruction(NR_ESP,hp1) do
  1098. hp1 := tai(hp1.next);
  1099. if Assigned(hp1) and
  1100. (tai(hp1).typ = ait_instruction) and
  1101. (taicpu(hp1).opcode = A_PUSH) and
  1102. (taicpu(hp1).opsize = S_W) then
  1103. begin
  1104. taicpu(hp1).changeopsize(S_L);
  1105. if taicpu(hp1).oper[0]^.typ=top_reg then
  1106. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1107. hp1 := tai(p.next);
  1108. asml.remove(p);
  1109. p.free;
  1110. p := hp1;
  1111. continue
  1112. end;
  1113. if DoSubAddOpt(p) then
  1114. continue;
  1115. end
  1116. else if DoSubAddOpt(p) then
  1117. continue
  1118. end;
  1119. A_VMOVAPS,
  1120. A_VMOVAPD:
  1121. if OptPass1VMOVAP(p) then
  1122. continue;
  1123. A_VDIVSD,
  1124. A_VDIVSS,
  1125. A_VSUBSD,
  1126. A_VSUBSS,
  1127. A_VMULSD,
  1128. A_VMULSS,
  1129. A_VADDSD,
  1130. A_VADDSS,
  1131. A_VANDPD,
  1132. A_VANDPS,
  1133. A_VORPD,
  1134. A_VORPS,
  1135. A_VXORPD,
  1136. A_VXORPS:
  1137. if OptPass1VOP(p) then
  1138. continue;
  1139. A_MULSD,
  1140. A_MULSS,
  1141. A_ADDSD,
  1142. A_ADDSS:
  1143. if OptPass1OP(p) then
  1144. continue;
  1145. A_MOVAPD,
  1146. A_MOVAPS:
  1147. if OptPass1MOVAP(p) then
  1148. continue;
  1149. A_VMOVSD,
  1150. A_VMOVSS,
  1151. A_MOVSD,
  1152. A_MOVSS:
  1153. if OptPass1MOVXX(p) then
  1154. continue;
  1155. end;
  1156. end; { if is_jmp }
  1157. end;
  1158. end;
  1159. updateUsedRegs(UsedRegs,p);
  1160. p:=tai(p.next);
  1161. end;
  1162. end;
  1163. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1164. var
  1165. p : tai;
  1166. begin
  1167. p := BlockStart;
  1168. ClearUsedRegs;
  1169. while (p <> BlockEnd) Do
  1170. begin
  1171. UpdateUsedRegs(UsedRegs, tai(p.next));
  1172. case p.Typ Of
  1173. Ait_Instruction:
  1174. begin
  1175. if InsContainsSegRef(taicpu(p)) then
  1176. begin
  1177. p := tai(p.next);
  1178. continue;
  1179. end;
  1180. case taicpu(p).opcode Of
  1181. A_Jcc:
  1182. if OptPass2Jcc(p) then
  1183. continue;
  1184. A_FSTP,A_FISTP:
  1185. if DoFpuLoadStoreOpt(p) then
  1186. continue;
  1187. A_IMUL:
  1188. if OptPass2Imul(p) then
  1189. continue;
  1190. A_JMP:
  1191. if OptPass2Jmp(p) then
  1192. continue;
  1193. A_MOV:
  1194. if OptPass2MOV(p) then
  1195. continue;
  1196. end;
  1197. end;
  1198. end;
  1199. p := tai(p.next)
  1200. end;
  1201. end;
  1202. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1203. var
  1204. p,hp1,hp2: tai;
  1205. IsTestConstX: boolean;
  1206. begin
  1207. p := BlockStart;
  1208. ClearUsedRegs;
  1209. while (p <> BlockEnd) Do
  1210. begin
  1211. UpdateUsedRegs(UsedRegs, tai(p.next));
  1212. case p.Typ Of
  1213. Ait_Instruction:
  1214. begin
  1215. if InsContainsSegRef(taicpu(p)) then
  1216. begin
  1217. p := tai(p.next);
  1218. continue;
  1219. end;
  1220. case taicpu(p).opcode Of
  1221. A_CALL:
  1222. begin
  1223. { don't do this on modern CPUs, this really hurts them due to
  1224. broken call/ret pairing }
  1225. if (current_settings.optimizecputype < cpu_Pentium2) and
  1226. not(cs_create_pic in current_settings.moduleswitches) and
  1227. GetNextInstruction(p, hp1) and
  1228. (hp1.typ = ait_instruction) and
  1229. (taicpu(hp1).opcode = A_JMP) and
  1230. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1231. begin
  1232. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1233. InsertLLItem(p.previous, p, hp2);
  1234. taicpu(p).opcode := A_JMP;
  1235. taicpu(p).is_jmp := true;
  1236. asml.remove(hp1);
  1237. hp1.free;
  1238. end
  1239. { replace
  1240. call procname
  1241. ret
  1242. by
  1243. jmp procname
  1244. this should never hurt except when pic is used, not sure
  1245. how to handle it then
  1246. but do it only on level 4 because it destroys stack back traces
  1247. }
  1248. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  1249. not(cs_create_pic in current_settings.moduleswitches) and
  1250. GetNextInstruction(p, hp1) and
  1251. (hp1.typ = ait_instruction) and
  1252. (taicpu(hp1).opcode = A_RET) and
  1253. (taicpu(hp1).ops=0) then
  1254. begin
  1255. taicpu(p).opcode := A_JMP;
  1256. taicpu(p).is_jmp := true;
  1257. asml.remove(hp1);
  1258. hp1.free;
  1259. end;
  1260. end;
  1261. A_CMP:
  1262. begin
  1263. if (taicpu(p).oper[0]^.typ = top_const) and
  1264. (taicpu(p).oper[0]^.val = 0) and
  1265. (taicpu(p).oper[1]^.typ = top_reg) then
  1266. {change "cmp $0, %reg" to "test %reg, %reg"}
  1267. begin
  1268. taicpu(p).opcode := A_TEST;
  1269. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1270. continue;
  1271. end;
  1272. end;
  1273. A_MOV:
  1274. PostPeepholeOptMov(p);
  1275. A_MOVZX:
  1276. { if register vars are on, it's possible there is code like }
  1277. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1278. { so we can't safely replace the movzx then with xor/mov, }
  1279. { since that would change the flags (JM) }
  1280. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  1281. begin
  1282. if (taicpu(p).oper[1]^.typ = top_reg) then
  1283. if (taicpu(p).oper[0]^.typ = top_reg)
  1284. then
  1285. case taicpu(p).opsize of
  1286. S_BL:
  1287. begin
  1288. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1289. not(cs_opt_size in current_settings.optimizerswitches) and
  1290. (current_settings.optimizecputype = cpu_Pentium) then
  1291. {Change "movzbl %reg1, %reg2" to
  1292. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1293. PentiumMMX}
  1294. begin
  1295. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1296. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1297. InsertLLItem(p.previous, p, hp1);
  1298. taicpu(p).opcode := A_MOV;
  1299. taicpu(p).changeopsize(S_B);
  1300. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1301. end;
  1302. end;
  1303. end
  1304. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1305. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1306. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1307. not(cs_opt_size in current_settings.optimizerswitches) and
  1308. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1309. (current_settings.optimizecputype = cpu_Pentium) and
  1310. (taicpu(p).opsize = S_BL) then
  1311. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1312. Pentium and PentiumMMX}
  1313. begin
  1314. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1315. taicpu(p).oper[1]^.reg);
  1316. taicpu(p).opcode := A_MOV;
  1317. taicpu(p).changeopsize(S_B);
  1318. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1319. InsertLLItem(p.previous, p, hp1);
  1320. end;
  1321. end;
  1322. A_TEST, A_OR:
  1323. {removes the line marked with (x) from the sequence
  1324. and/or/xor/add/sub/... $x, %y
  1325. test/or %y, %y | test $-1, %y (x)
  1326. j(n)z _Label
  1327. as the first instruction already adjusts the ZF
  1328. %y operand may also be a reference }
  1329. begin
  1330. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  1331. MatchOperand(taicpu(p).oper[0]^,-1);
  1332. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  1333. GetLastInstruction(p, hp1) and
  1334. (tai(hp1).typ = ait_instruction) and
  1335. GetNextInstruction(p,hp2) and
  1336. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  1337. case taicpu(hp1).opcode Of
  1338. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  1339. begin
  1340. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  1341. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1342. { and in case of carry for A(E)/B(E)/C/NC }
  1343. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  1344. ((taicpu(hp1).opcode <> A_ADD) and
  1345. (taicpu(hp1).opcode <> A_SUB))) then
  1346. begin
  1347. hp1 := tai(p.next);
  1348. asml.remove(p);
  1349. p.free;
  1350. p := tai(hp1);
  1351. continue
  1352. end;
  1353. end;
  1354. A_SHL, A_SAL, A_SHR, A_SAR:
  1355. begin
  1356. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  1357. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  1358. { therefore, it's only safe to do this optimization for }
  1359. { shifts by a (nonzero) constant }
  1360. (taicpu(hp1).oper[0]^.typ = top_const) and
  1361. (taicpu(hp1).oper[0]^.val <> 0) and
  1362. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1363. { and in case of carry for A(E)/B(E)/C/NC }
  1364. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  1365. begin
  1366. hp1 := tai(p.next);
  1367. asml.remove(p);
  1368. p.free;
  1369. p := tai(hp1);
  1370. continue
  1371. end;
  1372. end;
  1373. A_DEC, A_INC, A_NEG:
  1374. begin
  1375. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  1376. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1377. { and in case of carry for A(E)/B(E)/C/NC }
  1378. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  1379. begin
  1380. case taicpu(hp1).opcode Of
  1381. A_DEC, A_INC:
  1382. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  1383. begin
  1384. case taicpu(hp1).opcode Of
  1385. A_DEC: taicpu(hp1).opcode := A_SUB;
  1386. A_INC: taicpu(hp1).opcode := A_ADD;
  1387. end;
  1388. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  1389. taicpu(hp1).loadConst(0,1);
  1390. taicpu(hp1).ops:=2;
  1391. end
  1392. end;
  1393. hp1 := tai(p.next);
  1394. asml.remove(p);
  1395. p.free;
  1396. p := tai(hp1);
  1397. continue
  1398. end;
  1399. end
  1400. else
  1401. { change "test $-1,%reg" into "test %reg,%reg" }
  1402. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  1403. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  1404. end { case }
  1405. else
  1406. { change "test $-1,%reg" into "test %reg,%reg" }
  1407. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  1408. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  1409. end;
  1410. end;
  1411. end;
  1412. end;
  1413. p := tai(p.next)
  1414. end;
  1415. OptReferences;
  1416. end;
  1417. Procedure TCpuAsmOptimizer.Optimize;
  1418. Var
  1419. HP: Tai;
  1420. pass: longint;
  1421. slowopt, changed, lastLoop: boolean;
  1422. Begin
  1423. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  1424. pass := 0;
  1425. changed := false;
  1426. repeat
  1427. lastLoop :=
  1428. not(slowopt) or
  1429. (not changed and (pass > 2)) or
  1430. { prevent endless loops }
  1431. (pass = 4);
  1432. changed := false;
  1433. { Setup labeltable, always necessary }
  1434. blockstart := tai(asml.first);
  1435. pass_1;
  1436. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  1437. { or nil }
  1438. While Assigned(BlockStart) Do
  1439. Begin
  1440. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1441. begin
  1442. if (pass = 0) then
  1443. PrePeepHoleOpts;
  1444. { Peephole optimizations }
  1445. PeepHoleOptPass1;
  1446. { Only perform them twice in the first pass }
  1447. if pass = 0 then
  1448. PeepHoleOptPass1;
  1449. end;
  1450. { More peephole optimizations }
  1451. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1452. begin
  1453. PeepHoleOptPass2;
  1454. if lastLoop then
  1455. PostPeepHoleOpts;
  1456. end;
  1457. { Continue where we left off, BlockEnd is either the start of an }
  1458. { assembler block or nil }
  1459. BlockStart := BlockEnd;
  1460. While Assigned(BlockStart) And
  1461. (BlockStart.typ = ait_Marker) And
  1462. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  1463. Begin
  1464. { We stopped at an assembler block, so skip it }
  1465. Repeat
  1466. BlockStart := Tai(BlockStart.Next);
  1467. Until (BlockStart.Typ = Ait_Marker) And
  1468. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  1469. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  1470. If GetNextInstruction(BlockStart, HP) And
  1471. ((HP.typ <> ait_Marker) Or
  1472. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  1473. { There is no assembler block anymore after the current one, so }
  1474. { optimize the next block of "normal" instructions }
  1475. pass_1
  1476. { Otherwise, skip the next assembler block }
  1477. else
  1478. blockStart := hp;
  1479. End;
  1480. End;
  1481. inc(pass);
  1482. until lastLoop;
  1483. dfa.free;
  1484. End;
  1485. begin
  1486. casmoptimizer:=TCpuAsmOptimizer;
  1487. end.