Browse Source

+ optimization setting level4
+ change tail calls into jmp if a ret without constant follows

git-svn-id: trunk@29653 -

florian 10 years ago
parent
commit
06fd4223e9
2 changed files with 44 additions and 19 deletions
  1. 3 3
      compiler/globtype.pas
  2. 41 16
      compiler/i386/popt386.pas

+ 3 - 3
compiler/globtype.pas

@@ -267,7 +267,7 @@ interface
      type
        { optimizer }
        toptimizerswitch = (cs_opt_none,
-         cs_opt_level1,cs_opt_level2,cs_opt_level3,
+         cs_opt_level1,cs_opt_level2,cs_opt_level3,cs_opt_level4,
          cs_opt_regvar,cs_opt_uncertain,cs_opt_size,cs_opt_stackframe,
          cs_opt_peephole,cs_opt_asmcse,cs_opt_loopunroll,cs_opt_tailrecursion,cs_opt_nodecse,
          cs_opt_nodedfa,cs_opt_loopstrength,cs_opt_scheduler,cs_opt_autoinline,cs_useebp,cs_userbp,
@@ -313,7 +313,7 @@ interface
 
     const
        OptimizerSwitchStr : array[toptimizerswitch] of string[17] = ('',
-         'LEVEL1','LEVEL2','LEVEL3',
+         'LEVEL1','LEVEL2','LEVEL3','LEVEL4',
          'REGVAR','UNCERTAIN','SIZE','STACKFRAME',
          'PEEPHOLE','ASMCSE','LOOPUNROLL','TAILREC','CSE',
          'DFA','STRENGTH','SCHEDULE','AUTOINLINE','USEEBP','USERBP',
@@ -345,7 +345,7 @@ interface
        genericlevel1optimizerswitches = [cs_opt_level1,cs_opt_peephole];
        genericlevel2optimizerswitches = [cs_opt_level2,cs_opt_remove_emtpy_proc];
        genericlevel3optimizerswitches = [cs_opt_level3,cs_opt_constant_propagate,cs_opt_nodedfa];
-       genericlevel4optimizerswitches = [cs_opt_reorder_fields,cs_opt_dead_values,cs_opt_fastmath];
+       genericlevel4optimizerswitches = [cs_opt_level4,cs_opt_reorder_fields,cs_opt_dead_values,cs_opt_fastmath];
 
        { whole program optimizations whose information generation requires
          information from all loaded units

+ 41 - 16
compiler/i386/popt386.pas

@@ -2331,22 +2331,47 @@ begin
               end;
             case taicpu(p).opcode Of
               A_CALL:
-                { don't do this on modern CPUs, this really hurts them due to
-                  broken call/ret pairing }
-                if (current_settings.optimizecputype < cpu_Pentium2) and
-                   not(cs_create_pic in current_settings.moduleswitches) and
-                   GetNextInstruction(p, hp1) and
-                   (hp1.typ = ait_instruction) and
-                   (taicpu(hp1).opcode = A_JMP) and
-                   ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
-                  begin
-                    hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
-                    InsertLLItem(asml, p.previous, p, hp2);
-                    taicpu(p).opcode := A_JMP;
-                    taicpu(p).is_jmp := true;
-                    asml.remove(hp1);
-                    hp1.free;
-                  end;
+                begin
+                  { don't do this on modern CPUs, this really hurts them due to
+                    broken call/ret pairing }
+                  if (current_settings.optimizecputype < cpu_Pentium2) and
+                     not(cs_create_pic in current_settings.moduleswitches) and
+                     GetNextInstruction(p, hp1) and
+                     (hp1.typ = ait_instruction) and
+                     (taicpu(hp1).opcode = A_JMP) and
+                     ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
+                    begin
+                      hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
+                      InsertLLItem(asml, p.previous, p, hp2);
+                      taicpu(p).opcode := A_JMP;
+                      taicpu(p).is_jmp := true;
+                      asml.remove(hp1);
+                      hp1.free;
+                    end
+                  { replace
+                      call   procname
+                      ret
+                    by
+                      jmp    procname
+
+                    this should never hurt except when pic is used, not sure
+                    how to handle it then
+
+                    but do it only on level 4 because it destroys stack back traces
+                  }  
+                  else if (cs_opt_level4 in current_settings.optimizerswitches) and
+                     not(cs_create_pic in current_settings.moduleswitches) and
+                     GetNextInstruction(p, hp1) and
+                     (hp1.typ = ait_instruction) and
+                     (taicpu(hp1).opcode = A_RET) and
+                     (taicpu(hp1).ops=0) then
+                    begin
+                      taicpu(p).opcode := A_JMP;
+                      taicpu(p).is_jmp := true;
+                      asml.remove(hp1);
+                      hp1.free;
+                    end;
+                end;
               A_CMP:
                 begin
                   if (taicpu(p).oper[0]^.typ = top_const) and