Procházet zdrojové kódy

+ first version of ldr instruction scheduler on arm

git-svn-id: trunk@20512 -
florian před 13 roky
rodič
revize
0fe22a358b

+ 12 - 0
compiler/aopt.pas

@@ -51,8 +51,10 @@ Unit aopt;
 
     var
       casmoptimizer : TAsmOptimizerClass;
+      cpreregallocscheduler : TAsmOptimizerClass;
 
     procedure Optimize(AsmL:TAsmList);
+    procedure PreRegallocSchedule(AsmL:TAsmList);
 
   Implementation
 
@@ -274,4 +276,14 @@ Unit aopt;
       end;
 
 
+    procedure PreRegallocSchedule(AsmL:TAsmList);
+      var
+        p : TAsmOptimizer;
+      begin
+        p:=cpreregallocscheduler.Create(AsmL);
+        p.Optimize;
+        p.free
+      end;
+
+
 end.

+ 83 - 1
compiler/arm/aoptcpu.pas

@@ -37,6 +37,9 @@ Type
     procedure PeepHoleOptPass2;override;
   End;
 
+  TCpuPreRegallocScheduler = class(TAsmOptimizer)
+    function PeepHoleOptPass1Cpu(var p: tai): boolean;override;
+  end;
 
   TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
     { uses the same constructor as TAopObj }
@@ -46,8 +49,9 @@ Type
 Implementation
 
   uses
+    cutils,
     verbose,
-    cgutils,
+    cgbase,cgutils,
     aasmbase,aasmcpu;
 
   function CanBeCond(p : tai) : boolean;
@@ -450,6 +454,83 @@ Implementation
         end;
     end;
 
+  const
+    { set of opcode which might or do write to memory }
+    { TODO : extend armins.dat to contain r/w info }
+    opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
+                              A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
+
+  function TCpuPreRegallocScheduler.PeepHoleOptPass1Cpu(var p: tai): boolean;
+  {
+    TODO:
+      - move in right pass
+      - changes of R15
+  }
+    var
+      hp1,hp2 : tai;
+    begin
+      result:=true;
+      p := BlockStart;
+      { UsedRegs := []; }
+      while (p <> BlockEnd) Do
+        begin
+          if (p.typ=ait_instruction) and
+            GetNextInstruction(p,hp1) and
+            (hp1.typ=ait_instruction) and
+            { for now we don't reschedule if the previous instruction changes potentially a memory location }
+            ( (not(taicpu(p).opcode in opcode_could_mem_write) and
+               not(RegModifiedByInstruction(NR_PC,p)) and
+               (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH])
+              ) or
+              ((taicpu(p).opcode in [A_STM,A_STRB,A_STRH,A_STR]) and
+               (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
+               ((taicpu(hp1).oper[1]^.ref^.base=NR_PC) or
+                (assigned(taicpu(hp1).oper[1]^.ref^.symboldata) and
+                (taicpu(hp1).oper[1]^.ref^.offset=0)
+                )
+               ) or
+               { try to prove that the memory accesses don't overlapp }
+               ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
+                (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
+                (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
+                (taicpu(p).oppostfix=PF_None) and
+                (taicpu(hp1).oppostfix=PF_None) and
+                (taicpu(p).oper[1]^.ref^.index=NR_NO) and
+                (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
+                { get operand sizes and check if the offset distance is large enough to ensure no overlapp }
+                (abs(taicpu(p).oper[1]^.ref^.offset-taicpu(hp1).oper[1]^.ref^.offset)>=max(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)],tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]))
+              )
+            )
+            ) and
+            GetNextInstruction(hp1,hp2) and
+            (hp2.typ=ait_instruction) and
+            { loaded register used by next instruction? }
+            (RegInInstruction(taicpu(hp1).oper[0]^.reg,hp2)) and
+            { loaded register not used by previous instruction? }
+            not(RegInInstruction(taicpu(hp1).oper[0]^.reg,p)) and
+            { same condition? }
+            (taicpu(p).condition=taicpu(hp1).condition) and
+            { first instruction might not change the register used as base }
+            ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or
+             not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.base,p))
+            ) and
+            { first instruction might not change the register used as index }
+            ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or
+             not(RegModifiedByInstruction(taicpu(hp1).oper[1]^.ref^.index,p))
+            ) then
+            begin
+              asml.Remove(p);
+              asml.Remove(hp1);
+{$ifdef DEBUG_PREREGSCHEDULER}
+              asml.InsertBefore(tai_comment.Create(strpnew('Rescheduled')),hp2);
+{$endif DEBUG_PREREGSCHEDULER}
+              asml.InsertBefore(hp1,hp2);
+              asml.InsertBefore(p,hp2);
+            end;
+          p := tai(p.next)
+        end;
+    end;
+
 
   procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
     begin
@@ -458,4 +539,5 @@ Implementation
 
 begin
   casmoptimizer:=TCpuAsmOptimizer;
+  cpreregallocscheduler:=TCpuPreRegallocScheduler;
 End.

+ 1 - 1
compiler/arm/cpuinfo.pas

@@ -1028,7 +1028,7 @@ Const
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
+     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse,cs_opt_scheduler];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
 
 Implementation

+ 3 - 2
compiler/globtype.pas

@@ -213,7 +213,7 @@ interface
          cs_opt_level1,cs_opt_level2,cs_opt_level3,
          cs_opt_regvar,cs_opt_uncertain,cs_opt_size,cs_opt_stackframe,
          cs_opt_peephole,cs_opt_asmcse,cs_opt_loopunroll,cs_opt_tailrecursion,cs_opt_nodecse,
-         cs_opt_nodedfa,cs_opt_loopstrength
+         cs_opt_nodedfa,cs_opt_loopstrength,cs_opt_scheduler
        );
        toptimizerswitches = set of toptimizerswitch;
 
@@ -236,7 +236,8 @@ interface
        OptimizerSwitchStr : array[toptimizerswitch] of string[10] = ('',
          'LEVEL1','LEVEL2','LEVEL3',
          'REGVAR','UNCERTAIN','SIZE','STACKFRAME',
-         'PEEPHOLE','ASMCSE','LOOPUNROLL','TAILREC','CSE','DFA','STRENGTH'
+         'PEEPHOLE','ASMCSE','LOOPUNROLL','TAILREC','CSE',
+         'DFA','STRENGTH','SCHEDULE'
        );
        WPOptimizerSwitchStr : array [twpoptimizerswitch] of string[14] = (
          'DEVIRTCALLS','OPTVMTS','SYMBOLLIVENESS'

+ 18 - 0
compiler/psub.pas

@@ -1138,6 +1138,16 @@ implementation
             cg.set_regalloc_live_range_direction(rad_forward);
             }
 
+
+{$ifndef NoOpt}
+{$ifndef i386}
+            if (cs_opt_scheduler in current_settings.optimizerswitches) and
+              { do not optimize pure assembler procedures }
+              not(pi_is_assembler in flags) then
+              preregallocschedule(aktproccode);
+{$endif i386}
+{$endif NoOpt}
+
             { The procedure body is finished, we can now
               allocate the registers }
             cg.do_register_allocation(aktproccode,headertai);
@@ -1206,6 +1216,14 @@ implementation
                    { do not optimize pure assembler procedures }
                    not(pi_is_assembler in flags)  then
                   optimize(aktproccode);
+{$ifndef i386}
+                { schedule after assembler optimization, it could have brought up
+                  new schedule possibilities }
+                if (cs_opt_scheduler in current_settings.optimizerswitches) and
+                  { do not optimize pure assembler procedures }
+                  not(pi_is_assembler in flags)  then
+                  preregallocschedule(aktproccode);
+{$endif i386}
               end;
 {$endif NoOpt}