소스 검색

* made maxregs related information protected instead of private in trgobj,
as well as insert_regalloc_info_all()
+ "register" allocator for the jvm target, which simply replaces every
virtual register with a temp. This is done for all register types in
one pass, so that the temps can be easily reused without worrying
about conflicts. Small optimisation: alloc/store/dealloc/load
sequences for a single reg are removed (many of these are generated
because most cg nodes return their value in a register which is then
immediately consumed by the parent)
* map addressregisters to integer registers with size R_SUBD, because
they require one stackslot (the subregister type is used by rgcpu
to determine the size of the temp it has to allocate)

git-svn-id: branches/jvmbackend@18316 -

Jonas Maebe 14 년 전
부모
커밋
00cb8f5725
4개의 변경된 파일325개의 추가작업 그리고 6개의 파일을 삭제
  1. 1 0
      .gitattributes
  2. 34 2
      compiler/jvm/cgcpu.pas
  3. 285 0
      compiler/jvm/rgcpu.pas
  4. 5 4
      compiler/rgobj.pas

+ 1 - 0
.gitattributes

@@ -218,6 +218,7 @@ compiler/jvm/cputarg.pas svneol=native#text/plain
 compiler/jvm/hlcgcpu.pas svneol=native#text/plain
 compiler/jvm/itcpujas.pas svneol=native#text/plain
 compiler/jvm/jvmreg.dat svneol=native#text/plain
+compiler/jvm/rgcpu.pas svneol=native#text/plain
 compiler/jvm/rjvmcon.inc svneol=native#text/plain
 compiler/jvm/rjvmnor.inc svneol=native#text/plain
 compiler/jvm/rjvmnum.inc svneol=native#text/plain

+ 34 - 2
compiler/jvm/cgcpu.pas

@@ -38,7 +38,10 @@ interface
      public
         procedure init_register_allocators;override;
         procedure done_register_allocators;override;
-        function getfpuregister(list:TAsmList;size:Tcgsize):Tregister;override;
+        function  getintregister(list:TAsmList;size:Tcgsize):Tregister;override;
+        function  getfpuregister(list:TAsmList;size:Tcgsize):Tregister;override;
+        function  getaddressregister(list:TAsmList):Tregister;override;
+        procedure do_register_allocation(list:TAsmList;headertai:tai);override;
       end;
 
     procedure create_codegen;
@@ -66,7 +69,7 @@ implementation
         rg[R_INTREGISTER]:=Trgcpu.create(R_INTREGISTER,R_SUBQ,
           [RS_R0],first_int_imreg,[]);
 {$endif not cpu64bitaddr}
-        rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBFD,
+        rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBFS,
           [RS_R0],first_fpu_imreg,[]);
         rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,
           [RS_R0],first_mm_imreg,[]);
@@ -82,6 +85,15 @@ implementation
       end;
 
 
+    function tcgjvm.getintregister(list:TAsmList;size:Tcgsize):Tregister;
+      begin
+        if not(size in [OS_64,OS_S64]) then
+          result:=rg[R_INTREGISTER].getregister(list,R_SUBD)
+        else
+          result:=rg[R_INTREGISTER].getregister(list,R_SUBQ);
+      end;
+
+
     function tcgjvm.getfpuregister(list:TAsmList;size:Tcgsize):Tregister;
       begin
         if size=OS_F64 then
@@ -91,6 +103,26 @@ implementation
       end;
 
 
+    function tcgjvm.getaddressregister(list:TAsmList):Tregister;
+      begin
+        { avoid problems in the compiler where int and addr registers are
+          mixed for now; we currently don't have to differentiate between the
+          two as far as the jvm backend is concerned }
+        result:=rg[R_INTREGISTER].getregister(list,R_SUBD)
+      end;
+
+
+    procedure tcgjvm.do_register_allocation(list:TAsmList;headertai:tai);
+      var
+        rt : tregistertype;
+      begin
+        { We only run the "register allocation" once for an arbitrary allocator,
+          which will perform the register->temp mapping for all register types.
+          This allows us to easily reuse temps. }
+        trgcpu(rg[R_INTREGISTER]).do_all_register_allocation(list,headertai);
+      end;
+
+
     procedure create_codegen;
       begin
         cg:=tcgjvm.Create;

+ 285 - 0
compiler/jvm/rgcpu.pas

@@ -0,0 +1,285 @@
+{
+    Copyright (c) 2010 by Jonas Maebe
+
+    This unit implements the JVM specific class for the register
+    allocator
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************}
+unit rgcpu;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      aasmbase,aasmcpu,aasmtai,aasmdata,
+      cgbase,cgutils,
+      cpubase,
+      rgobj;
+
+    type
+      tspilltemps = array[tregistertype] of ^Tspill_temp_list;
+
+      { trgcpu }
+
+      trgcpu=class(trgobj)
+       protected
+        class function  do_spill_replace_all(list:TAsmList;instr:taicpu;const spilltemps: tspilltemps):boolean;
+        class procedure remove_dummy_load_stores(list: TAsmList; headertai: tai);
+       public
+        { performs the register allocation for *all* register types }
+        class procedure do_all_register_allocation(list: TAsmList; headertai: tai);
+      end;
+
+
+implementation
+
+    uses
+      verbose,cutils,
+      globtype,globals,
+      cgobj,
+      tgobj;
+
+    { trgcpu }
+
+    class function trgcpu.do_spill_replace_all(list:TAsmList;instr:taicpu;const spilltemps: tspilltemps):boolean;
+      var
+        l: longint;
+        reg: tregister;
+      begin
+        { jvm instructions never have more than one memory (virtual register)
+          operand, so there is no danger of superregister conflicts }
+        for l:=0 to instr.ops-1 do
+          if instr.oper[l]^.typ=top_reg then
+            begin
+              reg:=instr.oper[l]^.reg;
+              instr.loadref(l,spilltemps[getregtype(reg)]^[getsupreg(reg)]);
+            end;
+      end;
+
+
+    class procedure trgcpu.remove_dummy_load_stores(list: TAsmList; headertai: tai);
+
+      function issimpleregstore(p: tai; reg: tregister; doubleprecisionok: boolean): boolean;
+        const
+          simplestoressp = [a_astore,a_fstore,a_istore];
+          simplestoresdp = [a_dstore,a_lstore];
+        begin
+          result:=
+            assigned(p) and
+            (p.typ=ait_instruction) and
+            ((taicpu(p).opcode in simplestoressp) or
+             (doubleprecisionok and
+              (taicpu(p).opcode in simplestoresdp))) and
+            ((reg=NR_NO) or
+             (taicpu(p).oper[0]^.typ=top_reg) and
+             (taicpu(p).oper[0]^.reg=reg));
+        end;
+
+      function issimpleregload(p: tai; reg: tregister; doubleprecisionok: boolean): boolean;
+        const
+          simpleloadssp = [a_aload,a_fload,a_iload];
+          simpleloadsdp = [a_dload,a_lload];
+        begin
+          result:=
+            assigned(p) and
+            (p.typ=ait_instruction) and
+            ((taicpu(p).opcode in simpleloadssp) or
+             (doubleprecisionok and
+              (taicpu(p).opcode in simpleloadsdp))) and
+            ((reg=NR_NO) or
+             (taicpu(p).oper[0]^.typ=top_reg) and
+             (taicpu(p).oper[0]^.reg=reg));
+        end;
+
+
+      function try_remove_alloc_store_dealloc_load(var p: tai; reg: tregister): boolean;
+        var
+          q: tai;
+        begin
+          result:=false;
+          { check for:
+              alloc regx
+              store regx
+              dealloc regx
+              load regx
+            and remove. We don't have to check that the load/store
+            types match, because they have to for this to be
+            valid JVM code }
+          if issimpleregstore(tai(p.next),reg,true) and
+             assigned(p.next.next) and
+             (tai(p.next.next).typ=ait_regalloc) and
+             (tai_regalloc(p.next.next).ratype=ra_dealloc) and
+             (tai_regalloc(p.next.next).reg=reg) and
+             issimpleregload(tai(p.next.next.next),reg,true) then
+            begin
+              { remove the whole sequence: the allocation }
+              q:=Tai(p.next);
+              list.remove(p);
+              p.free;
+              p:=q;
+              { the store }
+              q:=Tai(p.next);
+              list.remove(p);
+              p.free;
+              p:=q;
+              { the dealloc }
+              q:=Tai(p.next);
+              list.remove(p);
+              p.free;
+              p:=q;
+              { the load }
+              q:=Tai(p.next);
+              list.remove(p);
+              p.free;
+              p:=q;
+              result:=true;
+            end;
+        end;
+
+
+      var
+        p: tai;
+        reg: tregister;
+        removedsomething: boolean;
+      begin
+        repeat
+          removedsomething:=false;
+          p:=headertai;
+          while assigned(p) do
+            begin
+              case p.typ of
+                ait_regalloc:
+                  begin
+                    if (tai_regalloc(p).ratype=ra_alloc) then
+                      begin
+                        reg:=tai_regalloc(p).reg;
+                        if try_remove_alloc_store_dealloc_load(p,reg) then
+                          begin
+                            removedsomething:=true;
+                            continue;
+                          end;
+                        { todo in peephole optimizer:
+                            alloc regx // not double precision
+                            store regx // not double precision
+                            load  regy or memy
+                            dealloc regx
+                            load regx
+                          -> change into
+                            load regy or memy
+                            swap       // can only handle single precision
+
+                          and then
+                            swap
+                            <commutative op>
+                           -> remove swap
+                        }
+                      end;
+                  end;
+              end;
+              p:=tai(p.next);
+            end;
+        until not removedsomething;
+      end;
+
+
+    class procedure trgcpu.do_all_register_allocation(list: TAsmList; headertai: tai);
+      var
+        spill_temps : tspilltemps;
+        templist : TAsmList;
+        intrg,
+        fprg     : trgcpu;
+        p,q      : tai;
+        size     : longint;
+      begin
+        { Since there are no actual registers, we simply spill everything. We
+          use tt_regallocator temps, which are not used by the temp allocator
+          during code generation, so that we cannot accidentally overwrite
+          any temporary values }
+
+        { get references to all register allocators }
+        intrg:=trgcpu(cg.rg[R_INTREGISTER]);
+        fprg:=trgcpu(cg.rg[R_FPUREGISTER]);
+        { determine the live ranges of all registers }
+        intrg.insert_regalloc_info_all(list);
+        fprg.insert_regalloc_info_all(list);
+        { Don't do the actual allocation when -sr is passed }
+        if (cs_no_regalloc in current_settings.globalswitches) then
+          exit;
+        { remove some simple useless store/load sequences }
+        remove_dummy_load_stores(list,headertai);
+        { allocate room to store the virtual register -> temp mapping }
+        spill_temps[R_INTREGISTER]:=allocmem(sizeof(treference)*intrg.maxreg);
+        spill_temps[R_FPUREGISTER]:=allocmem(sizeof(treference)*fprg.maxreg);
+        { List to insert temp allocations into }
+        templist:=TAsmList.create;
+        { allocate/replace all registers }
+        p:=headertai;
+        while assigned(p) do
+          begin
+            case p.typ of
+              ait_regalloc:
+                with Tai_regalloc(p) do
+                  begin
+                    case getregtype(reg) of
+                      R_INTREGISTER:
+                        if getsubreg(reg)=R_SUBD then
+                          size:=4
+                        else
+                          size:=8;
+                      R_ADDRESSREGISTER:
+                        size:=4;
+                      R_FPUREGISTER:
+                        if getsubreg(reg)=R_SUBFS then
+                          size:=4
+                        else
+                          size:=8;
+                      else
+                        internalerror(2010122912);
+                    end;
+                    case ratype of
+                      ra_alloc :
+                        tg.gettemp(templist,
+                                   size,1,
+                                   tt_regallocator,spill_temps[getregtype(reg)]^[getsupreg(reg)]);
+                      ra_dealloc :
+                        begin
+                          tg.ungettemp(templist,spill_temps[getregtype(reg)]^[getsupreg(reg)]);
+                          { don't invalidate the temp reference, may still be used one instruction
+                            later }
+                        end;
+                    end;
+                    { insert the tempallocation/free at the right place }
+                    list.insertlistbefore(p,templist);
+                    { remove the register allocation info for the register
+                      (p.previous is valid because we just inserted the temp
+                       allocation/free before p) }
+                    q:=Tai(p.previous);
+                    list.remove(p);
+                    p.free;
+                    p:=q;
+                  end;
+              ait_instruction:
+                do_spill_replace_all(list,taicpu(p),spill_temps);
+            end;
+            p:=Tai(p.next);
+          end;
+        freemem(spill_temps[R_INTREGISTER]);
+        freemem(spill_temps[R_FPUREGISTER]);
+      end;
+
+end.

+ 5 - 4
compiler/rgobj.pas

@@ -160,6 +160,10 @@ unit rgobj;
         { translates a single given imaginary register to it's real register }
         procedure translate_register(var reg : tregister);
       protected
+        maxreginfo,
+        maxreginfoinc,
+        maxreg            : Tsuperregister;
+
         regtype           : Tregistertype;
         { default subregister used }
         defaultsub        : tsubregister;
@@ -179,15 +183,13 @@ unit rgobj;
                                       instr:taicpu;
                                       const r:Tsuperregisterset;
                                       const spilltemplist:Tspill_temp_list): boolean;virtual;
+        procedure insert_regalloc_info_all(list:TAsmList);
       private
         int_live_range_direction: TRADirection;
         {# First imaginary register.}
         first_imaginary   : Tsuperregister;
         {# Highest register allocated until now.}
         reginfo           : PReginfo;
-        maxreginfo,
-        maxreginfoinc,
-        maxreg            : Tsuperregister;
         usable_registers_cnt : word;
         usable_registers  : array[0..maxcpuregister-1] of tsuperregister;
         ibitmap           : Tinterferencebitmap;
@@ -217,7 +219,6 @@ unit rgobj;
         {# Colour the registers; that is do the register allocation.}
         procedure colour_registers;
         procedure insert_regalloc_info(list:TAsmList;u:tsuperregister);
-        procedure insert_regalloc_info_all(list:TAsmList);
         procedure generate_interference_graph(list:TAsmList;headertai:tai);
         { translates the registers in the given assembler list }
         procedure translate_registers(list:TAsmList);