Forráskód Böngészése

Added some peephole optimizations, and fixed generic unconditional jump optimizations, for AVR.
Fixed multiplication code generation for AVR controllers without mul instructions.
Added handling of the old interrupt procedure directive such that procedures with that use RETI instead of RET.

git-svn-id: trunk@31030 -

Jeppe Johansen 10 éve
szülő
commit
03880c2f74
4 módosított fájl, 270 hozzáadás és 58 törlés
  1. 4 0
      compiler/aoptobj.pas
  2. 141 6
      compiler/avr/aoptcpu.pas
  3. 1 1
      compiler/avr/aoptcpub.pas
  4. 124 51
      compiler/avr/cgcpu.pas

+ 4 - 0
compiler/aoptobj.pas

@@ -1178,7 +1178,11 @@ Unit AoptObj;
 
 
     function IsJumpToLabel(hp: taicpu): boolean;
     function IsJumpToLabel(hp: taicpu): boolean;
       begin
       begin
+{$if defined(avr)}
+        result:=(hp.opcode in aopt_uncondjmp) and
+{$else avr}
         result:=(hp.opcode=aopt_uncondjmp) and
         result:=(hp.opcode=aopt_uncondjmp) and
+{$endif avr}
 {$if defined(arm) or defined(aarch64)}
 {$if defined(arm) or defined(aarch64)}
           (hp.condition=c_None) and
           (hp.condition=c_None) and
 {$endif arm or aarch64}
 {$endif arm or aarch64}

+ 141 - 6
compiler/avr/aoptcpu.pas

@@ -45,7 +45,7 @@ Implementation
   uses
   uses
     cutils,
     cutils,
     cpuinfo,
     cpuinfo,
-    aasmbase,aasmcpu,
+    aasmbase,aasmcpu,aasmdata,
     globals,globtype,
     globals,globtype,
     cgutils;
     cgutils;
 
 
@@ -132,9 +132,10 @@ Implementation
 
 
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     var
     var
-      hp1,hp2,hp3: tai;
+      hp1,hp2,hp3,hp4,hp5: tai;
       alloc, dealloc: tai_regalloc;
       alloc, dealloc: tai_regalloc;
       i: integer;
       i: integer;
+      l: TAsmLabel;
     begin
     begin
       result := false;
       result := false;
       case p.typ of
       case p.typ of
@@ -265,7 +266,8 @@ Implementation
                           into
                           into
                           sbi rX,lg(n)
                           sbi rX,lg(n)
                         }
                         }
-                        if MatchInstruction(hp1,A_ORI) and
+                        if (taicpu(p).oper[1]^.val<=31) and
+                          MatchInstruction(hp1,A_ORI) and
                           (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
                           (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
                           (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
                           (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
                           GetNextInstruction(hp1,hp2) and
                           GetNextInstruction(hp1,hp2) and
@@ -275,7 +277,7 @@ Implementation
                           begin
                           begin
                             taicpu(p).opcode:=A_SBI;
                             taicpu(p).opcode:=A_SBI;
                             taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
                             taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
-                            taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val)-1);
+                            taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
                             asml.Remove(hp1);
                             asml.Remove(hp1);
                             hp1.Free;
                             hp1.Free;
                             asml.Remove(hp2);
                             asml.Remove(hp2);
@@ -290,7 +292,8 @@ Implementation
                           into
                           into
                           cbi rX,lg(n)
                           cbi rX,lg(n)
                         }
                         }
-                        else if MatchInstruction(hp1,A_ANDI) and
+                        else if (taicpu(p).oper[1]^.val<=31) and
+                           MatchInstruction(hp1,A_ANDI) and
                            (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
                            (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
                            (PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
                            (PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
                            GetNextInstruction(hp1,hp2) and
                            GetNextInstruction(hp1,hp2) and
@@ -300,11 +303,51 @@ Implementation
                           begin
                           begin
                             taicpu(p).opcode:=A_CBI;
                             taicpu(p).opcode:=A_CBI;
                             taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
                             taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
-                            taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val))-1);
+                            taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val)));
                             asml.Remove(hp1);
                             asml.Remove(hp1);
                             hp1.Free;
                             hp1.Free;
                             asml.Remove(hp2);
                             asml.Remove(hp2);
                             hp2.Free;
                             hp2.Free;
+                            result:=true;
+                          end
+                         {
+                              in rX,Y
+                              andi rX,n
+                              breq/brne L1
+
+                          into
+                              sbis/sbic Y,lg(n)
+                              jmp L1
+                            .Ltemp:
+                        }
+                        else if (taicpu(p).oper[1]^.val<=31) and
+                           MatchInstruction(hp1,A_ANDI) and
+                           (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
+                           (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
+                           GetNextInstruction(hp1,hp2) and
+                           MatchInstruction(hp2,A_BRxx) and
+                           (taicpu(hp2).condition in [C_EQ,C_NE]) then
+                          begin
+                            if taicpu(hp2).condition=C_EQ then
+                              taicpu(p).opcode:=A_SBIS
+                            else
+                              taicpu(p).opcode:=A_SBIC;
+
+                            taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
+                            taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
+                            asml.Remove(hp1);
+                            hp1.Free;
+
+                            taicpu(hp2).condition:=C_None;
+                            if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
+                              taicpu(hp2).opcode:=A_JMP
+                            else
+                              taicpu(hp2).opcode:=A_RJMP;
+
+                            current_asmdata.getjumplabel(l);
+                            l.increfs;
+                            asml.InsertAfter(tai_label.create(l), hp2);
+
                             result:=true;
                             result:=true;
                           end;
                           end;
                       end;
                       end;
@@ -528,6 +571,98 @@ Implementation
                             break;
                             break;
                         end;
                         end;
                   end;
                   end;
+                A_SBIC,
+                A_SBIS:
+                  begin
+                    {
+                      Turn
+                          sbic/sbis X, y
+                          jmp .L1
+                          op
+                        .L1:
+
+                      into
+                          sbis/sbic X,y
+                          op
+                        .L1:
+                    }
+                    if GetNextInstruction(p, hp1) and
+                       (hp1.typ=ait_instruction) and
+                       (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
+                       (taicpu(hp1).ops>0) and
+                       (taicpu(hp1).oper[0]^.typ = top_ref) and
+                       (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
+                       GetNextInstruction(hp1, hp2) and
+                       (hp2.typ=ait_instruction) and
+                       (not taicpu(hp2).is_jmp) and
+                       GetNextInstruction(hp2, hp3) and
+                       (hp3.typ=ait_label) and
+                       (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
+                      begin
+                        if taicpu(p).opcode=A_SBIC then
+                          taicpu(p).opcode:=A_SBIS
+                        else
+                          taicpu(p).opcode:=A_SBIC;
+
+                        tai_label(hp3).labsym.decrefs;
+
+                        AsmL.remove(hp1);
+                        taicpu(hp1).Free;
+
+                        result:=true;
+                      end
+                    {
+                      Turn
+                          sbiX X, y
+                          jmp .L1
+                          jmp .L2
+                        .L1:
+                          op
+                        .L2:
+
+                      into
+                          sbiX X,y
+                        .L1:
+                          op
+                        .L2:
+                    }
+                    else if GetNextInstruction(p, hp1) and
+                       (hp1.typ=ait_instruction) and
+                       (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
+                       (taicpu(hp1).ops>0) and
+                       (taicpu(hp1).oper[0]^.typ = top_ref) and
+                       (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
+
+                       GetNextInstruction(hp1, hp2) and
+                       (hp2.typ=ait_instruction) and
+                       (taicpu(hp2).opcode in [A_JMP,A_RJMP]) and
+                       (taicpu(hp2).ops>0) and
+                       (taicpu(hp2).oper[0]^.typ = top_ref) and
+                       (taicpu(hp2).oper[0]^.ref^.symbol is TAsmLabel) and
+
+                       GetNextInstruction(hp2, hp3) and
+                       (hp3.typ=ait_label) and
+                       (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) and
+
+                       GetNextInstruction(hp3, hp4) and
+                       (hp4.typ=ait_instruction) and
+
+                       GetNextInstruction(hp4, hp5) and
+                       (hp3.typ=ait_label) and
+                       (taicpu(hp2).oper[0]^.ref^.symbol=tai_label(hp5).labsym) then
+                      begin
+                        tai_label(hp3).labsym.decrefs;
+                        tai_label(hp5).labsym.decrefs;
+
+                        AsmL.remove(hp1);
+                        taicpu(hp1).Free;
+
+                        AsmL.remove(hp2);
+                        taicpu(hp2).Free;
+
+                        result:=true;
+                      end;
+                  end;
               end;
               end;
           end;
           end;
       end;
       end;

+ 1 - 1
compiler/avr/aoptcpub.pas

@@ -99,7 +99,7 @@ Const
 
 
   StoreDst = 0;
   StoreDst = 0;
 
 
-  aopt_uncondjmp = A_JMP;
+  aopt_uncondjmp = [A_RJMP,A_JMP];
   aopt_condjmp = A_BRxx;
   aopt_condjmp = A_BRxx;
 
 
 Implementation
 Implementation

+ 124 - 51
compiler/avr/cgcpu.pas

@@ -431,7 +431,8 @@ unit cgcpu;
 
 
      procedure tcgavr.a_op_reg_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister);
      procedure tcgavr.a_op_reg_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister);
        begin
        begin
-         if (op in [OP_MUL,OP_IMUL]) and (size in [OS_16,OS_S16]) then
+         if (op in [OP_MUL,OP_IMUL]) and (size in [OS_16,OS_S16]) and
+            (CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype]) then
            begin
            begin
              getcpuregister(list,NR_R0);
              getcpuregister(list,NR_R0);
              getcpuregister(list,NR_R1);
              getcpuregister(list,NR_R1);
@@ -577,55 +578,64 @@ unit cgcpu;
              begin
              begin
                if size in [OS_8,OS_S8] then
                if size in [OS_8,OS_S8] then
                  begin
                  begin
-                   cg.a_reg_alloc(list,NR_R0);
-                   cg.a_reg_alloc(list,NR_R1);
-                   list.concat(taicpu.op_reg_reg(topcg2asmop[op],dst,src));
-                   list.concat(taicpu.op_reg(A_CLR,NR_R1));
-                   cg.a_reg_dealloc(list,NR_R1);
-                   list.concat(taicpu.op_reg_reg(A_MOV,dst,NR_R0));
-                   cg.a_reg_dealloc(list,NR_R0);
+                   if CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype] then
+                     begin
+                       cg.a_reg_alloc(list,NR_R0);
+                       cg.a_reg_alloc(list,NR_R1);
+                       list.concat(taicpu.op_reg_reg(topcg2asmop[op],dst,src));
+                       list.concat(taicpu.op_reg(A_CLR,NR_R1));
+                       cg.a_reg_dealloc(list,NR_R1);
+                       list.concat(taicpu.op_reg_reg(A_MOV,dst,NR_R0));
+                       cg.a_reg_dealloc(list,NR_R0);
+                     end
+                   else
+                     internalerror(2015061001);
                  end
                  end
                else if size=OS_16 then
                else if size=OS_16 then
                  begin
                  begin
-                   tmpreg:=getintregister(list,OS_16);
-                   emit_mov(list,tmpreg,dst);
-                   emit_mov(list,GetNextReg(tmpreg),GetNextReg(dst));
-                   list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,src));
-                   emit_mov(list,dst,NR_R0);
-                   emit_mov(list,GetNextReg(dst),NR_R1);
-                   list.concat(taicpu.op_reg_reg(A_MUL,GetNextReg(tmpreg),src));
-                   list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
-                   list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,GetNextReg(src)));
-                   list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
-                   list.concat(taicpu.op_reg(A_CLR,NR_R1));
-
-                   { keep code for muls with overflow checking
-                   pd:=search_system_proc('fpc_mul_word');
-                   paraloc1.init;
-                   paraloc2.init;
-                   paraloc3.init;
-                   paramanager.getintparaloc(list,pd,1,paraloc1);
-                   paramanager.getintparaloc(list,pd,2,paraloc2);
-                   paramanager.getintparaloc(list,pd,3,paraloc3);
-                   a_load_const_cgpara(list,OS_8,0,paraloc3);
-                   a_load_reg_cgpara(list,OS_16,src,paraloc2);
-                   a_load_reg_cgpara(list,OS_16,dst,paraloc1);
-                   paramanager.freecgpara(list,paraloc3);
-                   paramanager.freecgpara(list,paraloc2);
-                   paramanager.freecgpara(list,paraloc1);
-                   alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
-                   a_call_name(list,'FPC_MUL_WORD',false);
-                   dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
-                   cg.a_reg_alloc(list,NR_R24);
-                   cg.a_reg_alloc(list,NR_R25);
-                   cg.a_load_reg_reg(list,OS_8,OS_8,NR_R24,dst);
-                   cg.a_reg_dealloc(list,NR_R24);
-                   cg.a_load_reg_reg(list,OS_8,OS_8,NR_R25,GetNextReg(dst));
-                   cg.a_reg_dealloc(list,NR_R25);
-                   paraloc3.done;
-                   paraloc2.done;
-                   paraloc1.done;
-                   }
+                   if CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype] then
+                     begin
+                       tmpreg:=getintregister(list,OS_16);
+                       emit_mov(list,tmpreg,dst);
+                       emit_mov(list,GetNextReg(tmpreg),GetNextReg(dst));
+                       list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,src));
+                       emit_mov(list,dst,NR_R0);
+                       emit_mov(list,GetNextReg(dst),NR_R1);
+                       list.concat(taicpu.op_reg_reg(A_MUL,GetNextReg(tmpreg),src));
+                       list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
+                       list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,GetNextReg(src)));
+                       list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
+                       list.concat(taicpu.op_reg(A_CLR,NR_R1));
+                     end
+                   else
+                     begin
+                       { keep code for muls with overflow checking }
+                       pd:=search_system_proc('fpc_mul_word');
+                       paraloc1.init;
+                       paraloc2.init;
+                       paraloc3.init;
+                       paramanager.getintparaloc(list,pd,1,paraloc1);
+                       paramanager.getintparaloc(list,pd,2,paraloc2);
+                       paramanager.getintparaloc(list,pd,3,paraloc3);
+                       a_load_const_cgpara(list,OS_8,0,paraloc3);
+                       a_load_reg_cgpara(list,OS_16,src,paraloc2);
+                       a_load_reg_cgpara(list,OS_16,dst,paraloc1);
+                       paramanager.freecgpara(list,paraloc3);
+                       paramanager.freecgpara(list,paraloc2);
+                       paramanager.freecgpara(list,paraloc1);
+                       alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+                       a_call_name(list,'FPC_MUL_WORD',false);
+                       dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+                       cg.a_reg_alloc(list,NR_R24);
+                       cg.a_reg_alloc(list,NR_R25);
+                       cg.a_load_reg_reg(list,OS_8,OS_8,NR_R24,dst);
+                       cg.a_reg_dealloc(list,NR_R24);
+                       cg.a_load_reg_reg(list,OS_8,OS_8,NR_R25,GetNextReg(dst));
+                       cg.a_reg_dealloc(list,NR_R25);
+                       paraloc3.done;
+                       paraloc2.done;
+                       paraloc1.done;
+                     end;
                  end
                  end
                else
                else
                  internalerror(2011022002);
                  internalerror(2011022002);
@@ -1691,7 +1701,46 @@ unit cgcpu;
          regs : tcpuregisterset;
          regs : tcpuregisterset;
          reg : tsuperregister;
          reg : tsuperregister;
       begin
       begin
-        if not(nostackframe) then
+        if po_interrupt in current_procinfo.procdef.procoptions then
+          begin
+            { check if the framepointer is actually used, this is done here because
+              we have to know the size of the locals (must be 0), avr does not know
+              an sp based stack }
+
+            if not(current_procinfo.procdef.stack_tainting_parameter(calleeside)) and
+              (localsize=0) then
+              current_procinfo.framepointer:=NR_NO;
+
+            { save int registers,
+              but only if the procedure returns }
+            if not(po_noreturn in current_procinfo.procdef.procoptions) then
+              regs:=rg[R_INTREGISTER].used_in_proc
+            else
+              regs:=[];
+            { if the framepointer is potentially used, save it always because we need a proper stack frame,
+              even if the procedure never returns, the procedure could be e.g. a nested one accessing
+              an outer stackframe }
+            if current_procinfo.framepointer<>NR_NO then
+              regs:=regs+[RS_R28,RS_R29];
+
+            regs:=regs+[RS_R0];
+
+            for reg:=RS_R31 downto RS_R0 do
+              if reg in regs then
+                list.concat(taicpu.op_reg(A_PUSH,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
+
+            { Save SREG }
+            list.concat(taicpu.op_reg_const(A_IN, NR_R0, $3F));
+            list.concat(taicpu.op_reg(A_PUSH, NR_R0));
+
+            if current_procinfo.framepointer<>NR_NO then
+              begin
+                list.concat(taicpu.op_reg_const(A_IN,NR_R28,NIO_SP_LO));
+                list.concat(taicpu.op_reg_const(A_IN,NR_R29,NIO_SP_HI));
+                a_adjust_sp(list,-localsize);
+              end;
+          end
+        else if not(nostackframe) then
           begin
           begin
             { check if the framepointer is actually used, this is done here because
             { check if the framepointer is actually used, this is done here because
               we have to know the size of the locals (must be 0), avr does not know
               we have to know the size of the locals (must be 0), avr does not know
@@ -1738,7 +1787,29 @@ unit cgcpu;
         }
         }
         if po_noreturn in current_procinfo.procdef.procoptions then
         if po_noreturn in current_procinfo.procdef.procoptions then
           exit;
           exit;
-        if not(nostackframe) then
+        if po_interrupt in current_procinfo.procdef.procoptions then
+          begin
+            regs:=rg[R_INTREGISTER].used_in_proc;
+            if current_procinfo.framepointer<>NR_NO then
+              begin
+                regs:=regs+[RS_R28,RS_R29];
+                LocalSize:=current_procinfo.calc_stackframe_size;
+                a_adjust_sp(list,LocalSize);
+              end;
+
+            { Reload SREG }
+            regs:=regs+[RS_R0];
+
+            list.concat(taicpu.op_reg(A_POP, NR_R0));
+            list.concat(taicpu.op_const_reg(A_OUT, $3F, NR_R0));
+
+            for reg:=RS_R0 to RS_R31 do
+              if reg in regs then
+                list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
+
+            list.concat(taicpu.op_none(A_RETI));
+          end
+        else if not(nostackframe) then
           begin
           begin
             regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
             regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
             if current_procinfo.framepointer<>NR_NO then
             if current_procinfo.framepointer<>NR_NO then
@@ -1750,8 +1821,10 @@ unit cgcpu;
             for reg:=RS_R0 to RS_R31 do
             for reg:=RS_R0 to RS_R31 do
               if reg in regs then
               if reg in regs then
                 list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
                 list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
-          end;
-        list.concat(taicpu.op_none(A_RET));
+            list.concat(taicpu.op_none(A_RET));
+          end
+        else
+          list.concat(taicpu.op_none(A_RET));
       end;
       end;