Browse Source

Added some peephole optimizations, and fixed generic unconditional jump optimizations, for AVR.
Fixed multiplication code generation for AVR controllers without mul instructions.
Added handling of the old interrupt procedure directive such that procedures with that use RETI instead of RET.

git-svn-id: trunk@31030 -

Jeppe Johansen 10 years ago
parent
commit
03880c2f74
4 changed files with 270 additions and 58 deletions
  1. 4 0
      compiler/aoptobj.pas
  2. 141 6
      compiler/avr/aoptcpu.pas
  3. 1 1
      compiler/avr/aoptcpub.pas
  4. 124 51
      compiler/avr/cgcpu.pas

+ 4 - 0
compiler/aoptobj.pas

@@ -1178,7 +1178,11 @@ Unit AoptObj;
 
     function IsJumpToLabel(hp: taicpu): boolean;
       begin
+{$if defined(avr)}
+        result:=(hp.opcode in aopt_uncondjmp) and
+{$else avr}
         result:=(hp.opcode=aopt_uncondjmp) and
+{$endif avr}
 {$if defined(arm) or defined(aarch64)}
           (hp.condition=c_None) and
 {$endif arm or aarch64}

+ 141 - 6
compiler/avr/aoptcpu.pas

@@ -45,7 +45,7 @@ Implementation
   uses
     cutils,
     cpuinfo,
-    aasmbase,aasmcpu,
+    aasmbase,aasmcpu,aasmdata,
     globals,globtype,
     cgutils;
 
@@ -132,9 +132,10 @@ Implementation
 
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     var
-      hp1,hp2,hp3: tai;
+      hp1,hp2,hp3,hp4,hp5: tai;
       alloc, dealloc: tai_regalloc;
       i: integer;
+      l: TAsmLabel;
     begin
       result := false;
       case p.typ of
@@ -265,7 +266,8 @@ Implementation
                           into
                           sbi rX,lg(n)
                         }
-                        if MatchInstruction(hp1,A_ORI) and
+                        if (taicpu(p).oper[1]^.val<=31) and
+                          MatchInstruction(hp1,A_ORI) and
                           (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
                           (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
                           GetNextInstruction(hp1,hp2) and
@@ -275,7 +277,7 @@ Implementation
                           begin
                             taicpu(p).opcode:=A_SBI;
                             taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
-                            taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val)-1);
+                            taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
                             asml.Remove(hp1);
                             hp1.Free;
                             asml.Remove(hp2);
@@ -290,7 +292,8 @@ Implementation
                           into
                           cbi rX,lg(n)
                         }
-                        else if MatchInstruction(hp1,A_ANDI) and
+                        else if (taicpu(p).oper[1]^.val<=31) and
+                           MatchInstruction(hp1,A_ANDI) and
                            (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
                            (PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
                            GetNextInstruction(hp1,hp2) and
@@ -300,11 +303,51 @@ Implementation
                           begin
                             taicpu(p).opcode:=A_CBI;
                             taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
-                            taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val))-1);
+                            taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val)));
                             asml.Remove(hp1);
                             hp1.Free;
                             asml.Remove(hp2);
                             hp2.Free;
+                            result:=true;
+                          end
+                         {
+                              in rX,Y
+                              andi rX,n
+                              breq/brne L1
+
+                          into
+                              sbis/sbic Y,lg(n)
+                              jmp L1
+                            .Ltemp:
+                        }
+                        else if (taicpu(p).oper[1]^.val<=31) and
+                           MatchInstruction(hp1,A_ANDI) and
+                           (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
+                           (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
+                           GetNextInstruction(hp1,hp2) and
+                           MatchInstruction(hp2,A_BRxx) and
+                           (taicpu(hp2).condition in [C_EQ,C_NE]) then
+                          begin
+                            if taicpu(hp2).condition=C_EQ then
+                              taicpu(p).opcode:=A_SBIS
+                            else
+                              taicpu(p).opcode:=A_SBIC;
+
+                            taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
+                            taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
+                            asml.Remove(hp1);
+                            hp1.Free;
+
+                            taicpu(hp2).condition:=C_None;
+                            if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
+                              taicpu(hp2).opcode:=A_JMP
+                            else
+                              taicpu(hp2).opcode:=A_RJMP;
+
+                            current_asmdata.getjumplabel(l);
+                            l.increfs;
+                            asml.InsertAfter(tai_label.create(l), hp2);
+
                             result:=true;
                           end;
                       end;
@@ -528,6 +571,98 @@ Implementation
                             break;
                         end;
                   end;
+                A_SBIC,
+                A_SBIS:
+                  begin
+                    {
+                      Turn
+                          sbic/sbis X, y
+                          jmp .L1
+                          op
+                        .L1:
+
+                      into
+                          sbis/sbic X,y
+                          op
+                        .L1:
+                    }
+                    if GetNextInstruction(p, hp1) and
+                       (hp1.typ=ait_instruction) and
+                       (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
+                       (taicpu(hp1).ops>0) and
+                       (taicpu(hp1).oper[0]^.typ = top_ref) and
+                       (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
+                       GetNextInstruction(hp1, hp2) and
+                       (hp2.typ=ait_instruction) and
+                       (not taicpu(hp2).is_jmp) and
+                       GetNextInstruction(hp2, hp3) and
+                       (hp3.typ=ait_label) and
+                       (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
+                      begin
+                        if taicpu(p).opcode=A_SBIC then
+                          taicpu(p).opcode:=A_SBIS
+                        else
+                          taicpu(p).opcode:=A_SBIC;
+
+                        tai_label(hp3).labsym.decrefs;
+
+                        AsmL.remove(hp1);
+                        taicpu(hp1).Free;
+
+                        result:=true;
+                      end
+                    {
+                      Turn
+                          sbiX X, y
+                          jmp .L1
+                          jmp .L2
+                        .L1:
+                          op
+                        .L2:
+
+                      into
+                          sbiX X,y
+                        .L1:
+                          op
+                        .L2:
+                    }
+                    else if GetNextInstruction(p, hp1) and
+                       (hp1.typ=ait_instruction) and
+                       (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
+                       (taicpu(hp1).ops>0) and
+                       (taicpu(hp1).oper[0]^.typ = top_ref) and
+                       (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
+
+                       GetNextInstruction(hp1, hp2) and
+                       (hp2.typ=ait_instruction) and
+                       (taicpu(hp2).opcode in [A_JMP,A_RJMP]) and
+                       (taicpu(hp2).ops>0) and
+                       (taicpu(hp2).oper[0]^.typ = top_ref) and
+                       (taicpu(hp2).oper[0]^.ref^.symbol is TAsmLabel) and
+
+                       GetNextInstruction(hp2, hp3) and
+                       (hp3.typ=ait_label) and
+                       (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) and
+
+                       GetNextInstruction(hp3, hp4) and
+                       (hp4.typ=ait_instruction) and
+
+                       GetNextInstruction(hp4, hp5) and
+                       (hp3.typ=ait_label) and
+                       (taicpu(hp2).oper[0]^.ref^.symbol=tai_label(hp5).labsym) then
+                      begin
+                        tai_label(hp3).labsym.decrefs;
+                        tai_label(hp5).labsym.decrefs;
+
+                        AsmL.remove(hp1);
+                        taicpu(hp1).Free;
+
+                        AsmL.remove(hp2);
+                        taicpu(hp2).Free;
+
+                        result:=true;
+                      end;
+                  end;
               end;
           end;
       end;

+ 1 - 1
compiler/avr/aoptcpub.pas

@@ -99,7 +99,7 @@ Const
 
   StoreDst = 0;
 
-  aopt_uncondjmp = A_JMP;
+  aopt_uncondjmp = [A_RJMP,A_JMP];
   aopt_condjmp = A_BRxx;
 
 Implementation

+ 124 - 51
compiler/avr/cgcpu.pas

@@ -431,7 +431,8 @@ unit cgcpu;
 
      procedure tcgavr.a_op_reg_reg_reg(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister);
        begin
-         if (op in [OP_MUL,OP_IMUL]) and (size in [OS_16,OS_S16]) then
+         if (op in [OP_MUL,OP_IMUL]) and (size in [OS_16,OS_S16]) and
+            (CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype]) then
            begin
              getcpuregister(list,NR_R0);
              getcpuregister(list,NR_R1);
@@ -577,55 +578,64 @@ unit cgcpu;
              begin
                if size in [OS_8,OS_S8] then
                  begin
-                   cg.a_reg_alloc(list,NR_R0);
-                   cg.a_reg_alloc(list,NR_R1);
-                   list.concat(taicpu.op_reg_reg(topcg2asmop[op],dst,src));
-                   list.concat(taicpu.op_reg(A_CLR,NR_R1));
-                   cg.a_reg_dealloc(list,NR_R1);
-                   list.concat(taicpu.op_reg_reg(A_MOV,dst,NR_R0));
-                   cg.a_reg_dealloc(list,NR_R0);
+                   if CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype] then
+                     begin
+                       cg.a_reg_alloc(list,NR_R0);
+                       cg.a_reg_alloc(list,NR_R1);
+                       list.concat(taicpu.op_reg_reg(topcg2asmop[op],dst,src));
+                       list.concat(taicpu.op_reg(A_CLR,NR_R1));
+                       cg.a_reg_dealloc(list,NR_R1);
+                       list.concat(taicpu.op_reg_reg(A_MOV,dst,NR_R0));
+                       cg.a_reg_dealloc(list,NR_R0);
+                     end
+                   else
+                     internalerror(2015061001);
                  end
                else if size=OS_16 then
                  begin
-                   tmpreg:=getintregister(list,OS_16);
-                   emit_mov(list,tmpreg,dst);
-                   emit_mov(list,GetNextReg(tmpreg),GetNextReg(dst));
-                   list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,src));
-                   emit_mov(list,dst,NR_R0);
-                   emit_mov(list,GetNextReg(dst),NR_R1);
-                   list.concat(taicpu.op_reg_reg(A_MUL,GetNextReg(tmpreg),src));
-                   list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
-                   list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,GetNextReg(src)));
-                   list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
-                   list.concat(taicpu.op_reg(A_CLR,NR_R1));
-
-                   { keep code for muls with overflow checking
-                   pd:=search_system_proc('fpc_mul_word');
-                   paraloc1.init;
-                   paraloc2.init;
-                   paraloc3.init;
-                   paramanager.getintparaloc(list,pd,1,paraloc1);
-                   paramanager.getintparaloc(list,pd,2,paraloc2);
-                   paramanager.getintparaloc(list,pd,3,paraloc3);
-                   a_load_const_cgpara(list,OS_8,0,paraloc3);
-                   a_load_reg_cgpara(list,OS_16,src,paraloc2);
-                   a_load_reg_cgpara(list,OS_16,dst,paraloc1);
-                   paramanager.freecgpara(list,paraloc3);
-                   paramanager.freecgpara(list,paraloc2);
-                   paramanager.freecgpara(list,paraloc1);
-                   alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
-                   a_call_name(list,'FPC_MUL_WORD',false);
-                   dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
-                   cg.a_reg_alloc(list,NR_R24);
-                   cg.a_reg_alloc(list,NR_R25);
-                   cg.a_load_reg_reg(list,OS_8,OS_8,NR_R24,dst);
-                   cg.a_reg_dealloc(list,NR_R24);
-                   cg.a_load_reg_reg(list,OS_8,OS_8,NR_R25,GetNextReg(dst));
-                   cg.a_reg_dealloc(list,NR_R25);
-                   paraloc3.done;
-                   paraloc2.done;
-                   paraloc1.done;
-                   }
+                   if CPUAVR_HAS_MUL in cpu_capabilities[current_settings.cputype] then
+                     begin
+                       tmpreg:=getintregister(list,OS_16);
+                       emit_mov(list,tmpreg,dst);
+                       emit_mov(list,GetNextReg(tmpreg),GetNextReg(dst));
+                       list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,src));
+                       emit_mov(list,dst,NR_R0);
+                       emit_mov(list,GetNextReg(dst),NR_R1);
+                       list.concat(taicpu.op_reg_reg(A_MUL,GetNextReg(tmpreg),src));
+                       list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
+                       list.concat(taicpu.op_reg_reg(A_MUL,tmpreg,GetNextReg(src)));
+                       list.concat(taicpu.op_reg_reg(A_ADD,GetNextReg(dst),NR_R0));
+                       list.concat(taicpu.op_reg(A_CLR,NR_R1));
+                     end
+                   else
+                     begin
+                       { keep code for muls with overflow checking }
+                       pd:=search_system_proc('fpc_mul_word');
+                       paraloc1.init;
+                       paraloc2.init;
+                       paraloc3.init;
+                       paramanager.getintparaloc(list,pd,1,paraloc1);
+                       paramanager.getintparaloc(list,pd,2,paraloc2);
+                       paramanager.getintparaloc(list,pd,3,paraloc3);
+                       a_load_const_cgpara(list,OS_8,0,paraloc3);
+                       a_load_reg_cgpara(list,OS_16,src,paraloc2);
+                       a_load_reg_cgpara(list,OS_16,dst,paraloc1);
+                       paramanager.freecgpara(list,paraloc3);
+                       paramanager.freecgpara(list,paraloc2);
+                       paramanager.freecgpara(list,paraloc1);
+                       alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+                       a_call_name(list,'FPC_MUL_WORD',false);
+                       dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+                       cg.a_reg_alloc(list,NR_R24);
+                       cg.a_reg_alloc(list,NR_R25);
+                       cg.a_load_reg_reg(list,OS_8,OS_8,NR_R24,dst);
+                       cg.a_reg_dealloc(list,NR_R24);
+                       cg.a_load_reg_reg(list,OS_8,OS_8,NR_R25,GetNextReg(dst));
+                       cg.a_reg_dealloc(list,NR_R25);
+                       paraloc3.done;
+                       paraloc2.done;
+                       paraloc1.done;
+                     end;
                  end
                else
                  internalerror(2011022002);
@@ -1691,7 +1701,46 @@ unit cgcpu;
          regs : tcpuregisterset;
          reg : tsuperregister;
       begin
-        if not(nostackframe) then
+        if po_interrupt in current_procinfo.procdef.procoptions then
+          begin
+            { check if the framepointer is actually used, this is done here because
+              we have to know the size of the locals (must be 0), avr does not know
+              an sp based stack }
+
+            if not(current_procinfo.procdef.stack_tainting_parameter(calleeside)) and
+              (localsize=0) then
+              current_procinfo.framepointer:=NR_NO;
+
+            { save int registers,
+              but only if the procedure returns }
+            if not(po_noreturn in current_procinfo.procdef.procoptions) then
+              regs:=rg[R_INTREGISTER].used_in_proc
+            else
+              regs:=[];
+            { if the framepointer is potentially used, save it always because we need a proper stack frame,
+              even if the procedure never returns, the procedure could be e.g. a nested one accessing
+              an outer stackframe }
+            if current_procinfo.framepointer<>NR_NO then
+              regs:=regs+[RS_R28,RS_R29];
+
+            regs:=regs+[RS_R0];
+
+            for reg:=RS_R31 downto RS_R0 do
+              if reg in regs then
+                list.concat(taicpu.op_reg(A_PUSH,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
+
+            { Save SREG }
+            list.concat(taicpu.op_reg_const(A_IN, NR_R0, $3F));
+            list.concat(taicpu.op_reg(A_PUSH, NR_R0));
+
+            if current_procinfo.framepointer<>NR_NO then
+              begin
+                list.concat(taicpu.op_reg_const(A_IN,NR_R28,NIO_SP_LO));
+                list.concat(taicpu.op_reg_const(A_IN,NR_R29,NIO_SP_HI));
+                a_adjust_sp(list,-localsize);
+              end;
+          end
+        else if not(nostackframe) then
           begin
             { check if the framepointer is actually used, this is done here because
               we have to know the size of the locals (must be 0), avr does not know
@@ -1738,7 +1787,29 @@ unit cgcpu;
         }
         if po_noreturn in current_procinfo.procdef.procoptions then
           exit;
-        if not(nostackframe) then
+        if po_interrupt in current_procinfo.procdef.procoptions then
+          begin
+            regs:=rg[R_INTREGISTER].used_in_proc;
+            if current_procinfo.framepointer<>NR_NO then
+              begin
+                regs:=regs+[RS_R28,RS_R29];
+                LocalSize:=current_procinfo.calc_stackframe_size;
+                a_adjust_sp(list,LocalSize);
+              end;
+
+            { Reload SREG }
+            regs:=regs+[RS_R0];
+
+            list.concat(taicpu.op_reg(A_POP, NR_R0));
+            list.concat(taicpu.op_const_reg(A_OUT, $3F, NR_R0));
+
+            for reg:=RS_R0 to RS_R31 do
+              if reg in regs then
+                list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
+
+            list.concat(taicpu.op_none(A_RETI));
+          end
+        else if not(nostackframe) then
           begin
             regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
             if current_procinfo.framepointer<>NR_NO then
@@ -1750,8 +1821,10 @@ unit cgcpu;
             for reg:=RS_R0 to RS_R31 do
               if reg in regs then
                 list.concat(taicpu.op_reg(A_POP,newreg(R_INTREGISTER,reg,R_SUBWHOLE)));
-          end;
-        list.concat(taicpu.op_none(A_RET));
+            list.concat(taicpu.op_none(A_RET));
+          end
+        else
+          list.concat(taicpu.op_none(A_RET));
       end;