Browse Source

Fixed many issues...

git-svn-id: branches/laksen/avr32new@22839 -
Jeppe Johansen 12 years ago
parent
commit
95b0d8bb7d

+ 33 - 17
compiler/avr32/aasmcpu.pas

@@ -178,6 +178,7 @@ uses
          constructor op_reg(op : tasmop;_op1 : tregister);
          constructor op_ref(op : tasmop;const _op1 : treference);
          constructor op_const(op : tasmop;_op1 : longint);
+         constructor op_regset(op:tasmop; regtype: tregistertype; subreg: tsubregister; _op1: tcpuregisterset);
 
          constructor op_reg_reg(op : tasmop;_op1,_op2 : tregister);
          constructor op_ref_reg(op : tasmop;const _op1 : treference;_op2 : tregister);
@@ -367,6 +368,13 @@ implementation
          loadconst(0,aint(_op1));
       end;
 
+    constructor taicpu.op_regset(op: tasmop; regtype: tregistertype; subreg: tsubregister; _op1: tcpuregisterset);
+      begin
+         inherited create(op);
+         ops:=1;
+         loadregset(0,regtype,subreg,_op1);
+      end;
+
 
     constructor taicpu.op_reg_reg(op : tasmop;_op1,_op2 : tregister);
       begin
@@ -575,7 +583,7 @@ implementation
       begin
         case getregtype(r) of
           R_INTREGISTER :
-            result:=setoppostfix(taicpu.op_reg_ref(A_LD,r,ref),PF_W);
+            result:=setoppostfix(taicpu.op_reg_ref(A_LDDSP,r,ref),PF_W);
           else
             internalerror(200401041);
         end;
@@ -588,7 +596,7 @@ implementation
       begin
         case getregtype(r) of
           R_INTREGISTER :
-            result:=setoppostfix(taicpu.op_ref_reg(A_ST,ref,r),PF_W);
+            result:=setoppostfix(taicpu.op_ref_reg(A_STDSP,ref,r),PF_W);
           else
             internalerror(200401041);
         end;
@@ -611,8 +619,9 @@ implementation
           A_EORH,A_EORL,
           A_ORH,A_ORL,
           A_TST,
+          A_MAC,A_MACS,A_MACU,
           A_CASTS,A_CASTU,
-          A_CBR,A_SWAP,
+          A_SBR,A_CBR,A_SWAP,
           A_ROL,A_ROR:
             if opnr = 0 then
               result:=operand_readwrite;
@@ -622,36 +631,41 @@ implementation
           A_BR,
           A_RJMP,A_ACALL,A_ICALL,A_MCALL,A_RCALL,A_SCALL,
           A_RET,A_RETD,A_RETE,A_RETS,
-          A_MEMC,A_MEMS,A_MEMT:;
+          A_MEMC,A_MEMS,A_MEMT,A_POPM,
+          A_PUSHM:;
 
-          A_ADC,A_ADD,A_ADDABS,
-          A_MAX,A_MIN,
-          A_RSUB,A_SBC,A_SUB,
+          A_ADD,
+          A_RSUB,A_SUB,
+          A_MUL,
+          A_AND,A_ANDN,
+          A_EOR,A_OR,
+          A_ASR,
+          A_LSL,
+          A_LSR:
+            if (ops=2) and (opnr=0) then
+              result:=operand_readwrite
+            else if opnr=0 then
+              result:=operand_write;
+
+          A_ADC,A_ADDABS,
+          A_MAX,A_MIN,A_SBC,
           A_DIVS,A_DIVU,
           A_ADDHH,
-          A_MAC,A_MACS,A_MACU,
-          A_MUL,A_MULS,A_MULU,
+          A_MULS,A_MULU,
           A_MULHH,A_MULWH,A_MULNHH,A_MULNWH,
           A_SATADD,A_SATSUB,
           A_SUBHH,
           A_MULSATHH,A_MULSATRNDHH,A_MULSATRNDWH,A_MULSATWH,
-          A_AND,A_ANDN,
-          A_EOR,A_OR,
           A_BFEXTS,A_BFEXTU,
           A_BFINS,
           A_BREV,
           A_CLZ,
-          A_SBR,
-          A_ASR,
-          A_LSL,
-          A_LSR,
           A_MOV,
           A_MOVH,
           A_LD,A_LDINS,A_LDSWP,A_LDDPC,A_LDDSP,
           A_ST,A_STCOND,A_STDSP,A_STHH,A_STSWP,
           A_XCHG,
-          A_LDM,A_LDMTS,A_POPM,
-          A_PUSHM,A_STM,A_STMTS,
+          A_LDM,A_LDMTS,A_STM,A_STMTS,
           A_BREAKPOINT,
           A_CACHE,
           A_CSRF,
@@ -956,12 +970,14 @@ implementation
                 current_asmdata.getjumplabel(l);
                 curdata.insert(taicpu.op_sym(A_BR,l));
                 curdata.concat(tai_label.create(l));
+                curdata.Insert(tai_align.Create_zeros(4));
                 list.insertlistafter(curtai,curdata);
                 curtai:=hp;
               end
             else
               curtai:=tai(curtai.next);
           end;
+        list.concat(tai_align.Create_zeros(4));
         list.concatlist(curdata);
         curdata.free;
       end;

+ 131 - 106
compiler/avr32/aoptcpu.pas

@@ -56,135 +56,160 @@ Implementation
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     var
       v: LongInt;
+      hp1: tai;
     begin
       result := false;
       case p.typ of
         ait_instruction:
           begin
             { Collapse ADD r0,r0,r1 -> ADD r0,r1 }
-            if (taicpu(p).opcode in [A_ADD,A_SUB,A_AND,A_EOR,A_OR]) and
+            if (taicpu(p).opcode in [A_ADD,A_SUB,A_AND,A_EOR,A_OR,A_MUL]) and
                (taicpu(p).ops = 3) and
                (taicpu(p).oper[0]^.typ = top_reg) and
                (taicpu(p).oper[1]^.typ = top_reg) and
                (taicpu(p).oper[2]^.typ = top_reg) and
                (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
               begin
-                taicpu(p).ops:=2;
-                taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
+                hp1:=taicpu.op_reg_reg(taicpu(p).opcode,taicpu(p).oper[0]^.reg,taicpu(p).oper[2]^.reg);
+                AsmL.InsertBefore(hp1,p);
+                //InsertLLItem(p.Previous,p.Next,hp1);
+                AsmL.Remove(p);
+                p.Free;
+                p:=hp1;
                 result:=true;
-                exit;
-              end;
-
+              end
             { Collapse ADD r0,r1,r0 -> ADD r0,r1
               SUB needs RSUB }
-            if (taicpu(p).opcode in [A_ADD,A_AND,A_EOR,A_OR]) and
+            else if (taicpu(p).opcode in [A_ADD,A_AND,A_EOR,A_OR,A_MUL]) and
                (taicpu(p).ops = 3) and
                (taicpu(p).oper[0]^.typ = top_reg) and
                (taicpu(p).oper[1]^.typ = top_reg) and
                (taicpu(p).oper[2]^.typ = top_reg) and
                (taicpu(p).oper[0]^.reg = taicpu(p).oper[2]^.reg) then
               begin
-                taicpu(p).ops:=2;
+                hp1:=taicpu.op_reg_reg(taicpu(p).opcode,taicpu(p).oper[0]^.reg,taicpu(p).oper[1]^.reg);
+                AsmL.InsertBefore(hp1,p);
+                //InsertLLItem(p.Previous,p.Next,hp1);
+                AsmL.Remove(p);
+                p.Free;
+                p:=hp1;
                 result:=true;
-                exit;
+              end
+            else
+              case taicpu(p).opcode of
+                { Collapse instructions into a compact format }
+                A_SUB:
+                  begin
+                    { SUB r0,r0,imm8 -> SUB r0,imm8 }
+                    if (taicpu(p).ops=3) and
+                       (taicpu(p).oper[0]^.typ = top_reg) and
+                       (taicpu(p).oper[1]^.typ = top_reg) and
+                       (taicpu(p).oper[2]^.typ = top_const) and
+                       (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
+                       in_signed_bits(taicpu(p).oper[2]^.val, 8) then
+                      begin
+                        hp1:=taicpu.op_reg_const(taicpu(p).opcode,taicpu(p).oper[0]^.reg,taicpu(p).oper[2]^.val);
+                        AsmL.InsertBefore(hp1,p);
+                        //InsertLLItem(p.Previous,p.Next,hp1);
+                        AsmL.Remove(p);
+                        p.Free;
+                        p:=hp1;
+                        result:=true;
+                      end
+                    { SUB r0,r1,r0 -> RSUB r0,r1 }
+                    else if (taicpu(p).ops=3) and
+                            (taicpu(p).oper[0]^.typ = top_reg) and
+                            (taicpu(p).oper[1]^.typ = top_reg) and
+                            (taicpu(p).oper[2]^.typ = top_reg) and
+                            (taicpu(p).oper[0]^.reg = taicpu(p).oper[2]^.reg) then
+                      begin
+                        hp1:=taicpu.op_reg_reg(A_RSUB,taicpu(p).oper[0]^.reg,taicpu(p).oper[1]^.reg);
+                        AsmL.InsertBefore(hp1,p);
+                        //InsertLLItem(p.Previous,p.Next,hp1);
+                        AsmL.Remove(p);
+                        p.Free;
+                        p:=hp1;
+                        result:=true;
+                      end;
+                  end;
+                A_LSL,
+                A_LSR,
+                A_ASR:
+                  begin
+                    { LSL r0,r0,imm8 -> LSL r0,imm8 }
+                    if (taicpu(p).ops=3) and
+                       (taicpu(p).oper[0]^.typ = top_reg) and
+                       (taicpu(p).oper[1]^.typ = top_reg) and
+                       (taicpu(p).oper[2]^.typ = top_const) and
+                       (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
+                      begin
+                        hp1:=taicpu.op_reg_const(taicpu(p).opcode,taicpu(p).oper[0]^.reg,taicpu(p).oper[2]^.val);
+                        AsmL.InsertBefore(hp1,p);
+                        //InsertLLItem(p.Previous,p.Next,hp1);
+                        AsmL.Remove(p);
+                        p.Free;
+                        p:=hp1;
+                        result:=true;
+                      end
+                  end;
+                A_STM:
+                  begin
+                    { Try to compress STM --sp, LIST down to PUSHM LIST }
+                    if (taicpu(p).oper[0]^.ref^.base = NR_STACK_POINTER_REG) and
+                       (taicpu(p).oper[0]^.ref^.addressmode = AM_PREINDEXED) and
+                       (taicpu(p).oper[1]^.regset^ <> []) then
+                      begin
+                        if test_set(taicpu(p).oper[1]^.regset^, [0..3]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [4..7]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [8..9]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [10]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [11]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [12]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [14]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [15]) and
+                           ((taicpu(p).oper[1]^.regset^ * [13]) = []) then
+                          begin
+                            hp1:=taicpu.op_regset(A_PUSHM,taicpu(p).oper[1]^.regtyp,taicpu(p).oper[1]^.subreg,taicpu(p).oper[1]^.regset^);
+                            AsmL.InsertBefore(hp1,p);
+                            //InsertLLItem(p.Previous,p.Next,hp1);
+                            AsmL.Remove(p);
+                            p.Free;
+                            p:=hp1;
+                            result:=true;
+                            exit;
+                          end;
+                      end;
+                  end;
+                A_LDM:
+                  begin
+                    { Try to compress LDM sp++, LIST down to POPM LIST }
+                    if (taicpu(p).oper[0]^.ref^.base = NR_STACK_POINTER_REG) and
+                       ((taicpu(p).oper[0]^.ref^.addressmode = AM_POSTINDEXED) or
+                        (RS_STACK_POINTER_REG in taicpu(p).oper[1]^.regset^)) and
+                       (taicpu(p).oper[1]^.regset^ <> []) then
+                      begin
+                        if test_set(taicpu(p).oper[1]^.regset^, [0..3]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [4..7]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [8..9]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [10]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [11]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [12]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [14]) and
+                           test_set(taicpu(p).oper[1]^.regset^, [15]) and
+                           ((taicpu(p).oper[1]^.regset^ * [13]) = []) then
+                          begin
+                            hp1:=taicpu.op_regset(A_POPM,taicpu(p).oper[1]^.regtyp,taicpu(p).oper[1]^.subreg,taicpu(p).oper[1]^.regset^);
+                            AsmL.InsertBefore(hp1,p);
+                            //InsertLLItem(p.Previous,p.Next,hp1);
+                            AsmL.Remove(p);
+                            p.Free;
+                            p:=hp1;
+                            result:=true;
+                            exit;
+                          end;
+                      end;
+                  end;
               end;
-
-            case taicpu(p).opcode of
-              { Collapse instructions into a compact format }
-              A_SUB:
-                begin
-                  { SUB r0,r0,imm8 -> SUB r0,imm8 }
-                  if (taicpu(p).ops=3) and
-                     (taicpu(p).oper[0]^.typ = top_reg) and
-                     (taicpu(p).oper[1]^.typ = top_reg) and
-                     (taicpu(p).oper[2]^.typ = top_const) and
-                     (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
-                     in_signed_bits(taicpu(p).oper[2]^.val, 8) then
-                    begin
-                      taicpu(p).ops:=2;
-                      taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
-                      result:=true;
-                    end
-                  { SUB r0,r1,r0 -> RSUB r0,r1 }
-                  else if (taicpu(p).ops=3) and
-                          (taicpu(p).oper[0]^.typ = top_reg) and
-                          (taicpu(p).oper[1]^.typ = top_reg) and
-                          (taicpu(p).oper[2]^.typ = top_reg) and
-                          (taicpu(p).oper[0]^.reg = taicpu(p).oper[2]^.reg) then
-                    begin
-                      taicpu(p).opcode:=A_RSUB;
-                      taicpu(p).ops:=2;
-                      result:=true;
-                    end;
-                end;
-              A_LSL,
-              A_LSR,
-              A_ASR:
-                begin
-                  { LSL r0,r0,imm8 -> LSL r0,imm8 }
-                  if (taicpu(p).ops=3) and
-                     (taicpu(p).oper[0]^.typ = top_reg) and
-                     (taicpu(p).oper[1]^.typ = top_reg) and
-                     (taicpu(p).oper[2]^.typ = top_const) and
-                     (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
-                    begin
-                      taicpu(p).ops:=2;
-                      taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
-                      result:=true;
-                    end
-                end;
-              A_STM:
-                begin
-                  { Try to compress STM --sp, LIST down to PUSHM LIST }
-                  if (taicpu(p).oper[0]^.ref^.base = NR_STACK_POINTER_REG) and
-                     (taicpu(p).oper[0]^.ref^.addressmode = AM_PREINDEXED) and
-                     (taicpu(p).oper[1]^.regset^ <> []) then
-                    begin
-                      if test_set(taicpu(p).oper[1]^.regset^, [0..3]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [4..7]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [8..9]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [10]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [11]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [12]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [14]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [15]) and
-                         ((taicpu(p).oper[1]^.regset^ * [13]) = []) then
-                        begin
-                          taicpu(p).loadregset(0,taicpu(p).oper[1]^.regtyp,taicpu(p).oper[1]^.subreg,taicpu(p).oper[1]^.regset^);
-                          taicpu(p).ops:=1;
-                          taicpu(p).opcode:=A_PUSHM;
-                          result:=true;
-                          exit;
-                        end;
-                    end;
-                end;
-              A_LDM:
-                begin
-                  { Try to compress LDM sp++, LIST down to POPM LIST }
-                  if (taicpu(p).oper[0]^.ref^.base = NR_STACK_POINTER_REG) and
-                     ((taicpu(p).oper[0]^.ref^.addressmode = AM_POSTINDEXED) or
-                      (RS_STACK_POINTER_REG in taicpu(p).oper[1]^.regset^)) and
-                     (taicpu(p).oper[1]^.regset^ <> []) then
-                    begin
-                      if test_set(taicpu(p).oper[1]^.regset^, [0..3]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [4..7]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [8..9]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [10]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [11]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [12]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [14]) and
-                         test_set(taicpu(p).oper[1]^.regset^, [15]) and
-                         ((taicpu(p).oper[1]^.regset^ * [13]) = []) then
-                        begin
-                          taicpu(p).loadregset(0,taicpu(p).oper[1]^.regtyp,taicpu(p).oper[1]^.subreg,taicpu(p).oper[1]^.regset^);
-                          taicpu(p).ops:=1;
-                          taicpu(p).opcode:=A_POPM;
-                          result:=true;
-                          exit;
-                        end;
-                    end;
-                end;
-            end;
           end;
       end;
     end;

+ 145 - 58
compiler/avr32/cgcpu.pas

@@ -32,7 +32,7 @@ unit cgcpu;
        cgbase,cgutils,cgobj,
        aasmbase,aasmcpu,aasmtai,aasmdata,
        parabase,
-       cpubase,cpuinfo,node,cg64f32,rgcpu;
+       cpubase,cpuinfo,node,cg64f32,rgcpu,sysutils;
 
 
     type
@@ -63,6 +63,8 @@ unit cgcpu;
         procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
         procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
 
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); override;
+
         { move instructions }
         procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
         procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
@@ -240,11 +242,17 @@ unit cgcpu;
           imm_shift : byte;
           l : tasmlabel;
           hr : treference;
+          i : longint;
        begin
           if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
             internalerror(2002090902);
           if in_signed_bits(a,21) then
             list.concat(taicpu.op_reg_const(A_MOV,reg,a))
+          else if ispowerof2(a, i) then
+            begin
+              list.concat(taicpu.op_reg_reg(A_EOR, reg, reg));
+              list.concat(taicpu.op_reg_const(A_SBR, reg, i));
+            end
           else
             begin
                reference_reset(hr,4);
@@ -255,7 +263,7 @@ unit cgcpu;
                current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
 
                hr.symbol:=l;
-               list.concat(setoppostfix(taicpu.op_reg_ref(A_LD,reg,hr),size2postfix(size)));
+               list.concat(taicpu.op_reg_ref(A_LDDPC,reg,hr));
             end;
        end;
 
@@ -538,17 +546,11 @@ unit cgcpu;
        begin
          case op of
            OP_NEG:
-             list.concat(taicpu.op_reg_reg_const(A_RSUB,dst,dst,0));
+             list.concat(taicpu.op_reg_reg_const(A_RSUB,dst,src,0));
            OP_NOT:
              begin
                a_load_reg_reg(list,size,size,src,dst);
                list.concat(taicpu.op_reg(A_COM,dst));
-               case size of
-                 OS_8 :
-                   list.concat(taicpu.op_reg_const_coh(A_ANDL,dst,$FF));
-                 OS_16 :
-                   list.concat(taicpu.op_reg_const_coh(A_ANDL,dst,$FFFF));
-               end;
              end
            else
              a_op_reg_reg_reg(list,op,OS_32,src,dst,dst);
@@ -558,7 +560,7 @@ unit cgcpu;
 
     const
       op_reg_reg_opcg2asmop: array[TOpCG] of tasmop =
-        (A_NONE,A_MOV,A_ADD,A_AND,A_NONE,A_NONE,A_MUL,A_MUL,A_NONE,A_NONE,A_OR,
+        (A_NONE,A_MOV,A_ADD,A_AND,A_DIVU,A_DIVS,A_MUL,A_MUL,A_NONE,A_NONE,A_OR,
          A_NONE,A_NONE,A_NONE,A_SUB,A_EOR,A_NONE,A_NONE);
 
 
@@ -584,7 +586,7 @@ unit cgcpu;
         shift : byte;
         tmpreg : tregister;
         so : tshifterop;
-        l1 : longint;
+        i, l1 : longint;
       begin
         ovloc.loc:=LOC_VOID;
 
@@ -594,10 +596,10 @@ unit cgcpu;
             a := -a;
           end;
 
-        if in_signed_bits(a,16) and (op = OP_SUB) then
-          begin
-            list.concat(taicpu.op_reg_reg_const(A_SUB,dst,src,a));
-          end
+        if (op in [OP_SUB,OP_OR]) and (a = 0) then
+          a_load_reg_reg(list,size,size,src,dst)
+        else if in_signed_bits(a,16) and (op = OP_SUB) then
+          list.concat(taicpu.op_reg_reg_const(A_SUB,dst,src,a))
         else if in_signed_bits(a,21) and (op = OP_SUB) then
           begin
             a_load_reg_reg(list,size,size,src,dst);
@@ -608,6 +610,53 @@ unit cgcpu;
             a_load_reg_reg(list,size,size,src,dst);
             list.concat(taicpu.op_reg_const(A_SUB,dst,a));
           end
+        else if (op = OP_OR) and ispowerof2(a, i) then
+          begin
+            a_load_reg_reg(list,size,size,src,dst);
+            list.concat(taicpu.op_reg_const(A_SBR,dst,i));
+          end
+        else if (op = OP_AND) and ispowerof2(not a, i) then
+          begin
+            a_load_reg_reg(list,size,size,src,dst);
+            list.concat(taicpu.op_reg_const(A_CBR,dst,i));
+          end
+        else if (op = OP_AND) and ((a and $FFFF) = a) then
+          begin
+            a_load_reg_reg(list,size,size,src,dst);
+            if a = $FFFF then
+              list.concat(setoppostfix(taicpu.op_reg(A_CASTU, dst), PF_H))
+            else if a = $FF then
+              list.concat(setoppostfix(taicpu.op_reg(A_CASTU, dst), PF_B))
+            else if a = 0 then
+              list.concat(taicpu.op_reg_reg(A_EOR, dst, dst))
+            else
+              list.concat(taicpu.op_reg_const_coh(A_ANDL, dst, a));
+          end
+        else if (op = OP_AND) and ((a and $FFFF0000) = a) then
+          begin
+            a_load_reg_reg(list,size,size,src,dst);
+            list.concat(taicpu.op_reg_const_coh(A_ANDH, dst, a shr 16));
+          end
+        else if (op = OP_OR) and ((a and $FFFF) = a) then
+          begin
+            a_load_reg_reg(list,size,size,src,dst);
+            list.concat(taicpu.op_reg_const(A_ORL, dst, a));
+          end
+        else if (op = OP_OR) and ((a and $FFFF0000) = a) then
+          begin
+            a_load_reg_reg(list,size,size,src,dst);
+            list.concat(taicpu.op_reg_const(A_ORH, dst, a shr 16));
+          end
+        else if (op = OP_XOR) and ((a and $FFFF) = a) then
+          begin
+            a_load_reg_reg(list,size,size,src,dst);
+            list.concat(taicpu.op_reg_const(A_EORL, dst, a));
+          end
+        else if (op = OP_XOR) and ((a and $FFFF0000) = a) then
+          begin
+            a_load_reg_reg(list,size,size,src,dst);
+            list.concat(taicpu.op_reg_const(A_EORH, dst, a shr 16));
+          end
         { there could be added some more sophisticated optimizations }
         else if (op in [OP_MUL,OP_IMUL]) and (a=1) then
           a_load_reg_reg(list,size,size,src,dst)
@@ -671,7 +720,6 @@ unit cgcpu;
         ovloc.loc:=LOC_VOID;
         case op of
           OP_NEG,OP_NOT,
-          OP_DIV,OP_IDIV,
           OP_ROL,OP_ROR:
             internalerror(200308281);
           OP_SHL:
@@ -683,31 +731,15 @@ unit cgcpu;
           OP_IMUL,
           OP_MUL:
             begin
-              if op=OP_IMUL then
-                asmop:=A_MULS
-              else
-                asmop:=A_MULU;
-
-              list.concat(setoppostfix(taicpu.op_reg_reg_reg(asmop,dst,src2,src1), PF_D));
-              {overflowreg:=getintregister(list,OS_64);
-              if op=OP_IMUL then
-                asmop:=A_MULS
-              else
-                asmop:=A_MULU;
-
-              list.concat(setoppostfix(taicpu.op_reg_reg_reg(asmop,overflowreg,src2,src1), PF_D));
-              a_load_reg_reg(list,size,size,overflowreg,dst);
-
               if op=OP_IMUL then
                 begin
-                  a_op_const_reg(list,OP_SAR,OS_SINT,31,overflowreg);
-                  list.concat(taicpu.op_reg_reg(A_CP,tregister(longint(overflowreg)+1),overflowreg));
+                  alloccpuregisters(list,R_INTREGISTER,[RS_R10,RS_R11]);
+                  list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_MULS,NR_R10,src1,src2), PF_D));
+                  dealloccpuregisters(list,R_INTREGISTER,[RS_R10,RS_R11]);
+                  a_load_reg_reg(list,OS_INT,OS_INT,NR_R10,dst);
                 end
               else
-                list.concat(taicpu.op_reg_const(A_CP,overflowreg,0));
-
-               ovloc.loc:=LOC_FLAGS;
-               ovloc.resflags:=F_NE;}
+                list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
             end;
           else
             list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1));
@@ -715,6 +747,24 @@ unit cgcpu;
         maybeadjustresult(list,op,size,dst);
       end;
 
+    procedure tcgavr32.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister);
+      begin
+        if reverse then
+          begin
+            list.concat(taicpu.op_reg_reg(A_CLZ, dst, src));
+            list.concat(taicpu.op_reg_reg_const(A_RSUB, dst, dst, 31));
+            list.concat(setoppostfix(taicpu.op_reg(A_CASTU, dst), PF_B));
+          end
+        else
+          begin
+            a_load_reg_reg(list, size, size, src, dst);
+            list.concat(taicpu.op_reg(A_BREV, dst));
+            list.concat(taicpu.op_reg_reg(A_CLZ, dst, dst));
+            list.concat(taicpu.op_reg_reg_const(A_RSUB, dst, dst, 31));
+            list.concat(setoppostfix(taicpu.op_reg(A_CASTU, dst), PF_B));
+          end
+      end;
+
 
     function tcgavr32.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
       var
@@ -766,8 +816,8 @@ unit cgcpu;
 
                 { load consts entry }
                 tmpref.symbol:=l;
-                tmpref.base:=NR_R15;
-                list.concat(setoppostfix(taicpu.op_reg_ref(A_LD,tmpreg,tmpref), PF_W));
+                tmpref.base:=NR_PC;
+                list.concat(taicpu.op_reg_ref(A_LDDPC,tmpreg,tmpref));
 
                 { in case of LDF/STF, we got rid of the NR_R15 }
                 if is_pc(ref.base) then
@@ -809,7 +859,10 @@ unit cgcpu;
             ref.offset:=0;
           end;
 
-        if op in [A_LD,A_LDINS,A_LDSWP,A_LDDPC,A_LDDSP] then
+        if is_pc(ref.base) and
+           (op = A_LD) then
+          list.concat(taicpu.op_reg_ref(A_LDDPC,reg,ref))
+        else if op in [A_LD,A_LDINS,A_LDSWP,A_LDDPC,A_LDDSP] then
           list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix))
         else
           list.concat(setoppostfix(taicpu.op_ref_reg(op,ref,reg),oppostfix));
@@ -866,12 +919,16 @@ unit cgcpu;
                         inc(usedtmpref.offset,3);
                       usedtmpref:=a_internal_load_reg_ref(list,OS_8,OS_8,reg,usedtmpref);
                       list.concat(taicpu.op_reg_reg_const(A_LSR,tmpreg,reg,8));
-                      inc(usedtmpref.offset,dir);
+                      {inc(usedtmpref.offset,dir);
                       a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
                       list.concat(taicpu.op_reg_reg_const(A_LSR,tmpreg,tmpreg,8));
                       inc(usedtmpref.offset,dir);
                       a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
-                      list.concat(taicpu.op_reg_reg_const(A_LSR,tmpreg,tmpreg,8));
+                      list.concat(taicpu.op_reg_reg_const(A_LSR,tmpreg,tmpreg,8));}
+                      inc(usedtmpref.offset,2*dir);
+                      a_internal_load_reg_ref(list,OS_16,OS_16,tmpreg,usedtmpref);
+                      list.concat(taicpu.op_reg_reg_const(A_LSR,tmpreg,tmpreg,16));
+
                       inc(usedtmpref.offset,dir);
                       a_internal_load_reg_ref(list,OS_8,OS_8,tmpreg,usedtmpref);
                     end;
@@ -917,7 +974,7 @@ unit cgcpu;
           internalerror(2002090901);
 
         conv_done:=false;
-        if tosize<>fromsize then
+        {if tosize<>fromsize then
           begin
             conv_done:=true;
             if tcgsize2size[tosize]<=tcgsize2size[fromsize] then
@@ -946,7 +1003,21 @@ unit cgcpu;
               else
                 conv_done:=false;
             end;
+          end;}
+        if tcgsize2size[tosize]>tcgsize2size[fromsize] then
+          begin
+            conv_done := true;
+            a_load_reg_reg(list, fromsize,fromsize, reg1, reg2);
+            case fromsize of
+              OS_8: list.concat(setoppostfix(taicpu.op_reg(A_CASTU, reg2), PF_B));
+              OS_S8: list.concat(setoppostfix(taicpu.op_reg(A_CASTS, reg2), PF_B));
+              OS_16: list.concat(setoppostfix(taicpu.op_reg(A_CASTU, reg2), PF_H));
+              OS_S16: list.concat(setoppostfix(taicpu.op_reg(A_CASTS, reg2), PF_H));
+            else
+              conv_done := false;
+            end;
           end;
+
         if not conv_done and (reg1<>reg2) then
           begin
             { same size, only a register mov required }
@@ -1029,9 +1100,11 @@ unit cgcpu;
          regs : tcpuregisterset;
          stackcount : pint;
       begin
+        localsize := align(localsize,4);
         if not(nostackframe) then
           begin
-            a_reg_alloc(list,NR_STACK_POINTER_REG);
+            //a_reg_alloc(list,NR_STACK_POINTER_REG);
+            list.concat(tai_comment.Create(strpnew('Entry '+BoolToStr(current_procinfo.framepointer=NR_STACK_POINTER_REG,'t','f')+inttostr(localsize))));
 
             { save int registers }
             reference_reset(ref,4);
@@ -1057,10 +1130,13 @@ unit cgcpu;
                   if (r in regs) then
                     inc(stackcount,4);
 
-                dec(stackcount,4); // Point at LR
-                if stackcount > 0 then
-                  list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_STACK_POINTER_REG,-stackcount));
-              end;
+                list.concat(taicpu.op_reg_reg_const(A_SUB,current_procinfo.framepointer,NR_STACK_POINTER_REG,-stackcount));
+
+                if localsize > 0 then
+                  list.concat(taicpu.op_reg_const(A_SUB,NR_STACK_POINTER_REG,localsize));
+              end
+            else if localsize <> 0 then
+              list.concat(taicpu.op_reg_const(A_SUB,NR_STACK_POINTER_REG,localsize));
           end
         else
           begin
@@ -1074,6 +1150,9 @@ unit cgcpu;
 
             if regs <> [] then
               list.concat(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs));
+
+            if localsize<>0 then
+              list.concat(taicpu.op_reg_const(A_SUB,NR_STACK_POINTER_REG,localsize));
           end;
       end;
 
@@ -1087,11 +1166,19 @@ unit cgcpu;
          regs : tcpuregisterset;
          stackmisalignment: pint;
       begin
+        localsize:=Align(current_procinfo.calc_stackframe_size,4);
+
         if not(nostackframe) then
           begin
             stackmisalignment:=0;
 
-            regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall)        ;
+            list.concat(tai_comment.Create(strpnew('Exit '+inttostr(localsize))));
+
+            if (current_procinfo.framepointer=NR_STACK_POINTER_REG) and
+               (localsize<>0) then
+              list.concat(taicpu.op_reg_const(A_SUB,NR_STACK_POINTER_REG,-localsize));
+
+            regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
             if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
               begin
                 exclude(regs,RS_R14);
@@ -1100,11 +1187,10 @@ unit cgcpu;
             { restore saved stack pointer to SP (R13) and saved lr to PC (R15).
               The saved PC came after that but is discarded, since we restore
               the stack pointer }
+
             if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
               regs:=regs+[RS_FRAME_POINTER_REG,RS_R15];
 
-            Exclude(regs,RS_STACK_POINTER_REG);
-
             if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
               begin
                 if regs=[] then
@@ -1119,6 +1205,8 @@ unit cgcpu;
               end
             else
               begin
+                list.concat(taicpu.op_reg_reg(A_MOV,NR_STACK_POINTER_REG,NR_FRAME_POINTER_REG));
+
                 { restore int registers and return }
                 reference_reset(ref,4);
                 ref.base:=NR_STACK_POINTER_REG;
@@ -1128,6 +1216,9 @@ unit cgcpu;
           end
         else
           begin
+            if localsize<>0 then
+              list.concat(taicpu.op_reg_const(A_SUB,NR_STACK_POINTER_REG,-localsize));
+
             reference_reset(ref,4);
             ref.base:=NR_STACK_POINTER_REG;
             ref.addressmode:=AM_POSTINDEXED;
@@ -1223,7 +1314,7 @@ unit cgcpu;
         tmpreg:=getintregister(list,OS_INT);
         tmpref.symbol:=l;
         tmpref.base:=NR_PC;
-        list.concat(setoppostfix(taicpu.op_reg_ref(A_LD,tmpreg,tmpref),size2postfix(OS_INT)));
+        list.concat(taicpu.op_reg_ref(A_LDDPC,tmpreg,tmpref));
 
         if (ref.base<>NR_NO) then
           begin
@@ -1763,9 +1854,7 @@ unit cgcpu;
               OP_ADD:
                 begin
                     begin
-                      tmpreg:=cg.getintregister(list,OS_32);
-                      cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
-                      list.concat(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg));
+                      cg.a_op_const_reg_reg(list,OP_ADD,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
                     end;
 
                     begin
@@ -1777,9 +1866,7 @@ unit cgcpu;
               OP_SUB:
                 begin
                     begin
-                      tmpreg:=cg.getintregister(list,OS_32);
-                      cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
-                      list.concat(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg));
+                      cg.a_op_const_reg_reg(list,OP_SUB,OS_32,aint(lo(value)),regsrc.reglo,regdst.reglo);
                     end;
 
                     begin

+ 6 - 17
compiler/avr32/cpubase.pas

@@ -90,7 +90,7 @@ unit cpubase;
       first_mm_imreg     = $0;
 
 { TODO: Calculate bsstart}
-      regnumber_count_bsstart = 64;
+      regnumber_count_bsstart = 8;
 
       regnumber_table : array[tregisterindex] of tregister = (
         {$i ravr32num.inc}
@@ -214,7 +214,7 @@ unit cpubase;
       maxintregs = 15;
       { to determine how many registers to use for regvars }
       maxintscratchregs = 3;
-      usableregsint = [RS_R0..RS_R10,RS_R12];
+      usableregsint = [RS_R0..RS_R12];
       c_countusableregsint = 12;
 
       maxfpuregs = 0;
@@ -383,21 +383,10 @@ unit cpubase;
 
     function cgsize2subreg(regtype: tregistertype; s:Tcgsize):Tsubregister;
       begin
-        case regtype of
-          R_MMREGISTER:
-            begin
-              case s of
-                OS_F32:
-                  cgsize2subreg:=R_SUBFS;
-                OS_F64:
-                  cgsize2subreg:=R_SUBFD;
-                else
-                  internalerror(2009112701);
-              end;
-            end;
-          else
-            cgsize2subreg:=R_SUBWHOLE;
-        end;
+        if s in [OS_64,OS_S64] then
+          cgsize2subreg:=R_SUBQ
+        else
+          cgsize2subreg:=R_SUBWHOLE;
       end;
 
 

+ 4 - 6
compiler/avr32/cpupara.pas

@@ -80,14 +80,14 @@ unit cpupara;
             if nr<=4 then
               begin
                 loc:=LOC_REGISTER;
-                register:=newreg(R_INTREGISTER,RS_R0+nr-1,R_SUBWHOLE);
+                register:=newreg(R_INTREGISTER,RS_R12-nr+1,R_SUBWHOLE);
               end
             else
               begin
                 { the other parameters are passed on the stack }
                 loc:=LOC_REFERENCE;
                 reference.index:=NR_STACK_POINTER_REG;
-                reference.offset:=(nr-5)*4;
+                reference.offset:=(nr-6)*4;
               end;
           end;
       end;
@@ -221,7 +221,7 @@ unit cpupara;
              begin
                paraloc^.loc:=LOC_REGISTER;
                paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
-               inc(nextintreg);
+               dec(nextintreg);
              end
            else
              begin
@@ -325,8 +325,6 @@ unit cpupara;
                             paraloc^.loc:=LOC_REGISTER;
                             paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
                             dec(nextintreg);
-                            if nextintreg=RS_R11 then
-                              dec(nextintreg);
                           end
                         else
                           begin
@@ -383,7 +381,7 @@ unit cpupara;
                      if paraloc^.loc=LOC_REFERENCE then
                        begin
                          paraloc^.reference.index:=NR_FRAME_POINTER_REG;
-                         inc(paraloc^.reference.offset,4);
+                         //inc(paraloc^.reference.offset,4);
                        end;
                    end;
                  dec(paralen,tcgsize2size[paraloc^.size]);

+ 18 - 3
compiler/avr32/cpupi.pas

@@ -32,10 +32,11 @@ unit cpupi;
        procinfo,cpuinfo,psub;
 
     type
-       tarmprocinfo = class(tcgprocinfo)
+       tavr32procinfo = class(tcgprocinfo)
+         procedure set_first_temp_offset; override;
+         function calc_stackframe_size: longint; override;
        end;
 
-
   implementation
 
     uses
@@ -48,6 +49,20 @@ unit cpupi;
        cgobj;
 
 
+    procedure tavr32procinfo.set_first_temp_offset;
+      begin
+        if tg.direction = -1 then
+          tg.setfirsttemp(-28-16)
+        else
+          tg.setfirsttemp(maxpushedparasize);
+      end;
+
+    function tavr32procinfo.calc_stackframe_size: longint;
+      begin
+        maxpushedparasize:=align(maxpushedparasize,max(current_settings.alignment.localalignmin,4));
+        result:=Align(tg.direction*tg.lasttemp,max(current_settings.alignment.localalignmin,4))+maxpushedparasize;
+      end;
+
 begin
-   cprocinfo:=tarmprocinfo;
+   cprocinfo:=tavr32procinfo;
 end.

+ 98 - 93
compiler/avr32/navr32mat.pas

@@ -54,7 +54,8 @@ implementation
       pass_2,procinfo,
       ncon,
       cpubase,cpuinfo,
-      ncgutil,cgcpu;
+      ncgutil,cgcpu,
+      nadd,pass_1,symdef;
 
 {*****************************************************************************
                              Tavr32MODDIVNODE
@@ -72,6 +73,19 @@ implementation
           ) and
           not(is_64bitint(resultdef)) then
           result:=nil
+        else if (nodetype=divn) and
+          not(is_64bitint(resultdef)) then
+          result:=nil
+        else if (nodetype=modn) and
+          not(is_64bitint(resultdef)) then
+          begin
+            if (right.nodetype=ordconstn) and
+              ispowerof2(tordconstnode(right).value,power) and
+              (tordconstnode(right).value>0) then
+              result:=caddnode.create(andn,left,cordconstnode.create(tordconstnode(right).value-1,sinttype,false))
+            else
+              result:=nil;
+          end
         else
           result:=inherited first_moddivint;
       end;
@@ -80,112 +94,103 @@ implementation
     procedure tavr32moddivnode.pass_generate_code;
       var
         power  : longint;
+        helper1,helper2,
         numerator,
         resultreg  : tregister;
         size       : Tcgsize;
-        so : tshifterop;
-
-       procedure genOrdConstNodeDiv;
-         begin
-           if tordconstnode(right).value=0 then
-             internalerror(2005061701)
-           else if tordconstnode(right).value=1 then
-             cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, numerator, resultreg)
-           else if (tordconstnode(right).value = int64(-1)) then
-             begin
-               // note: only in the signed case possible..., may overflow
-               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,resultreg,numerator));
-               current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_NEG,resultreg));
-             end
-           else if ispowerof2(tordconstnode(right).value,power) then
-             begin
-               if (is_signed(right.resultdef)) then
-                 begin
-                   cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,31,numerator,resultreg);
-                   current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_LSR,resultreg,32-power));
-                   current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg(A_ADD,resultreg,numerator));
-                   current_asmdata.CurrAsmList.Concat(taicpu.op_reg_const(A_ASR,resultreg,power));
-                  end
-               else
-                 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,power,numerator,resultreg)
-             end;
-         end;
 
-{
-       procedure genOrdConstNodeMod;
-         var
-             modreg, maskreg, tempreg : tregister;
-         begin
-             if (tordconstnode(right).value = 0) then begin
-                 internalerror(2005061702);
-             end
-             else if (abs(tordconstnode(right).value.svalue) = 1) then
-             begin
-                // x mod +/-1 is always zero
-                cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, 0, resultreg);
-             end
-             else if (ispowerof2(tordconstnode(right).value, power)) then
-             begin
-                 if (is_signed(right.resultdef)) then begin
-
-                     tempreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
-                     maskreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
-                     modreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
-
-                     cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, abs(tordconstnode(right).value.svalue)-1, modreg);
-                     cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, 31, numerator, maskreg);
-                     cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, numerator, modreg, tempreg);
-
-                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_ANDC, maskreg, maskreg, modreg));
-                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_const(A_SUBFIC, modreg, tempreg, 0));
-                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SUBFE, modreg, modreg, modreg));
-                     cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, modreg, maskreg, maskreg);
-                     cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_OR, OS_INT, maskreg, tempreg, resultreg);
-                 end else begin
-                     cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, tordconstnode(right).value.svalue-1, numerator, resultreg);
-                 end;
-             end else begin
-                 genOrdConstNodeDiv();
-                 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_MUL, OS_INT, tordconstnode(right).value.svalue, resultreg, resultreg);
-                 cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_SUB, OS_INT, resultreg, numerator, resultreg);
-             end;
-         end;
-}
+      procedure genOrdConstNodeDiv;
+        begin
+          if tordconstnode(right).value=0 then
+            internalerror(2005061701)
+          else if tordconstnode(right).value=1 then
+            cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, numerator, resultreg)
+          else if (tordconstnode(right).value = int64(-1)) then
+            begin
+              cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NEG,OS_INT,numerator,resultreg);
+            end
+          else if ispowerof2(tordconstnode(right).value,power) then
+            begin
+              if (is_signed(right.resultdef)) then
+                begin
+                   helper1:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+                   helper2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+                   current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_const(A_ASR,helper1,numerator,31));
+                   current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_LSR,helper1,32-power));
+                   current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_ADD,helper2,numerator,helper1));
+                   current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_ASR,helper2,power));
+                   cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,helper2,resultreg);
+                 end
+              else
+                cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,power,numerator,resultreg)
+            end;
+        end;
 
       begin
         secondpass(left);
         secondpass(right);
-        location_copy(location,left.location);
-
-        { put numerator in register }
-        size:=def_cgsize(left.resultdef);
-        location_force_reg(current_asmdata.CurrAsmList,left.location,
-          size,true);
-        location_copy(location,left.location);
-        numerator:=location.register;
-        resultreg:=location.register;
-        if location.loc=LOC_CREGISTER then
+
+        location_force_reg(current_asmdata.CurrAsmList,left.location,OS_INT,true);
+        location_force_reg(current_asmdata.CurrAsmList,right.location,OS_INT,true);
+
+        if (nodetype=divn) and
+           not(is_64bitint(resultdef)) then
           begin
-            location.loc := LOC_REGISTER;
-            location.register := cg.getintregister(current_asmdata.CurrAsmList,size);
-            resultreg:=location.register;
+            size:=def_cgsize(left.resultdef);
+
+            if (right.nodetype=ordconstn) and
+               ((tordconstnode(right).value=1) or
+                (tordconstnode(right).value=int64(-1)) or
+                (tordconstnode(right).value=0) or
+                ispowerof2(tordconstnode(right).value,power)) then
+              begin
+                location_copy(location,left.location);
+                location.loc := LOC_REGISTER;
+                location.register := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+                resultreg:=location.register;
+
+                numerator:=left.location.register;
+
+                genOrdConstNodeDiv;
+              end
+            else
+              begin
+                location_copy(location,left.location);
+                location.loc := LOC_REGISTER;
+                location.register := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+                cg.alloccpuregisters(current_asmdata.CurrAsmList, R_INTREGISTER, [RS_R10,RS_R11]);
+
+                if is_signed(left.resultdef) or
+                   is_signed(right.resultdef) then
+                  cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_IDIV,OS_INT,right.location.register,left.location.register,NR_R10)
+                else
+                  cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_DIV,OS_INT,right.location.register,left.location.register,NR_R10);
+
+                cg.dealloccpuregisters(current_asmdata.CurrAsmList, R_INTREGISTER, [RS_R10,RS_R11]);
+
+                cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_R10,location.register);
+              end;
           end
-        else if (nodetype=modn) or (right.nodetype=ordconstn) then
+        else if (nodetype=modn) and
+           not(is_64bitint(resultdef)) then
           begin
-            // for a modulus op, and for const nodes we need the result register
-            // to be an extra register
-            resultreg:=cg.getintregister(current_asmdata.CurrAsmList,size);
-          end;
+            location_copy(location,left.location);
+            location.loc := LOC_REGISTER;
+            location.register := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+            cg.alloccpuregisters(current_asmdata.CurrAsmList, R_INTREGISTER, [RS_R10,RS_R11]);
 
-        if right.nodetype=ordconstn then
-          begin
-            if nodetype=divn then
-              genOrdConstNodeDiv
+            if is_signed(left.resultdef) or
+               is_signed(right.resultdef) then
+              cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_IDIV,OS_INT,right.location.register,left.location.register,NR_R10)
             else
-//              genOrdConstNodeMod;
-          end;
+              cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_DIV,OS_INT,right.location.register,left.location.register,NR_R10);
 
-        location.register:=resultreg;
+            cg.dealloccpuregisters(current_asmdata.CurrAsmList, R_INTREGISTER, [RS_R10,RS_R11]);
+
+            cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_R11,location.register);
+          end
+        else
+          internalerror(2012090701);
 
         { unsigned division/module can only overflow in case of division by zero }
         { (but checking this overflow flag is more convoluted than performing a  }

+ 63 - 14
compiler/avr32/rgcpu.pas

@@ -34,7 +34,12 @@ unit rgcpu;
        rgobj;
 
      type
+
+       { trgcpu }
+
        trgcpu = class(trgobj)
+         procedure add_constraints(reg: Tregister); override;
+         procedure add_cpu_interferences(p: tai); override;
          procedure do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
          procedure do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
        end;
@@ -49,6 +54,64 @@ unit rgcpu;
       cgobj,
       procinfo;
 
+    procedure trgcpu.add_constraints(reg: Tregister);
+      var
+        supreg,i : Tsuperregister;
+      begin
+        case getsubreg(reg) of
+          { Let 64bit ints conflict with all odd int regs }
+          R_SUBQ:
+            begin
+              supreg:=getsupreg(reg);
+              i:=RS_R1;
+              while (i<=RS_R15) do
+                begin
+                  add_edge(supreg,i);
+                  inc(i,2);
+                end;
+            end;
+        end;
+      end;
+
+    procedure trgcpu.add_cpu_interferences(p: tai);
+      var
+        r : Tsuperregister;
+      begin
+        if p.typ=ait_instruction then
+          begin
+            case taicpu(p).opcode of
+              A_DIVS,
+              A_DIVU:
+                begin
+                  r:=RS_R1;
+                  while r <= RS_R15 do
+                    begin
+                      add_edge(getsupreg(taicpu(p).oper[0]^.reg), r);
+                      inc(r,2);
+                    end;
+                end;
+              A_MACHH,
+              A_MACS,
+              A_MACU,
+              A_MACWH,
+              A_MULNWH,
+              A_MULS,
+              A_MULU,
+              A_MULWH:
+                begin
+                  if taicpu(p).oppostfix=PF_D then
+                    begin
+                      r:=RS_R1;
+                      while r <= RS_R15 do
+                        begin
+                          add_edge(getsupreg(taicpu(p).oper[0]^.reg), r);
+                          inc(r,2);
+                        end;
+                    end;
+                end;
+            end;
+          end;
+      end;
 
     procedure trgcpu.do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);
       var
@@ -57,20 +120,6 @@ unit rgcpu;
         l : tasmlabel;
         hreg : tregister;
       begin
-        { don't load spilled register between
-          mov lr,pc
-          mov pc,r4
-          but befure the mov lr,pc
-        }
-        if assigned(pos.previous) and
-          (pos.typ=ait_instruction) and
-          (taicpu(pos).opcode=A_MOV) and
-          (taicpu(pos).oper[0]^.typ=top_reg) and
-          (taicpu(pos).oper[0]^.reg=NR_R14) and
-          (taicpu(pos).oper[1]^.typ=top_reg) and
-          (taicpu(pos).oper[1]^.reg=NR_PC) then
-          pos:=tai(pos.previous);
-
         if in_signed_bits(spilltemp.offset,16) then
           begin
             {helplist:=TAsmList.create;

+ 1 - 1
compiler/fpcdefs.inc

@@ -138,7 +138,7 @@
   {$define cpu32bitalu}
   {$define cpuflags}
   {$define cpufpemu}
-  { $define cputargethasfixedstack}
+  {$define cputargethasfixedstack}
 {$endif avr32}
 
 {$ifdef mipsel}

+ 1 - 1
compiler/systems/i_embed.pas

@@ -265,7 +265,7 @@ unit i_embed;
                 constalignmax   : 4;
                 varalignmin     : 0;
                 varalignmax     : 4;
-                localalignmin   : 2;
+                localalignmin   : 4;
                 localalignmax   : 8;
                 recordalignmin  : 0;
                 recordalignmax  : 4;

+ 4 - 2
compiler/systems/t_embed.pas

@@ -675,17 +675,19 @@ begin
       Add('    *(.rodata)');
       Add('    *(.rodata.*)');
       Add('    *(.comment)');
+      Add('    . = ALIGN(4);');
       Add('    _etext = .;');
       Add('    } >flash =0xd703d703');
-      Add('    .data :');
+      Add('    .data : ALIGN(4) ');
       Add('    {');
       Add('    _data = .;');
       Add('    *(.data)');
       Add('    *(.data.*)');
       Add('    KEEP (*(.fpc .fpc.n_version .fpc.n_links))');
+      Add('    . = ALIGN(4);');
       Add('    _edata = .;');
       Add('    } >ram AT >flash');
-      Add('    .bss :');
+      Add('    .bss : ');
       Add('    {');
       Add('    _bss_start = .;');
       Add('    *(.bss)');

+ 1 - 1
rtl/Makefile

@@ -1,5 +1,5 @@
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2011/06/25]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2012/09/14]
 #
 default: all
 MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-solaris x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian powerpc64-linux powerpc64-darwin powerpc64-embedded avr-embedded armeb-linux armeb-embedded mipsel-linux avr32-embedded

+ 2 - 2
rtl/embedded/Makefile

@@ -1,5 +1,5 @@
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2011/06/25]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2012/09/14]
 #
 default: all
 MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-solaris x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian powerpc64-linux powerpc64-darwin powerpc64-embedded avr-embedded armeb-linux armeb-embedded mipsel-linux avr32-embedded
@@ -306,7 +306,7 @@ ifeq ($(ARCH),arm)
 CPU_UNITS=lpc21x4 at91sam7x256 stellaris stm32f103
 endif
 ifeq ($(ARCH),avr32)
-CPU_UNITS=at32uc3b1256
+CPU_UNITS=at32uc3b1256 at32uc3l064
 endif
 ifeq ($(ARCH),i386)
 CPU_UNITS=multiboot

+ 1 - 1
rtl/embedded/Makefile.fpc

@@ -52,7 +52,7 @@ CPU_UNITS=lpc21x4 at91sam7x256 stellaris stm32f103
 endif
 
 ifeq ($(ARCH),avr32)
-CPU_UNITS=at32uc3b1256
+CPU_UNITS=at32uc3b1256 at32uc3l064
 endif
 
 ifeq ($(ARCH),i386)

+ 7 - 11
rtl/embedded/avr32/at32uc3b1256.pas

@@ -242,10 +242,6 @@ asm
 	rjmp .Lhalt
 end;
 
-procedure DefaultHandler; assembler; nostackframe; public name 'DefaultHandler';
-asm
-end;
-
 procedure StartCode; nostackframe; assembler; [public, alias: '_START'];// interrupt 0;
 asm
    .init
@@ -254,16 +250,16 @@ asm
    .text
 .Lstart:
    // Update stack
-   ld.w sp, .L_stack_top
+   lddpc sp, .L_stack_top
    
    // Set EVBA
-   ld.w r0, .L_evba_base
+   lddpc r0, .L_evba_base
    mtsr 4, r0 // EVBA
    
    // copy initialized data from flash to ram
-   ld.w r1,.L_etext
-   ld.w r2,.L_data
-   ld.w r3,.L_edata
+   lddpc r1,.L_etext
+   lddpc r2,.L_data
+   lddpc r3,.L_edata
 .Lcopyloop:
    cp.w r2,r3
    brhi .Lecopyloop
@@ -273,8 +269,8 @@ asm
 .Lecopyloop:
 
    // clear onboard ram
-   ld.w r1,.L_bss_start
-   ld.w r2,.L_bss_end
+   lddpc r1,.L_bss_start
+   lddpc r2,.L_bss_end
    mov r0, 0
 .Lzeroloop:
    cp.w r1,r2