Browse Source

o merging r22801 of Jeppe Johansen

git-svn-id: trunk@22812 -
florian 12 years ago
parent
commit
970405c0f3

+ 1 - 1
compiler/arm/aasmcpu.pas

@@ -735,7 +735,7 @@ implementation
               { check for pre/post indexed }
               { check for pre/post indexed }
               result := operand_read;
               result := operand_read;
           //Thumb2
           //Thumb2
-          A_LSL, A_LSR, A_ROR, A_ASR, A_SDIV, A_UDIV,A_MOVT:
+          A_LSL, A_LSR, A_ROR, A_ASR, A_SDIV, A_UDIV, A_MOVW, A_MOVT, A_MLS:
             if opnr in [0] then
             if opnr in [0] then
               result:=operand_write
               result:=operand_write
             else
             else

+ 161 - 4
compiler/arm/aoptcpu.pas

@@ -342,7 +342,8 @@ Implementation
          {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
          {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
          not (
          not (
            (taicpu(p).opcode in [A_MLA, A_MUL]) and
            (taicpu(p).opcode in [A_MLA, A_MUL]) and
-           (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg)
+           (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
+           (current_settings.cputype < cpu_armv6)
          ) and
          ) and
          { Take care to only do this for instructions which REALLY load to the first register.
          { Take care to only do this for instructions which REALLY load to the first register.
            Otherwise
            Otherwise
@@ -1170,7 +1171,10 @@ Implementation
                       add reg2, ...
                       add reg2, ...
                     }
                     }
                     if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
                     if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
-                      RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
+                      begin
+                        if (taicpu(p).ops=3) then
+                          RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
+                      end;
                   end;
                   end;
                 A_MVN:
                 A_MVN:
                   begin
                   begin
@@ -1260,6 +1264,52 @@ Implementation
                         asml.remove(p);
                         asml.remove(p);
                         p.free;
                         p.free;
                         p:=hp1;
                         p:=hp1;
+                      end
+                    {
+                      change
+                      uxtb reg2,reg1
+                      uxtb reg3,reg2
+                      dealloc reg2
+                      to
+                      uxtb reg3,reg1
+                    }
+                    else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
+                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+                      MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
+                      (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
+                       (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg)) and
+                      { reg1 might not be modified inbetween }
+                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+                      begin
+                        DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
+                        taicpu(hp1).opcode:=A_UXTB;
+                        taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+                        asml.remove(p);
+                        p.free;
+                        p:=hp1;
+                      end
+                    {
+                      change
+                      uxth reg2,reg1
+                      uxth reg3,reg2
+                      dealloc reg2
+                      to
+                      uxth reg3,reg1
+                    }
+                    else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
+                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+                      MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
+                      (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
+                       (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg)) and
+                      { reg1 might not be modified inbetween }
+                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+                      begin
+                        DebugMsg('Peephole UxthUxth2Uxth done', p);
+                        taicpu(hp1).opcode:=A_UXTH;
+                        taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+                        asml.remove(p);
+                        p.free;
+                        p:=hp1;
                       end;
                       end;
                   end;
                   end;
                 A_UXTH:
                 A_UXTH:
@@ -1858,7 +1908,17 @@ Implementation
           result:=true;
           result:=true;
         end
         end
       else if (p.typ=ait_instruction) and
       else if (p.typ=ait_instruction) and
-        MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None,PF_S]) and
+        MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
+        (taicpu(p).ops = 3) and
+        MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
+        (taicpu(p).oper[2]^.typ=top_reg) then
+        begin
+          taicpu(p).ops := 2;
+          taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
+          result:=true;
+        end
+      else if (p.typ=ait_instruction) and
+        MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
         (taicpu(p).ops = 3) and
         (taicpu(p).ops = 3) and
         MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
         MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
         (taicpu(p).oper[2]^.typ=top_reg) and
         (taicpu(p).oper[2]^.typ=top_reg) and
@@ -1873,7 +1933,7 @@ Implementation
           result:=true;
           result:=true;
         end
         end
       else if (p.typ=ait_instruction) and
       else if (p.typ=ait_instruction) and
-        MatchInstruction(p, [A_AND,A_ORR,A_EOR], [], [PF_None,PF_S]) and
+        MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
         (taicpu(p).ops = 3) and
         (taicpu(p).ops = 3) and
         MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
         MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
         (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
         (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
@@ -1885,6 +1945,33 @@ Implementation
           taicpu(p).ops := 2;
           taicpu(p).ops := 2;
           result:=true;
           result:=true;
         end
         end
+      else if (p.typ=ait_instruction) and
+        MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
+        (taicpu(p).ops=3) and
+        (taicpu(p).oper[2]^.typ=top_shifterop) and
+        (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
+        MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
+        (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
+        begin
+          asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
+          asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
+          IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
+          taicpu(p).oppostfix:=PF_S;
+          taicpu(p).ops := 2;
+
+          if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
+            taicpu(p).loadreg(1, taicpu(p).oper[2]^.shifterop^.rs)
+          else
+            taicpu(p).loadconst(1, taicpu(p).oper[2]^.shifterop^.shiftimm);
+
+          case taicpu(p).oper[2]^.shifterop^.shiftmode of
+            SM_LSL: taicpu(p).opcode:=A_LSL;
+            SM_LSR: taicpu(p).opcode:=A_LSR;
+            SM_ASR: taicpu(p).opcode:=A_ASR;
+            SM_ROR: taicpu(p).opcode:=A_ROR;
+          end;
+          result:=true;
+        end
       else if (p.typ=ait_instruction) and
       else if (p.typ=ait_instruction) and
         MatchInstruction(p, [A_AND], [], [PF_None]) and
         MatchInstruction(p, [A_AND], [], [PF_None]) and
         (taicpu(p).ops = 2) and
         (taicpu(p).ops = 2) and
@@ -1917,6 +2004,76 @@ Implementation
 
 
           result := true;
           result := true;
         end
         end
+      {
+       Turn
+       mul reg0, z,w
+       sub/add x, y, reg0
+       dealloc reg0
+
+       into
+
+       mls/mla x,y,z,w
+       }
+      else if (p.typ=ait_instruction) and
+        MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and
+        (taicpu(p).ops=3) and
+        (taicpu(p).oper[0]^.typ = top_reg) and
+        (taicpu(p).oper[1]^.typ = top_reg) and
+        (taicpu(p).oper[2]^.typ = top_reg) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
+        (((taicpu(hp1).ops=3) and
+          (taicpu(hp1).oper[2]^.typ=top_reg) and
+          (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
+           (MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+            (taicpu(hp1).opcode=A_ADD)))) or
+         ((taicpu(hp1).ops=2) and
+          (taicpu(hp1).oper[1]^.typ=top_reg) and
+          MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
+        assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
+        not(RegModifiedBetween(taicpu(p).oper[2]^.reg,p,hp1)) then
+        begin
+          if taicpu(hp1).opcode=A_ADD then
+            begin
+              taicpu(hp1).opcode:=A_MLA;
+
+              if taicpu(hp1).ops=3 then
+                if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
+                  taicpu(hp1).loadreg(1,taicpu(hp1).oper[2]^.reg);
+
+              taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
+              taicpu(hp1).loadreg(3,taicpu(p).oper[2]^.reg);
+
+              DebugMsg('MulAdd2MLA done', p);
+
+              taicpu(hp1).ops:=4;
+
+              asml.remove(p);
+              p.free;
+              p:=hp1;
+            end
+          else
+            begin
+              taicpu(hp1).opcode:=A_MLS;
+
+              if taicpu(hp1).ops=2 then
+                taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
+
+              taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
+              taicpu(hp1).loadreg(3,taicpu(p).oper[2]^.reg);
+
+              DebugMsg('MulSub2MLS done', p);
+
+              taicpu(hp1).ops:=4;
+
+              asml.remove(p);
+              p.free;
+              p:=hp1;
+            end;
+
+          result:=true;
+        end
       {else if (p.typ=ait_instruction) and
       {else if (p.typ=ait_instruction) and
         MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
         MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
         (taicpu(p).oper[1]^.typ=top_const) and
         (taicpu(p).oper[1]^.typ=top_const) and

+ 53 - 17
compiler/arm/cgcpu.pas

@@ -160,6 +160,8 @@ unit cgcpu;
         procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
         procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
         procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
         procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
 
 
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
+
         function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
         function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
 
 
         procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
         procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
@@ -3170,24 +3172,12 @@ unit cgcpu;
        begin
        begin
           if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
           if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
             internalerror(2002090902);
             internalerror(2002090902);
-          if is_shifter_const(a,imm_shift) then
+          if is_thumb_imm(a) then
             list.concat(taicpu.op_reg_const(A_MOV,reg,a))
             list.concat(taicpu.op_reg_const(A_MOV,reg,a))
-          { loading of constants with mov and orr }
-          else if (is_shifter_const(a-byte(a),imm_shift)) then
-            begin
-              list.concat(taicpu.op_reg_const(A_MOV,reg,a-byte(a)));
-              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,byte(a)));
-            end
-          else if (is_shifter_const(a-word(a),imm_shift)) and (is_shifter_const(word(a),imm_shift)) then
-            begin
-              list.concat(taicpu.op_reg_const(A_MOV,reg,a-word(a)));
-              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,word(a)));
-            end
-          else if (is_shifter_const(a-(dword(a) shl 8) shr 8,imm_shift)) and (is_shifter_const((dword(a) shl 8) shr 8,imm_shift)) then
-            begin
-              list.concat(taicpu.op_reg_const(A_MOV,reg,a-(dword(a) shl 8) shr 8));
-              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,(dword(a) shl 8) shr 8));
-            end
+          else if is_thumb_imm(not(a)) then
+            list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
+          else if (a and $FFFF)=a then
+            list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
           else
           else
             begin
             begin
                reference_reset(hr,4);
                reference_reset(hr,4);
@@ -3198,6 +3188,7 @@ unit cgcpu;
                current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
                current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
 
 
                hr.symbol:=l;
                hr.symbol:=l;
+               hr.base:=NR_PC;
                list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
                list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
             end;
             end;
        end;
        end;
@@ -3478,6 +3469,35 @@ unit cgcpu;
                 so.shiftimm:=l1;
                 so.shiftimm:=l1;
                 list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
                 list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
               end
               end
+            { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
+            else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
+              begin
+                if l1>32 then{does this ever happen?}
+                  internalerror(201205181);
+                shifterop_reset(so);
+                so.shiftmode:=SM_LSL;
+                so.shiftimm:=l1;
+                list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
+              end
+            else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
+              begin
+                { nothing to do on success }
+              end
+            { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
+              Just using mov x, #0 might allow some easier optimizations down the line. }
+            else if (op = OP_AND) and (dword(a)=0) then
+              list.concat(taicpu.op_reg_const(A_MOV,dst,0))
+            { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
+            else if (op = OP_AND) and (not(dword(a))=0) then
+              list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
+            { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
+              broader range of shifterconstants.}
+            {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
+              list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
+            else if (op = OP_AND) and is_thumb_imm(a) then
+              list.concat(taicpu.op_reg_reg_const(A_MOV,dst,src,dword(a)))
+            else if (op = OP_AND) and is_thumb_imm(not(dword(a))) then
+              list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
             else
             else
               begin
               begin
                 tmpreg:=getintregister(list,size);
                 tmpreg:=getintregister(list,size);
@@ -3810,6 +3830,22 @@ unit cgcpu;
           list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14));
           list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14));
       end;
       end;
 
 
+    procedure Tthumb2cgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister);
+      begin
+        if reverse then
+          begin
+            list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
+            list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
+            list.Concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
+          end
+        else
+          begin
+            list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
+            list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
+            list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
+            list.Concat(taicpu.op_reg_reg(A_UXTB,dst,dst));
+          end
+      end;
 
 
    function Tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
    function Tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
       var
       var

+ 38 - 0
compiler/arm/cpubase.pas

@@ -365,6 +365,7 @@ unit cpubase;
     function is_pc(const r : tregister) : boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function is_pc(const r : tregister) : boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
 
     function is_shifter_const(d : aint;var imm_shift : byte) : boolean;
     function is_shifter_const(d : aint;var imm_shift : byte) : boolean;
+    function is_thumb_imm(d : aint) : boolean; { Doesn't handle ROR_C detection }
     function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword):boolean;
     function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword):boolean;
     function dwarf_reg(r:tregister):shortint;
     function dwarf_reg(r:tregister):shortint;
 
 
@@ -550,6 +551,43 @@ unit cpubase;
         result:=false;
         result:=false;
       end;
       end;
 
 
+    function is_thumb_imm(d: aint): boolean;
+      var
+        t : aint;
+        i : longint;
+        imm : byte;
+      begin
+        result:=false;
+        if (d and $FF) = d then
+          begin
+            result:=true;
+            exit;
+          end;
+        if ((d and $FF00FF00) = 0) and
+           ((d shr 16)=(d and $FFFF)) then
+          begin
+            result:=true;
+            exit;
+          end;
+        if ((d and $00FF00FF) = 0) and
+           ((d shr 16)=(d and $FFFF)) then
+          begin
+            result:=true;
+            exit;
+          end;
+        if ((d shr 16)=(d and $FFFF)) and
+           ((d shr 8)=(d and $FF)) then
+          begin
+            result:=true;
+            exit;
+          end;
+        if is_shifter_const(d,imm) then
+          begin
+            result:=true;
+            exit;
+          end;
+      end;
+
     function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword) : boolean;
     function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword) : boolean;
       var
       var
         d, i, i2: Dword;
         d, i, i2: Dword;

+ 3 - 2
compiler/arm/cpuinfo.pas

@@ -1066,6 +1066,7 @@ Const
        CPUARM_HAS_CLZ,        { CPU supports the CLZ instruction                          }
        CPUARM_HAS_CLZ,        { CPU supports the CLZ instruction                          }
        CPUARM_HAS_EDSP,       { CPU supports the PLD,STRD,LDRD,MCRR and MRRC instructions }
        CPUARM_HAS_EDSP,       { CPU supports the PLD,STRD,LDRD,MCRR and MRRC instructions }
        CPUARM_HAS_REV,        { CPU supports the REV instruction                          }
        CPUARM_HAS_REV,        { CPU supports the REV instruction                          }
+       CPUARM_HAS_RBIT,       { CPU supports the RBIT instruction                         }
        CPUARM_HAS_LDREX,
        CPUARM_HAS_LDREX,
        CPUARM_HAS_IDIV
        CPUARM_HAS_IDIV
       );
       );
@@ -1088,8 +1089,8 @@ Const
        { cpu_armv7    } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX],
        { cpu_armv7    } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX],
        { cpu_armv7a   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX],
        { cpu_armv7a   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX],
        { cpu_armv7r   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX],
        { cpu_armv7r   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX],
-       { cpu_armv7m   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX,CPUARM_HAS_IDIV],
-       { cpu_armv7em  } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX,CPUARM_HAS_IDIV]
+       { cpu_armv7m   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_IDIV],
+       { cpu_armv7em  } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_IDIV]
      );
      );
 
 
 Implementation
 Implementation

+ 5 - 1
compiler/options.pas

@@ -3268,7 +3268,11 @@ if (target_info.abi = abi_eabihf) then
     this is not perfect but the current implementation bsf/bsr does not allow another
     this is not perfect but the current implementation bsf/bsr does not allow another
     solution }
     solution }
   if CPUARM_HAS_CLZ in cpu_capabilities[init_settings.cputype] then
   if CPUARM_HAS_CLZ in cpu_capabilities[init_settings.cputype] then
-    def_system_macro('FPC_HAS_INTERNAL_BSR');
+    begin
+      def_system_macro('FPC_HAS_INTERNAL_BSR');
+      if CPUARM_HAS_RBIT in cpu_capabilities[init_settings.cputype] then
+        def_system_macro('FPC_HAS_INTERNAL_BSF');
+    end;
 {$endif}
 {$endif}
 
 
 
 

+ 30 - 103
rtl/arm/thumb2.inc

@@ -505,140 +505,67 @@ asm
 end;
 end;
 {$endif}
 {$endif}
 
 
-
-var
-  fpc_system_lock: longint; export name 'fpc_system_lock';
-
 function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
 function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
 asm
 asm
-// lock
-  ldr r3, .Lfpc_system_lock
-  mov r1, #1
 .Lloop:
 .Lloop:
-  ldrex r2, [r3]
-  cmp r2, #0
-  itt eq
-  strexeq r2, r1, [r3]
-  cmpeq r2, #0
+  ldrex ip, [r0]
+  sub ip, #1
+  strex r3, ip, [r0]
+  cmp r3, #0
   bne .Lloop
   bne .Lloop
-// do the job
-  ldr r1, [r0]
-  sub r1, r1, #1
-  str r1, [r0]
-  mov r0, r1
-// unlock and return
-  str r2, [r3]
-  mov pc, lr
   
   
-.Lfpc_system_lock:
-  .long fpc_system_lock
+  mov r0, ip
 end;
 end;
 
 
 
 
 function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
 function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
 asm
 asm
-// lock
-  ldr r3, .Lfpc_system_lock
-  mov r1, #1
 .Lloop:
 .Lloop:
-  ldrex r2, [r3]
-  cmp r2, #0
-  itt eq
-  strexeq r2, r1, [r3]
-  cmpeq r2, #0
+  ldrex ip, [r0]
+  add ip, #1
+  strex r3, ip, [r0]
+  cmp r3, #0
   bne .Lloop
   bne .Lloop
-// do the job
-  ldr r1, [r0]
-  add r1, r1, #1
-  str r1, [r0]
-  mov r0, r1
-// unlock and return
-  str r2, [r3]
-  mov pc, lr
-
-.Lfpc_system_lock:
-  .long fpc_system_lock
+  
+  mov r0, ip
 end;
 end;
 
 
 
 
 function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
 function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
 asm
 asm
-
-// lock
-  ldr r3, .Lfpc_system_lock
-  mov r2, #1
 .Lloop:
 .Lloop:
-  ldrex r2, [r3]
-  cmp r2, #0
-  itt eq
-  strexeq r2, r12, [r3]
-  cmpeq r2, #0
+  ldrex ip, [r0]
+  strex r3, r1, [r0]
+  cmp r3, #0
   bne .Lloop
   bne .Lloop
-// do the job
-  ldr r2, [r0]
-  str r1, [r0]
-  mov r0, r2
-// unlock and return
-  mov r2, #0
-  str r2, [r3]
-  mov pc, lr
-
-.Lfpc_system_lock:
-  .long fpc_system_lock
+  
+  mov r0, ip
 end;
 end;
 
 
 function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
 function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
 asm
 asm
-// lock
-  ldr r3, .Lfpc_system_lock
-  mov r2, #1
 .Lloop:
 .Lloop:
-  ldrex r2, [r3]
-  cmp r2, #0
-  itt eq
-  strexeq r2, r12, [r3]
-  cmpeq r2, #0
+  ldrex ip, [r0]
+  add r2, ip, r1
+  strex r3, r2, [r0]
+  cmp r3, #0
   bne .Lloop
   bne .Lloop
-// do the job
-  ldr r2, [r0]
-  add r1, r1, r2
-  str r1, [r0]
-  mov r0, r2
-// unlock and return
-  mov r2, #0
-  str r2, [r3]
-  mov pc, lr
-
-.Lfpc_system_lock:
-  .long fpc_system_lock
+  
+  mov r0, ip
 end;
 end;
 
 
-
 function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
 function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
 asm
 asm
-// lock
-  ldr r12, .Lfpc_system_lock
-  mov r3, #1
 .Lloop:
 .Lloop:
-  ldrex r2, [r12]
-  cmp r2, #0
-  itt eq
-  strexeq r2, r1, [r12]
-  cmpeq r2, #0
+  ldrex ip, [r0]
+  cmp ip, r2
+  ite eq
+  strexeq r3, r1, [r0]
+  movne r3, #0
+  cmp r3, #0
   bne .Lloop
   bne .Lloop
-// do the job
-  ldr r3, [r0]
-  cmp r3, r2
-  it eq
-  streq r1, [r0]
-  mov r0, r3
-// unlock and return
-  mov r3, #0
-  str r3, [r12]
-  mov pc, lr
-
-.Lfpc_system_lock:
-  .long fpc_system_lock
+  
+  mov r0, ip
 end;
 end;
 
 
 {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
 {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}