Переглянути джерело

* merged armthum branch
-- Zusammenführen der Unterschiede zwischen Projektarchiv-URLs in ».«:
U rtl/arm/setjump.inc
A rtl/arm/thumb2.inc
U rtl/arm/divide.inc
A rtl/embedded/arm/stm32f103.pp
U rtl/inc/system.inc
U compiler/alpha/cgcpu.pas
U compiler/sparc/cgcpu.pas
U compiler/i386/cgcpu.pas
U compiler/ncgld.pas
U compiler/powerpc/cgcpu.pas
U compiler/avr/cgcpu.pas
U compiler/aggas.pas
U compiler/powerpc64/cgcpu.pas
U compiler/x86_64/cgcpu.pas
U compiler/cgobj.pas
U compiler/psystem.pas
U compiler/aasmtai.pas
U compiler/m68k/cgcpu.pas
U compiler/ncgutil.pas
U compiler/rautils.pas
U compiler/arm/raarmgas.pas
U compiler/arm/armatts.inc
U compiler/arm/cgcpu.pas
U compiler/arm/armins.dat
U compiler/arm/rgcpu.pas
U compiler/arm/cpubase.pas
U compiler/arm/agarmgas.pas
U compiler/arm/cpuinfo.pas
U compiler/arm/armop.inc
U compiler/arm/narmadd.pas
U compiler/arm/aoptcpu.pas
U compiler/arm/armatt.inc
U compiler/arm/aasmcpu.pas
U compiler/systems/t_embed.pas
U compiler/psub.pas
U compiler/options.pas

git-svn-id: trunk@13801 -

florian 16 роки тому
батько
коміт
515774b864

+ 2 - 0
.gitattributes

@@ -5453,6 +5453,7 @@ rtl/arm/setjump.inc svneol=native#text/plain
 rtl/arm/setjumph.inc svneol=native#text/plain
 rtl/arm/strings.inc svneol=native#text/plain
 rtl/arm/stringss.inc svneol=native#text/plain
+rtl/arm/thumb2.inc svneol=native#text/plain
 rtl/atari/os.inc svneol=native#text/plain
 rtl/atari/prt0.as svneol=native#text/plain
 rtl/atari/readme -text
@@ -5559,6 +5560,7 @@ rtl/embedded/Makefile svneol=native#text/plain
 rtl/embedded/Makefile.fpc svneol=native#text/plain
 rtl/embedded/arm/at91sam7x256.pp svneol=native#text/plain
 rtl/embedded/arm/lpc21x4.pp svneol=native#text/plain
+rtl/embedded/arm/stm32f103.pp svneol=native#text/plain
 rtl/embedded/check.inc svneol=native#text/plain
 rtl/embedded/empty.cfg -text
 rtl/embedded/rtl.cfg -text

+ 11 - 0
compiler/aasmtai.pas

@@ -76,6 +76,9 @@ interface
 {$ifdef m68k}
           ait_labeled_instruction,
 {$endif m68k}
+{$ifdef arm}
+          ait_thumb_func,
+{$endif arm}
           { used to split into tiny assembler files }
           ait_cutobject,
           ait_regalloc,
@@ -160,6 +163,9 @@ interface
 {$ifdef m68k}
           'labeled_instr',
 {$endif m68k}
+{$ifdef arm}
+          'thumb_func',
+{$endif arm}
           'cut',
           'regalloc',
           'tempalloc',
@@ -173,6 +179,7 @@ interface
        { ARM only }
        ,top_regset
        ,top_shifterop
+       ,top_conditioncode
 {$endif arm}
 {$ifdef m68k}
        { m68k only }
@@ -208,6 +215,7 @@ interface
       {$ifdef arm}
           top_regset : (regset:^tcpuregisterset);
           top_shifterop : (shifterop : pshifterop);
+          top_conditioncode: (cc: TAsmCond);
       {$endif arm}
       {$ifdef m68k}
           top_regset : (regset:^tcpuregisterset);
@@ -231,6 +239,9 @@ interface
                      ait_stab,ait_function_name,
                      ait_cutobject,ait_marker,ait_align,ait_section,ait_comment,
                      ait_const,
+{$ifdef arm}
+                     ait_thumb_func,
+{$endif arm}
                      ait_real_32bit,ait_real_64bit,ait_real_80bit,ait_comp_64bit,ait_real_128bit,
                      ait_symbol
                     ];

+ 6 - 1
compiler/aggas.pas

@@ -34,7 +34,6 @@ interface
       aasmbase,aasmtai,aasmdata,aasmcpu,
       assemble;
 
-
     type
       TCPUInstrWriter = class;
       {# This is a derived class which is used to write
@@ -1044,6 +1043,12 @@ implementation
                  end;
                AsmWriteLn(tai_symbol(hp).sym.name + ':');
              end;
+{$ifdef arm}
+           ait_thumb_func:
+             begin
+               AsmWriteLn(#9'.thumb_func');
+             end;
+{$endif arm}
 
            ait_symbol_end :
              begin

+ 7 - 0
compiler/alpha/cgcpu.pas

@@ -51,6 +51,8 @@ tcgalpha = class(tcg)
   procedure g_restore_frame_pointer(list : TAsmList);override;
 end;
 
+procedure create_codegen;
+
 implementation
 
 uses
@@ -157,4 +159,9 @@ begin
 end;
 
 
+procedure create_codegen;
+  begin
+    cg:=tcgalpha.create;
+  end;
+
 end.

+ 51 - 0
compiler/arm/aasmcpu.pas

@@ -103,6 +103,8 @@ uses
       { co proc. ld/st operations }
       OT_AM5       = $00080000;
       OT_AMMASK    = $000f0000;
+      { IT instruction }
+      OT_CONDITION = $00100000;
 
       OT_MEMORYAM2 = OT_MEMORY or OT_AM2;
       OT_MEMORYAM3 = OT_MEMORY or OT_AM3;
@@ -159,6 +161,7 @@ uses
          roundingmode : troundingmode;
          procedure loadshifterop(opidx:longint;const so:tshifterop);
          procedure loadregset(opidx:longint;const s:tcpuregisterset);
+         procedure loadconditioncode(opidx:longint;const cond:tasmcond);
          constructor op_none(op : tasmop);
 
          constructor op_reg(op : tasmop;_op1 : tregister);
@@ -180,6 +183,9 @@ uses
          { SFM/LFM }
          constructor op_reg_const_ref(op : tasmop;_op1 : tregister;_op2 : aint;_op3 : treference);
 
+         { ITxxx }
+         constructor op_cond(op: tasmop; cond: tasmcond);
+
          { *M*LL }
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
 
@@ -229,6 +235,10 @@ uses
         { nothing to add }
       end;
 
+      tai_thumb_func = class(tai)
+        constructor create;
+      end;
+
     function spilling_create_load(const ref:treference;r:tregister):Taicpu;
     function spilling_create_store(r:tregister; const ref:treference):Taicpu;
 
@@ -290,6 +300,19 @@ implementation
       end;
 
 
+    procedure taicpu.loadconditioncode(opidx:longint;const cond:tasmcond);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+         begin
+           if typ<>top_conditioncode then
+             clearop(opidx);
+           cc:=cond;
+           typ:=top_conditioncode;
+         end;
+      end;
+
+
 {*****************************************************************************
                                  taicpu Constructors
 *****************************************************************************}
@@ -402,6 +425,14 @@ implementation
       end;
 
 
+    constructor taicpu.op_cond(op: tasmop; cond: tasmcond);
+      begin
+        inherited create(op);
+        ops:=0;
+        condition := cond;
+      end;
+
+
      constructor taicpu.op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
        begin
          inherited create(op);
@@ -574,6 +605,20 @@ implementation
             else
               { check for pre/post indexed }
               result := operand_read;
+          //Thumb2
+          A_LSL, A_LSR, A_ROR, A_ASR, A_SDIV, A_UDIV,A_MOVT:
+            if opnr in [0] then
+              result:=operand_write
+            else
+              result:=operand_read;
+          A_LDREX:
+            if opnr in [0] then
+              result:=operand_write
+            else
+              result:=operand_read;
+          A_STREX:
+            if opnr in [0,1,2] then
+              result:=operand_write;
           else
             internalerror(200403151);
         end;
@@ -2516,6 +2561,12 @@ static char *CC[] =
 *)
 {$endif dummy}
 
+  constructor tai_thumb_func.create;
+    begin
+      inherited create;
+      typ:=ait_thumb_func;
+    end;
+
 begin
   cai_align:=tai_align;
 end.

+ 24 - 1
compiler/arm/agarmgas.pas

@@ -38,6 +38,7 @@ unit agarmgas;
       TARMGNUAssembler=class(TGNUassembler)
         constructor create(smart: boolean); override;
         function MakeCmdLine: TCmdStr; override;
+        procedure WriteExtraHeader; override;
       end;
 
      TArmInstrWriter=class(TCPUInstrWriter)
@@ -79,6 +80,18 @@ unit agarmgas;
         result:=inherited MakeCmdLine;
         if (current_settings.fputype = fpu_soft) then
           result:='-mfpu=softvfp '+result;
+
+        if current_settings.cputype = cpu_cortexm3 then
+          result:='-mcpu=cortex-m3 -mthumb -mthumb-interwork '+result;
+        if current_settings.cputype = cpu_armv7m then
+          result:='-march=armv7m -mthumb -mthumb-interwork '+result;
+      end;
+
+    procedure TArmGNUAssembler.WriteExtraHeader;
+      begin
+        inherited WriteExtraHeader;
+        if current_settings.cputype in cpu_thumb2 then
+          AsmWriteLn(#9'.syntax unified');
       end;
 
 {****************************************************************************}
@@ -189,6 +202,8 @@ unit agarmgas;
                   end;
               getopstr:=getopstr+'}';
             end;
+			    top_conditioncode:
+			      getopstr:=cond2str[o.cc];
           top_ref:
             if o.ref^.refaddr=addr_full then
               begin
@@ -215,7 +230,15 @@ unit agarmgas;
         sep: string[3];
     begin
       op:=taicpu(hp).opcode;
-      s:=#9+gas_op2str[op]+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix];
+      if current_settings.cputype in cpu_thumb2 then
+        begin
+          if taicpu(hp).ops = 0 then
+            s:=#9+gas_op2str[op]+' '+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix]
+          else
+            s:=#9+gas_op2str[op]+oppostfix2str[taicpu(hp).oppostfix]+cond2str[taicpu(hp).condition]; // Conditional infixes are deprecated in unified syntax
+        end
+      else
+        s:=#9+gas_op2str[op]+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix];
       if taicpu(hp).ops<>0 then
         begin
           sep:=#9;

+ 12 - 0
compiler/arm/aoptcpu.pas

@@ -36,6 +36,12 @@ Type
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     procedure PeepHoleOptPass2;override;
   End;
+  
+  
+  TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
+    { uses the same constructor as TAopObj }
+    procedure PeepHoleOptPass2;override;
+  End;
 
 Implementation
 
@@ -322,6 +328,12 @@ Implementation
         end;
     end;
 
+
+  procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
+    begin
+      { TODO: Add optimizer code }
+    end;
+
 begin
   casmoptimizer:=TCpuAsmOptimizer;
 End.

+ 25 - 1
compiler/arm/armatt.inc

@@ -177,5 +177,29 @@
 'ftouid',
 'ftouis',
 'fuitod',
-'fuitos'
+'fuitos',
+'asr',
+'lsr',
+'lsl',
+'ror',
+'sdiv',
+'udiv',
+'movt',
+'ldrex',
+'strex',
+'it',
+'ite',
+'itt',
+'itee',
+'itte',
+'itet',
+'ittt',
+'iteee',
+'ittee',
+'itete',
+'ittte',
+'iteet',
+'ittet',
+'itett',
+'itttt'
 );

+ 24 - 0
compiler/arm/armatts.inc

@@ -177,5 +177,29 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 48 - 0
compiler/arm/armins.dat

@@ -573,3 +573,51 @@ reg32,reg32,reg32,reg32  \x16\x00\x80\x90		 ARM7
 [FUITODcc]
 
 [FUITOScc]
+
+[ASRcc]
+
+[LSRcc]
+
+[LSLcc]
+
+[RORcc]
+
+[SDIVcc]
+
+[UDIVcc]
+
+[MOVTcc]
+
+[LDREXcc]
+
+[STREXcc]
+
+[IT]
+
+[ITE]
+
+[ITT]
+
+[ITEE]
+
+[ITTE]
+
+[ITET]
+
+[ITTT]
+
+[ITEEE]
+
+[ITTEE]
+
+[ITETE]
+
+[ITTTE]
+
+[ITEET]
+
+[ITTET]
+
+[ITETT]
+
+[ITTTT]

+ 25 - 1
compiler/arm/armop.inc

@@ -177,5 +177,29 @@ A_FTOSIS,
 A_FTOUID,
 A_FTOUIS,
 A_FUITOD,
-A_FUITOS
+A_FUITOS,
+A_ASR,
+A_LSR,
+A_LSL,
+A_ROR,
+A_SDIV,
+A_UDIV,
+A_MOVT,
+A_LDREX,
+A_STREX,
+A_IT,
+A_ITE,
+A_ITT,
+A_ITEE,
+A_ITTE,
+A_ITET,
+A_ITTT,
+A_ITEEE,
+A_ITTEE,
+A_ITETE,
+A_ITTTE,
+A_ITEET,
+A_ITTET,
+A_ITETT,
+A_ITTTT
 );

+ 1098 - 183
compiler/arm/cgcpu.pas

@@ -39,8 +39,6 @@ unit cgcpu;
       tcgarm = class(tcg)
         { true, if the next arithmetic operation should modify the flags }
         cgsetflags : boolean;
-        procedure init_register_allocators;override;
-        procedure done_register_allocators;override;
 
         procedure a_param_const(list : TAsmList;size : tcgsize;a : aint;const paraloc : TCGPara);override;
         procedure a_param_ref(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
@@ -61,9 +59,7 @@ unit cgcpu;
         procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
 
         { move instructions }
-        procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : aint;reg : tregister);override;
         procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
-        procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
         procedure a_load_reg_reg(list : TAsmList; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override;
         function a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
         function a_internal_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister):treference;
@@ -103,7 +99,7 @@ unit cgcpu;
 
         procedure a_jmp_cond(list : TAsmList;cond : TOpCmp;l: tasmlabel);
         procedure fixref(list : TAsmList;var ref : treference);
-        function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
+        function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; virtual;
 
         procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
         procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: aint); override;
@@ -115,6 +111,14 @@ unit cgcpu;
         function get_darwin_call_stub(const s: string; weak: boolean): tasmsymbol;
       end;
 
+      tarmcgarm = class(tcgarm)
+        procedure init_register_allocators;override;
+        procedure done_register_allocators;override;
+
+        procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : aint;reg : tregister);override;
+        procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
+      end;
+
       tcg64farm = class(tcg64f32)
         procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
         procedure a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);override;
@@ -124,6 +128,30 @@ unit cgcpu;
         procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
       end;
 
+      Tthumb2cgarm = class(tcgarm)
+        procedure init_register_allocators;override;
+        procedure done_register_allocators;override;
+
+        procedure a_call_reg(list : TAsmList;reg: tregister);override;
+
+        procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : aint;reg : tregister);override;
+        procedure a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override;
+
+        procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: aint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
+        procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);override;
+
+        procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
+
+        procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
+        procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
+
+        function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
+      end;
+
+      tthumb2cg64farm = class(tcg64farm)
+        procedure a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);override;
+      end;
+
     const
       OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
                            C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI);
@@ -131,12 +159,14 @@ unit cgcpu;
       winstackpagesize = 4096;
 
     function get_fpu_postfix(def : tdef) : toppostfix;
+    procedure create_codegen;
 
   implementation
 
 
     uses
        globals,verbose,systems,cutils,
+       aopt,aoptcpu,
        fmodule,
        symconst,symsym,
        tgobj,
@@ -164,7 +194,7 @@ unit cgcpu;
       end;
 
 
-    procedure tcgarm.init_register_allocators;
+    procedure tarmcgarm.init_register_allocators;
       begin
         inherited init_register_allocators;
         { currently, we save R14 always, so we can use it }
@@ -184,7 +214,7 @@ unit cgcpu;
       end;
 
 
-    procedure tcgarm.done_register_allocators;
+    procedure tarmcgarm.done_register_allocators;
       begin
         rg[R_INTREGISTER].free;
         rg[R_FPUREGISTER].free;
@@ -193,6 +223,174 @@ unit cgcpu;
       end;
 
 
+     procedure tarmcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : aint;reg : tregister);
+       var
+          imm_shift : byte;
+          l : tasmlabel;
+          hr : treference;
+       begin
+          if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
+            internalerror(2002090902);
+          if is_shifter_const(a,imm_shift) then
+            list.concat(taicpu.op_reg_const(A_MOV,reg,a))
+          else if is_shifter_const(not(a),imm_shift) then
+            list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
+          { loading of constants with mov and orr }
+          else if (is_shifter_const(a-byte(a),imm_shift)) then
+            begin
+              list.concat(taicpu.op_reg_const(A_MOV,reg,a-byte(a)));
+              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,byte(a)));
+            end
+          else if (is_shifter_const(a-word(a),imm_shift)) and (is_shifter_const(word(a),imm_shift)) then
+            begin
+              list.concat(taicpu.op_reg_const(A_MOV,reg,a-word(a)));
+              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,word(a)));
+            end
+          else if (is_shifter_const(a-(dword(a) shl 8) shr 8,imm_shift)) and (is_shifter_const((dword(a) shl 8) shr 8,imm_shift)) then
+            begin
+              list.concat(taicpu.op_reg_const(A_MOV,reg,a-(dword(a) shl 8) shr 8));
+              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,(dword(a) shl 8) shr 8));
+            end
+          else
+            begin
+               reference_reset(hr,4);
+
+               current_asmdata.getjumplabel(l);
+               cg.a_label(current_procinfo.aktlocaldata,l);
+               hr.symboldata:=current_procinfo.aktlocaldata.last;
+               current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
+
+               hr.symbol:=l;
+               list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
+            end;
+       end;
+
+
+     procedure tarmcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
+       var
+         oppostfix:toppostfix;
+         usedtmpref: treference;
+         tmpreg,tmpreg2 : tregister;
+         so : tshifterop;
+         dir : integer;
+       begin
+         if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
+           FromSize := ToSize;
+         case FromSize of
+           { signed integer registers }
+           OS_8:
+             oppostfix:=PF_B;
+           OS_S8:
+             oppostfix:=PF_SB;
+           OS_16:
+             oppostfix:=PF_H;
+           OS_S16:
+             oppostfix:=PF_SH;
+           OS_32,
+           OS_S32:
+             oppostfix:=PF_None;
+           else
+             InternalError(200308297);
+         end;
+         if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
+           begin
+             if target_info.endian=endian_big then
+               dir:=-1
+             else
+               dir:=1;
+             case FromSize of
+               OS_16,OS_S16:
+                 begin
+                   { only complicated references need an extra loadaddr }
+                   if assigned(ref.symbol) or
+                     (ref.index<>NR_NO) or
+                     (ref.offset<-4095) or
+                     (ref.offset>4094) or
+                     { sometimes the compiler reused registers }
+                     (reg=ref.index) or
+                     (reg=ref.base) then
+                     begin
+                       tmpreg2:=getintregister(list,OS_INT);
+                       a_loadaddr_ref_reg(list,ref,tmpreg2);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
+                     end
+                   else
+                     usedtmpref:=ref;
+
+                   if target_info.endian=endian_big then
+                     inc(usedtmpref.offset,1);
+                   shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
+                   tmpreg:=getintregister(list,OS_INT);
+                   a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
+                   inc(usedtmpref.offset,dir);
+                   if FromSize=OS_16 then
+                     a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
+                   else
+                     a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
+                   list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
+                 end;
+               OS_32,OS_S32:
+                 begin
+                   tmpreg:=getintregister(list,OS_INT);
+
+                   { only complicated references need an extra loadaddr }
+                   if assigned(ref.symbol) or
+                     (ref.index<>NR_NO) or
+                     (ref.offset<-4095) or
+                     (ref.offset>4092) or
+                     { sometimes the compiler reused registers }
+                     (reg=ref.index) or
+                     (reg=ref.base) then
+                     begin
+                       tmpreg2:=getintregister(list,OS_INT);
+                       a_loadaddr_ref_reg(list,ref,tmpreg2);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
+                     end
+                   else
+                     usedtmpref:=ref;
+
+                   shifterop_reset(so);so.shiftmode:=SM_LSL;
+                   if ref.alignment=2 then
+                     begin
+                       if target_info.endian=endian_big then
+                         inc(usedtmpref.offset,2);
+                       a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
+                       inc(usedtmpref.offset,dir*2);
+                       a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
+                       so.shiftimm:=16;
+                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
+                     end
+                   else
+                     begin
+                       if target_info.endian=endian_big then
+                         inc(usedtmpref.offset,3);
+                       a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
+                       inc(usedtmpref.offset,dir);
+                       a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
+                       so.shiftimm:=8;
+                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
+                       inc(usedtmpref.offset,dir);
+                       a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
+                       so.shiftimm:=16;
+                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
+                       inc(usedtmpref.offset,dir);
+                       a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
+                       so.shiftimm:=24;
+                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
+                     end;
+                 end
+               else
+                 handle_load_store(list,A_LDR,oppostfix,reg,ref);
+             end;
+           end
+         else
+           handle_load_store(list,A_LDR,oppostfix,reg,ref);
+
+         if (fromsize=OS_S8) and (tosize = OS_16) then
+           a_load_reg_reg(list,OS_16,OS_32,reg,reg);
+       end;
+
+
     procedure tcgarm.a_param_const(list : TAsmList;size : tcgsize;a : aint;const paraloc : TCGPara);
       var
         ref: treference;
@@ -479,6 +677,17 @@ unit cgcpu;
                  list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
               end;
             else
+              {if (op in [OP_SUB, OP_ADD]) and
+                 ((a < 0) or
+                  (a > 4095)) then
+                begin
+                  tmpreg:=getintregister(list,size);
+                  list.concat(taicpu.op_reg_const(A_MOVT, tmpreg, (a shr 16) and $FFFF));
+                  list.concat(taicpu.op_reg_const(A_MOV, tmpreg, a and $FFFF));
+                  list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
+                   ));
+                end
+              else}
               list.concat(setoppostfix(
                   taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
               ));
@@ -645,49 +854,6 @@ unit cgcpu;
       end;
 
 
-     procedure tcgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : aint;reg : tregister);
-       var
-          imm_shift : byte;
-          l : tasmlabel;
-          hr : treference;
-       begin
-          if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
-            internalerror(2002090902);
-          if is_shifter_const(a,imm_shift) then
-            list.concat(taicpu.op_reg_const(A_MOV,reg,a))
-          else if is_shifter_const(not(a),imm_shift) then
-            list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
-          { loading of constants with mov and orr }
-          else if (is_shifter_const(a-byte(a),imm_shift)) then
-            begin
-              list.concat(taicpu.op_reg_const(A_MOV,reg,a-byte(a)));
-              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,byte(a)));
-            end
-          else if (is_shifter_const(a-word(a),imm_shift)) and (is_shifter_const(word(a),imm_shift)) then
-            begin
-              list.concat(taicpu.op_reg_const(A_MOV,reg,a-word(a)));
-              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,word(a)));
-            end
-          else if (is_shifter_const(a-(dword(a) shl 8) shr 8,imm_shift)) and (is_shifter_const((dword(a) shl 8) shr 8,imm_shift)) then
-            begin
-              list.concat(taicpu.op_reg_const(A_MOV,reg,a-(dword(a) shl 8) shr 8));
-              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,(dword(a) shl 8) shr 8));
-            end
-          else
-            begin
-               reference_reset(hr,4);
-
-               current_asmdata.getjumplabel(l);
-               cg.a_label(current_procinfo.aktlocaldata,l);
-               hr.symboldata:=current_procinfo.aktlocaldata.last;
-               current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
-
-               hr.symbol:=l;
-               list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
-            end;
-       end;
-
-
     function tcgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
       var
         tmpreg : tregister;
@@ -922,138 +1088,13 @@ unit cgcpu;
        end;
 
 
-     procedure tcgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
+     function tcgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
        var
          oppostfix:toppostfix;
-         usedtmpref: treference;
-         tmpreg,tmpreg2 : tregister;
-         so : tshifterop;
-         dir : integer;
        begin
-         if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
-           FromSize := ToSize;
-         case FromSize of
+         case ToSize of
            { signed integer registers }
-           OS_8:
-             oppostfix:=PF_B;
-           OS_S8:
-             oppostfix:=PF_SB;
-           OS_16:
-             oppostfix:=PF_H;
-           OS_S16:
-             oppostfix:=PF_SH;
-           OS_32,
-           OS_S32:
-             oppostfix:=PF_None;
-           else
-             InternalError(200308297);
-         end;
-         if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
-           begin
-             if target_info.endian=endian_big then
-               dir:=-1
-             else
-               dir:=1;
-             case FromSize of
-               OS_16,OS_S16:
-                 begin
-                   { only complicated references need an extra loadaddr }
-                   if assigned(ref.symbol) or
-                     (ref.index<>NR_NO) or
-                     (ref.offset<-4095) or
-                     (ref.offset>4094) or
-                     { sometimes the compiler reused registers }
-                     (reg=ref.index) or
-                     (reg=ref.base) then
-                     begin
-                       tmpreg2:=getintregister(list,OS_INT);
-                       a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
-                     end
-                   else
-                     usedtmpref:=ref;
-
-                   if target_info.endian=endian_big then
-                     inc(usedtmpref.offset,1);
-                   shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
-                   tmpreg:=getintregister(list,OS_INT);
-                   a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
-                   inc(usedtmpref.offset,dir);
-                   if FromSize=OS_16 then
-                     a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
-                   else
-                     a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
-                   list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
-                 end;
-               OS_32,OS_S32:
-                 begin
-                   tmpreg:=getintregister(list,OS_INT);
-
-                   { only complicated references need an extra loadaddr }
-                   if assigned(ref.symbol) or
-                     (ref.index<>NR_NO) or
-                     (ref.offset<-4095) or
-                     (ref.offset>4092) or
-                     { sometimes the compiler reused registers }
-                     (reg=ref.index) or
-                     (reg=ref.base) then
-                     begin
-                       tmpreg2:=getintregister(list,OS_INT);
-                       a_loadaddr_ref_reg(list,ref,tmpreg2);
-                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
-                     end
-                   else
-                     usedtmpref:=ref;
-
-                   shifterop_reset(so);so.shiftmode:=SM_LSL;
-                   if ref.alignment=2 then
-                     begin
-                       if target_info.endian=endian_big then
-                         inc(usedtmpref.offset,2);
-                       a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
-                       inc(usedtmpref.offset,dir*2);
-                       a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
-                       so.shiftimm:=16;
-                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
-                     end
-                   else
-                     begin
-                       if target_info.endian=endian_big then
-                         inc(usedtmpref.offset,3);
-                       a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
-                       inc(usedtmpref.offset,dir);
-                       a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
-                       so.shiftimm:=8;
-                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
-                       inc(usedtmpref.offset,dir);
-                       a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
-                       so.shiftimm:=16;
-                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
-                       inc(usedtmpref.offset,dir);
-                       a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
-                       so.shiftimm:=24;
-                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
-                     end;
-                 end
-               else
-                 handle_load_store(list,A_LDR,oppostfix,reg,ref);
-             end;
-           end
-         else
-           handle_load_store(list,A_LDR,oppostfix,reg,ref);
-
-         if (fromsize=OS_S8) and (tosize = OS_16) then
-           a_load_reg_reg(list,OS_16,OS_32,reg,reg);
-       end;
-
-
-     function tcgarm.a_internal_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference):treference;
-       var
-         oppostfix:toppostfix;
-       begin
-         case ToSize of
-           { signed integer registers }
-           OS_8,
+           OS_8,
            OS_S8:
              oppostfix:=PF_B;
            OS_16,
@@ -2108,7 +2149,7 @@ unit cgcpu;
         if weak then
           current_asmdata.weakrefasmsymbol(s);
         current_asmdata.asmlists[al_imports].concat(tai_directive.create(asd_indirect_symbol,s));
-        
+
         if not(cs_create_pic in current_settings.moduleswitches) then
           begin
             l1 := current_asmdata.RefAsmSymbol('L'+s+'$slp');
@@ -2123,7 +2164,7 @@ unit cgcpu;
           end
         else
           internalerror(2008100401);
-        
+
         current_asmdata.asmlists[al_imports].concat(tai_directive.create(asd_lazy_symbol_pointer,''));
         current_asmdata.asmlists[al_imports].concat(Tai_symbol.Create(l1,0));
         current_asmdata.asmlists[al_imports].concat(tai_directive.create(asd_indirect_symbol,s));
@@ -2357,7 +2398,881 @@ unit cgcpu;
       end;
 
 
-begin
-  cg:=tcgarm.create;
-  cg64:=tcg64farm.create;
+    procedure Tthumb2cgarm.init_register_allocators;
+      begin
+        inherited init_register_allocators;
+        { currently, we save R14 always, so we can use it }
+        if (target_info.system<>system_arm_darwin) then
+          rg[R_INTREGISTER]:=trgcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
+              [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
+               RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
+        else
+          { r9 is not available on Darwin according to the llvm code generator }
+          rg[R_INTREGISTER]:=trgcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
+              [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
+               RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
+        rg[R_FPUREGISTER]:=trgcputhumb2.create(R_FPUREGISTER,R_SUBNONE,
+            [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
+        rg[R_MMREGISTER]:=trgcputhumb2.create(R_MMREGISTER,R_SUBNONE,
+            [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
+      end;
+
+
+    procedure Tthumb2cgarm.done_register_allocators;
+      begin
+        rg[R_INTREGISTER].free;
+        rg[R_FPUREGISTER].free;
+        rg[R_MMREGISTER].free;
+        inherited done_register_allocators;
+      end;
+
+
+    procedure Tthumb2cgarm.a_call_reg(list : TAsmList;reg: tregister);
+      begin
+        list.concat(taicpu.op_reg(A_BLX, reg));
+{
+        the compiler does not properly set this flag anymore in pass 1, and
+        for now we only need it after pass 2 (I hope) (JM)
+          if not(pi_do_call in current_procinfo.flags) then
+            internalerror(2003060703);
+}
+        include(current_procinfo.flags,pi_do_call);
+      end;
+
+
+     procedure Tthumb2cgarm.a_load_const_reg(list : TAsmList; size: tcgsize; a : aint;reg : tregister);
+       var
+          imm_shift : byte;
+          l : tasmlabel;
+          hr : treference;
+       begin
+          if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
+            internalerror(2002090902);
+          if is_shifter_const(a,imm_shift) then
+            list.concat(taicpu.op_reg_const(A_MOV,reg,a))
+          { loading of constants with mov and orr }
+          else if (is_shifter_const(a-byte(a),imm_shift)) then
+            begin
+              list.concat(taicpu.op_reg_const(A_MOV,reg,a-byte(a)));
+              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,byte(a)));
+            end
+          else if (is_shifter_const(a-word(a),imm_shift)) and (is_shifter_const(word(a),imm_shift)) then
+            begin
+              list.concat(taicpu.op_reg_const(A_MOV,reg,a-word(a)));
+              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,word(a)));
+            end
+          else if (is_shifter_const(a-(dword(a) shl 8) shr 8,imm_shift)) and (is_shifter_const((dword(a) shl 8) shr 8,imm_shift)) then
+            begin
+              list.concat(taicpu.op_reg_const(A_MOV,reg,a-(dword(a) shl 8) shr 8));
+              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,(dword(a) shl 8) shr 8));
+            end
+          else
+            begin
+               reference_reset(hr,4);
+
+               current_asmdata.getjumplabel(l);
+               cg.a_label(current_procinfo.aktlocaldata,l);
+               hr.symboldata:=current_procinfo.aktlocaldata.last;
+               current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
+
+               hr.symbol:=l;
+               list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
+            end;
+       end;
+
+
+     procedure Tthumb2cgarm.a_load_ref_reg(list : TAsmList; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);
+       var
+         oppostfix:toppostfix;
+         usedtmpref: treference;
+         tmpreg,tmpreg2 : tregister;
+         so : tshifterop;
+         dir : integer;
+       begin
+         if (TCGSize2Size[FromSize] >= TCGSize2Size[ToSize]) then
+           FromSize := ToSize;
+         case FromSize of
+           { signed integer registers }
+           OS_8:
+             oppostfix:=PF_B;
+           OS_S8:
+             oppostfix:=PF_SB;
+           OS_16:
+             oppostfix:=PF_H;
+           OS_S16:
+             oppostfix:=PF_SH;
+           OS_32,
+           OS_S32:
+             oppostfix:=PF_None;
+           else
+             InternalError(200308297);
+         end;
+         if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
+           begin
+             if target_info.endian=endian_big then
+               dir:=-1
+             else
+               dir:=1;
+             case FromSize of
+               OS_16,OS_S16:
+                 begin
+                   { only complicated references need an extra loadaddr }
+                   if assigned(ref.symbol) or
+                     (ref.index<>NR_NO) or
+                     (ref.offset<-255) or
+                     (ref.offset>4094) or
+                     { sometimes the compiler reused registers }
+                     (reg=ref.index) or
+                     (reg=ref.base) then
+                     begin
+                       tmpreg2:=getintregister(list,OS_INT);
+                       a_loadaddr_ref_reg(list,ref,tmpreg2);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
+                     end
+                   else
+                     usedtmpref:=ref;
+
+                   if target_info.endian=endian_big then
+                     inc(usedtmpref.offset,1);
+                   shifterop_reset(so);so.shiftmode:=SM_LSL;so.shiftimm:=8;
+                   tmpreg:=getintregister(list,OS_INT);
+                   a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
+                   inc(usedtmpref.offset,dir);
+                   if FromSize=OS_16 then
+                     a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg)
+                   else
+                     a_internal_load_ref_reg(list,OS_S8,OS_S8,usedtmpref,tmpreg);
+                   list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
+                 end;
+               OS_32,OS_S32:
+                 begin
+                   tmpreg:=getintregister(list,OS_INT);
+
+                   { only complicated references need an extra loadaddr }
+                   if assigned(ref.symbol) or
+                     (ref.index<>NR_NO) or
+                     (ref.offset<-255) or
+                     (ref.offset>4092) or
+                     { sometimes the compiler reused registers }
+                     (reg=ref.index) or
+                     (reg=ref.base) then
+                     begin
+                       tmpreg2:=getintregister(list,OS_INT);
+                       a_loadaddr_ref_reg(list,ref,tmpreg2);
+                       reference_reset_base(usedtmpref,tmpreg2,0,ref.alignment);
+                     end
+                   else
+                     usedtmpref:=ref;
+
+                   shifterop_reset(so);so.shiftmode:=SM_LSL;
+                   if ref.alignment=2 then
+                     begin
+                       if target_info.endian=endian_big then
+                         inc(usedtmpref.offset,2);
+                       a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,reg);
+                       inc(usedtmpref.offset,dir*2);
+                       a_internal_load_ref_reg(list,OS_16,OS_16,usedtmpref,tmpreg);
+                       so.shiftimm:=16;
+                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
+                     end
+                   else
+                     begin
+                       if target_info.endian=endian_big then
+                         inc(usedtmpref.offset,3);
+                       a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
+                       inc(usedtmpref.offset,dir);
+                       a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
+                       so.shiftimm:=8;
+                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
+                       inc(usedtmpref.offset,dir);
+                       a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
+                       so.shiftimm:=16;
+                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
+                       inc(usedtmpref.offset,dir);
+                       a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
+                       so.shiftimm:=24;
+                       list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
+                     end;
+                 end
+               else
+                 handle_load_store(list,A_LDR,oppostfix,reg,ref);
+             end;
+           end
+         else
+           handle_load_store(list,A_LDR,oppostfix,reg,ref);
+
+         if (fromsize=OS_S8) and (tosize = OS_16) then
+           a_load_reg_reg(list,OS_16,OS_32,reg,reg);
+       end;
+
+
+    procedure Tthumb2cgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: aint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
+      var
+        shift : byte;
+        tmpreg : tregister;
+        so : tshifterop;
+        l1 : longint;
+      begin
+        ovloc.loc:=LOC_VOID;
+        if {$ifopt R+}(a<>-2147483648) and{$endif} is_shifter_const(-a,shift) then
+          case op of
+            OP_ADD:
+              begin
+                op:=OP_SUB;
+                a:=aint(dword(-a));
+              end;
+            OP_SUB:
+              begin
+                op:=OP_ADD;
+                a:=aint(dword(-a));
+              end
+          end;
+
+        if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
+          case op of
+            OP_NEG,OP_NOT,
+            OP_DIV,OP_IDIV:
+              internalerror(200308281);
+            OP_SHL:
+              begin
+                if a>32 then
+                  internalerror(200308294);
+                if a<>0 then
+                  begin
+                    shifterop_reset(so);
+                    so.shiftmode:=SM_LSL;
+                    so.shiftimm:=a;
+                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
+                  end
+                else
+                 list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
+              end;
+            OP_ROL:
+              begin
+                if a>32 then
+                  internalerror(200308294);
+                if a<>0 then
+                  begin
+                    shifterop_reset(so);
+                    so.shiftmode:=SM_ROR;
+                    so.shiftimm:=32-a;
+                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
+                  end
+                else
+                 list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
+              end;
+            OP_ROR:
+              begin
+                if a>32 then
+                  internalerror(200308294);
+                if a<>0 then
+                  begin
+                    shifterop_reset(so);
+                    so.shiftmode:=SM_ROR;
+                    so.shiftimm:=a;
+                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
+                  end
+                else
+                 list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
+              end;
+            OP_SHR:
+              begin
+                if a>32 then
+                  internalerror(200308292);
+                shifterop_reset(so);
+                if a<>0 then
+                  begin
+                    so.shiftmode:=SM_LSR;
+                    so.shiftimm:=a;
+                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
+                  end
+                else
+                 list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
+              end;
+            OP_SAR:
+              begin
+                if a>32 then
+                  internalerror(200308295);
+                if a<>0 then
+                  begin
+                    shifterop_reset(so);
+                    so.shiftmode:=SM_ASR;
+                    so.shiftimm:=a;
+                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
+                  end
+                else
+                 list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
+              end;
+            else
+              if (op in [OP_SUB, OP_ADD]) and
+                 ((a < 0) or
+                  (a > 4095)) then
+                begin
+                  tmpreg:=getintregister(list,size);
+                  a_load_const_reg(list, size, a, tmpreg);
+                  list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
+                   ));
+                end
+              else
+              list.concat(setoppostfix(
+                  taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
+              ));
+              if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
+                begin
+                  ovloc.loc:=LOC_FLAGS;
+                  case op of
+                    OP_ADD:
+                      ovloc.resflags:=F_CS;
+                    OP_SUB:
+                      ovloc.resflags:=F_CC;
+                  end;
+                end;
+          end
+        else
+          begin
+            { there could be added some more sophisticated optimizations }
+            if (op in [OP_MUL,OP_IMUL]) and (a=1) then
+              a_load_reg_reg(list,size,size,src,dst)
+            else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
+              a_load_const_reg(list,size,0,dst)
+            else if (op in [OP_IMUL]) and (a=-1) then
+              a_op_reg_reg(list,OP_NEG,size,src,dst)
+            { we do this here instead in the peephole optimizer because
+              it saves us a register }
+            else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
+              a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
+            { for example : b=a*5 -> b=a*4+a with add instruction and shl }
+            else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
+              begin
+                if l1>32 then{roozbeh does this ever happen?}
+                  internalerror(200308296);
+                shifterop_reset(so);
+                so.shiftmode:=SM_LSL;
+                so.shiftimm:=l1;
+                list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
+              end
+            else
+              begin
+                tmpreg:=getintregister(list,size);
+                a_load_const_reg(list,size,a,tmpreg);
+                a_op_reg_reg_reg_checkoverflow(list,op,size,tmpreg,src,dst,setflags,ovloc);
+              end;
+          end;
+        maybeadjustresult(list,op,size,dst);
+      end;
+
+
+    const
+      op_reg_reg_opcg2asmopThumb2: array[TOpCG] of tasmop =
+        (A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NONE,A_MVN,A_ORR,
+         A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR);
+
+
+    procedure Tthumb2cgarm.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; src1, src2, dst: tregister;setflags : boolean;var ovloc : tlocation);
+      var
+        so : tshifterop;
+        tmpreg,overflowreg : tregister;
+        asmop : tasmop;
+      begin
+        ovloc.loc:=LOC_VOID;
+        case op of
+           OP_NEG,OP_NOT,
+           OP_DIV,OP_IDIV:
+              internalerror(200308281);
+           OP_ROL:
+              begin
+                if not(size in [OS_32,OS_S32]) then
+                   internalerror(2008072801);
+                { simulate ROL by ror'ing 32-value }
+                tmpreg:=getintregister(list,OS_32);
+                list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
+                list.concat(taicpu.op_reg_reg_reg(A_SUB,src1,tmpreg,src1));
+                list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
+              end;
+           OP_ROR:
+              begin
+                if not(size in [OS_32,OS_S32]) then
+                   internalerror(2008072802);
+                list.concat(taicpu.op_reg_reg_reg(A_ROR, dst, src2, src1));
+              end;
+           OP_IMUL,
+           OP_MUL:
+              begin
+                if cgsetflags or setflags then
+                   begin
+                      overflowreg:=getintregister(list,size);
+                      if op=OP_IMUL then
+                        asmop:=A_SMULL
+                      else
+                        asmop:=A_UMULL;
+                      { the arm doesn't allow that rd and rm are the same }
+                      if dst=src2 then
+                        begin
+                           if dst<>src1 then
+                              list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src1,src2))
+                           else
+                              begin
+                                tmpreg:=getintregister(list,size);
+                                a_load_reg_reg(list,size,size,src2,dst);
+                                list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,tmpreg,src1));
+                              end;
+                        end
+                      else
+                        list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
+                      if op=OP_IMUL then
+                        begin
+                           shifterop_reset(so);
+                           so.shiftmode:=SM_ASR;
+                           so.shiftimm:=31;
+                           list.concat(taicpu.op_reg_reg_shifterop(A_CMP,overflowreg,dst,so));
+                        end
+                      else
+                        list.concat(taicpu.op_reg_const(A_CMP,overflowreg,0));
+
+                       ovloc.loc:=LOC_FLAGS;
+                       ovloc.resflags:=F_NE;
+                   end
+                else
+                   begin
+                      { the arm doesn't allow that rd and rm are the same }
+                      if dst=src2 then
+                        begin
+                           if dst<>src1 then
+                              list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src1,src2))
+                           else
+                              begin
+                                tmpreg:=getintregister(list,size);
+                                a_load_reg_reg(list,size,size,src2,dst);
+                                list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,tmpreg,src1));
+                              end;
+                        end
+                      else
+                        list.concat(taicpu.op_reg_reg_reg(A_MUL,dst,src2,src1));
+                   end;
+              end;
+           else
+              list.concat(setoppostfix(
+                   taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
+                ));
+        end;
+        maybeadjustresult(list,op,size,dst);
+      end;
+
+
+    procedure Tthumb2cgarm.g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister);
+      var item: taicpu;
+      begin
+        item := setcondition(taicpu.op_reg_const(A_MOV,reg,1),flags_to_cond(f));
+        list.concat(item);
+        list.insertbefore(taicpu.op_cond(A_IT, flags_to_cond(f)), item);
+
+        item := setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f)));
+        list.concat(item);
+        list.insertbefore(taicpu.op_cond(A_IT, inverse_cond(flags_to_cond(f))), item);
+      end;
+
+
+    procedure Tthumb2cgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
+      var
+         ref : treference;
+         shift : byte;
+         firstfloatreg,lastfloatreg,
+         r : byte;
+         regs : tcpuregisterset;
+         stackmisalignment: pint;
+      begin
+        LocalSize:=align(LocalSize,4);
+        { call instruction does not put anything on the stack }
+        stackmisalignment:=0;
+        if not(nostackframe) then
+          begin
+            firstfloatreg:=RS_NO;
+            { save floating point registers? }
+            for r:=RS_F0 to RS_F7 do
+              if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
+                begin
+                  if firstfloatreg=RS_NO then
+                    firstfloatreg:=r;
+                  lastfloatreg:=r;
+                  inc(stackmisalignment,12);
+                end;
+
+            a_reg_alloc(list,NR_STACK_POINTER_REG);
+            if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
+              begin
+                a_reg_alloc(list,NR_FRAME_POINTER_REG);
+                a_reg_alloc(list,NR_R12);
+
+                list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
+              end;
+            { save int registers }
+            reference_reset(ref,4);
+            ref.index:=NR_STACK_POINTER_REG;
+            ref.addressmode:=AM_PREINDEXED;
+
+            regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
+
+            if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
+              regs:=regs+[RS_R11,RS_R14]
+            else if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
+              include(regs,RS_R14);
+
+            if regs<>[] then
+              begin
+                for r:=RS_R0 to RS_R15 do
+                  if (r in regs) then
+                    inc(stackmisalignment,4);
+                list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,regs),PF_FD));
+              end;
+
+            if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
+              list.concat(taicpu.op_reg_reg(A_MOV,NR_FRAME_POINTER_REG,NR_R12));
+
+            stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
+            if (LocalSize<>0) or
+               ((stackmisalignment<>0) and
+                ((pi_do_call in current_procinfo.flags) or
+                 (po_assembler in current_procinfo.procdef.procoptions))) then
+              begin
+                localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
+                if not(is_shifter_const(localsize,shift)) then
+                  begin
+                    if current_procinfo.framepointer=NR_STACK_POINTER_REG then
+                      a_reg_alloc(list,NR_R12);
+                    a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
+                    list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
+                    a_reg_dealloc(list,NR_R12);
+                  end
+                else
+                  begin
+                    a_reg_dealloc(list,NR_R12);
+                    list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
+                  end;
+              end;
+
+            if firstfloatreg<>RS_NO then
+              begin
+                reference_reset(ref,4);
+                if tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023 then
+                  begin
+                    a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
+                    list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
+                    ref.base:=NR_R12;
+                  end
+                else
+                  begin
+                    ref.base:=current_procinfo.framepointer;
+                    ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
+                  end;
+                list.concat(taicpu.op_reg_const_ref(A_SFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
+                  lastfloatreg-firstfloatreg+1,ref));
+              end;
+          end;
+      end;
+
+
+    procedure Tthumb2cgarm.g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean);
+      var
+         ref : treference;
+         firstfloatreg,lastfloatreg,
+         r : byte;
+         shift : byte;
+         regs : tcpuregisterset;
+         LocalSize : longint;
+         stackmisalignment: pint;
+      begin
+        if not(nostackframe) then
+          begin
+            stackmisalignment:=0;
+            { restore floating point register }
+            firstfloatreg:=RS_NO;
+            { save floating point registers? }
+            for r:=RS_F0 to RS_F7 do
+              if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
+                begin
+                  if firstfloatreg=RS_NO then
+                    firstfloatreg:=r;
+                  lastfloatreg:=r;
+                  { floating point register space is already included in
+                    localsize below by calc_stackframe_size
+                   inc(stackmisalignment,12);
+                  }
+                end;
+
+            if firstfloatreg<>RS_NO then
+              begin
+                reference_reset(ref,4);
+                if tg.direction*tarmprocinfo(current_procinfo).floatregstart>=1023 then
+                  begin
+                    a_load_const_reg(list,OS_ADDR,-tarmprocinfo(current_procinfo).floatregstart,NR_R12);
+                    list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_R12,current_procinfo.framepointer,NR_R12));
+                    ref.base:=NR_R12;
+                  end
+                else
+                  begin
+                    ref.base:=current_procinfo.framepointer;
+                    ref.offset:=tarmprocinfo(current_procinfo).floatregstart;
+                  end;
+                list.concat(taicpu.op_reg_const_ref(A_LFM,newreg(R_FPUREGISTER,firstfloatreg,R_SUBWHOLE),
+                  lastfloatreg-firstfloatreg+1,ref));
+              end;
+
+            regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
+            if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
+              begin
+                exclude(regs,RS_R14);
+                include(regs,RS_R15);
+              end;
+            if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
+              regs:=regs+[RS_R11,RS_R15];
+
+            for r:=RS_R0 to RS_R15 do
+              if (r in regs) then
+                inc(stackmisalignment,4);
+
+            stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
+            if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
+              begin
+                LocalSize:=current_procinfo.calc_stackframe_size;
+                if (LocalSize<>0) or
+                   ((stackmisalignment<>0) and
+                    ((pi_do_call in current_procinfo.flags) or
+                     (po_assembler in current_procinfo.procdef.procoptions))) then
+                  begin
+                    localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
+                    if not(is_shifter_const(LocalSize,shift)) then
+                      begin
+                        a_reg_alloc(list,NR_R12);
+                        a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
+                        list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
+                        a_reg_dealloc(list,NR_R12);
+                      end
+                    else
+                      begin
+                        list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
+                      end;
+                  end;
+
+                if regs=[] then
+                  list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14))
+                else
+                  begin
+                    reference_reset(ref,4);
+                    ref.index:=NR_STACK_POINTER_REG;
+                    ref.addressmode:=AM_PREINDEXED;
+                    list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,regs),PF_FD));
+                  end;
+              end
+            else
+              begin
+                { restore int registers and return }
+                list.concat(taicpu.op_reg_reg(A_MOV, NR_STACK_POINTER_REG, NR_R11));
+                
+                reference_reset(ref,4);
+                ref.index:=NR_STACK_POINTER_REG;
+                list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,regs),PF_DB));
+              end;
+          end
+        else
+          list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14));
+      end;
+
+
+   function Tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
+      var
+        tmpreg : tregister;
+        tmpref : treference;
+        l : tasmlabel;
+        so: tshifterop;
+      begin
+        tmpreg:=NR_NO;
+
+        { Be sure to have a base register }
+        if (ref.base=NR_NO) then
+          begin
+            if ref.shiftmode<>SM_None then
+              internalerror(200308294);
+            ref.base:=ref.index;
+            ref.index:=NR_NO;
+          end;
+
+        { absolute symbols can't be handled directly, we've to store the symbol reference
+          in the text segment and access it pc relative
+
+          For now, we assume that references where base or index equals to PC are already
+          relative, all other references are assumed to be absolute and thus they need
+          to be handled extra.
+
+          A proper solution would be to change refoptions to a set and store the information
+          if the symbol is absolute or relative there.
+        }
+
+        if (assigned(ref.symbol) and
+            not(is_pc(ref.base)) and
+            not(is_pc(ref.index))
+           ) or
+           { [#xxx] isn't a valid address operand }
+           ((ref.base=NR_NO) and (ref.index=NR_NO)) or
+           //(ref.offset<-4095) or
+           (ref.offset<-255) or
+           (ref.offset>4095) or
+           ((oppostfix in [PF_SB,PF_H,PF_SH]) and
+            ((ref.offset<-255) or
+             (ref.offset>255)
+            )
+           ) or
+           ((op in [A_LDF,A_STF]) and
+            ((ref.offset<-1020) or
+             (ref.offset>1020) or
+             { the usual pc relative symbol handling assumes possible offsets of +/- 4095 }
+             assigned(ref.symbol)
+            )
+           ) then
+          begin
+            reference_reset(tmpref,4);
+
+            { load symbol }
+            tmpreg:=getintregister(list,OS_INT);
+            if assigned(ref.symbol) then
+              begin
+                current_asmdata.getjumplabel(l);
+                cg.a_label(current_procinfo.aktlocaldata,l);
+                tmpref.symboldata:=current_procinfo.aktlocaldata.last;
+
+                current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
+
+                { load consts entry }
+                tmpref.symbol:=l;
+                tmpref.base:=NR_R15;
+                list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
+
+                { in case of LDF/STF, we got rid of the NR_R15 }
+                if is_pc(ref.base) then
+                  ref.base:=NR_NO;
+                if is_pc(ref.index) then
+                  ref.index:=NR_NO;
+              end
+            else
+              a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
+
+            if (ref.base<>NR_NO) then
+              begin
+                if ref.index<>NR_NO then
+                  begin
+                    list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
+                    ref.base:=tmpreg;
+                  end
+                else
+                  begin
+                    ref.index:=tmpreg;
+                    ref.shiftimm:=0;
+                    ref.signindex:=1;
+                    ref.shiftmode:=SM_None;
+                  end;
+              end
+            else
+              ref.base:=tmpreg;
+            ref.offset:=0;
+            ref.symbol:=nil;
+          end;
+
+        if (ref.base<>NR_NO) and (ref.index<>NR_NO) and (ref.offset<>0) then
+          begin
+            if tmpreg<>NR_NO then
+              a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg,tmpreg)
+            else
+              begin
+                tmpreg:=getintregister(list,OS_ADDR);
+                a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,tmpreg);
+                ref.base:=tmpreg;
+              end;
+            ref.offset:=0;
+          end;
+
+        { Hack? Thumb2 doesn't allow PC indexed addressing modes(although it does in the specification) }
+        if (ref.base=NR_R15) and (ref.index<>NR_NO) and (ref.shiftmode <> sm_none) then
+          begin
+            tmpreg:=getintregister(list,OS_ADDR);
+
+            list.concat(taicpu.op_reg_reg(A_MOV, tmpreg, NR_R15));
+        
+            ref.base := tmpreg;
+          end;
+
+        { floating point operations have only limited references
+          we expect here, that a base is already set }
+        if (op in [A_LDF,A_STF]) and (ref.index<>NR_NO) then
+          begin
+            if ref.shiftmode<>SM_none then
+              internalerror(200309121);
+            if tmpreg<>NR_NO then
+              begin
+                if ref.base=tmpreg then
+                  begin
+                    if ref.signindex<0 then
+                      list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,tmpreg,ref.index))
+                    else
+                      list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,tmpreg,ref.index));
+                    ref.index:=NR_NO;
+                  end
+                else
+                  begin
+                    if ref.index<>tmpreg then
+                      internalerror(200403161);
+                    if ref.signindex<0 then
+                      list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
+                    else
+                      list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
+                    ref.base:=tmpreg;
+                    ref.index:=NR_NO;
+                  end;
+              end
+            else
+              begin
+                tmpreg:=getintregister(list,OS_ADDR);
+                list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,ref.index));
+                ref.base:=tmpreg;
+                ref.index:=NR_NO;
+              end;
+          end;
+        list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
+        Result := ref;
+      end;
+
+
+    procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
+      var tmpreg: tregister;
+      begin
+        case op of
+          OP_NEG:
+            begin
+              list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
+              tmpreg:=cg.getintregister(list,OS_32);
+              list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
+              list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
+            end;
+          else
+            inherited a_op64_reg_reg(list, op, size, regsrc, regdst);
+        end;
+      end;
+
+
+    procedure create_codegen;
+      begin
+        if current_settings.cputype in cpu_thumb2 then
+          begin
+            cg:=tthumb2cgarm.create;
+            cg64:=tthumb2cg64farm.create;
+
+            casmoptimizer:=TCpuThumb2AsmOptimizer;
+          end
+        else
+          begin
+            cg:=tarmcgarm.create;
+            cg64:=tcg64farm.create;
+
+            casmoptimizer:=TCpuAsmOptimizer;
+          end;
+      end;
+
 end.

+ 25 - 10
compiler/arm/cpubase.pas

@@ -503,16 +503,31 @@ unit cpubase;
       var
          i : longint;
       begin
-         for i:=0 to 15 do
-           begin
-              if (dword(d) and not(rotl($ff,i*2)))=0 then
-                begin
-                   imm_shift:=i*2;
-                   result:=true;
-                   exit;
-                end;
-           end;
-         result:=false;
+        if current_settings.cputype in cpu_thumb2 then
+          begin
+            for i:=0 to 24 do
+              begin
+                 if (dword(d) and not($ff shl i))=0 then
+                   begin
+                     imm_shift:=i;
+                     result:=true;
+                     exit;
+                   end;
+              end;
+          end
+        else
+          begin
+            for i:=0 to 15 do
+              begin
+                 if (dword(d) and not(rotl($ff,i*2)))=0 then
+                   begin
+                      imm_shift:=i*2;
+                      result:=true;
+                      exit;
+                   end;
+              end;
+          end;
+        result:=false;
       end;
 
 

+ 21 - 6
compiler/arm/cpuinfo.pas

@@ -34,9 +34,17 @@ Type
       (cpu_none,
        cpu_armv3,
        cpu_armv4,
-       cpu_armv5
+       cpu_armv5,
+       cpu_armv7m,
+       cpu_cortexm3
       );
 
+Const
+   cpu_arm = [cpu_none,cpu_armv3,cpu_armv4,cpu_armv5];
+   cpu_thumb = [];
+   cpu_thumb2 = [cpu_armv7m,cpu_cortexm3];
+
+Type
    tfputype =
      (fpu_none,
       fpu_soft,
@@ -59,7 +67,10 @@ Type
       ct_at91sam7s256,
       ct_at91sam7se256,
       ct_at91sam7x256,
-      ct_at91sam7xc256
+      ct_at91sam7xc256,
+		
+      { STMicroelectronics }
+      ct_stm32f103re
      );
 
 Const
@@ -83,10 +94,12 @@ Const
      pocall_softfloat
    ];
 
-   cputypestr : array[tcputype] of string[5] = ('',
+   cputypestr : array[tcputype] of string[8] = ('',
      'ARMV3',
      'ARMV4',
-     'ARMV5'
+     'ARMV5',
+     'ARMV7M',
+     'CORTEXM3'
    );
 
    fputypestr : array[tfputype] of string[6] = ('',
@@ -106,7 +119,8 @@ Const
       'AT91SAM7S256',
       'AT91SAM7SE256',
       'AT91SAM7X256',
-      'AT91SAM7XC256'
+      'AT91SAM7XC256',
+      'STM32F103RE'
      );
 
    controllerunitstr : array[tcontrollertype] of string[20] =
@@ -117,7 +131,8 @@ Const
       'AT91SAM7x256',
       'AT91SAM7x256',
       'AT91SAM7x256',
-      'AT91SAM7x256'
+      'AT91SAM7x256',
+      'STM32F103'
      );
 
    { Supported optimizations, only used for information }

+ 2 - 0
compiler/arm/narmadd.pas

@@ -257,6 +257,8 @@ interface
             location_reset(location,LOC_FLAGS,OS_NO);
             location.resflags:=getresflags(unsigned);
             current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register64.reghi,right.location.register64.reghi));
+            if current_settings.cputype in cpu_thumb2 then
+              current_asmdata.CurrAsmList.concat(taicpu.op_cond(A_IT, C_EQ));
             current_asmdata.CurrAsmList.concat(setcondition(taicpu.op_reg_reg(A_CMP,left.location.register64.reglo,right.location.register64.reglo),C_EQ));
           end
         else

+ 33 - 0
compiler/arm/raarmgas.pas

@@ -645,6 +645,32 @@ Unit raarmgas;
           end;
 
 
+        function is_ConditionCode(hs: string): boolean;
+          var icond: tasmcond;
+          begin
+            is_ConditionCode := false;
+            
+            if actopcode in [A_IT,A_ITE,A_ITT,
+                             A_ITEE,A_ITTE,A_ITET,A_ITTT,
+                             A_ITEEE,A_ITTEE,A_ITETE,A_ITTTE,A_ITEET,A_ITTET,A_ITETT,A_ITTTT] then
+              begin
+                { search for condition, conditions are always 2 chars }
+                if length(hs)>1 then
+                  begin
+                    for icond:=low(tasmcond) to high(tasmcond) do
+                      begin
+                        if copy(hs,1,2)=uppercond2str[icond] then
+                          begin
+                            //actcondition:=icond;
+                            oper.opr.typ := OPR_COND;
+                            oper.opr.cc := icond;
+                            exit(true);
+                          end;
+                      end;
+                  end;
+              end;
+          end;
+
       var
         tempreg : tregister;
         ireg : tsuperregister;
@@ -687,6 +713,12 @@ Unit raarmgas;
           *)
           AS_ID: { A constant expression, or a Variable ref.  }
             Begin
+              { Condition code? }
+              if is_conditioncode(actasmpattern) then
+              begin
+                consume(AS_ID);
+              end
+              else
               { Local Label ? }
               if is_locallabel(actasmpattern) then
                begin
@@ -970,6 +1002,7 @@ Unit raarmgas;
           end;
         if actopcode=A_NONE then
           exit;
+			 
         { search for condition, conditions are always 2 chars }
         if length(hs)>1 then
           begin

+ 111 - 0
compiler/arm/rgcpu.pas

@@ -39,6 +39,11 @@ unit rgcpu;
          procedure do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
        end;
 
+       trgcputhumb2 = class(trgobj)
+         procedure do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
+         procedure do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
+       end;
+
        trgintcpu = class(trgcpu)
          procedure add_cpu_interferences(p : tai);override;
        end;
@@ -157,6 +162,112 @@ unit rgcpu;
       end;
 
 
+    procedure trgcputhumb2.do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);
+      var
+        tmpref : treference;
+        helplist : TAsmList;
+        l : tasmlabel;
+        hreg : tregister;
+      begin
+        { don't load spilled register between
+          mov lr,pc
+          mov pc,r4
+          but befure the mov lr,pc
+        }
+        if assigned(pos.previous) and
+          (pos.typ=ait_instruction) and
+          (taicpu(pos).opcode=A_MOV) and
+          (taicpu(pos).oper[0]^.typ=top_reg) and
+          (taicpu(pos).oper[0]^.reg=NR_R14) and
+          (taicpu(pos).oper[1]^.typ=top_reg) and
+          (taicpu(pos).oper[1]^.reg=NR_PC) then
+          pos:=tai(pos.previous);
+
+        if (spilltemp.offset>4095) or (spilltemp.offset<-255) then
+          begin
+            helplist:=TAsmList.create;
+            reference_reset(tmpref,sizeof(aint));
+            { create consts entry }
+            current_asmdata.getjumplabel(l);
+            cg.a_label(current_procinfo.aktlocaldata,l);
+            tmpref.symboldata:=current_procinfo.aktlocaldata.last;
+
+            current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(spilltemp.offset));
+
+            { load consts entry }
+            if getregtype(tempreg)=R_INTREGISTER then
+              hreg:=getregisterinline(helplist,R_SUBWHOLE)
+            else
+              hreg:=cg.getintregister(helplist,OS_ADDR);
+
+            tmpref.symbol:=l;
+            tmpref.base:=NR_R15;
+            helplist.concat(taicpu.op_reg_ref(A_LDR,hreg,tmpref));
+
+            reference_reset_base(tmpref,current_procinfo.framepointer,0,sizeof(aint));
+            tmpref.index:=hreg;
+
+            if spilltemp.index<>NR_NO then
+              internalerror(200401263);
+
+            helplist.concat(spilling_create_load(tmpref,tempreg));
+            if getregtype(tempreg)=R_INTREGISTER then
+              ungetregisterinline(helplist,hreg);
+
+            list.insertlistafter(pos,helplist);
+            helplist.free;
+          end
+        else
+          inherited do_spill_read(list,pos,spilltemp,tempreg);
+      end;
+
+
+    procedure trgcputhumb2.do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);
+      var
+        tmpref : treference;
+        helplist : TAsmList;
+        l : tasmlabel;
+        hreg : tregister;
+      begin
+        if (spilltemp.offset>4095) or (spilltemp.offset<-255) then
+          begin
+            helplist:=TAsmList.create;
+            reference_reset(tmpref,sizeof(aint));
+            { create consts entry }
+            current_asmdata.getjumplabel(l);
+            cg.a_label(current_procinfo.aktlocaldata,l);
+            tmpref.symboldata:=current_procinfo.aktlocaldata.last;
+
+            current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(spilltemp.offset));
+
+            { load consts entry }
+            if getregtype(tempreg)=R_INTREGISTER then
+              hreg:=getregisterinline(helplist,R_SUBWHOLE)
+            else
+              hreg:=cg.getintregister(helplist,OS_ADDR);
+            tmpref.symbol:=l;
+            tmpref.base:=NR_R15;
+            helplist.concat(taicpu.op_reg_ref(A_LDR,hreg,tmpref));
+
+            if spilltemp.index<>NR_NO then
+              internalerror(200401263);
+
+            reference_reset_base(tmpref,current_procinfo.framepointer,0,sizeof(pint));
+            tmpref.index:=hreg;
+
+            helplist.concat(spilling_create_store(tempreg,tmpref));
+
+            if getregtype(tempreg)=R_INTREGISTER then
+              ungetregisterinline(helplist,hreg);
+
+            list.insertlistafter(pos,helplist);
+            helplist.free;
+          end
+        else
+          inherited do_spill_written(list,pos,spilltemp,tempreg);
+      end;
+
+
     procedure trgintcpu.add_cpu_interferences(p : tai);
       var
         r : tregister;

+ 8 - 3
compiler/avr/cgcpu.pas

@@ -110,6 +110,8 @@ unit cgcpu;
         procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
         procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
       end;
+      
+    procedure create_codegen;
 
     const
       OpCmp2AsmCond : Array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
@@ -829,7 +831,10 @@ unit cgcpu;
       end;
 
 
-begin
-  cg:=tcgavr.create;
-  cg64:=tcg64favr.create;
+    procedure create_codegen;
+      begin
+        cg:=tcgavr.create;
+        cg64:=tcg64favr.create;
+      end;
+      
 end.

+ 9 - 7
compiler/cgobj.pas

@@ -582,6 +582,7 @@ unit cgobj;
        cg64 : tcg64;
 {$endif cpu64bitalu}
 
+    procedure destroy_codegen;
 
 implementation
 
@@ -3996,16 +3997,17 @@ implementation
             internalerror(2006082211);
         end;
       end;
-
 {$endif cpu64bitalu}
 
 
-
-initialization
-    ;
-finalization
-  cg.free;
+    procedure destroy_codegen;
+      begin
+        cg.free;
+        cg:=nil;
 {$ifndef cpu64bitalu}
-  cg64.free;
+        cg64.free;
+        cg64:=nil;
 {$endif cpu64bitalu}
+      end;
+
 end.

+ 8 - 3
compiler/i386/cgcpu.pas

@@ -63,6 +63,8 @@ unit cgcpu;
       private
         procedure get_64bit_ops(op:TOpCG;var op1,op2:TAsmOp);
       end;
+      
+    procedure create_codegen;
 
   implementation
 
@@ -851,7 +853,10 @@ unit cgcpu;
         end;
       end;
 
-begin
-  cg := tcg386.create;
-  cg64 := tcg64f386.create;
+    procedure create_codegen;
+      begin
+        cg := tcg386.create;
+        cg64 := tcg64f386.create;
+      end;
+      
 end.

+ 8 - 3
compiler/m68k/cgcpu.pas

@@ -130,6 +130,7 @@ unit cgcpu;
          S_FS,S_FD,S_FX,S_NO,S_NO,
          S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO);
 
+    procedure create_codegen;
 
   implementation
 
@@ -1738,8 +1739,12 @@ unit cgcpu;
         end;
     end; { end case }
   end;
+  
+  
+procedure create_codegen;
+  begin
+    cg := tcg68k.create;
+    cg64 :=tcg64f68k.create;
+  end;
 
-begin
-  cg := tcg68k.create;
-  cg64 :=tcg64f68k.create;
 end.

+ 1 - 1
compiler/ncgld.pas

@@ -247,7 +247,7 @@ implementation
                        if tabsolutevarsym(symtableentry).absseg then
                          location.reference.segment:=NR_FS;
 {$endif i386}
-                       location.reference.offset:=tabsolutevarsym(symtableentry).addroffset;
+                       location.reference.offset:=aint(tabsolutevarsym(symtableentry).addroffset);
                      end;
                    toasm :
                      location.reference.symbol:=current_asmdata.RefAsmSymbol(tabsolutevarsym(symtableentry).mangledname);

+ 9 - 2
compiler/ncgutil.pas

@@ -160,7 +160,7 @@ implementation
     regvars,dbgbase,
     pass_1,pass_2,
     nbas,ncon,nld,nmem,nutils,
-    tgobj,cgobj
+    tgobj,cgobj,cgcpu
 {$ifdef powerpc}
     , cpupi
 {$endif}
@@ -2103,6 +2103,10 @@ implementation
         item := TCmdStrListItem(current_procinfo.procdef.aliasnames.first);
         while assigned(item) do
           begin
+{$ifdef arm}
+            if current_settings.cputype in cpu_thumb2 then
+              list.concat(tai_thumb_func.create);
+{$endif arm}
             { "double link" all procedure entry symbols via .reference }
             { directives on darwin, because otherwise the linker       }
             { sometimes strips the procedure if only on of the symbols }
@@ -2123,7 +2127,6 @@ implementation
             previtem:=item;
             item := TCmdStrListItem(item.next);
           end;
-
         current_procinfo.procdef.procstarttai:=tai(list.last);
       end;
 
@@ -2271,6 +2274,7 @@ implementation
 
     procedure gen_external_stub(list:TAsmList;pd:tprocdef;const externalname:string);
       begin
+        create_codegen;
         { add the procedure to the al_procedures }
         maybe_new_object_file(list);
         new_section(list,sec_code,lower(pd.mangledname),current_settings.alignment.procalign);
@@ -2281,6 +2285,7 @@ implementation
           list.concat(Tai_symbol.createname(pd.mangledname,AT_FUNCTION,0));
 
         cg.g_external_wrapper(list,pd,externalname);
+        destroy_codegen;
       end;
 
 {****************************************************************************
@@ -2811,12 +2816,14 @@ implementation
         i   : longint;
         def : tdef;
       begin
+        create_codegen;
         for i:=0 to st.DefList.Count-1 do
           begin
             def:=tdef(st.DefList[i]);
             if is_class(def) then
               gen_intf_wrapper(list,tobjectdef(def));
           end;
+        destroy_codegen;
       end;
 
 

+ 1 - 2
compiler/options.pas

@@ -82,8 +82,7 @@ uses
   comphook,
   symtable,scanner,rabase,
   wpobase,
-  i_bsd
-  ;
+  i_bsd;
 
 const
   page_size = 24;

+ 7 - 3
compiler/powerpc/cgcpu.pas

@@ -115,6 +115,7 @@ unit cgcpu;
        procedure a_op64_reg_reg_reg(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64);override;
      end;
 
+  procedure create_codegen;
 
 const
   TOpCG2AsmOpConstLo: Array[topcg] of TAsmOp = (A_NONE,A_MR,A_ADDI,A_ANDI_,A_DIVWU,
@@ -1870,7 +1871,10 @@ const
       end;
 
 
-begin
-  cg := tcgppc.create;
-  cg64 :=tcg64fppc.create;
+    procedure create_codegen;
+      begin
+        cg := tcgppc.create;
+        cg64 :=tcg64fppc.create;
+      end;
+      
 end.

+ 6 - 0
compiler/powerpc64/cgcpu.pas

@@ -140,6 +140,8 @@ type
     procedure profilecode_savepara(para : tparavarsym; list : TAsmList);
     procedure profilecode_restorepara(para : tparavarsym; list : TAsmList);
   end;
+  
+  procedure create_codegen;
 
 const
   TShiftOpCG2AsmOpConst : array[boolean, OP_SAR..OP_SHR] of TAsmOp = (
@@ -2158,6 +2160,10 @@ begin
   cg.a_load_ref_reg(list, OS_INT, OS_INT, ref, reg);
 end;
 
+
+procedure create_codegen;
 begin
   cg := tcgppc.create;
+end;
+
 end.

+ 5 - 2
compiler/psub.pas

@@ -82,7 +82,7 @@ implementation
        globtype,tokens,verbose,comphook,constexp,
        systems,
        { aasm }
-       cpubase,aasmbase,aasmtai,aasmdata,
+       cpuinfo,cpubase,aasmbase,aasmtai,aasmdata,
        { symtable }
        symconst,symbase,symsym,symtype,symtable,defutil,
        paramgr,
@@ -101,7 +101,7 @@ implementation
        scanner,import,gendef,
        pbase,pstatmnt,pdecl,pdecsub,pexports,
        { codegen }
-       tgobj,cgbase,cgobj,dbgbase,
+       tgobj,cgbase,cgobj,cgcpu,dbgbase,
        ncgutil,regvars,
        optbase,
        opttail,
@@ -819,6 +819,8 @@ implementation
         { only do secondpass if there are no errors }
         if (ErrorCount=0) then
           begin
+            create_codegen;
+
             { set the start offset to the start of the temp area in the stack }
             tg:=ttgobj.create;
 
@@ -1136,6 +1138,7 @@ implementation
             { stop tempgen and ra }
             tg.free;
             cg.done_register_allocators;
+            destroy_codegen;
             tg:=nil;
           end;
 

+ 3 - 0
compiler/psystem.pas

@@ -579,6 +579,9 @@ implementation
 {$ifdef SPARC}
 //        aiclass[ait_labeled_instruction]:=tai_labeled_instruction;
 {$endif SPARC}
+{$ifdef arm}
+        aiclass[ait_thumb_func]:=tai_thumb_func;
+{$endif arm}
         aiclass[ait_cutobject]:=tai_cutobject;
         aiclass[ait_regalloc]:=tai_regalloc;
         aiclass[ait_tempalloc]:=tai_tempalloc;

+ 3 - 0
compiler/rautils.pas

@@ -87,6 +87,7 @@ type
 {$ifdef arm}
       OPR_REGSET    : (regset : tcpuregisterset);
       OPR_SHIFTEROP : (shifterop : tshifterop);
+      OPR_COND      : (cc : tasmcond);
 {$endif arm}
   end;
 
@@ -1062,6 +1063,8 @@ end;
                 ai.loadregset(i-1,regset);
               OPR_SHIFTEROP:
                 ai.loadshifterop(i-1,shifterop);
+              OPR_COND:
+                ai.loadconditioncode(i-1,cc);
 {$endif ARM}
               { ignore wrong operand }
               OPR_NONE:

+ 8 - 3
compiler/sparc/cgcpu.pas

@@ -106,6 +106,8 @@ interface
         procedure a_op64_const_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;value : int64;regsrc,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
         procedure a_op64_reg_reg_reg_checkoverflow(list: TAsmList;op:TOpCG;size : tcgsize;regsrc1,regsrc2,regdst : tregister64;setflags : boolean;var ovloc : tlocation);override;
       end;
+      
+    procedure create_codegen;
 
     const
       TOpCG2AsmOp : array[topcg] of TAsmOp=(
@@ -1535,7 +1537,10 @@ implementation
       end;
 
 
-begin
-  cg:=TCgSparc.Create;
-  cg64:=TCg64Sparc.Create;
+    procedure create_codegen;
+      begin
+        cg:=TCgSparc.Create;
+        cg64:=TCg64Sparc.Create;
+      end;
+      
 end.

+ 11 - 0
compiler/systems/t_embed.pas

@@ -245,6 +245,17 @@ begin
           Add('}');
           Add('_stack_top = 0x20FFFC;');
         end;
+      ct_stm32f103re:
+      with linkres do
+        begin
+          Add('ENTRY(_START)');
+          Add('MEMORY');
+          Add('{');
+          Add('    flash : ORIGIN = 0x08000000, LENGTH = 512K');
+          Add('    ram : ORIGIN = 0x20000000, LENGTH = 64K');
+          Add('}');
+          Add('_stack_top = 0x2000FFFC;');
+        end;
 
     else
       internalerror(200902011);

+ 7 - 3
compiler/x86_64/cgcpu.pas

@@ -43,6 +43,7 @@ unit cgcpu;
         procedure a_param_ref(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
       end;
 
+    procedure create_codegen;
 
   implementation
 
@@ -245,7 +246,10 @@ unit cgcpu;
         List.concat(Tai_symbol_end.Createname(labelname));
       end;
 
-
-begin
-  cg:=tcgx86_64.create;
+      
+    procedure create_codegen;
+      begin
+        cg:=tcgx86_64.create;
+      end;
+      
 end.

+ 96 - 0
rtl/arm/divide.inc

@@ -41,6 +41,10 @@
 function fpc_div_dword(n,z:dword):dword;[public,alias: 'FPC_DIV_DWORD'];assembler;nostackframe;
 
 asm
+  {$if defined(CPUCORTEXM3) or defined(CPUARMV7M)}
+  {$ifdef CPUCORTEXM3}
+  udiv r0, r1, r0
+  {$else}
   mov r3, #0
   rsbs r2, r0, r1, LSR#3
   bcc .Ldiv_3bits
@@ -55,6 +59,7 @@ asm
   mov r0, r0, LSL#8
   orr r3, r3, #0x00FF0000
   rsbs r2, r0, r1, LSR#8
+  itt cs
   movcs r0, r0, LSL#8
   orrcs r3, r3, #0x0000FF00
   rsbs r2, r0, r1, LSR#4
@@ -62,32 +67,41 @@ asm
   rsbs r2, r0, #0
   bcs .Ldiv_by_0
 .Ldiv_loop:
+  it cs
   movcs r0, r0, LSR#8
 .Ldiv_8bits:
   rsbs r2, r0, r1, LSR#7
+  it cs
   subcs r1, r1, r0, LSL#7
   adc r3, r3, r3
   rsbs r2, r0, r1, LSR#6
+  it cs
   subcs r1, r1, r0, LSL#6
   adc r3, r3, r3
   rsbs r2, r0, r1, LSR#5
+  it cs
   subcs r1, r1, r0, LSL#5
   adc r3, r3, r3
   rsbs r2, r0, r1, LSR#4
+  it cs
   subcs r1, r1, r0, LSL#4
   adc r3, r3, r3
 .Ldiv_4bits:
   rsbs r2, r0, r1, LSR#3
+  it cs
   subcs r1, r1, r0, LSL#3
   adc r3, r3, r3
 .Ldiv_3bits:
   rsbs r2, r0, r1, LSR#2
+  it cs
   subcs r1, r1, r0, LSL#2
   adc r3, r3, r3
   rsbs r2, r0, r1, LSR#1
+  it cs
   subcs r1, r1, r0, LSL#1
   adc r3, r3, r3
   rsbs r2, r0, r1
+  it cs
   subcs r1, r1, r0
   adcs r3, r3, r3
 .Ldiv_next:
@@ -99,6 +113,67 @@ asm
   mov r1, r11
   bl handleerrorframe
   mov pc, lr
+  {$endif}
+  {$else}
+  mov r3, #0
+  rsbs r2, r0, r1, LSR#3
+  bcc .Ldiv_3bits
+  rsbs r2, r0, r1, LSR#8
+  bcc .Ldiv_8bits
+  mov r0, r0, LSL#8
+  orr r3, r3, #0xFF000000
+  rsbs r2, r0, r1, LSR#4
+  bcc .Ldiv_4bits
+  rsbs r2, r0, r1, LSR#8
+  bcc .Ldiv_8bits
+  mov r0, r0, LSL#8
+  orr r3, r3, #0x00FF0000
+  rsbs r2, r0, r1, LSR#8
+  movcs r0, r0, LSL#8
+  orrcs r3, r3, #0x0000FF00
+  rsbs r2, r0, r1, LSR#4
+  bcc .Ldiv_4bits
+  rsbs r2, r0, #0
+  bcs .Ldiv_by_0
+.Ldiv_loop:
+  movcs r0, r0, LSR#8
+.Ldiv_8bits:
+  rsbs r2, r0, r1, LSR#7
+  subcs r1, r1, r0, LSL#7
+  adc r3, r3, r3
+  rsbs r2, r0, r1, LSR#6
+  subcs r1, r1, r0, LSL#6
+  adc r3, r3, r3
+  rsbs r2, r0, r1, LSR#5
+  subcs r1, r1, r0, LSL#5
+  adc r3, r3, r3
+  rsbs r2, r0, r1, LSR#4
+  subcs r1, r1, r0, LSL#4
+  adc r3, r3, r3
+.Ldiv_4bits:
+  rsbs r2, r0, r1, LSR#3
+  subcs r1, r1, r0, LSL#3
+  adc r3, r3, r3
+.Ldiv_3bits:
+  rsbs r2, r0, r1, LSR#2
+  subcs r1, r1, r0, LSL#2
+  adc r3, r3, r3
+  rsbs r2, r0, r1, LSR#1
+  subcs r1, r1, r0, LSL#1
+  adc r3, r3, r3
+  rsbs r2, r0, r1
+  subcs r1, r1, r0
+  adcs r3, r3, r3
+.Ldiv_next:
+  bcs .Ldiv_loop
+  mov r0, r3
+  mov pc, lr
+.Ldiv_by_0:
+  mov r0, #200
+  mov r1, r11
+  bl handleerrorframe
+  mov pc, lr
+  {$endif}
 end;
 
 {It is a compilerproc (systemh.inc), make an alias for internal use.}
@@ -110,6 +185,26 @@ function fpc_div_dword(n,z:dword):dword;external name 'FPC_DIV_DWORD';
 function fpc_div_longint(n,z:longint):longint;[public,alias: 'FPC_DIV_LONGINT'];assembler;nostackframe;
 
 asm
+  {$if defined(CPUCORTEXM3) or defined(CPUARMV7M)}
+  {$ifdef CPUCORTEXM3}
+  sdiv r0, r1, r0
+  {$else}
+  stmfd sp!, {lr}
+  ands r12, r0, #1<<31       (* r12:=r0 and $80000000 *)
+  it mi
+  rsbmi r0, r0, #0           (* if signed(r0) then r0:=0-r0 *)
+  eors r12, r12, r1, ASR#32  (* r12:=r12 xor (r1 asr 32) *)
+  it cs
+  rsbcs r1, r1, #0           (* if signed(r12) then r1:=0-r1 *)
+  bl fpc_div_dword
+  movs r12, r12, LSL#1       (* carry:=sign(r12) *)
+  it cs
+  rsbcs r0, r0, #0
+  it mi
+  rsbmi r1, r1, #0
+  ldmfd sp!, {pc}
+  {$endif}
+  {$else}
   stmfd sp!, {lr}
   ands r12, r0, #1<<31       (* r12:=r0 and $80000000 *)
   rsbmi r0, r0, #0           (* if signed(r0) then r0:=0-r0 *)
@@ -120,6 +215,7 @@ asm
   rsbcs r0, r0, #0
   rsbmi r1, r1, #0
   ldmfd sp!, {pc}
+  {$endif}
 end;
 
 {It is a compilerproc (systemh.inc), make an alias for internal use.}

+ 24 - 4
rtl/arm/setjump.inc

@@ -16,18 +16,38 @@
 
 function setjmp(var S : jmp_buf) : longint;assembler;[Public, alias : 'FPC_SETJMP'];nostackframe;
   asm
+    {$if defined(CPUCORTEXM3) or defined(CPUARMV7M)}
+    stmia	r0!, {v1-v6, sl, fp}
+    mov	   r2, sp
+    stmia	r0!, {r2, lr}
+
+    mov     r0,#0
+    mov     pc,lr
+    {$else}
     stmia   r0,{v1-v6, sl, fp, sp, lr}
     mov     r0,#0
     mov     pc,lr
+    {$endif}
   end;
 
 
 procedure longjmp(var S : jmp_buf;value : longint);assembler;[Public, alias : 'FPC_LONGJMP'];
   asm
-     mov     ip, r0
-     movs    r0, r1
-     moveq   r0, #1
-     ldmia   ip,{v1-v6, sl, fp, sp, pc}
+    {$if defined(CPUCORTEXM3) or defined(CPUARMV7M)}
+    mov     ip, r0
+    movs    r0, r1
+    it eq
+    moveq   r0, #1
+    ldmia   ip,{v1-v6, sl, fp}
+    ldr		 sp, [ip]
+    add		 ip, ip, #4
+    ldr		 pc, [ip]
+    {$else}
+    mov     ip, r0
+    movs    r0, r1
+    moveq   r0, #1
+    ldmia   ip,{v1-v6, sl, fp, sp, pc}
+    {$endif}
   end;
 
 

+ 659 - 0
rtl/arm/thumb2.inc

@@ -0,0 +1,659 @@
+{
+
+    This file is part of the Free Pascal run time library.
+    Copyright (c) 2003 by the Free Pascal development team.
+
+    Processor dependent implementation for the system unit for
+    ARM
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+{$asmmode gas}
+
+{$ifndef FPC_SYSTEM_HAS_MOVE}
+{$define FPC_SYSTEM_FPC_MOVE}
+{$endif FPC_SYSTEM_HAS_MOVE}
+
+{$ifdef FPC_SYSTEM_FPC_MOVE}
+const
+  cpu_has_edsp : boolean = false;
+  in_edsp_test : boolean = false;
+{$endif FPC_SYSTEM_FPC_MOVE}
+
+{$if not(defined(wince)) and not(defined(gba)) and not(defined(nds)) and not(defined(FPUSOFT)) and not(defined(FPULIBGCC))}
+{$define FPC_SYSTEM_HAS_SYSINITFPU}
+Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
+begin
+  { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
+  asm
+    rfs r0
+    and r0,r0,#0xffe0ffff
+    orr r0,r0,#0x00070000
+    wfs r0
+  end;
+end;
+{$endif}
+
+procedure fpc_cpuinit;
+begin
+  SysInitFPU;
+end;
+
+{$ifdef wince}
+function _controlfp(new: DWORD; mask: DWORD): DWORD; cdecl; external 'coredll';
+
+{$define FPC_SYSTEM_HAS_SYSRESETFPU}
+Procedure SysResetFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
+begin
+  softfloat_exception_flags:=0;
+end;
+
+{$define FPC_SYSTEM_HAS_SYSINITFPU}
+Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
+begin
+  softfloat_exception_mask:=float_flag_underflow or float_flag_inexact or float_flag_denormal;
+  { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
+  { FPU precision 64 bit, rounding to nearest, affine infinity }
+  _controlfp($000C0003, $030F031F);
+end;
+{$endif wince}
+
+{****************************************************************************
+                       stack frame related stuff
+****************************************************************************}
+
+{$IFNDEF INTERNAL_BACKTRACE}
+{$define FPC_SYSTEM_HAS_GET_FRAME}
+function get_frame:pointer;assembler;nostackframe;
+asm
+  mov    r0,r11
+end;
+{$ENDIF not INTERNAL_BACKTRACE}
+
+{$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
+function get_caller_addr(framebp:pointer):pointer;assembler;
+asm
+  movs r0,r0
+  beq .Lg_a_null
+  ldr r0,[r0,#-4]
+.Lg_a_null:
+end;
+
+
+{$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
+function get_caller_frame(framebp:pointer):pointer;assembler;
+asm
+  movs r0,r0
+  beq .Lgnf_null
+  ldr r0,[r0,#-12]
+.Lgnf_null:
+end;
+
+
+{$define FPC_SYSTEM_HAS_SPTR}
+Function Sptr : pointer;assembler;
+asm
+  mov    r0,sp
+end;
+
+
+{$ifndef FPC_SYSTEM_HAS_FILLCHAR}
+{$define FPC_SYSTEM_HAS_FILLCHAR}
+Procedure FillChar(var x;count:longint;value:byte);assembler;nostackframe;
+asm
+        // less than 0?
+        cmp r1,#0
+        it lt
+        movlt pc,lr
+        mov     r3,r0
+        cmp     r1,#8           // at least 8 bytes to do?
+        blt     .LFillchar2
+        orr r2,r2,r2,lsl #8
+        orr r2,r2,r2,lsl #16
+.LFillchar0:
+        tst     r3,#3           // aligned yet?
+        itt ne
+        strneb r2,[r3],#1
+        subne   r1,r1,#1
+        bne     .LFillchar0
+        mov     ip,r2
+.LFillchar1:
+        cmp     r1,#8           // 8 bytes still to do?
+        blt     .LFillchar2
+        stmia   r3!,{r2,ip}
+        sub     r1,r1,#8
+        cmp     r1,#8           // 8 bytes still to do?
+        blt     .LFillchar2
+        stmia   r3!,{r2,ip}
+        sub     r1,r1,#8
+        cmp     r1,#8           // 8 bytes still to do?
+        blt     .LFillchar2
+        stmia   r3!,{r2,ip}
+        sub     r1,r1,#8
+        cmp     r1,#8           // 8 bytes still to do?
+        itt ge
+        stmgeia r3!,{r2,ip}
+        subge   r1,r1,#8
+        bge     .LFillchar1
+.LFillchar2:
+        movs r1,r1              // anything left?
+        it eq
+        moveq pc,lr
+        rsb     r1,r1,#7
+        add     pc,pc,r1,lsl #2
+        mov     r0,r0
+        strb r2,[r3],#1
+        strb r2,[r3],#1
+        strb r2,[r3],#1
+        strb r2,[r3],#1
+        strb r2,[r3],#1
+        strb r2,[r3],#1
+        strb r2,[r3],#1
+        mov pc,lr
+end;
+{$endif FPC_SYSTEM_HAS_FILLCHAR}
+
+{$ifndef FPC_SYSTEM_HAS_MOVE}
+{$define FPC_SYSTEM_HAS_MOVE}
+procedure Move_pld(const source;var dest;count:longint);assembler;nostackframe;
+asm
+  pld [r0]
+  pld [r1]
+  // count <=0 ?
+  cmp r2,#0
+  it le
+  movle pc,lr
+  // overlap?
+  cmp r1,r0
+  bls .Lnooverlap
+  add r3,r0,r2
+  cmp r3,r1
+  bls .Lnooverlap
+  // overlap, copy backward
+.Loverlapped:
+  subs r2,r2,#1
+  ldrb r3,[r0,r2]
+  strb r3,[r1,r2]
+  bne .Loverlapped
+  mov pc,lr
+.Lnooverlap:
+  // less then 16 bytes to copy?
+  cmp r2,#8
+  // yes, the forget about the whole optimizations
+  // and do a bytewise copy
+  blt .Lbyteloop
+
+  // both aligned?
+  orr r3,r0,r1
+  tst r3,#3
+
+  bne .Lbyteloop
+(*
+  // yes, then align
+  // alignment to 4 byte boundries is enough
+  ldrb ip,[r0],#1
+  sub r2,r2,#1
+  stb ip,[r1],#1
+  tst r3,#2
+  bne .Ldifferentaligned
+  ldrh ip,[r0],#2
+  sub r2,r2,#2
+  sth ip,[r1],#2
+
+.Ldifferentaligned
+  // qword aligned?
+  orrs r3,r0,r1
+  tst r3,#7
+  bne .Ldwordloop
+*)
+  pld [r0,#32]
+  pld [r1,#32]
+.Ldwordloop:
+  sub r2,r2,#4
+  ldr r3,[r0],#4
+  // preload
+  pld [r0,#64]
+  pld [r1,#64]
+  cmp r2,#4
+  str r3,[r1],#4
+  bcs .Ldwordloop
+  cmp r2,#0
+  it eq
+  moveq pc,lr
+.Lbyteloop:
+  subs r2,r2,#1
+  ldrb r3,[r0],#1
+  strb r3,[r1],#1
+  bne .Lbyteloop
+  mov pc,lr
+end;
+
+procedure Move_blended(const source;var dest;count:longint);assembler;nostackframe;
+asm
+  // count <=0 ?
+  cmp r2,#0
+  it le
+  movle pc,lr
+  // overlap?
+  cmp r1,r0
+  bls .Lnooverlap
+  add r3,r0,r2
+  cmp r3,r1
+  bls .Lnooverlap
+  // overlap, copy backward
+.Loverlapped:
+  subs r2,r2,#1
+  ldrb r3,[r0,r2]
+  strb r3,[r1,r2]
+  bne .Loverlapped
+  mov pc,lr
+.Lnooverlap:
+  // less then 16 bytes to copy?
+  cmp r2,#8
+  // yes, the forget about the whole optimizations
+  // and do a bytewise copy
+  blt .Lbyteloop
+
+  // both aligned?
+  orr r3,r0,r1
+  tst r3,#3
+
+  bne .Lbyteloop
+(*
+  // yes, then align
+  // alignment to 4 byte boundries is enough
+  ldrb ip,[r0],#1
+  sub r2,r2,#1
+  stb ip,[r1],#1
+  tst r3,#2
+  bne .Ldifferentaligned
+  ldrh ip,[r0],#2
+  sub r2,r2,#2
+  sth ip,[r1],#2
+
+.Ldifferentaligned
+  // qword aligned?
+  orrs r3,r0,r1
+  tst r3,#7
+  bne .Ldwordloop
+*)
+.Ldwordloop:
+  sub r2,r2,#4
+  ldr r3,[r0],#4
+  cmp r2,#4
+  str r3,[r1],#4
+  bcs .Ldwordloop
+  cmp r2,#0
+  it eq
+  moveq pc,lr
+.Lbyteloop:
+  subs r2,r2,#1
+  ldrb r3,[r0],#1
+  strb r3,[r1],#1
+  bne .Lbyteloop
+  mov pc,lr
+end;
+
+
+const
+  moveproc : pointer = @move_blended;
+
+procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
+asm
+  ldr ip,.Lmoveproc
+  ldr pc,[ip]
+.Lmoveproc:
+  .long moveproc
+end;
+
+{$endif FPC_SYSTEM_HAS_MOVE}
+
+{****************************************************************************
+                                 String
+****************************************************************************}
+
+{$ifndef FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
+{$define FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
+
+{$ifndef FPC_STRTOSHORTSTRINGPROC}
+function fpc_shortstr_to_shortstr(len:longint;const sstr:shortstring):shortstring;assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
+{$else}
+procedure fpc_shortstr_to_shortstr(out res:shortstring;const sstr:shortstring);assembler;nostackframe;[public,alias: 'FPC_SHORTSTR_TO_SHORTSTR'];compilerproc;
+{$endif}
+{r0: __RESULT
+ r1: len
+ r2: sstr}
+
+asm
+    ldrb r12,[r2],#1
+    cmp  r12,r1
+    it gt
+    movgt r12,r1
+    strb r12,[r0],#1
+    cmp  r12,#6 (* 6 seems to be the break even point. *)
+    blt  .LStartTailCopy
+    (* Align destination on 32bits. This is the only place where unrolling
+       really seems to help, since in the common case, sstr is aligned on
+       32 bits, therefore in the common case we need to copy 3 bytes to
+       align, i.e. in the case of a loop, you wouldn't branch out early.*)
+    rsb  r3,r0,#0
+    ands  r3,r3,#3
+    sub  r12,r12,r3
+    itttt ne
+    ldrneb r1,[r2],#1
+    strneb r1,[r0],#1
+    subnes  r3,r3,#1
+    ldrneb r1,[r2],#1
+    itttt ne
+    strneb r1,[r0],#1
+    subnes  r3,r3,#1
+    ldrneb r1,[r2],#1
+    strneb r1,[r0],#1
+    it ne
+    subnes  r3,r3,#1
+.LDoneAlign:
+    (* Destination should be aligned now, but source might not be aligned,
+       if this is the case, do a byte-per-byte copy. *)
+    tst r2,#3
+    bne .LStartTailCopy
+    (* Start the main copy, 32 bit at a time. *)
+    movs r3,r12,lsr #2
+    and r12,r12,#3
+    beq  .LStartTailCopy
+.LNext4bytes:
+    (* Unrolling this loop would save a little bit of time for long strings
+       (>20 chars), but alas, it hurts for short strings and they are the
+       common case.*)
+    ittt ne
+    ldrne r1,[r2],#4
+    strne r1,[r0],#4
+    subnes  r3,r3,#1
+    bne .LNext4bytes
+.LStartTailCopy:
+    (* Do remaining bytes. *)
+    cmp r12,#0
+    beq .LDoneTail
+.LNextChar3:
+    ldrb r1,[r2],#1
+    strb r1,[r0],#1
+    subs  r12,r12,#1
+    bne .LNextChar3
+.LDoneTail:
+end;
+
+procedure fpc_shortstr_assign(len:longint;sstr,dstr:pointer);assembler;nostackframe;[public,alias:'FPC_SHORTSTR_ASSIGN'];compilerproc;
+
+{r0: len
+ r1: sstr
+ r2: dstr}
+
+asm
+    ldrb r12,[r1],#1
+    cmp  r12,r0
+    it gt
+    movgt r12,r0
+    strb r12,[r2],#1
+    cmp  r12,#6 (* 6 seems to be the break even point. *)
+    blt  .LStartTailCopy
+    (* Align destination on 32bits. This is the only place where unrolling
+       really seems to help, since in the common case, sstr is aligned on
+       32 bits, therefore in the common case we need to copy 3 bytes to
+       align, i.e. in the case of a loop, you wouldn't branch out early.*)
+    rsb  r3,r2,#0
+    ands  r3,r3,#3
+    sub  r12,r12,r3
+    itttt ne
+    ldrneb r0,[r1],#1
+    strneb r0,[r2],#1
+    subnes  r3,r3,#1
+    ldrneb r0,[r1],#1
+    itttt ne
+    strneb r0,[r2],#1
+    subnes  r3,r3,#1
+    ldrneb r0,[r1],#1
+    strneb r0,[r2],#1
+    it ne
+    subnes  r3,r3,#1
+.LDoneAlign:
+    (* Destination should be aligned now, but source might not be aligned,
+       if this is the case, do a byte-per-byte copy. *)
+    tst r1,#3
+    bne .LStartTailCopy
+    (* Start the main copy, 32 bit at a time. *)
+    movs r3,r12,lsr #2
+    and r12,r12,#3
+    beq  .LStartTailCopy
+.LNext4bytes:
+    (* Unrolling this loop would save a little bit of time for long strings
+       (>20 chars), but alas, it hurts for short strings and they are the
+       common case.*)
+    ittt ne
+    ldrne r0,[r1],#4
+    strne r0,[r2],#4
+    subnes  r3,r3,#1
+    bne .LNext4bytes
+.LStartTailCopy:
+    (* Do remaining bytes. *)
+    cmp r12,#0
+    beq .LDoneTail
+.LNextChar3:
+    ldrb r0,[r1],#1
+    strb r0,[r2],#1
+    subs  r12,r12,#1
+    bne .LNextChar3
+.LDoneTail:
+end;
+{$endif FPC_SYSTEM_HAS_FPC_SHORTSTR_ASSIGN}
+
+{$ifndef FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
+{$define FPC_SYSTEM_HAS_FPC_PCHAR_LENGTH}
+function fpc_Pchar_length(p:Pchar):longint;assembler;nostackframe;[public,alias:'FPC_PCHAR_LENGTH'];compilerproc;
+
+asm
+    cmp r0,#0
+    mov r1,r0
+    beq .Ldone
+.Lnextchar:
+    (*Are we aligned?*)
+    tst r1,#3
+    bne .Ltest_unaligned    (*No, do byte per byte.*)
+    ldr r3,.L01010101
+.Ltest_aligned:
+    (*Aligned, load 4 bytes at a time.*)
+    ldr r12,[r1],#4
+    (*Check wether r12 contains a 0 byte.*)
+    sub r2,r12,r3
+    mvn r12,r12
+    and r2,r2,r12
+    ands r2,r2,r3,lsl #7    (*r3 lsl 7 = $80808080*)
+    beq .Ltest_aligned      (*No 0 byte, repeat.*)
+    sub r1,r1,#4
+.Ltest_unaligned:
+    ldrb r12,[r1],#1
+    cmp r12,#1              (*r12<1 same as r12=0, but result in carry flag*)
+    bcs .Lnextchar
+    (*Dirty trick: we need to subtract 1 extra because we have counted the
+      terminating 0, due to the known carry flag sbc can do this.*)
+    sbc r0,r1,r0
+.Ldone:
+    mov pc,lr
+.L01010101:
+    .long 0x01010101
+end;
+{$endif}
+
+
+var
+  fpc_system_lock: longint; export name 'fpc_system_lock';
+
+function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
+asm
+// lock
+  ldr r3, .Lfpc_system_lock
+  mov r1, #1
+.Lloop:
+  ldrex r2, [r3]
+  cmp r2, #0
+  itt eq
+  strexeq r2, r1, [r3]
+  cmpeq r2, #0
+  bne .Lloop
+// do the job
+  ldr r1, [r0]
+  sub r1, r1, #1
+  str r1, [r0]
+  mov r0, r1
+// unlock and return
+  str r2, [r3]
+  mov pc, lr
+  
+.Lfpc_system_lock:
+  .long fpc_system_lock
+end;
+
+
+function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
+asm
+// lock
+  ldr r3, .Lfpc_system_lock
+  mov r1, #1
+.Lloop:
+  ldrex r2, [r3]
+  cmp r2, #0
+  itt eq
+  strexeq r2, r1, [r3]
+  cmpeq r2, #0
+  bne .Lloop
+// do the job
+  ldr r1, [r0]
+  add r1, r1, #1
+  str r1, [r0]
+  mov r0, r1
+// unlock and return
+  str r2, [r3]
+  mov pc, lr
+
+.Lfpc_system_lock:
+  .long fpc_system_lock
+end;
+
+
+function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
+asm
+
+// lock
+  ldr r3, .Lfpc_system_lock
+  mov r2, #1
+.Lloop:
+  ldrex r2, [r3]
+  cmp r2, #0
+  itt eq
+  strexeq r2, r12, [r3]
+  cmpeq r2, #0
+  bne .Lloop
+// do the job
+  ldr r2, [r0]
+  str r1, [r0]
+  mov r0, r2
+// unlock and return
+  mov r2, #0
+  str r2, [r3]
+  mov pc, lr
+
+.Lfpc_system_lock:
+  .long fpc_system_lock
+end;
+
+function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
+asm
+// lock
+  ldr r3, .Lfpc_system_lock
+  mov r2, #1
+.Lloop:
+  ldrex r2, [r3]
+  cmp r2, #0
+  itt eq
+  strexeq r2, r12, [r3]
+  cmpeq r2, #0
+  bne .Lloop
+// do the job
+  ldr r2, [r0]
+  add r1, r1, r2
+  str r1, [r0]
+  mov r0, r2
+// unlock and return
+  mov r2, #0
+  str r2, [r3]
+  mov pc, lr
+
+.Lfpc_system_lock:
+  .long fpc_system_lock
+end;
+
+
+function InterlockedCompareExchange(var Target: longint; NewValue: longint; Comperand: longint): longint; assembler; nostackframe;
+asm
+// lock
+  ldr r12, .Lfpc_system_lock
+  mov r3, #1
+.Lloop:
+  ldrex r2, [r12]
+  cmp r2, #0
+  itt eq
+  strexeq r2, r1, [r12]
+  cmpeq r2, #0
+  bne .Lloop
+// do the job
+  ldr r3, [r0]
+  cmp r3, r2
+  it eq
+  streq r1, [r0]
+  mov r0, r3
+// unlock and return
+  mov r3, #0
+  str r3, [r12]
+  mov pc, lr
+
+.Lfpc_system_lock:
+  .long fpc_system_lock
+end;
+
+{$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
+function declocked(var l: longint) : boolean; inline;
+begin
+  Result:=InterLockedDecrement(l) = 0;
+end;
+
+{$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
+procedure inclocked(var l: longint); inline;
+begin
+  InterLockedIncrement(l);
+end;
+
+procedure fpc_cpucodeinit;
+begin
+{$ifdef FPC_SYSTEM_FPC_MOVE}
+  cpu_has_edsp:=true;
+  in_edsp_test:=true;
+  asm
+    bic r0,sp,#7
+    ldrd r0,[r0]
+  end;
+  in_edsp_test:=false;
+  if cpu_has_edsp then
+    moveproc:=@move_pld
+  else
+    moveproc:=@move_blended;
+{$endif FPC_SYSTEM_FPC_MOVE}
+end;
+
+{include hand-optimized assembler division code}
+{$i divide.inc}
+

+ 626 - 0
rtl/embedded/arm/stm32f103.pp

@@ -0,0 +1,626 @@
+{
+Register definitions and utility code for STM32F103
+Preliminary startup code - TODO: interrupt handler variables
+
+Created by Jeppe Johansen 2009 - [email protected]
+}
+unit stm32f103;
+
+{$goto on}
+
+interface
+
+type
+ TBitvector32 = bitpacked array[0..31] of 0..1;
+
+{$PACKRECORDS 2}
+const
+ PeripheralBase 	= $40000000;
+ 
+ FSMCBase			= $60000000;
+ 
+ APB1Base 			= PeripheralBase;
+ APB2Base 			= PeripheralBase+$10000;
+ AHBBase 			= PeripheralBase+$20000;
+ 
+ { FSMC }
+ FSMCBank1NOR1		= FSMCBase+$00000000;
+ FSMCBank1NOR2		= FSMCBase+$04000000;
+ FSMCBank1NOR3		= FSMCBase+$08000000;
+ FSMCBank1NOR4		= FSMCBase+$0C000000;
+ 
+ FSMCBank1PSRAM1	= FSMCBase+$00000000;
+ FSMCBank1PSRAM2	= FSMCBase+$04000000;
+ FSMCBank1PSRAM3	= FSMCBase+$08000000;
+ FSMCBank1PSRAM4	= FSMCBase+$0C000000;
+ 
+ FSMCBank2NAND1	= FSMCBase+$10000000;
+ FSMCBank3NAND2	= FSMCBase+$20000000;
+ 
+ FSMCBank4PCCARD	= FSMCBase+$30000000;
+
+type
+ TTimerRegisters = record
+  CR1, res1,
+  CR2, res2,
+  SMCR, res3,
+  DIER, res4,
+  SR, res5,
+  EGR, res,
+  CCMR1, res6,
+  CCMR2, res7,
+  CCER, res8,
+  CNT, res9,
+  PSC, res10,
+  ARR, res11,
+  RCR, res12,
+  CCR1, res13,
+  CCR2, res14,
+  CCR3, res15,
+  CCR4, res16,
+  BDTR, res17,
+  DCR, res18,
+  DMAR, res19: Word;
+ end;
+ 
+ TRTCRegisters = record
+  CRH, res1,
+  CRL, res2,
+  PRLH, res3,
+  PRLL, res4,
+  DIVH, res5,
+  DIVL, res6,
+  CNTH, res7,
+  CNTL, res8,
+  ALRH, res9,
+  ALRL, res10: Word;
+ end;
+ 
+ TIWDGRegisters = record
+  KR, res1,
+  PR, res2,
+  RLR, res3,
+  SR, res4: word;
+ end;
+ 
+ TWWDGRegisters = record
+  CR, res2,
+  CFR, res3,
+  SR, res4: word;
+ end;
+ 
+ TSPIRegisters = record
+  CR1, res1,
+  CR2, res2,
+  SR, res3,
+  DR, res4,
+  CRCPR, res5,
+  RXCRCR, res6,
+  TXCRCR, res7,
+  I2SCFGR, res8,
+  I2SPR, res9: Word;
+ end;
+ 
+ TUSARTRegisters = record
+  SR, res1,
+  DR, res2,
+  BRR, res3,
+  CR1, res4,
+  CR2, res5,
+  CR3, res6,
+  GTPR, res7: Word;
+ end;
+ 
+ TI2CRegisters = record
+  CR1, res1,
+  CR2, res2,
+  OAR1, res3,
+  OAR2, res4,
+  DR, res5,
+  SR1, res6,
+  SR2, res7,
+  CCR, res8: word;
+  TRISE: byte;
+ end;
+ 
+ TUSBRegisters = record
+  EPR: array[0..7] of DWord;
+  
+  res: array[0..7] of dword;
+  
+  CNTR, res1,
+  ISTR, res2,
+  FNR, res3: Word;
+  DADDR: byte; res4: word; res5: byte;
+  BTABLE: Word;
+ end;
+ 
+ TUSBMem = packed array[0..511] of byte;
+ 
+ TCANMailbox = record
+  IR,
+  DTR,
+  DLR,
+  DHR: DWord;
+ end;
+ 
+ TCANRegisters = record
+  MCR,
+  MSR,
+  TSR,
+  RF0R,
+  RF1R,
+  IER,
+  ESR,
+  BTR: DWord;
+  
+  res5: array[$020..$17F] of byte;
+  
+  TX: array[0..2] of TCANMailbox;
+  RX: array[0..2] of TCANMailbox;
+  
+  res6: array[$1D0..$1FF] of byte;
+  
+  FMR,
+  FM1R,
+  res9: DWord;
+  FS1R, res10: word;
+  res11: DWord;
+  FFA1R, res12: word;
+  res13: DWord;
+  FA1R, res14: word;
+  res15: array[$220..$23F] of byte;
+  
+  FOR1,
+  FOR2: DWord;
+  
+  FB: array[1..13] of array[1..2] of DWord;
+ end;
+ 
+ TBKPRegisters = record
+  DR: array[1..10] of record data, res: word; end;
+  
+  RTCCR,
+  CR,
+  CSR,
+  res1,res2: DWord;
+  
+  DR2: array[11..42] of record data, res: word; end;
+ end;
+ 
+ TPwrRegisters = record
+  CR, res: word;
+  CSR: Word;
+ end;
+ 
+ TDACRegisters = record
+  CR,
+  SWTRIGR: DWord;
+  
+  DHR12R1, res2,
+  DHR12L1, res3,
+  DHR8R1, res4,
+  DHR12R2, res5,
+  DHR12L2, res6,
+  DHR8R2, res7: word;
+  
+  DHR12RD,
+  DHR12LD: DWord;
+  
+  DHR8RD, res8,
+  
+  DOR1, res9,
+  DOR2, res10: Word;
+ end;
+ 
+ TAFIORegisters = record
+  EVCR,
+  MAPR: DWord;
+  EXTICR: array[0..3] of DWord;
+ end;
+ 
+ TEXTIRegisters = record
+  IMR,
+  EMR,
+  RTSR,
+  FTSR,
+  SWIER,
+  PR: DWord;
+ end;
+ 
+ TPortRegisters = record
+  CRL,
+  CRH,
+  IDR,
+  ODR,
+  BSRR,
+  BRR,
+  LCKR: DWord;
+ end;
+ 
+ TADCRegisters = record
+  SR,
+  CR1,
+  CR2,
+  SMPR1,
+  SMPR2: DWord;
+  JOFR1, res2,
+  JOFR2, res3,
+  JOFR3, res4,
+  JOFR4, res5,
+  HTR, res6,
+  LTR, res7: word;
+  SQR1,
+  SQR2,
+  SQR3,
+  JSQR: DWord;
+  JDR1, res8,
+  JDR2, res9,
+  JDR3, res10,
+  JDR4, res11: Word;
+  DR: DWord;
+ end;
+ 
+ TSDIORegisters = record
+  POWER,
+  CLKCR,
+  ARG: DWord;
+  CMD, res3,
+  RESPCMD, res4: Word;
+  RESP1,
+  RESP2,
+  RESP3,
+  RESP4,
+  DTIMER,
+  DLEN: DWord;
+  DCTRL, res5: word;
+  DCOUNT,
+  STA,
+  ICR,
+  MASK,
+  FIFOCNT,
+  FIFO: DWord;
+ end;
+ 
+ TDMAChannel = record
+  CCR, res1,
+  CNDTR, res2: word;
+  CPAR,
+  CMAR,
+  res: DWord;
+ end;
+ 
+ TDMARegisters = record
+  ISR,
+  IFCR: DWord;
+  Channel: array[0..7] of TDMAChannel;
+ end;
+ 
+ TRCCRegisters = record
+  CR,
+  CFGR,
+  CIR,
+  APB2RSTR,
+  APB1RSTR,
+  AHBENR,
+  APB2ENR,
+  APB1ENR,
+  BDCR,
+  CSR: DWord;
+ end;
+ 
+ TCRCRegisters = record
+  DR: DWord;
+  IDR: byte; res1: word; res2: byte;
+  CR: byte;
+ end;
+ 
+ TFSMCRegisters = record
+  nothingyet: byte;
+ end;
+ 
+ TFlashRegisters = record
+  ACR,
+  KEYR,
+  OPTKEYR,
+  SR,
+  CR,
+  AR,
+  res,
+  OBR,
+  WRPR: DWord;
+ end;
+
+{$ALIGN 2}
+var
+ { Timers }
+ Timer1: TTimerRegisters 	absolute (APB2Base+$2C00);
+ Timer2: TTimerRegisters 	absolute (APB1Base+$0000);
+ Timer3: TTimerRegisters 	absolute (APB1Base+$0400);
+ Timer4: TTimerRegisters 	absolute (APB1Base+$0800);
+ Timer5: TTimerRegisters 	absolute (APB1Base+$0C00);
+ Timer6: TTimerRegisters 	absolute (APB1Base+$1000);
+ Timer7: TTimerRegisters 	absolute (APB1Base+$1400);
+ Timer8: TTimerRegisters 	absolute (APB2Base+$3400);
+ 
+ { RTC }
+ RTC: TRTCRegisters 			absolute (APB1Base+$2800);
+ 
+ { WDG }
+ WWDG: TWWDGRegisters 		absolute (APB1Base+$2C00);
+ IWDG: TIWDGRegisters 		absolute (APB1Base+$3000);
+ 
+ { SPI }
+ SPI1: TSPIRegisters			absolute (APB2Base+$3000);
+ SPI2: TSPIRegisters			absolute (APB1Base+$3800);
+ SPI3: TSPIRegisters			absolute (APB1Base+$3C00);
+ 
+ { USART/UART }
+ USART1: TUSARTRegisters	absolute (APB2Base+$3800);
+ USART2: TUSARTRegisters	absolute (APB1Base+$4400);
+ USART3: TUSARTRegisters	absolute (APB1Base+$4800);
+ UART4: TUSARTRegisters		absolute (APB1Base+$4C00);
+ UART5: TUSARTRegisters		absolute (APB1Base+$5000);
+ 
+ { I2C }
+ I2C1: TI2CRegisters			absolute (APB1Base+$5400);
+ I2C2: TI2CRegisters			absolute (APB1Base+$5800);
+ 
+ { USB }
+ USB: TUSBRegisters			absolute (APB1Base+$5C00);
+ USBMem: TUSBMem				absolute (APB1Base+$5C00);
+ 
+ { CAN }
+ CAN: TCANRegisters			absolute (APB1Base+$6800);
+ 
+ { BKP }
+ BKP: TBKPRegisters			absolute (APB1Base+$6C00);
+ 
+ { PWR }
+ PWR: TPwrRegisters			absolute (APB1Base+$7000);
+ 
+ { DAC }
+ DAC: TDACRegisters			absolute (APB1Base+$7400);
+ 
+ { GPIO }
+ AFIO: TAFIORegisters		absolute (APB2Base+$0);
+ EXTI: TEXTIRegisters		absolute (APB2Base+$0400);
+ 
+ PortA: TPortRegisters		absolute (APB2Base+$0800);
+ PortB: TPortRegisters		absolute (APB2Base+$0C00);
+ PortC: TPortRegisters		absolute (APB2Base+$1000);
+ PortD: TPortRegisters		absolute (APB2Base+$1400);
+ PortE: TPortRegisters		absolute (APB2Base+$1800);
+ PortF: TPortRegisters		absolute (APB2Base+$1C00);
+ PortG: TPortRegisters		absolute (APB2Base+$2000);
+ 
+ { ADC }
+ ADC1: TADCRegisters			absolute (APB2Base+$2400);
+ ADC2: TADCRegisters			absolute (APB2Base+$2800);
+ ADC3: TADCRegisters			absolute (APB2Base+$3C00);
+ 
+ { SDIO }
+ SDIO: TSDIORegisters		absolute (APB2Base+$8000);
+ 
+ { DMA }
+ DMA1: TDMARegisters			absolute (AHBBase+$0000);
+ DMA2: TDMARegisters			absolute (AHBBase+$0400);
+ 
+ { RCC }
+ RCC: TRCCRegisters			absolute (AHBBase+$1000);
+ 
+ { Flash }
+ Flash: TFlashRegisters		absolute (AHBBase+$2000);
+ 
+ { CRC }
+ CRC: TCRCRegisters			absolute (AHBBase+$3000);
+
+var
+	NMI_Handler,
+	HardFault_Handler,
+	MemManage_Handler,
+	BusFault_Handler,
+	UsageFault_Handler,
+	SWI_Handler,
+	DebugMonitor_Handler,
+  PendingSV_Handler,
+  Systick_Handler: pointer;
+	
+implementation
+
+var
+	_data: record end; external name '_data';
+	_edata: record end; external name '_edata';
+	_etext: record end; external name '_etext';
+	_bss_start: record end; external name '_bss_start';
+	_bss_end: record end; external name '_bss_end';
+	_stack_top: record end; external name '_stack_top';
+
+procedure PASCALMAIN; external name 'PASCALMAIN';
+
+procedure _FPC_haltproc; assembler; nostackframe; public name '_haltproc';
+asm
+.Lhalt:
+	b .Lhalt
+end;
+
+procedure _FPC_start; assembler; nostackframe;
+label _start;
+asm
+	.init
+	.align 16
+	
+	.long _stack_top	 			// First entry in NVIC table is the new stack pointer
+	.long _start
+	//b   _start					// Reset
+	.long _start+1
+	//b	 .LNMI_Addr				// Non maskable interrupt. The RCC Clock Security System (CSS) is linked to the NMI vector.
+	.long _start+1
+	//b	 .LHardFault_Addr		// All class of fault
+	.long _start+1
+	//b	 .LMemManage_Addr		// Memory management
+	.long _start+1
+	//b	 .LBusFault_Addr		// Pre-fetch fault, memory access fault
+	.long _start+1
+	//b	 .LUsageFault_Addr	// Undefined instruction or illegal state
+	.long _start+1
+	//nop							// Reserved
+	.long _start+1
+	//nop							// Reserved
+	.long _start+1
+	//nop							// Reserved
+	.long _start+1
+	//nop							// Reserved
+	.long _start+1
+	//b	 .LSWI_Addr				// Software Interrupt vector
+	.long _start+1
+	//b	 .LDebugMonitor_Addr	// Debug Monitor
+	.long _start+1
+	//nop							// Reserved
+	.long _start+1
+	//b	 .LPendingSV_Addr		//	Pendable request for system service
+	.long _start+1
+	//b	 .LSystick_Addr		// System tick timer
+	//17
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	//20
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+	.long .LDefaultHandler+1
+
+.LNMI_Addr:
+	ldr r0,.L1
+	ldr pc,[r0]
+.LHardFault_Addr:
+	ldr r0,.L2
+	ldr pc,[r0]
+.LMemManage_Addr:
+	ldr r0,.L3
+	ldr pc,[r0]
+.LBusFault_Addr:
+	ldr r0,.L4
+	ldr pc,[r0]
+.LUsageFault_Addr:
+	ldr r0,.L5
+	ldr pc,[r0]
+.LSWI_Addr:
+	ldr r0,.L6
+	ldr pc,[r0]
+.LDebugMonitor_Addr:
+	ldr r0,.L7
+	ldr pc,[r0]
+.LPendingSV_Addr:
+	ldr r0,.L8
+	ldr pc,[r0]
+.LSystick_Addr:
+	ldr r0,.L9
+	ldr pc,[r0]
+
+.L1:
+	.long NMI_Handler
+.L2:
+	.long HardFault_Handler
+.L3:
+	.long MemManage_Handler
+.L4:
+	.long BusFault_Handler
+.L5:
+	.long UsageFault_Handler
+.L6:
+	.long SWI_Handler
+.L7:
+	.long DebugMonitor_Handler
+.L8:
+	.long PendingSV_Handler
+.L9:
+	.long Systick_Handler   
+
+	.globl _start
+	.text
+_start:
+	
+	// Copy initialized data to ram
+	ldr r1,.L_etext
+	ldr r2,.L_data
+	ldr r3,.L_edata
+.Lcopyloop:
+	cmp r2,r3
+	ittt ls
+	ldrls r0,[r1],#4
+	strls r0,[r2],#4
+	bls .Lcopyloop
+
+	// clear onboard ram
+	ldr r1,.L_bss_start
+	ldr r2,.L_bss_end
+	mov r0,#0
+.Lzeroloop:
+	cmp r1,r2
+	itt ls
+	strls r0,[r1],#4
+	bls .Lzeroloop
+
+	b PASCALMAIN
+	b _FPC_haltproc
+
+.L_bss_start:
+	.long _bss_start
+.L_bss_end:
+	.long _bss_end
+.L_etext:
+	.long _etext
+.L_data:
+	.long _data
+.L_edata:
+	.long _edata
+.LDefaultHandlerAddr:
+	.long .LDefaultHandler
+	// default irq handler just returns
+.LDefaultHandler:
+	mov pc,r14
+end;
+
+end.
+

+ 5 - 1
rtl/inc/system.inc

@@ -172,7 +172,11 @@ function do_isdevice(handle:thandle):boolean;forward;
   {$ifdef SYSPROCDEFINED}
     {$Error Can't determine processor type !}
   {$endif}
-  {$i arm.inc}  { Case dependent, don't change }
+  {$if defined(CPUCORTEXM3) or defined(CPUARMV7M)}
+    {$i thumb2.inc}  { Case dependent, don't change }
+  {$else}
+    {$i arm.inc}  { Case dependent, don't change }
+  {$endif}
   {$define SYSPROCDEFINED}
 {$endif cpuarm}