Pārlūkot izejas kodu

Implemented UAL syntax support in the ARM assembler reader. Can be toggled with a field for now, but not implemented yet. Still using pre-UAL syntax for now.
Switched codegeneration of VFPv2 and VFPv3 to use UAL mnemonics and syntax.
Updated VFP code in RTL to use UAL syntax too.
Added preliminary ELF support for ARM.
Added support for linking of WinCE COFF files. Should work for with a standard ARMv4-I target.

git-svn-id: branches/laksen/armiw@29247 -

Jeppe Johansen 10 gadi atpakaļ
vecāks
revīzija
9e5979e8be

+ 1 - 1
compiler/arm/agarmgas.pas

@@ -314,7 +314,7 @@ unit agarmgas;
                // writeln(taicpu(hp).fileinfo.line);
 
                { LDM and STM use references as first operand but they are written like a register }
-               if (i=0) and (op in [A_LDM,A_STM,A_FSTM,A_FLDM]) then
+               if (i=0) and (op in [A_LDM,A_STM,A_FSTM,A_FLDM,A_VSTM,A_VLDM]) then
                  begin
                    case taicpu(hp).oper[0]^.typ of
                      top_ref:

+ 1 - 1
compiler/arm/aoptcpu.pas

@@ -2353,7 +2353,7 @@ Implementation
     { set of opcode which might or do write to memory }
     { TODO : extend armins.dat to contain r/w info }
     opcode_could_mem_write = [A_B,A_BL,A_BLX,A_BKPT,A_BX,A_STR,A_STRB,A_STRBT,
-                              A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD];
+                              A_STRH,A_STRT,A_STF,A_SFM,A_STM,A_FSTS,A_FSTD,A_VSTR,A_VSTM];
 
 
   { adjust the register live information when swapping the two instructions p and hp1,

+ 34 - 61
compiler/arm/cgcpu.pas

@@ -2033,11 +2033,11 @@ unit cgcpu;
                      ref.index:=ref.base;
                      ref.base:=NR_NO;
                      { FSTMX is deprecated on ARMv6 and later }
-                     if (current_settings.cputype<cpu_armv6) then
+                     {if (current_settings.cputype<cpu_armv6) then
                        postfix:=PF_IAX
                      else
-                       postfix:=PF_IAD;
-                     list.concat(setoppostfix(taicpu.op_ref_regset(A_FSTM,ref,R_MMREGISTER,R_SUBFD,mmregs),postfix));
+                       postfix:=PF_IAD;}
+                     list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
                    end;
                end;
              end;
@@ -2134,11 +2134,11 @@ unit cgcpu;
                       ref.index:=ref.base;
                       ref.base:=NR_NO;
                       { FLDMX is deprecated on ARMv6 and later }
-                      if (current_settings.cputype<cpu_armv6) then
+                      {if (current_settings.cputype<cpu_armv6) then
                         mmpostfix:=PF_IAX
                       else
-                        mmpostfix:=PF_IAD;
-                      list.concat(setoppostfix(taicpu.op_ref_regset(A_FLDM,ref,R_MMREGISTER,R_SUBFD,mmregs),mmpostfix));
+                        mmpostfix:=PF_IAD;}
+                      list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
                     end;
                 end;
               end;
@@ -2912,8 +2912,8 @@ unit cgcpu;
     function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
       const
         convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
-          (A_FCPYS,A_FCVTSD,A_NONE,A_NONE,A_NONE),
-          (A_FCVTDS,A_FCPYD,A_NONE,A_NONE,A_NONE),
+          (A_VMOV,A_VCVT,A_NONE,A_NONE,A_NONE),
+          (A_VCVT,A_VMOV,A_NONE,A_NONE,A_NONE),
           (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
           (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
           (A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
@@ -2924,33 +2924,30 @@ unit cgcpu;
       end;
 
 
+    function get_scalar_mm_prefix(fromsize,tosize : tcgsize) : TOpPostfix;
+      const
+        convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of TOpPostfix = (
+          (PF_F32,   PF_F32F64,PF_None,PF_None,PF_None),
+          (PF_F64F32,PF_F64,   PF_None,PF_None,PF_None),
+          (PF_None,  PF_None,  PF_None,PF_None,PF_None),
+          (PF_None,  PF_None,  PF_None,PF_None,PF_None),
+          (PF_None,  PF_None,  PF_None,PF_None,PF_None));
+      begin
+        result:=convertop[fromsize,tosize];
+      end;
+
+
     procedure tbasecgarm.a_loadmm_reg_reg(list: tasmlist; fromsize,tosize: tcgsize; reg1,reg2: tregister; shuffle: pmmshuffle);
       var
         instr: taicpu;
       begin
-        if shuffle=nil then
-          begin
-            if fromsize=tosize then
-              { needs correct size in case of spilling }
-              case fromsize of
-                OS_F32:
-                  instr:=taicpu.op_reg_reg(A_FCPYS,reg2,reg1);
-                OS_F64:
-                  instr:=taicpu.op_reg_reg(A_FCPYD,reg2,reg1);
-                else
-                  internalerror(2009112405);
-              end
-            else
-              internalerror(2009112406);
-          end
-        else if shufflescalar(shuffle) then
-          instr:=taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1)
+        if (shuffle=nil) or shufflescalar(shuffle) then
+          instr:=setoppostfix(taicpu.op_reg_reg(get_scalar_mm_op(tosize,fromsize),reg2,reg1),get_scalar_mm_prefix(tosize,fromsize))
         else
           internalerror(2009112407);
         list.concat(instr);
         case instr.opcode of
-          A_FCPYS,
-          A_FCPYD:
+          A_VMOV:
             add_move_instruction(instr);
         end;
       end;
@@ -2961,7 +2958,6 @@ unit cgcpu;
         intreg,
         tmpmmreg : tregister;
         reg64    : tregister64;
-        op       : tasmop;
       begin
         if assigned(shuffle) and
            not(shufflescalar(shuffle)) then
@@ -3010,15 +3006,7 @@ unit cgcpu;
           end
         else
           begin
-             case fromsize of
-               OS_F32:
-                 op:=A_FLDS;
-               OS_F64:
-                 op:=A_FLDD;
-               else
-                 internalerror(2009112415);
-             end;
-             handle_load_store(list,op,PF_None,tmpmmreg,ref);
+             handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
           end;
 
         if (tmpmmreg<>reg) then
@@ -3031,7 +3019,6 @@ unit cgcpu;
         intreg,
         tmpmmreg : tregister;
         reg64    : tregister64;
-        op       : tasmop;
       begin
         if assigned(shuffle) and
            not(shufflescalar(shuffle)) then
@@ -3083,15 +3070,7 @@ unit cgcpu;
           end
         else
           begin
-             case fromsize of
-               OS_F32:
-                 op:=A_FSTS;
-               OS_F64:
-                 op:=A_FSTD;
-               else
-                 internalerror(2009112418);
-             end;
-             handle_load_store(list,op,PF_None,tmpmmreg,ref);
+             handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
           end;
       end;
 
@@ -3107,7 +3086,7 @@ unit cgcpu;
         if assigned(shuffle) and
            not shufflescalar(shuffle) then
           internalerror(2009112516);
-        list.concat(taicpu.op_reg_reg(A_FMSR,mmreg,intreg));
+        list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
       end;
 
 
@@ -3122,7 +3101,7 @@ unit cgcpu;
         if assigned(shuffle) and
            not shufflescalar(shuffle) then
           internalerror(2009112514);
-        list.concat(taicpu.op_reg_reg(A_FMRS,intreg,mmreg));
+        list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
       end;
 
 
@@ -3144,9 +3123,9 @@ unit cgcpu;
                 a_load_const_reg(list,OS_32,0,tmpreg);
                 case size of
                   OS_F32:
-                    list.concat(taicpu.op_reg_reg(A_FMSR,dst,tmpreg));
+                    list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
                   OS_F64:
-                    list.concat(taicpu.op_reg_reg_reg(A_FMDRR,dst,tmpreg,tmpreg));
+                    list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
                   else
                     internalerror(2009112908);
                 end;
@@ -3408,7 +3387,7 @@ unit cgcpu;
           conversions }
         if (mmsize<>OS_F64) then
           internalerror(2009112405);
-        list.concat(taicpu.op_reg_reg_reg(A_FMDRR,mmreg,intreg.reglo,intreg.reghi));
+        list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
       end;
 
 
@@ -3418,7 +3397,7 @@ unit cgcpu;
           conversions }
         if (mmsize<>OS_F64) then
           internalerror(2009112406);
-        list.concat(taicpu.op_reg_reg_reg(A_FMRRD,intreg.reglo,intreg.reghi,mmreg));
+        list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
       end;
 
 
@@ -5276,19 +5255,13 @@ unit cgcpu;
 
     procedure tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
       begin
-        if fromsize=OS_F32 then
-          handle_load_store(list,A_VLDR,PF_F32,reg,ref)
-        else
-          handle_load_store(list,A_VLDR,PF_F64,reg,ref);
+        handle_load_store(list,A_VLDR,PF_None,reg,ref);
       end;
 
 
     procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
       begin
-        if fromsize=OS_F32 then
-          handle_load_store(list,A_VSTR,PF_F32,reg,ref)
-        else
-          handle_load_store(list,A_VSTR,PF_F64,reg,ref);
+        handle_load_store(list,A_VSTR,PF_None,reg,ref);
       end;
 
 

+ 20 - 0
compiler/arm/cpuelf.pas

@@ -325,8 +325,12 @@ implementation
           result:=R_ARM_ABS32;
         RELOC_RELATIVE:
           result:=R_ARM_REL32;
+        RELOC_RELATIVE_24,
+        RELOC_RELATIVE_24_THUMB:
+          result:=R_ARM_CALL;
       else
         result:=0;
+        writeln(objrel.typ);
         InternalError(2012110602);
       end;
     end;
@@ -926,7 +930,23 @@ implementation
         loadsection:       @elf_arm_loadSection;
       );
 
+    as_arm_elf32_info : tasminfo =
+       (
+         id     : as_arm_elf32;
+         idtxt  : 'ELF';
+         asmbin : '';
+         asmcmd : '';
+         supported_targets : [system_arm_embedded,system_arm_darwin,
+                              system_arm_linux,system_arm_gba,
+                              system_arm_nds];
+         flags : [af_outputbinary,af_smartlink_sections,af_supports_dwarf];
+         labelprefix : '.L';
+         comment : '';
+         dollarsign: '$';
+       );
+
 initialization
+  RegisterAssembler(as_arm_elf32_info,TElfAssembler);
   ElfTarget:=elf_target_arm;
   ElfExeOutputClass:=TElfExeOutputARM;
 

+ 23 - 28
compiler/arm/narmadd.pas

@@ -162,6 +162,7 @@ interface
       var
         op : TAsmOp;
         singleprec: boolean;
+        pf: TOpPostfix;
       begin
         pass_left_right;
         if (nf_swapped in flags) then
@@ -210,33 +211,25 @@ interface
               location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
 
               singleprec:=tfloatdef(left.resultdef).floattype=s32real;
+              if singleprec then
+                pf:=PF_F32
+              else
+                pf:=PF_F64;
               case nodetype of
                 addn :
-                  if singleprec then
-                    op:=A_FADDS
-                  else
-                    op:=A_FADDD;
+                  op:=A_VADD;
                 muln :
-                  if singleprec then
-                    op:=A_FMULS
-                  else
-                    op:=A_FMULD;
+                  op:=A_VMUL;
                 subn :
-                  if singleprec then
-                    op:=A_FSUBS
-                  else
-                    op:=A_FSUBD;
+                  op:=A_VSUB;
                 slashn :
-                  if singleprec then
-                    op:=A_FDIVS
-                  else
-                    op:=A_FDIVD;
+                  op:=A_VDIV;
                 else
                   internalerror(2009111401);
               end;
 
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
-                 location.register,left.location.register,right.location.register));
+              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op,
+                 location.register,left.location.register,right.location.register),pf));
             end;
           fpu_fpv4_s16:
             begin
@@ -275,6 +268,7 @@ interface
     procedure tarmaddnode.second_cmpfloat;
       var
         op: TAsmOp;
+        pf: TOpPostfix;
       begin
         pass_left_right;
         if (nf_swapped in flags) then
@@ -310,19 +304,20 @@ interface
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
 
+              if nodetype in [equaln,unequaln] then
+                op:=A_VCMP
+              else
+                op:=A_VCMPE;
+
               if (tfloatdef(left.resultdef).floattype=s32real) then
-                if nodetype in [equaln,unequaln] then
-                  op:=A_FCMPS
-                 else
-                   op:=A_FCMPES
-              else if nodetype in [equaln,unequaln] then
-                op:=A_FCMPD
+                pf:=PF_F32
               else
-                op:=A_FCMPED;
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,
-                left.location.register,right.location.register));
+                pf:=PF_F64;
+
+              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(op,
+                left.location.register,right.location.register), pf));
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
-              current_asmdata.CurrAsmList.concat(taicpu.op_none(A_FMSTAT));
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VMRS,NR_APSR_nzcv,NR_FPSCR));
               location.resflags:=GetFpuResFlags;
             end;
           fpu_fpv4_s16:

+ 6 - 5
compiler/arm/narmcnv.pas

@@ -170,9 +170,9 @@ implementation
 
     procedure tarmtypeconvnode.second_int_to_real;
       const
-        signedprec2vfpop: array[boolean,OS_F32..OS_F64] of tasmop =
-          ((A_FUITOS,A_FUITOD),
-           (A_FSITOS,A_FSITOD));
+        signedprec2vfppf: array[boolean,OS_F32..OS_F64] of toppostfix =
+          ((PF_F32U32,PF_F64U32),
+           (PF_F32S32,PF_F64S32));
       var
         instr : taicpu;
         href : treference;
@@ -253,8 +253,9 @@ implementation
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size)
               else
                 location.register:=left.location.register;
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(
-                signedprec2vfpop[signed,location.size],location.register,left.location.register));
+              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,
+                location.register,left.location.register),
+                signedprec2vfppf[signed,location.size]));
             end;
           fpu_fpv4_s16:
             begin

+ 9 - 16
compiler/arm/narminl.pas

@@ -234,7 +234,7 @@ implementation
     procedure tarminlinenode.second_abs_real;
       var
         singleprec: boolean;
-        op: TAsmOp;
+        pf: TOpPostfix;
       begin
         load_fpu_location(singleprec);
         case current_settings.fputype of
@@ -247,10 +247,10 @@ implementation
           fpu_vfpv3_d16:
             begin
               if singleprec then
-                op:=A_FABSS
+                pf:=PF_F32
               else
-                op:=A_FABSD;
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
+                pf:=PF_F64;
+              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register),pf));
             end;
           fpu_fpv4_s16:
             current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
@@ -270,7 +270,7 @@ implementation
     procedure tarminlinenode.second_sqr_real;
       var
         singleprec: boolean;
-        op: TAsmOp;
+        pf: TOpPostfix;
       begin
         load_fpu_location(singleprec);
         case current_settings.fputype of
@@ -283,10 +283,10 @@ implementation
           fpu_vfpv3_d16:
             begin
               if singleprec then
-                op:=A_FMULS
+                pf:=PF_F32
               else
-                op:=A_FMULD;
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,left.location.register,left.location.register));
+                pf:=PF_F64;
+              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register),pf));
             end;
           fpu_fpv4_s16:
             current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
@@ -309,14 +309,7 @@ implementation
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SQT,location.register,left.location.register),get_fpu_postfix(resultdef)));
           fpu_vfpv2,
           fpu_vfpv3,
-          fpu_vfpv3_d16:
-            begin
-              if singleprec then
-                op:=A_FSQRTS
-              else
-                op:=A_FSQRTD;
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
-            end;
+          fpu_vfpv3_d16,
           fpu_fpv4_s16:
             current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register));
         else

+ 1 - 5
compiler/arm/narmmat.pas

@@ -432,11 +432,7 @@ implementation
               location:=left.location;
               if (left.location.loc=LOC_CMMREGISTER) then
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
-              if (location.size=OS_F32) then
-                op:=A_FNEGS
-              else
-                op:=A_FNEGD;
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VNEG,
                 location.register,left.location.register));
             end;
           fpu_fpv4_s16:

+ 168 - 67
compiler/arm/raarmgas.pas

@@ -30,7 +30,10 @@ Unit raarmgas;
       cpubase;
 
     type
+      tarmsyntax = (asm_legacy, asm_unified);
+
       tarmattreader = class(tattreader)
+        asmsyntax : tarmsyntax;
         actoppostfix : TOpPostfix;
         actwideformat : boolean;
         function is_asmopcode(const s: string):boolean;override;
@@ -66,12 +69,12 @@ Unit raarmgas;
     function tarmattreader.is_register(const s:string):boolean;
       type
         treg2str = record
-          name : string[2];
+          name : string[3];
           reg : tregister;
         end;
 
       const
-        extraregs : array[0..19] of treg2str = (
+        extraregs : array[0..19+16] of treg2str = (
           (name: 'A1'; reg : NR_R0),
           (name: 'A2'; reg : NR_R1),
           (name: 'A3'; reg : NR_R2),
@@ -91,7 +94,25 @@ Unit raarmgas;
           (name: 'IP'; reg : NR_R12),
           (name: 'SP'; reg : NR_R13),
           (name: 'LR'; reg : NR_R14),
-          (name: 'PC'; reg : NR_R15));
+          (name: 'PC'; reg : NR_R15),
+
+          (name: 'C0'; reg : NR_CR0),
+          (name: 'C1'; reg : NR_CR1),
+          (name: 'C2'; reg : NR_CR2),
+          (name: 'C3'; reg : NR_CR3),
+          (name: 'C4'; reg : NR_CR4),
+          (name: 'C5'; reg : NR_CR5),
+          (name: 'C6'; reg : NR_CR6),
+          (name: 'C7'; reg : NR_CR7),
+          (name: 'C8'; reg : NR_CR8),
+          (name: 'C9'; reg : NR_CR9),
+          (name: 'C10'; reg : NR_CR10),
+          (name: 'C11'; reg : NR_CR11),
+          (name: 'C12'; reg : NR_CR12),
+          (name: 'C13'; reg : NR_CR13),
+          (name: 'C14'; reg : NR_CR14),
+          (name: 'C15'; reg : NR_CR15)
+          );
 
       var
         i : longint;
@@ -101,7 +122,7 @@ Unit raarmgas;
         { reg found?
           possible aliases are always 2 char
         }
-        if result or (length(s)<>2) then
+        if result or (not (length(s) in [2,3])) then
           exit;
         for i:=low(extraregs) to high(extraregs) do
           begin
@@ -241,7 +262,9 @@ Unit raarmgas;
                           do_error;
                         oper.opr.ref.shiftimm := shift;
                         test_end(require_rbracket);
-                      end;
+                      end
+                    else
+                      test_end(require_rbracket);
                    end
                  else
                    begin
@@ -785,6 +808,18 @@ Unit raarmgas;
           end;
 
 
+      function getregsetindex(reg: tregister): integer;
+        begin
+          if getsubreg(reg)=R_SUBFS then
+            begin
+              result:=getsupreg(reg)*2;
+              if result>32 then
+                result:=result-63;
+            end
+          else
+            result:=getsupreg(reg);
+        end;
+
       var
         tempreg : tregister;
         ireg : tsuperregister;
@@ -958,7 +993,7 @@ Unit raarmgas;
                   oper.opr.typ:=OPR_REGISTER;
                   oper.opr.reg:=tempreg;
                 end
-              else if (actasmtoken=AS_NOT) and (actopcode in [A_LDM,A_STM,A_FLDM,A_FSTM]) then
+              else if (actasmtoken=AS_NOT) and (actopcode in [A_LDM,A_STM,A_FLDM,A_FSTM,A_VLDM,A_VSTM]) then
                 begin
                   consume(AS_NOT);
                   oper.opr.typ:=OPR_REFERENCE;
@@ -976,11 +1011,11 @@ Unit raarmgas;
               registerset:=[];
               regtype:=R_INVALIDREGISTER;
               subreg:=R_SUBNONE;
-              while true do
+              while actasmtoken<>AS_RSBRACKET do
                 begin
                   if actasmtoken=AS_REGISTER then
                     begin
-                      include(registerset,getsupreg(actasmregister));
+                      include(registerset,getregsetindex(actasmregister));
                       if regtype<>R_INVALIDREGISTER then
                         begin
                           if (getregtype(actasmregister)<>regtype) or
@@ -997,7 +1032,7 @@ Unit raarmgas;
                       if actasmtoken=AS_MINUS then
                         begin
                           consume(AS_MINUS);
-                          for ireg:=getsupreg(tempreg) to getsupreg(actasmregister) do
+                          for ireg:=getregsetindex(tempreg) to getregsetindex(actasmregister) do
                             include(registerset,ireg);
                           consume(AS_REGISTER);
                         end;
@@ -1137,8 +1172,16 @@ Unit raarmgas;
           case actasmtoken of
             AS_COMMA: { Operand delimiter }
               Begin
-                if ((instr.opcode in [A_MOV, A_MVN, A_CMP, A_CMN, A_TST, A_TEQ]) and (operandnum=2)) or
-                  ((operandnum=3) and not(instr.opcode in [A_UMLAL,A_UMULL,A_SMLAL,A_SMULL,A_MLA,A_MRC,A_MCR,A_MCRR,A_MRRC])) then
+                if ((instr.opcode in [A_MOV,A_MVN,A_CMP,A_CMN,A_TST,A_TEQ,
+                                      A_UXTB,A_UXTH,A_UXTB16,
+                                      A_SXTB,A_SXTH,A_SXTB16]) and
+                    (operandnum=2)) or
+                  ((operandnum=3) and not(instr.opcode in [A_UMLAL,A_UMULL,A_SMLAL,A_SMULL,A_MLA,A_UMAAL,A_MLS,
+                                                           A_SMLABB,A_SMLABT,A_SMLATB,A_SMLATT,A_SMMLA,A_SMMLS,A_SMLAD,A_SMLALD,A_SMLSD,
+                                                           A_SMLALBB,A_SMLALBT,A_SMLALTB,A_SMLALTT,A_SMLSLD,
+                                                           A_MRC,A_MCR,A_MCRR,A_MRRC,A_STREXD,A_STRD,
+                                                           A_VMOV,
+                                                           A_SBFX,A_UBFX,A_BFI])) then
                   begin
                     Consume(AS_COMMA);
                     if not(TryBuildShifterOp(instr.Operands[operandnum+1] as tarmoperand)) then
@@ -1174,25 +1217,34 @@ Unit raarmgas;
 
       const
         { sorted by length so longer postfixes will match first }
-        postfix2strsorted : array[1..31] of string[3] = (
-          'IAD','DBD','FDD','EAD',
-          'IAS','DBS','FDS','EAS',
-          'IAX','DBX','FDX','EAX',
-          'EP','SB','BT','SH',
-          'IA','IB','DA','DB','FD','FA','ED','EA',
-          'B','D','E','P','T','H','S');
-
-        postfixsorted : array[1..31] of TOpPostfix = (
-          PF_IAD,PF_DBD,PF_FDD,PF_EAD,
-          PF_IAS,PF_DBS,PF_FDS,PF_EAS,
-          PF_IAX,PF_DBX,PF_FDX,PF_EAX,
-          PF_EP,PF_SB,PF_BT,PF_SH,
-          PF_IA,PF_IB,PF_DA,PF_DB,PF_FD,PF_FA,PF_ED,PF_EA,
-          PF_B,PF_D,PF_E,PF_P,PF_T,PF_H,PF_S);
+        postfix2strsorted : array[1..70] of string[9] = (
+          '.F32.S32','.F32.U32','.S32.F32','.U32.F32','.F64.S32','.F64.U32','.S32.F64','.U32.F64',
+          '.F32.S16','.F32.U16','.S16.F32','.U16.F32','.F64.S16','.F64.U16','.S16.F64','.U16.F64',
+          '.F32.F64','.F64.F32',
+          '.I16','.I32','.I64','.S16','.S32','.S64','.U16','.U32','.U64','.F32','.F64',
+          'IAD','DBD','FDD','EAD','IAS','DBS','FDS','EAS','IAX','DBX','FDX','EAX',
+          '.16','.32','.64','.I8','.S8','.U8','.P8',
+          'EP','SB','BT','SH','IA','IB','DA','DB','FD','FA','ED','EA',
+          '.8','S','D','E','P','X','R','B','H','T');
+
+        postfixsorted : array[1..70] of TOpPostfix = (
+          PF_F32S32,PF_F32U32,PF_S32F32,PF_U32F32,PF_F64S32,PF_F64U32,PF_S32F64,PF_U32F64,
+          PF_F32S16,PF_F32U16,PF_S16F32,PF_U16F32,PF_F64S16,PF_F64U16,PF_S16F64,PF_U16F64,
+          PF_F32F64,PF_F64F32,
+          PF_I16,PF_I32,
+          PF_I64,PF_S16,PF_S32,PF_S64,PF_U16,PF_U32,PF_U64,PF_F32,
+          PF_F64,PF_IAD,PF_DBD,PF_FDD,PF_EAD,
+          PF_IAS,PF_DBS,PF_FDS,PF_EAS,PF_IAX,
+          PF_DBX,PF_FDX,PF_EAX,PF_16,PF_32,
+          PF_64,PF_I8,PF_S8,PF_U8,PF_P8,
+          PF_EP,PF_SB,PF_BT,PF_SH,PF_IA,
+          PF_IB,PF_DA,PF_DB,PF_FD,PF_FA,
+          PF_ED,PF_EA,PF_8,PF_S,PF_D,PF_E,
+          PF_P,PF_X,PF_R,PF_B,PF_H,PF_T);
 
       var
-        j  : longint;
-        hs : string;
+        j, j2 : longint;
+        hs,hs2 : string;
         maxlen : longint;
         icond : tasmcond;
       Begin
@@ -1222,59 +1274,106 @@ Unit raarmgas;
           end;
         maxlen:=max(length(hs),5);
         actopcode:=A_NONE;
-        for j:=maxlen downto 1 do
+        j2:=maxlen;
+        hs2:=hs;
+        while j2>1 do
           begin
-            actopcode:=tasmop(PtrUInt(iasmops.Find(copy(hs,1,j))));
-            if actopcode<>A_NONE then
+            hs:=hs2;
+            while j2>=1 do
               begin
-                actasmtoken:=AS_OPCODE;
-                { strip op code }
-                delete(hs,1,j);
-                break;
+                actopcode:=tasmop(PtrUInt(iasmops.Find(copy(hs,1,j2))));
+                if actopcode<>A_NONE then
+                  begin
+                    actasmtoken:=AS_OPCODE;
+                    { strip op code }
+                    delete(hs,1,j2);
+                    dec(j2);
+                    break;
+                  end;
+                dec(j2);
               end;
-          end;
-        if actopcode=A_NONE then
-          exit;
+            if actopcode=A_NONE then
+              exit;
 
-        { search for condition, conditions are always 2 chars }
-        if length(hs)>1 then
-          begin
-            for icond:=low(tasmcond) to high(tasmcond) do
+            if asmsyntax=asm_unified then
               begin
-                if copy(hs,1,2)=uppercond2str[icond] then
+                { check for postfix }
+                if (length(hs)>0) and (actoppostfix=PF_None) then
                   begin
-                    actcondition:=icond;
-                    { strip condition }
-                    delete(hs,1,2);
-                    break;
+                    for j:=low(postfixsorted) to high(postfixsorted) do
+                      begin
+                        if copy(hs,1,length(postfix2strsorted[j]))=postfix2strsorted[j] then
+                          begin
+                            if not ((length(hs)-length(postfix2strsorted[j])) in [0,2,4]) then
+                              continue;
+
+                            actoppostfix:=postfixsorted[j];
+                            { strip postfix }
+                            delete(hs,1,length(postfix2strsorted[j]));
+                            break;
+                          end;
+                      end;
                   end;
-              end;
-          end;
-        { check for postfix }
-        if length(hs)>0 then
-          begin
-            for j:=low(postfixsorted) to high(postfixsorted) do
+                { search for condition, conditions are always 2 chars }
+                if length(hs)>1 then
+                  begin
+                    for icond:=low(tasmcond) to high(tasmcond) do
+                      begin
+                        if copy(hs,1,2)=uppercond2str[icond] then
+                          begin
+                            actcondition:=icond;
+                            { strip condition }
+                            delete(hs,1,2);
+                            break;
+                          end;
+                      end;
+                  end;
+              end
+            else
               begin
-                if copy(hs,1,length(postfix2strsorted[j]))=postfix2strsorted[j] then
+                { search for condition, conditions are always 2 chars }
+                if length(hs)>1 then
                   begin
-                    actoppostfix:=postfixsorted[j];
-                    { strip postfix }
-                    delete(hs,1,length(postfix2strsorted[j]));
-                    break;
+                    for icond:=low(tasmcond) to high(tasmcond) do
+                      begin
+                        if copy(hs,1,2)=uppercond2str[icond] then
+                          begin
+                            actcondition:=icond;
+                            { strip condition }
+                            delete(hs,1,2);
+                            break;
+                          end;
+                      end;
+                  end;
+                { check for postfix }
+                if (length(hs)>0) and (actoppostfix=PF_None) then
+                  begin
+                    for j:=low(postfixsorted) to high(postfixsorted) do
+                      begin
+                        if copy(hs,1,length(postfix2strsorted[j]))=postfix2strsorted[j] then
+                          begin
+                            actoppostfix:=postfixsorted[j];
+                            { strip postfix }
+                            delete(hs,1,length(postfix2strsorted[j]));
+                            break;
+                          end;
+                      end;
                   end;
               end;
-          end;
-        { check for format postfix }
-        if length(hs)>0 then
-          begin
-            if upcase(copy(hs,1,2)) = '.W' then
+            { check for format postfix }
+            if length(hs)>0 then
               begin
-                actwideformat:=true;
-                delete(hs,1,2);
+                if copy(hs,1,2) = '.W' then
+                  begin
+                    actwideformat:=true;
+                    delete(hs,1,2);
+                  end;
               end;
+            { if we stripped all postfixes, it's a valid opcode }
+            is_asmopcode:=length(hs)=0;
+            if is_asmopcode = true then
+              break;
           end;
-        { if we stripped all postfixes, it's a valid opcode }
-        is_asmopcode:=length(hs)=0;
       end;
 
 
@@ -1337,6 +1436,8 @@ Unit raarmgas;
         instr.Free;
         actoppostfix:=PF_None;
         actwideformat:=false;
+
+        asmsyntax:=asm_legacy;
       end;
 
 

+ 1 - 0
compiler/ogbase.pas

@@ -68,6 +68,7 @@ interface
 {$endif i386}
 {$ifdef arm}
          RELOC_RELATIVE_24,
+         RELOC_RELATIVE_24_THUMB,
 {$endif arm}
          { Relative relocation }
          RELOC_RELATIVE,

+ 33 - 7
compiler/ogcoff.pas

@@ -261,9 +261,9 @@ interface
        TLSDIR_SIZE      = $18;
 {$endif i386}
 {$ifdef arm}
-       COFF_MAGIC       = $1c0;
        COFF_OPT_MAGIC   = $10b;
        TLSDIR_SIZE      = $18;
+       function COFF_MAGIC: word;
 {$endif arm}
 {$ifdef x86_64}
        COFF_MAGIC       = $8664;
@@ -422,6 +422,11 @@ implementation
        IMAGE_REL_ARM_BLX11         = $0009;
        IMAGE_REL_ARM_SECTION       = $000E;     { Section table index }
        IMAGE_REL_ARM_SECREL        = $000F;     { Offset within section }
+       IMAGE_REL_ARM_MOV32A        = $0010;     { 32-bit VA applied to MOVW+MOVT pair, added to existing imm (ARM) }
+       IMAGE_REL_ARM_MOV32T        = $0011;     { 32-bit VA applied to MOVW+MOVT pair, added to existing imm (THUMB) }
+       IMAGE_REL_ARM_BRANCH20T     = $0012;     { Thumb: 20 most significant bits of 32 bit B cond instruction }
+       IMAGE_REL_ARM_BRANCH24T     = $0014;     { Thumb: 24 most significant bits of 32 bit B uncond instruction }
+       IMAGE_REL_ARM_BLX23T        = $0015;     { 23 most significant bits of 32 bit BL/BLX instruction. Transformed to BLX if target is Thumb }
 {$endif arm}
 
 {$ifdef i386}
@@ -913,6 +918,14 @@ const pemagic : array[0..3] of byte = (
                     if (relocval<>$3f) and (relocval<>0) then
                       internalerror(200606085);  { offset overflow }
                   end;
+                RELOC_RELATIVE_24_THUMB:
+                  begin
+                    relocval:=longint(relocval - objsec.mempos - objreloc.dataoffset) shr 1 - 4;
+                    address:=address or ((relocval shr 1) and $ffffff) or ((relocval and 1) shl 24);
+                    relocval:=relocval shr 25;
+                    if (relocval<>$3f) and (relocval<>0) then
+                      internalerror(200606085);  { offset overflow }
+                  end;
 {$endif arm}
 {$ifdef x86_64}
                 { 64 bit coff only }
@@ -1009,10 +1022,9 @@ const pemagic : array[0..3] of byte = (
           result:=aname
         else
           begin
-            { non-PECOFF targets lack rodata support.
-              TODO: WinCE likely supports it, but needs testing. }
+            { non-PECOFF targets lack rodata support }
             if (atype in [sec_rodata,sec_rodata_norel]) and
-               not (target_info.system in systems_windows) then
+               not (target_info.system in systems_all_windows) then
               atype:=sec_data;
             secname:=coffsecnames[atype];
             if create_smartlink_sections and
@@ -1038,8 +1050,7 @@ const pemagic : array[0..3] of byte = (
       begin
         if (aType in [sec_rodata,sec_rodata_norel]) then
           begin
-            { TODO: WinCE needs testing }
-            if (target_info.system in systems_windows) then
+            if (target_info.system in systems_all_windows) then
               aType:=sec_rodata_norel
             else
               aType:=sec_data;
@@ -1279,6 +1290,10 @@ const pemagic : array[0..3] of byte = (
                 rel.reloctype:=IMAGE_REL_ARM_ADDR32NB;
               RELOC_SECREL32 :
                 rel.reloctype:=IMAGE_REL_ARM_SECREL;
+              RELOC_RELATIVE_24 :
+                rel.reloctype:=IMAGE_REL_ARM_BRANCH24;
+              RELOC_RELATIVE_24_THUMB:
+                rel.reloctype:=IMAGE_REL_ARM_BLX24;
 {$endif arm}
 {$ifdef i386}
               RELOC_RELATIVE :
@@ -1597,6 +1612,8 @@ const pemagic : array[0..3] of byte = (
                rel_type:=RELOC_RELATIVE_24;
              IMAGE_REL_ARM_SECREL:
                rel_type:=RELOC_SECREL32;
+             IMAGE_REL_ARM_BLX24:
+               rel_type:=RELOC_RELATIVE_24_THUMB;
 {$endif arm}
 {$ifdef i386}
              IMAGE_REL_I386_PCRLONG :
@@ -2962,6 +2979,15 @@ const pemagic : array[0..3] of byte = (
         DLLReader.Free;
       end;
 
+{$ifdef arm}
+    function COFF_MAGIC: word;
+      begin
+        if GenerateThumb2Code and (current_settings.cputype>=cpu_armv7) then
+          COFF_MAGIC:=$1c4 // IMAGE_FILE_MACHINE_ARMNT
+        else
+          COFF_MAGIC:=$1c0; // IMAGE_FILE_MACHINE_ARM
+      end;
+{$endif arm}
 
 {*****************************************************************************
                                   Initialize
@@ -3045,7 +3071,7 @@ const pemagic : array[0..3] of byte = (
             asmbin : '';
             asmcmd : '';
             supported_targets : [system_arm_wince];
-            flags : [af_outputbinary];
+            flags : [af_outputbinary,af_smartlink_sections];
             labelprefix : '.L';
             comment : '';
             dollarsign: '$';

+ 15 - 3
compiler/raatt.pas

@@ -290,12 +290,24 @@ unit raatt;
            end;
 {$endif POWERPC}
 {$if defined(ARM)}
-           { Thumb-2 instructions can have a .W postfix to indicate 32bit instructions
+           {
+             Thumb-2 instructions can have a .W postfix to indicate 32bit instructions,
+             Also in unified syntax sizes and types are indicated with something like a .<dt> prefix for example
            }
            case c of
              '.':
                begin
-                 actasmpattern:=actasmpattern+c;
+                 if len>1 then
+                   begin
+                     while c in ['A'..'Z','a'..'z','0'..'9','_','.'] do
+                       begin
+                         inc(len);
+                         actasmpattern[len]:=c;
+                         c:=current_scanner.asmgetchar;
+                       end;
+                     actasmpattern[0]:=chr(len);
+                   end;
+                 {actasmpattern:=actasmpattern+c;
                  c:=current_scanner.asmgetchar;
 
                  if upcase(c) = 'W' then
@@ -304,7 +316,7 @@ unit raatt;
                      c:=current_scanner.asmgetchar;
                    end
                  else
-                   internalerror(2010122301);
+                   internalerror(2010122301);}
                end
            end;
 {$endif ARM}

+ 1 - 0
compiler/systems.inc

@@ -211,6 +211,7 @@
              ,as_i8086_nasm
              ,as_i8086_nasmobj
              ,as_gas_powerpc_xcoff
+             ,as_arm_elf32
        );
 
        tlink = (ld_none,

+ 4 - 4
compiler/systems/i_linux.pas

@@ -582,7 +582,7 @@ unit i_linux;
             Cprefix      : '';
             newline      : #10;
             dirsep       : '/';
-            assem        : as_gas;
+            assem        : as_arm_elf32;
             assemextern  : as_gas;
             link         : ld_none;
             linkextern   : ld_linux;
@@ -648,7 +648,7 @@ unit i_linux;
             Cprefix      : '';
             newline      : #10;
             dirsep       : '/';
-            assem        : as_gas;
+            assem        : as_arm_elf32;
             assemextern  : as_gas;
             link         : ld_none;
             linkextern   : ld_linux;
@@ -714,7 +714,7 @@ unit i_linux;
             Cprefix      : '';
             newline      : #10;
             dirsep       : '/';
-            assem        : as_gas;
+            assem        : as_arm_elf32;
             assemextern  : as_gas;
             link         : ld_none;
             linkextern   : ld_linux;
@@ -778,7 +778,7 @@ unit i_linux;
             Cprefix      : '';
             newline      : #10;
             dirsep       : '/';
-            assem        : as_gas;
+            assem        : as_arm_elf32;
             assemextern  : as_gas;
             link         : ld_none;
             linkextern   : ld_linux;

+ 1 - 1
compiler/systems/i_win.pas

@@ -198,7 +198,7 @@ unit i_win;
             Cprefix      : '';
             newline      : #13#10;
             dirsep       : '\';
-            assem        : as_gas;
+            assem        : as_arm_pecoffwince;
             assemextern  : as_gas;
             link         : ld_int_windows;
             linkextern   : ld_windows;

+ 2 - 2
rtl/arm/arm.inc

@@ -46,7 +46,7 @@ Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
 begin
   { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
   asm
-    fmrx r0,fpscr
+    vmrs r0,fpscr
     // set "round to nearest" mode
     and  r0,r0,#0xff3fffff
     // mask "exception happened" and overflow flags
@@ -61,7 +61,7 @@ begin
     // enable invalid operation, div-by-zero and overflow exceptions
     orr  r0,r0,#0x00000700
 {$endif}
-    fmxr fpscr,r0
+    vmsr fpscr,r0
   end;
 end;
 {$endif}

+ 2 - 2
rtl/arm/mathu.inc

@@ -161,13 +161,13 @@ const
 
 function VFP_GetCW : dword; nostackframe; assembler;
   asm
-    fmrx r0,fpscr
+    vmrs r0,fpscr
   end;
 
 
 procedure VFP_SetCW(cw : dword); nostackframe; assembler;
   asm
-    fmxr fpscr,r0
+    vmsr fpscr,r0
   end;
 
 

+ 5 - 5
rtl/arm/setjump.inc

@@ -18,12 +18,12 @@ function fpc_setjmp(var S : jmp_buf) : longint;assembler;[Public, alias : 'FPC_S
   asm
     {$if defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_D16)}
     {$if defined(CPUARMV3) or defined(CPUARMV4) or defined(CPUARMV5)}
-    fstmiax r0!, {d8-d15}
+    vstmia r0!, {d8-d15}
     // according to the ARM Developer Suite Assembler Guide Version 1.2
     // fstmiad increases the address register always by 2n+1 words, so fix this
     sub r0,r0,#4
     {$else}
-    fstmiad r0!, {d8-d15}
+    vstmia r0!, {d8-d15}
     {$endif}
     {$endif}
 
@@ -70,7 +70,7 @@ procedure fpc_longjmp(var S : jmp_buf;value : longint);assembler;[Public, alias
     it eq
     moveq   r0, #1
     {$if defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_D16)}
-    fldmiad ip!, {d8-d15}
+    vldmia ip!, {d8-d15}
     {$endif}
     ldmia   ip!, {v1-v6, sl, fp}
     ldr     sp, [ip]
@@ -103,12 +103,12 @@ procedure fpc_longjmp(var S : jmp_buf;value : longint);assembler;[Public, alias
     moveq   r0, #1
     {$if defined(FPUVFPV2) or defined(FPUVFPV3) or defined(FPUVFPV3_D16)}
     {$if defined(CPUARMV3) or defined(CPUARMV4) or defined(CPUARMV5)}
-    fldmiax ip!, {d8-d15}
+    vldmia ip!, {d8-d15}
     // according to the ARM Developer Suite Assembler Guide Version 1.2
     // increases fldmiax the address register always by 2n+1 words, so fix this
     sub ip,ip,#4
     {$else}
-    fldmiad ip!, {d8-d15}
+    vldmia ip!, {d8-d15}
     {$endif}
     {$endif}
     ldmia   ip,{v1-v6, sl, fp, sp, pc}