Browse Source

Added initial support for the Cortex-M4F FPv4_S16 FPU

git-svn-id: branches/laksen/arm-embedded@22597 -
Jeppe Johansen 13 years ago
parent
commit
a8f9b0dac4

+ 5 - 1
compiler/arm/agarmgas.pas

@@ -106,6 +106,8 @@ unit agarmgas;
           result:='-mfpu=vfpv3 '+result;
           result:='-mfpu=vfpv3 '+result;
         if (current_settings.fputype = fpu_vfpv3_d16) then
         if (current_settings.fputype = fpu_vfpv3_d16) then
           result:='-mfpu=vfpv3-d16 '+result;
           result:='-mfpu=vfpv3-d16 '+result;
+        if (current_settings.fputype = fpu_fpv4_s16) then
+          result:='-mfpu=fpv4-sp-d16 '+result;
 
 
         if current_settings.cputype=cpu_armv7m then
         if current_settings.cputype=cpu_armv7m then
           result:='-march=armv7m -mthumb -mthumb-interwork '+result
           result:='-march=armv7m -mthumb -mthumb-interwork '+result
@@ -292,8 +294,10 @@ unit agarmgas;
 
 
           if taicpu(hp).ops = 0 then
           if taicpu(hp).ops = 0 then
             s:=#9+gas_op2str[op]+' '+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix]
             s:=#9+gas_op2str[op]+' '+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix]
+          else if (taicpu(hp).opcode>=A_VABS) and (taicpu(hp).opcode<=A_VSUB) then
+            s:=#9+gas_op2str[op]+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix]
           else
           else
-            s:=#9+gas_op2str[op]+oppostfix2str[taicpu(hp).oppostfix]+postfix+cond2str[taicpu(hp).condition]; // Conditional infixes are deprecated in unified syntax
+            s:=#9+gas_op2str[op]+oppostfix2str[taicpu(hp).oppostfix]+cond2str[taicpu(hp).condition]+postfix; // Conditional infixes are deprecated in unified syntax
         end
         end
       else
       else
         s:=#9+gas_op2str[op]+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix];
         s:=#9+gas_op2str[op]+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix];

+ 137 - 3
compiler/arm/cgcpu.pas

@@ -161,6 +161,12 @@ unit cgcpu;
         procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
         procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
 
 
         function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
         function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
+
+        procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
+        procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
+        procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
+        procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
+        procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
       end;
       end;
 
 
       tthumb2cg64farm = class(tcg64farm)
       tthumb2cg64farm = class(tcg64farm)
@@ -3120,10 +3126,17 @@ unit cgcpu;
           rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
           rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
               [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
               [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
                RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
                RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
-        rg[R_FPUREGISTER]:=trgcputhumb2.create(R_FPUREGISTER,R_SUBNONE,
+        rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
             [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
             [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
-        rg[R_MMREGISTER]:=trgcputhumb2.create(R_MMREGISTER,R_SUBNONE,
-            [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
+
+        if current_settings.fputype=fpu_fpv4_s16 then
+          rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
+              [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
+               RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
+              ],first_mm_imreg,[])
+        else
+          rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,
+              [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
       end;
       end;
 
 
 
 
@@ -3959,6 +3972,127 @@ unit cgcpu;
         Result := ref;
         Result := ref;
       end;
       end;
 
 
+     procedure Tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
+      var
+        instr: taicpu;
+      begin
+        if (fromsize=OS_F32) and
+          (tosize=OS_F32) then
+          begin
+            instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
+            list.Concat(instr);
+            add_move_instruction(instr);
+          end
+        else if (fromsize=OS_F64) and
+          (tosize=OS_F64) then
+          begin
+            //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
+            //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
+          end
+        else if (fromsize=OS_F32) and
+          (tosize=OS_F64) then
+          //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
+          begin
+            //list.concat(nil);
+          end;
+      end;
+
+     procedure Tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+      var
+        href: treference;
+        tmpreg: TRegister;
+        so: tshifterop;
+      begin
+        href:=ref;
+
+        if (href.base<>NR_NO) and
+          (href.index<>NR_NO) then
+          begin
+            tmpreg:=getintregister(list,OS_INT);
+            if href.shiftmode<>SM_None then
+              begin
+                so.rs:=href.index;
+                so.shiftimm:=href.shiftimm;
+                so.shiftmode:=href.shiftmode;
+                list.concat(taicpu.op_reg_reg_shifterop(A_ADD,tmpreg,href.base,so));
+              end
+            else
+              a_op_reg_reg_reg(list,OP_ADD,OS_INT,href.index,href.base,tmpreg);
+
+            reference_reset_base(href,tmpreg,href.offset,0);
+          end;
+
+        if assigned(href.symbol) then
+          begin
+            tmpreg:=getintregister(list,OS_INT);
+            a_loadaddr_ref_reg(list,href,tmpreg);
+
+            reference_reset_base(href,tmpreg,0,0);
+          end;
+
+        if fromsize=OS_F32 then
+          list.Concat(setoppostfix(taicpu.op_reg_ref(A_VLDR,reg,href), PF_F32))
+        else
+          list.Concat(setoppostfix(taicpu.op_reg_ref(A_VLDR,reg,href), PF_F64));
+      end;
+
+     procedure Tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+      var
+        href: treference;
+        so: tshifterop;
+        tmpreg: TRegister;
+      begin
+        href:=ref;
+
+        if (href.base<>NR_NO) and
+          (href.index<>NR_NO) then
+          begin
+            tmpreg:=getintregister(list,OS_INT);
+            if href.shiftmode<>SM_None then
+              begin
+                so.rs:=href.index;
+                so.shiftimm:=href.shiftimm;
+                so.shiftmode:=href.shiftmode;
+                list.concat(taicpu.op_reg_reg_shifterop(A_ADD,tmpreg,href.base,so));
+              end
+            else
+              a_op_reg_reg_reg(list,OP_ADD,OS_INT,href.index,href.base,tmpreg);
+
+            reference_reset_base(href,tmpreg,href.offset,0);
+          end;
+
+        if assigned(href.symbol) then
+          begin
+            tmpreg:=getintregister(list,OS_INT);
+            a_loadaddr_ref_reg(list,href,tmpreg);
+
+            reference_reset_base(href,tmpreg,0,0);
+          end;
+
+        if fromsize=OS_F32 then
+          list.Concat(setoppostfix(taicpu.op_reg_ref(A_VSTR,reg,href), PF_32))
+        else
+          list.Concat(setoppostfix(taicpu.op_reg_ref(A_VSTR,reg,href), PF_64));
+      end;
+
+     procedure Tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
+      begin
+        if //(shuffle=nil) and
+          (tosize=OS_F32) then
+          list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
+        else
+          internalerror(2012100813);
+      end;
+
+     procedure Tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
+      begin
+        if //(shuffle=nil) and
+          (fromsize=OS_F32) then
+          list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg))
+        else
+          internalerror(2012100814);
+      end;
+
 
 
     procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
     procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
       var tmpreg: tregister;
       var tmpreg: tregister;

+ 11 - 4
compiler/arm/cpubase.pas

@@ -139,7 +139,11 @@ unit cpubase;
         { multiple load/store vfp address modes }
         { multiple load/store vfp address modes }
         PF_IAD,PF_DBD,PF_FDD,PF_EAD,
         PF_IAD,PF_DBD,PF_FDD,PF_EAD,
         PF_IAS,PF_DBS,PF_FDS,PF_EAS,
         PF_IAS,PF_DBS,PF_FDS,PF_EAS,
-        PF_IAX,PF_DBX,PF_FDX,PF_EAX
+        PF_IAX,PF_DBX,PF_FDX,PF_EAX,
+        { FPv4 postfixes }
+        PF_32,PF_64,PF_F32,PF_F64,
+        PF_F32S32,PF_F32U32,
+        PF_S32F32,PF_U32F32
       );
       );
 
 
       TOpPostfixes = set of TOpPostfix;
       TOpPostfixes = set of TOpPostfix;
@@ -152,14 +156,17 @@ unit cpubase;
         PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,
         PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,
         PF_S,PF_D,PF_E,PF_None,PF_None);
         PF_S,PF_D,PF_E,PF_None,PF_None);
 
 
-      oppostfix2str : array[TOpPostfix] of string[3] = ('',
+      oppostfix2str : array[TOpPostfix] of string[8] = ('',
         's',
         's',
         'd','e','p','ep',
         'd','e','p','ep',
         'b','sb','bt','h','sh','t',
         'b','sb','bt','h','sh','t',
         'ia','ib','da','db','fd','fa','ed','ea',
         'ia','ib','da','db','fd','fa','ed','ea',
         'iad','dbd','fdd','ead',
         'iad','dbd','fdd','ead',
         'ias','dbs','fds','eas',
         'ias','dbs','fds','eas',
-        'iax','dbx','fdx','eax');
+        'iax','dbx','fdx','eax',
+        '.32','.64','.f32','.f64',
+        '.f32.s32','.f32.u32',
+        '.s32.f32','.u32.f32');
 
 
       roundingmode2str : array[TRoundingMode] of string[1] = ('',
       roundingmode2str : array[TRoundingMode] of string[1] = ('',
         'p','m','z');
         'p','m','z');
@@ -371,7 +378,7 @@ unit cpubase;
 
 
 
 
     const
     const
-      std_regname_table : array[tregisterindex] of string[7] = (
+      std_regname_table : array[tregisterindex] of string[10] = (
         {$i rarmstd.inc}
         {$i rarmstd.inc}
       );
       );
 
 

+ 5 - 3
compiler/arm/cpuinfo.pas

@@ -65,7 +65,8 @@ Type
       fpu_fpa11,
       fpu_fpa11,
       fpu_vfpv2,
       fpu_vfpv2,
       fpu_vfpv3,
       fpu_vfpv3,
-      fpu_vfpv3_d16
+      fpu_vfpv3_d16,
+      fpu_fpv4_s16
      );
      );
 
 
    tcontrollertype =
    tcontrollertype =
@@ -227,7 +228,8 @@ Const
      'FPA11',
      'FPA11',
      'VFPV2',
      'VFPV2',
      'VFPV3',
      'VFPV3',
-     'VFPV3_D16'
+     'VFPV3_D16',
+     'FPV4_S16'
    );
    );
 
 
 
 
@@ -1004,7 +1006,7 @@ Const
         )
         )
     );
     );
 
 
-   vfp_scalar = [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16];
+   vfp_scalar = [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16];
 
 
    { Supported optimizations, only used for information }
    { Supported optimizations, only used for information }
    supported_optimizerswitches = genericlevel1optimizerswitches+
    supported_optimizerswitches = genericlevel1optimizerswitches+

+ 2 - 2
compiler/arm/cpupara.pas

@@ -124,7 +124,7 @@ unit cpupara;
                 getparaloc:=LOC_MMREGISTER
                 getparaloc:=LOC_MMREGISTER
               else if (calloption in [pocall_cdecl,pocall_cppdecl,pocall_softfloat]) or
               else if (calloption in [pocall_cdecl,pocall_cppdecl,pocall_softfloat]) or
                  (cs_fp_emulation in current_settings.moduleswitches) or
                  (cs_fp_emulation in current_settings.moduleswitches) or
-                 (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
+                 (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16]) then
                 { the ARM eabi also allows passing VFP values via VFP registers,
                 { the ARM eabi also allows passing VFP values via VFP registers,
                   but Mac OS X doesn't seem to do that and linux only does it if
                   but Mac OS X doesn't seem to do that and linux only does it if
                   built with the "-mfloat-abi=hard" option }
                   built with the "-mfloat-abi=hard" option }
@@ -608,7 +608,7 @@ unit cpupara;
               end
               end
             else if (p.proccalloption in [pocall_softfloat]) or
             else if (p.proccalloption in [pocall_softfloat]) or
                (cs_fp_emulation in current_settings.moduleswitches) or
                (cs_fp_emulation in current_settings.moduleswitches) or
-               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
+               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16]) then
               begin
               begin
                 case retcgsize of
                 case retcgsize of
                   OS_64,
                   OS_64,

+ 8 - 0
compiler/arm/cpupi.pas

@@ -118,6 +118,14 @@ unit cpupi;
                 if r in regs then
                 if r in regs then
                   inc(floatsavesize,8);
                   inc(floatsavesize,8);
             end;
             end;
+          fpu_fpv4_s16:
+            begin
+              floatsavesize:=0;
+              regs:=cg.rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
+              for r:=RS_D0 to RS_D15 do
+                if r in regs then
+                  inc(floatsavesize,8);
+            end;
         end;
         end;
         floatsavesize:=align(floatsavesize,max(current_settings.alignment.localalignmin,4));
         floatsavesize:=align(floatsavesize,max(current_settings.alignment.localalignmin,4));
         result:=Align(tg.direction*tg.lasttemp,max(current_settings.alignment.localalignmin,4))+maxpushedparasize+aint(floatsavesize);
         result:=Align(tg.direction*tg.lasttemp,max(current_settings.alignment.localalignmin,4))+maxpushedparasize+aint(floatsavesize);

+ 1 - 1
compiler/arm/itcpugas.pas

@@ -46,7 +46,7 @@ implementation
       cutils,verbose;
       cutils,verbose;
 
 
     const
     const
-      gas_regname_table : array[tregisterindex] of string[7] = (
+      gas_regname_table : array[tregisterindex] of string[10] = (
         {$i rarmstd.inc}
         {$i rarmstd.inc}
       );
       );
 
 

+ 125 - 2
compiler/arm/narmadd.pas

@@ -35,6 +35,7 @@ interface
        public
        public
           function pass_1 : tnode;override;
           function pass_1 : tnode;override;
        protected
        protected
+          function first_addfloat: tnode; override;
           procedure second_addfloat;override;
           procedure second_addfloat;override;
           procedure second_cmpfloat;override;
           procedure second_cmpfloat;override;
           procedure second_cmpordinal;override;
           procedure second_cmpordinal;override;
@@ -48,12 +49,12 @@ interface
       globtype,systems,
       globtype,systems,
       cutils,verbose,globals,
       cutils,verbose,globals,
       constexp,
       constexp,
-      symconst,symdef,paramgr,
+      symconst,symdef,paramgr,symtable,symtype,
       aasmbase,aasmtai,aasmdata,aasmcpu,defutil,htypechk,
       aasmbase,aasmtai,aasmdata,aasmcpu,defutil,htypechk,
       cgbase,cgutils,cgcpu,
       cgbase,cgutils,cgcpu,
       cpuinfo,pass_1,pass_2,regvars,procinfo,
       cpuinfo,pass_1,pass_2,regvars,procinfo,
       cpupara,
       cpupara,
-      ncon,nset,nadd,
+      ncon,nset,nadd,ncnv,ncal,nmat,
       ncgutil,tgobj,rgobj,rgcpu,cgobj,cg64f32,
       ncgutil,tgobj,rgobj,rgcpu,cgobj,cg64f32,
       hlcgobj
       hlcgobj
       ;
       ;
@@ -212,6 +213,36 @@ interface
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
                  location.register,left.location.register,right.location.register));
                  location.register,left.location.register,right.location.register));
             end;
             end;
+          fpu_fpv4_s16:
+            begin
+              { force mmreg as location, left right doesn't matter
+                as both will be in a fpureg }
+              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
+              location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
+
+              location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+              if left.location.loc<>LOC_CMMREGISTER then
+                location.register:=left.location.register
+              else if right.location.loc<>LOC_CMMREGISTER then
+                location.register:=right.location.register
+              else
+                location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+              case nodetype of
+                addn :
+                  op:=A_VADD;
+                muln :
+                  op:=A_VMUL;
+                subn :
+                  op:=A_VSUB;
+                slashn :
+                  op:=A_VDIV;
+                else
+                  internalerror(2009111401);
+              end;
+
+              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op, location.register,left.location.register,right.location.register), PF_F32));
+            end;
           fpu_soft:
           fpu_soft:
             { this case should be handled already by pass1 }
             { this case should be handled already by pass1 }
             internalerror(200308252);
             internalerror(200308252);
@@ -273,6 +304,21 @@ interface
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               current_asmdata.CurrAsmList.concat(taicpu.op_none(A_FMSTAT));
               current_asmdata.CurrAsmList.concat(taicpu.op_none(A_FMSTAT));
             end;
             end;
+          fpu_fpv4_s16:
+            begin
+              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
+              location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
+
+              if nodetype in [equaln,unequaln] then
+                op:=A_VCMP
+              else
+                op:=A_VCMPE;
+
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,
+                left.location.register,right.location.register));
+              cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+              current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg(A_VMRS, NR_APSR_nzcv, NR_FPSCR));
+            end;
           fpu_soft:
           fpu_soft:
             { this case should be handled already by pass1 }
             { this case should be handled already by pass1 }
             internalerror(2009112404);
             internalerror(2009112404);
@@ -464,6 +510,83 @@ interface
           end;
           end;
       end;
       end;
 
 
+    function tarmaddnode.first_addfloat: tnode;
+      var
+        procname: string[31];
+        { do we need to reverse the result ? }
+        notnode : boolean;
+        fdef : tdef;
+      begin
+        result := nil;
+        notnode := false;
+
+        if current_settings.fputype = fpu_fpv4_s16 then
+          begin
+            case tfloatdef(left.resultdef).floattype of
+              s32real:
+                begin
+                  result:=nil;
+                  notnode:=false;
+                end;
+              s64real:
+                begin
+                  fdef:=search_system_type('FLOAT64').typedef;
+                  procname:='float64';
+
+                  case nodetype of
+                    addn:
+                      procname:=procname+'_add';
+                    muln:
+                      procname:=procname+'_mul';
+                    subn:
+                      procname:=procname+'_sub';
+                    slashn:
+                      procname:=procname+'_div';
+                    ltn:
+                      procname:=procname+'_lt';
+                    lten:
+                      procname:=procname+'_le';
+                    gtn:
+                      begin
+                        procname:=procname+'_le';
+                        notnode:=true;
+                      end;
+                    gten:
+                      begin
+                        procname:=procname+'_lt';
+                        notnode:=true;
+                      end;
+                    equaln:
+                      procname:=procname+'_eq';
+                    unequaln:
+                      begin
+                        procname:=procname+'_eq';
+                        notnode:=true;
+                      end;
+                    else
+                      CGMessage3(type_e_operator_not_supported_for_types,node2opstr(nodetype),left.resultdef.typename,right.resultdef.typename);
+                  end;
+
+                  if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
+                    resultdef:=pasbool8type;
+                  result:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
+                      ctypeconvnode.create_internal(right,fdef),
+                      ccallparanode.create(
+                        ctypeconvnode.create_internal(left,fdef),nil))),resultdef);
+
+                  left:=nil;
+                  right:=nil;
+
+                  { do we need to reverse the result }
+                  if notnode then
+                    result:=cnotnode.create(result);
+                end;
+            end;
+          end
+        else
+          result:=inherited first_addfloat;
+      end;
+
 
 
     procedure tarmaddnode.second_cmpordinal;
     procedure tarmaddnode.second_cmpordinal;
       var
       var

+ 1 - 1
compiler/arm/narmcal.pas

@@ -49,7 +49,7 @@ implementation
       if (realresdef.typ=floatdef) and 
       if (realresdef.typ=floatdef) and 
          (target_info.abi <> abi_eabihf) and
          (target_info.abi <> abi_eabihf) and
          ((cs_fp_emulation in current_settings.moduleswitches) or
          ((cs_fp_emulation in current_settings.moduleswitches) or
-          (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16])) then
+          (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16])) then
         begin
         begin
           { keep the fpu values in integer registers for now, the code
           { keep the fpu values in integer registers for now, the code
             generator will move them to memory or an mmregister when necessary
             generator will move them to memory or an mmregister when necessary

+ 64 - 3
compiler/arm/narmcnv.pas

@@ -32,6 +32,7 @@ interface
        tarmtypeconvnode = class(tcgtypeconvnode)
        tarmtypeconvnode = class(tcgtypeconvnode)
          protected
          protected
            function first_int_to_real: tnode;override;
            function first_int_to_real: tnode;override;
+           function first_real_to_real: tnode; override;
          { procedure second_int_to_int;override; }
          { procedure second_int_to_int;override; }
          { procedure second_string_to_string;override; }
          { procedure second_string_to_string;override; }
          { procedure second_cstring_to_pchar;override; }
          { procedure second_cstring_to_pchar;override; }
@@ -58,7 +59,7 @@ implementation
 
 
    uses
    uses
       verbose,globtype,globals,systems,
       verbose,globtype,globals,systems,
-      symconst,symdef,aasmbase,aasmtai,aasmdata,
+      symconst,symdef,aasmbase,aasmtai,aasmdata,symtable,
       defutil,
       defutil,
       cgbase,cgutils,
       cgbase,cgutils,
       pass_1,pass_2,procinfo,
       pass_1,pass_2,procinfo,
@@ -76,7 +77,8 @@ implementation
       var
       var
         fname: string[19];
         fname: string[19];
       begin
       begin
-        if cs_fp_emulation in current_settings.moduleswitches then
+        if (cs_fp_emulation in current_settings.moduleswitches) or
+          (current_settings.fputype=fpu_fpv4_s16) then
           result:=inherited first_int_to_real
           result:=inherited first_int_to_real
         else
         else
           begin
           begin
@@ -117,7 +119,8 @@ implementation
                 expectloc:=LOC_FPUREGISTER;
                 expectloc:=LOC_FPUREGISTER;
               fpu_vfpv2,
               fpu_vfpv2,
               fpu_vfpv3,
               fpu_vfpv3,
-              fpu_vfpv3_d16:
+              fpu_vfpv3_d16,
+              fpu_fpv4_s16:
                 expectloc:=LOC_MMREGISTER;
                 expectloc:=LOC_MMREGISTER;
               else
               else
                 internalerror(2009112702);
                 internalerror(2009112702);
@@ -125,6 +128,48 @@ implementation
           end;
           end;
       end;
       end;
 
 
+    function tarmtypeconvnode.first_real_to_real: tnode;
+      begin
+        if (current_settings.fputype=fpu_fpv4_s16) then
+          begin
+            case tfloatdef(left.resultdef).floattype of
+              s32real:
+                case tfloatdef(resultdef).floattype of
+                  s64real:
+                    result:=ctypeconvnode.create_explicit(ccallnode.createintern('float32_to_float64',ccallparanode.create(
+                      ctypeconvnode.create_internal(left,search_system_type('FLOAT32REC').typedef),nil)),resultdef);
+                  s32real:
+                    begin
+                      result:=left;
+                      left:=nil;
+                    end;
+                  else
+                    internalerror(200610151);
+                end;
+              s64real:
+                case tfloatdef(resultdef).floattype of
+                  s32real:
+                    result:=ctypeconvnode.create_explicit(ccallnode.createintern('float64_to_float32',ccallparanode.create(
+                      ctypeconvnode.create_internal(left,search_system_type('FLOAT64').typedef),nil)),resultdef);
+                  s64real:
+                    begin
+                      result:=left;
+                      left:=nil;
+                    end;
+                  else
+                    internalerror(200610152);
+                end;
+              else
+                internalerror(200610153);
+            end;
+            left:=nil;
+            firstpass(result);
+            exit;
+          end
+        else
+          Result := inherited first_real_to_real;
+      end;
+
 
 
     procedure tarmtypeconvnode.second_int_to_real;
     procedure tarmtypeconvnode.second_int_to_real;
       const
       const
@@ -214,6 +259,22 @@ implementation
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(
                 signedprec2vfpop[signed,location.size],location.register,left.location.register));
                 signedprec2vfpop[signed,location.size],location.register,left.location.register));
             end;
             end;
+          fpu_fpv4_s16:
+            begin
+              location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+              signed:=left.location.size=OS_S32;
+              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+              if (left.location.size<>OS_F32) then
+                internalerror(2009112703);
+              if left.location.size<>location.size then
+                location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size)
+              else
+                location.register:=left.location.register;
+              if signed then
+                current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,location.register,left.location.register), PF_F32S32))
+              else
+                current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,location.register,left.location.register), PF_F32U32));
+            end;
         end;
         end;
       end;
       end;
 
 

+ 29 - 1
compiler/arm/narminl.pas

@@ -91,7 +91,8 @@ implementation
             end;
             end;
           fpu_vfpv2,
           fpu_vfpv2,
           fpu_vfpv3,
           fpu_vfpv3,
-          fpu_vfpv3_d16:
+          fpu_vfpv3_d16,
+          fpu_fpv4_s16:
             begin
             begin
               location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
               location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
               location_copy(location,left.location);
               location_copy(location,left.location);
@@ -123,6 +124,13 @@ implementation
               fpu_vfpv3,
               fpu_vfpv3,
               fpu_vfpv3_d16:
               fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
                 expectloc:=LOC_MMREGISTER;
+              fpu_fpv4_s16:
+                begin
+                  if tfloatdef(left.resultdef).floattype=s32real then
+                    expectloc:=LOC_MMREGISTER
+                  else
+                    exit(inherited first_abs_real);
+                end;
               else
               else
                 internalerror(2009112401);
                 internalerror(2009112401);
             end;
             end;
@@ -146,6 +154,13 @@ implementation
               fpu_vfpv3,
               fpu_vfpv3,
               fpu_vfpv3_d16:
               fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
                 expectloc:=LOC_MMREGISTER;
+              fpu_fpv4_s16:
+                begin
+                  if tfloatdef(left.resultdef).floattype=s32real then
+                    expectloc:=LOC_MMREGISTER
+                  else
+                    exit(inherited first_sqr_real);
+                end;
               else
               else
                 internalerror(2009112402);
                 internalerror(2009112402);
             end;
             end;
@@ -169,6 +184,13 @@ implementation
               fpu_vfpv3,
               fpu_vfpv3,
               fpu_vfpv3_d16:
               fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
                 expectloc:=LOC_MMREGISTER;
+              fpu_fpv4_s16:
+                begin
+                  if tfloatdef(left.resultdef).floattype=s32real then
+                    expectloc:=LOC_MMREGISTER
+                  else
+                    exit(inherited first_sqrt_real);
+                end;
               else
               else
                 internalerror(2009112403);
                 internalerror(2009112403);
             end;
             end;
@@ -227,6 +249,8 @@ implementation
                 op:=A_FABSD;
                 op:=A_FABSD;
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
             end;
             end;
+          fpu_fpv4_s16:
+            current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
         else
         else
           internalerror(2009111402);
           internalerror(2009111402);
         end;
         end;
@@ -254,6 +278,8 @@ implementation
                 op:=A_FMULD;
                 op:=A_FMULD;
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,left.location.register,left.location.register));
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,left.location.register,left.location.register));
             end;
             end;
+          fpu_fpv4_s16:
+            current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
         else
         else
           internalerror(2009111403);
           internalerror(2009111403);
         end;
         end;
@@ -281,6 +307,8 @@ implementation
                 op:=A_FSQRTD;
                 op:=A_FSQRTD;
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
             end;
             end;
+          fpu_fpv4_s16:
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register));
         else
         else
           internalerror(2009111402);
           internalerror(2009111402);
         end;
         end;

+ 52 - 1
compiler/arm/narmmat.pas

@@ -39,6 +39,7 @@ interface
       end;
       end;
 
 
       tarmunaryminusnode = class(tcgunaryminusnode)
       tarmunaryminusnode = class(tcgunaryminusnode)
+        function pass_1: tnode; override;
         procedure second_float;override;
         procedure second_float;override;
       end;
       end;
 
 
@@ -54,9 +55,10 @@ implementation
       cutils,verbose,globals,constexp,
       cutils,verbose,globals,constexp,
       aasmbase,aasmcpu,aasmtai,aasmdata,
       aasmbase,aasmcpu,aasmtai,aasmdata,
       defutil,
       defutil,
+      symtype,symconst,symtable,
       cgbase,cgobj,hlcgobj,cgutils,
       cgbase,cgobj,hlcgobj,cgutils,
       pass_2,procinfo,
       pass_2,procinfo,
-      ncon,
+      ncon,ncnv,ncal,
       cpubase,cpuinfo,
       cpubase,cpuinfo,
       ncgutil,cgcpu,
       ncgutil,cgcpu,
       nadd,pass_1,symdef;
       nadd,pass_1,symdef;
@@ -326,6 +328,46 @@ implementation
                                TARMUNARYMINUSNODE
                                TARMUNARYMINUSNODE
 *****************************************************************************}
 *****************************************************************************}
 
 
+    function tarmunaryminusnode.pass_1: tnode;
+      var
+        procname: string[31];
+        fdef : tdef;
+      begin
+        if (current_settings.fputype<>fpu_fpv4_s16) or
+          (tfloatdef(resultdef).floattype=s32real) then
+          exit(inherited pass_1);
+
+        result:=nil;
+        firstpass(left);
+        if codegenerror then
+          exit;
+
+        if (left.resultdef.typ=floatdef) then
+          begin
+            case tfloatdef(resultdef).floattype of
+              s64real:
+                begin
+                  procname:='float64_sub';
+                  fdef:=search_system_type('FLOAT64').typedef;
+                end;
+              else
+                internalerror(2005082801);
+            end;
+            result:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
+              ctypeconvnode.create_internal(left,fDef),
+              ccallparanode.create(ctypeconvnode.create_internal(crealconstnode.create(0,resultdef),fdef),nil))),resultdef);
+
+            left:=nil;
+          end
+        else
+          begin
+            if (left.resultdef.typ=floatdef) then
+              expectloc:=LOC_FPUREGISTER
+             else if (left.resultdef.typ=orddef) then
+               expectloc:=LOC_REGISTER;
+          end;
+      end;
+
     procedure tarmunaryminusnode.second_float;
     procedure tarmunaryminusnode.second_float;
       var
       var
         op: tasmop;
         op: tasmop;
@@ -357,6 +399,15 @@ implementation
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,
                 location.register,left.location.register));
                 location.register,left.location.register));
             end;
             end;
+          fpu_fpv4_s16:
+            begin
+              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
+              location:=left.location;
+              if (left.location.loc=LOC_CMMREGISTER) then
+                location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VNEG,
+                location.register,left.location.register), PF_F32));
+            end
           else
           else
             internalerror(2009112602);
             internalerror(2009112602);
         end;
         end;

+ 11 - 13
compiler/nadd.pas

@@ -2608,7 +2608,11 @@ implementation
         { In non-emulation mode, real opcodes are
         { In non-emulation mode, real opcodes are
           emitted for floating point values.
           emitted for floating point values.
         }
         }
-        if not (cs_fp_emulation in current_settings.moduleswitches) then
+        if not ((cs_fp_emulation in current_settings.moduleswitches)
+{$ifdef cpufpemu}
+                or (current_settings.fputype=fpu_soft)
+{$endif cpufpemu}
+                ) then
           exit;
           exit;
 
 
         if not(target_info.system in systems_wince) then
         if not(target_info.system in systems_wince) then
@@ -2768,12 +2772,9 @@ implementation
          if nodetype=slashn then
          if nodetype=slashn then
            begin
            begin
 {$ifdef cpufpemu}
 {$ifdef cpufpemu}
-             if (current_settings.fputype=fpu_soft) or (cs_fp_emulation in current_settings.moduleswitches) then
-               begin
-                 result:=first_addfloat;
-                 if assigned(result) then
-                   exit;
-               end;
+             result:=first_addfloat;
+             if assigned(result) then
+               exit;
 {$endif cpufpemu}
 {$endif cpufpemu}
              expectloc:=LOC_FPUREGISTER;
              expectloc:=LOC_FPUREGISTER;
            end
            end
@@ -2984,12 +2985,9 @@ implementation
          else if (rd.typ=floatdef) or (ld.typ=floatdef) then
          else if (rd.typ=floatdef) or (ld.typ=floatdef) then
             begin
             begin
 {$ifdef cpufpemu}
 {$ifdef cpufpemu}
-             if (current_settings.fputype=fpu_soft) or (cs_fp_emulation in current_settings.moduleswitches) then
-               begin
-                 result:=first_addfloat;
-                 if assigned(result) then
-                   exit;
-               end;
+             result:=first_addfloat;
+             if assigned(result) then
+               exit;
 {$endif cpufpemu}
 {$endif cpufpemu}
               if nodetype in [addn,subn,muln,andn,orn,xorn] then
               if nodetype in [addn,subn,muln,andn,orn,xorn] then
                 expectloc:=LOC_FPUREGISTER
                 expectloc:=LOC_FPUREGISTER

+ 1 - 1
compiler/rgbase.pas

@@ -29,7 +29,7 @@ interface
       cpuBase,cgBase;
       cpuBase,cgBase;
 
 
     type
     type
-      TRegNameTable = array[tregisterindex] of string[7];
+      TRegNameTable = array[tregisterindex] of string[10];
       TRegisterIndexTable = array[tregisterindex] of tregisterindex;
       TRegisterIndexTable = array[tregisterindex] of tregisterindex;
 
 
     function findreg_by_number_table(r:Tregister;const regnumber_index:TRegisterIndexTable):tregisterindex;
     function findreg_by_number_table(r:Tregister;const regnumber_index:TRegisterIndexTable):tregisterindex;

+ 9 - 0
rtl/arm/thumb2.inc

@@ -33,10 +33,19 @@ Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
 begin
 begin
   { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
   { Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
   asm
   asm
+    {$IFDEF FPUFPV4_S16}
+    movw r0, #(0xed88)
+    movt r0, #(0xe000)
+    ldr r1, [r0]
+    orr r1, r1, #(0xF << 20)
+    str r1, [r0]
+    bx lr
+    {$ELSE FPUFPV4_S16}
     rfs r0
     rfs r0
     and r0,r0,#0xffe0ffff
     and r0,r0,#0xffe0ffff
     orr r0,r0,#0x00070000
     orr r0,r0,#0x00070000
     wfs r0
     wfs r0
+    {$endif FPUFPV4_S16}
   end;
   end;
 end;
 end;
 {$endif}
 {$endif}