Browse Source

+ fpu_fpv4_sp_d32
* some fixes to make fpv4-sp-d32 work

git-svn-id: trunk@44702 -

florian 5 years ago
parent
commit
497ff94cb0
4 changed files with 49 additions and 32 deletions
  1. 1 0
      compiler/arm/aasmcpu.pas
  2. 27 16
      compiler/arm/cgcpu.pas
  3. 20 16
      compiler/arm/cpuinfo.pas
  4. 1 0
      compiler/arm/narmutil.pas

+ 1 - 0
compiler/arm/aasmcpu.pas

@@ -2234,6 +2234,7 @@ implementation
             { fpu_vfpv3_d16  } IF_VFPv2 or IF_VFPv3,
             { fpu_vfpv3_d16  } IF_VFPv2 or IF_VFPv3,
             { fpu_fpv4_s16   } IF_NONE,
             { fpu_fpv4_s16   } IF_NONE,
             { fpu_vfpv4      } IF_VFPv2 or IF_VFPv3 or IF_VFPv4,
             { fpu_vfpv4      } IF_VFPv2 or IF_VFPv3 or IF_VFPv4,
+            { fpu_vfpv4      } IF_VFPv2 or IF_VFPv3 or IF_VFPv4,
             { fpu_neon_vfpv4 } IF_VFPv2 or IF_VFPv3 or IF_VFPv4 or IF_NEON
             { fpu_neon_vfpv4 } IF_VFPv2 or IF_VFPv3 or IF_VFPv4 or IF_NEON
           );
           );
       begin
       begin

+ 27 - 16
compiler/arm/cgcpu.pas

@@ -2084,7 +2084,7 @@ unit cgcpu;
              begin
              begin
                reference_reset(ref,4,[]);
                reference_reset(ref,4,[]);
                if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
                if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
-                 (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
+                 (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
                  begin
                  begin
                    if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                    if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                      begin
                      begin
@@ -2115,14 +2115,16 @@ unit cgcpu;
                    begin
                    begin
                      ref.index:=ref.base;
                      ref.index:=ref.base;
                      ref.base:=NR_NO;
                      ref.base:=NR_NO;
-                     { FSTMX is deprecated on ARMv6 and later }
-                     {if (current_settings.cputype<cpu_armv6) then
-                       postfix:=PF_IAX
-                     else
-                       postfix:=PF_IAD;}
                      if mmregs<>[] then
                      if mmregs<>[] then
                        list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
                        list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
                    end
                    end
+                 else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
+                   begin
+                     ref.index:=ref.base;
+                     ref.base:=NR_NO;
+                     if mmregs<>[] then
+                       list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
+                   end
                  else
                  else
                    internalerror(2019050923);
                    internalerror(2019050923);
                end;
                end;
@@ -2176,7 +2178,7 @@ unit cgcpu;
                         }
                         }
                       end;
                       end;
                 end;
                 end;
-              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
+              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
                 begin
                 begin
                   { restore vfp registers? }
                   { restore vfp registers? }
                   { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
                   { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
@@ -2193,7 +2195,7 @@ unit cgcpu;
               begin
               begin
                 reference_reset(ref,4,[]);
                 reference_reset(ref,4,[]);
                 if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
                 if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
-                   (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
+                   (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
                   begin
                   begin
                     if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                     if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                       begin
                       begin
@@ -2223,13 +2225,15 @@ unit cgcpu;
                     begin
                     begin
                       ref.index:=ref.base;
                       ref.index:=ref.base;
                       ref.base:=NR_NO;
                       ref.base:=NR_NO;
-                      { FLDMX is deprecated on ARMv6 and later }
-                      {if (current_settings.cputype<cpu_armv6) then
-                        mmpostfix:=PF_IAX
-                      else
-                        mmpostfix:=PF_IAD;}
-                     if mmregs<>[] then
-                       list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
+                      if mmregs<>[] then
+                        list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
+                    end
+                  else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
+                    begin
+                      ref.index:=ref.base;
+                      ref.base:=NR_NO;
+                      if mmregs<>[] then
+                        list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
                     end
                     end
                   else
                   else
                     internalerror(2019050921);
                     internalerror(2019050921);
@@ -4328,12 +4332,19 @@ unit cgcpu;
         rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
         rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
             [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
             [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
 
 
-        if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
+        if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
+          (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
                RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
                RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
                RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
                RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
               ],first_mm_imreg,[])
               ],first_mm_imreg,[])
+        else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
+          rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
+              [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
+               RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
+               RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
+              ],first_mm_imreg,[])
         else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
         else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,

+ 20 - 16
compiler/arm/cpuinfo.pas

@@ -73,8 +73,9 @@ Type
       fpu_vfpv3,
       fpu_vfpv3,
       fpu_neon_vfpv3,
       fpu_neon_vfpv3,
       fpu_vfpv3_d16,
       fpu_vfpv3_d16,
-      fpu_fpv4_s16,
+      fpu_fpv4_s16,     { same as fpu_fpv4_sp_d32, kept for backwards compatibility }
       fpu_vfpv4,
       fpu_vfpv4,
+      fpu_fpv4_sp_d16,  { 32 registers single precision, for load/store/move they can be accessed as 16 double registers }
       fpu_neon_vfpv4
       fpu_neon_vfpv4
       { when new elements added afterwards, update also fpu_vfp_last below and
       { when new elements added afterwards, update also fpu_vfp_last below and
         update class procedure tarmnodeutils.InsertObjectInfo; in narmutil.pas }
         update class procedure tarmnodeutils.InsertObjectInfo; in narmutil.pas }
@@ -84,7 +85,7 @@ Const
    fpu_vfp_first = fpu_vfpv2;
    fpu_vfp_first = fpu_vfpv2;
    fpu_vfp_last  = fpu_neon_vfpv4;
    fpu_vfp_last  = fpu_neon_vfpv4;
 
 
-  fputypestrllvm : array[tfputype] of string[14] = ('',
+  fputypestrllvm : array[tfputype] of string[15] = ('',
     '',
     '',
     '',
     '',
     '',
     '',
@@ -96,6 +97,7 @@ Const
     'fpu=vfpv3-d16',
     'fpu=vfpv3-d16',
     'fpu=vfpv4-s16',
     'fpu=vfpv4-s16',
     'fpu=vfpv4',
     'fpu=vfpv4',
+    'fpu=fpv4-sp-d16',
     'fpu=neon-vfpv4'
     'fpu=neon-vfpv4'
   );
   );
 
 
@@ -570,7 +572,7 @@ Const
      'ARMV7EM'
      'ARMV7EM'
    );
    );
 
 
-   fputypestr : array[tfputype] of string[10] = (
+   fputypestr : array[tfputype] of string[11] = (
      'NONE',
      'NONE',
      'SOFT',
      'SOFT',
      'LIBGCC',
      'LIBGCC',
@@ -583,6 +585,7 @@ Const
      'VFPV3_D16',
      'VFPV3_D16',
      'FPV4_S16',
      'FPV4_S16',
      'VFPV4',
      'VFPV4',
+     'FPV4_SP_D16',
      'NEON_VFPV4'
      'NEON_VFPV4'
    );
    );
 
 
@@ -1105,19 +1108,20 @@ Const
      );
      );
 
 
      fpu_capabilities : array[tfputype] of set of tfpuflags =
      fpu_capabilities : array[tfputype] of set of tfpuflags =
-       ( { fpu_none       } [],
-         { fpu_soft       } [],
-         { fpu_libgcc     } [],
-         { fpu_fpa        } [FPUARM_HAS_FPA],
-         { fpu_fpa10      } [FPUARM_HAS_FPA],
-         { fpu_fpa11      } [FPUARM_HAS_FPA],
-         { fpu_vfpv2      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE],
-         { fpu_vfpv3      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST],
-         { fpu_neon_vfpv3 } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON],
-         { fpu_vfpv3_d16  } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_VMOV_CONST],
-         { fpu_fpv4_s16   } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_SINGLE_ONLY,FPUARM_HAS_VMOV_CONST],
-         { fpu_vfpv4      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_FMA],
-         { fpu_neon_vfpv4 } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON,FPUARM_HAS_FMA]
+       ( { fpu_none         } [],
+         { fpu_soft         } [],
+         { fpu_libgcc       } [],
+         { fpu_fpa          } [FPUARM_HAS_FPA],
+         { fpu_fpa10        } [FPUARM_HAS_FPA],
+         { fpu_fpa11        } [FPUARM_HAS_FPA],
+         { fpu_vfpv2        } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE],
+         { fpu_vfpv3        } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST],
+         { fpu_neon_vfpv3   } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON],
+         { fpu_vfpv3_d16    } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_VMOV_CONST],
+         { fpu_fpv4_s16     } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_SINGLE_ONLY,FPUARM_HAS_VMOV_CONST],
+         { fpu_vfpv4        } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_FMA],
+         { fpu_fpv4_sp_d16  } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_FMA],
+         { fpu_neon_vfpv4   } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON,FPUARM_HAS_FMA]
        );
        );
 
 
    { contains all CPU supporting any kind of thumb instruction set }
    { contains all CPU supporting any kind of thumb instruction set }

+ 1 - 0
compiler/arm/narmutil.pas

@@ -207,6 +207,7 @@ interface
                 current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,3));
                 current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,3));
               fpu_vfpv3_d16:
               fpu_vfpv3_d16:
                 current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,4));
                 current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,4));
+              fpu_fpv4_sp_d16,
               fpu_fpv4_s16:
               fpu_fpv4_s16:
                 current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,6));
                 current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,6));
               fpu_vfpv4,
               fpu_vfpv4,