Browse Source

+ make use of avx-512 instructions vcvtuis2s* if possible

git-svn-id: trunk@47078 -
florian 4 years ago
parent
commit
a8b387a166
3 changed files with 32 additions and 17 deletions
  1. 25 12
      compiler/x86/nx86cnv.pas
  2. 3 2
      compiler/x86_64/cpuinfo.pas
  3. 4 3
      compiler/x86_64/nx64cnv.pas

+ 25 - 12
compiler/x86/nx86cnv.pas

@@ -262,7 +262,10 @@ implementation
           hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
           hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
         if use_vectorfpu(resultdef) and
         if use_vectorfpu(resultdef) and
 {$ifdef cpu64bitalu}
 {$ifdef cpu64bitalu}
-           (torddef(left.resultdef).ordtype in [s32bit,s64bit]) then
+           ((torddef(left.resultdef).ordtype in [s32bit,s64bit]) or
+            ((torddef(left.resultdef).ordtype in [u32bit,u64bit]) and
+             (FPUX86_HAS_AVX512F in fpu_capabilities[current_settings.fputype]))
+           ) then
 {$else cpu64bitalu}
 {$else cpu64bitalu}
            (torddef(left.resultdef).ordtype=s32bit) then
            (torddef(left.resultdef).ordtype=s32bit) then
 {$endif cpu64bitalu}
 {$endif cpu64bitalu}
@@ -272,29 +275,39 @@ implementation
             if UseAVX then
             if UseAVX then
               case location.size of
               case location.size of
                 OS_F32:
                 OS_F32:
-                  op:=A_VCVTSI2SS;
+                  if is_signed(left.resultdef) then
+                    op:=A_VCVTSI2SS
+                  else
+                    op:=A_VCVTUSI2SS;
                 OS_F64:
                 OS_F64:
-                  op:=A_VCVTSI2SD;
+                  if is_signed(left.resultdef) then
+                    op:=A_VCVTSI2SD
+                  else
+                    op:=A_VCVTUSI2SD;
                 else
                 else
                   internalerror(2007120902);
                   internalerror(2007120902);
               end
               end
             else
             else
-              case location.size of
-                OS_F32:
-                  op:=A_CVTSI2SS;
-                OS_F64:
-                  op:=A_CVTSI2SD;
-                else
-                  internalerror(2007120902);
+              begin
+                if not(is_signed(left.resultdef)) then
+                  Internalerror(2020101001);
+                case location.size of
+                  OS_F32:
+                    op:=A_CVTSI2SS;
+                  OS_F64:
+                    op:=A_CVTSI2SD;
+                  else
+                    internalerror(2007120902);
+                end;
               end;
               end;
 
 
             { don't use left.location.size, because that one may be OS_32/OS_64
             { don't use left.location.size, because that one may be OS_32/OS_64
               if the lower bound of the orddef >= 0
               if the lower bound of the orddef >= 0
             }
             }
             case torddef(left.resultdef).ordtype of
             case torddef(left.resultdef).ordtype of
-              s32bit:
+              s32bit,u32bit:
                 opsize:=S_L;
                 opsize:=S_L;
-              s64bit:
+              s64bit,u64bit:
                 opsize:=S_Q;
                 opsize:=S_Q;
               else
               else
                 internalerror(2007120903);
                 internalerror(2007120903);

+ 3 - 2
compiler/x86_64/cpuinfo.pas

@@ -181,7 +181,8 @@ type
 
 
    tfpuflags =
    tfpuflags =
       (FPUX86_HAS_AVXUNIT,
       (FPUX86_HAS_AVXUNIT,
-       FPUX86_HAS_32MMREGS
+       FPUX86_HAS_32MMREGS,
+       FPUX86_HAS_AVX512F
       );
       );
 
 
  const
  const
@@ -202,7 +203,7 @@ type
       { fpu_sse42    } [],
       { fpu_sse42    } [],
       { fpu_avx      } [FPUX86_HAS_AVXUNIT],
       { fpu_avx      } [FPUX86_HAS_AVXUNIT],
       { fpu_avx2     } [FPUX86_HAS_AVXUNIT],
       { fpu_avx2     } [FPUX86_HAS_AVXUNIT],
-      { fpu_avx512   } [FPUX86_HAS_AVXUNIT,FPUX86_HAS_32MMREGS]
+      { fpu_avx512   } [FPUX86_HAS_AVXUNIT,FPUX86_HAS_32MMREGS,FPUX86_HAS_AVX512F]
    );
    );
 
 
 Implementation
 Implementation

+ 4 - 3
compiler/x86_64/nx64cnv.pas

@@ -64,7 +64,7 @@ implementation
       symconst,symdef,
       symconst,symdef,
       cgbase,cga,
       cgbase,cga,
       ncnv,
       ncnv,
-      cpubase,
+      cpubase,cpuinfo,
       cgutils,cgobj,hlcgobj,cgx86;
       cgutils,cgobj,hlcgobj,cgx86;
 
 
 
 
@@ -72,7 +72,8 @@ implementation
       begin
       begin
         result:=nil;
         result:=nil;
         if use_vectorfpu(resultdef) and
         if use_vectorfpu(resultdef) and
-           (torddef(left.resultdef).ordtype=u32bit) then
+           (torddef(left.resultdef).ordtype=u32bit) and
+           not(FPUX86_HAS_AVX512F in fpu_capabilities[current_settings.fputype]) then
           begin
           begin
             inserttypeconv(left,s64inttype);
             inserttypeconv(left,s64inttype);
             firstpass(left);
             firstpass(left);
@@ -90,7 +91,7 @@ implementation
          l1,l2 : tasmlabel;
          l1,l2 : tasmlabel;
          op : tasmop;
          op : tasmop;
       begin
       begin
-        if use_vectorfpu(resultdef) then
+        if use_vectorfpu(resultdef) and not(FPUX86_HAS_AVX512F in fpu_capabilities[current_settings.fputype]) then
           begin
           begin
             if is_double(resultdef) then
             if is_double(resultdef) then
               op:=A_CVTSI2SD
               op:=A_CVTSI2SD