Browse Source

+ x86 makes use of fpu_capabilities
* moved CPUX86_HAS_AVXUNIT to FPUX86_HAS_AVXUNIT
+ mm register allocator can be initialized with 32 mm registers of AVX512

git-svn-id: trunk@42707 -

florian 6 years ago
parent
commit
ba203c0564

+ 2 - 0
compiler/fpcdefs.inc

@@ -96,6 +96,7 @@
   {$define SUPPORT_SAFECALL}
   {$define SUPPORT_SAFECALL}
   {$define SUPPORT_GET_FRAME}
   {$define SUPPORT_GET_FRAME}
   {$define cpucapabilities}
   {$define cpucapabilities}
+  {$define fpucapabilities}
   {$define cpucg64shiftsupport}
   {$define cpucg64shiftsupport}
 {$endif i386}
 {$endif i386}
 
 
@@ -113,6 +114,7 @@
   {$define SUPPORT_SAFECALL}
   {$define SUPPORT_SAFECALL}
   {$define SUPPORT_GET_FRAME}
   {$define SUPPORT_GET_FRAME}
   {$define cpucapabilities}
   {$define cpucapabilities}
+  {$define fpucapabilities}
 {$endif x86_64}
 {$endif x86_64}
 
 
 {$ifdef sparc}
 {$ifdef sparc}

+ 19 - 3
compiler/i386/cpuinfo.pas

@@ -165,13 +165,17 @@ type
        CPUX86_HAS_BMI1,
        CPUX86_HAS_BMI1,
        CPUX86_HAS_BMI2,
        CPUX86_HAS_BMI2,
        CPUX86_HAS_POPCNT,
        CPUX86_HAS_POPCNT,
-       CPUX86_HAS_AVXUNIT,
        CPUX86_HAS_LZCNT,
        CPUX86_HAS_LZCNT,
        CPUX86_HAS_MOVBE,
        CPUX86_HAS_MOVBE,
        CPUX86_HAS_FMA,
        CPUX86_HAS_FMA,
        CPUX86_HAS_FMA4
        CPUX86_HAS_FMA4
       );
       );
 
 
+   tfpuflags =
+      (FPUX86_HAS_AVXUNIT,
+       FPUX86_HAS_32MMREGS
+      );
+
  const
  const
    cpu_capabilities : array[tcputype] of set of tcpuflags = (
    cpu_capabilities : array[tcputype] of set of tcpuflags = (
      { cpu_none      } [],
      { cpu_none      } [],
@@ -183,10 +187,22 @@ type
      { cpu_Pentium4  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2],
      { cpu_Pentium4  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2],
      { cpu_PentiumM  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2],
      { cpu_PentiumM  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2],
      { cpu_core_i    } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT],
      { cpu_core_i    } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT],
-     { cpu_core_avx  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
-     { cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
+     { cpu_core_avx  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT],
+     { cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
    );
    );
 
 
+   fpu_capabilities : array[tfputype] of set of tfpuflags = (
+      { fpu_none     } [],
+      { fpu_x87      } [],
+      { fpu_sse      } [],
+      { fpu_sse2     } [],
+      { fpu_sse3     } [],
+      { fpu_ssse3    } [],
+      { fpu_sse41    } [],
+      { fpu_sse42    } [],
+      { fpu_avx      } [FPUX86_HAS_AVXUNIT],
+      { fpu_avx2     } [FPUX86_HAS_AVXUNIT]
+   );
 
 
 Implementation
 Implementation
 
 

+ 2 - 2
compiler/x86/cgx86.pas

@@ -204,7 +204,7 @@ unit cgx86;
 
 
     function UseAVX: boolean;
     function UseAVX: boolean;
       begin
       begin
-        Result:=(current_settings.fputype in fpu_avx_instructionsets) {$ifndef i8086}or (CPUX86_HAS_AVXUNIT in cpu_capabilities[current_settings.cputype]){$endif i8086};
+        Result:={$ifdef i8086}false{$else i8086}(FPUX86_HAS_AVXUNIT in fpu_capabilities[current_settings.fputype]){$endif i8086};
       end;
       end;
 
 
 
 
@@ -2882,7 +2882,7 @@ unit cgx86;
 {$ifndef i8086}
 {$ifndef i8086}
       { avx helps only to reduce size, using it in general does at least not help on
       { avx helps only to reduce size, using it in general does at least not help on
         an i7-4770 (FK) }
         an i7-4770 (FK) }
-      if (CPUX86_HAS_AVXUNIT in cpu_capabilities[current_settings.cputype]) and
+      if (FPUX86_HAS_AVXUNIT in fpu_capabilities[current_settings.fputype]) and
         // (cs_opt_size in current_settings.optimizerswitches) and
         // (cs_opt_size in current_settings.optimizerswitches) and
          ({$ifdef i386}(len=8) or{$endif i386}(len=16) or (len=24) or (len=32) { or (len=40) or (len=48)}) then
          ({$ifdef i386}(len=8) or{$endif i386}(len=16) or (len=24) or (len=32) { or (len=40) or (len=48)}) then
          cm:=copy_avx
          cm:=copy_avx

+ 16 - 0
compiler/x86/cpubase.pas

@@ -143,6 +143,22 @@ uses
       RS_XMM13       = $0d;
       RS_XMM13       = $0d;
       RS_XMM14       = $0e;
       RS_XMM14       = $0e;
       RS_XMM15       = $0f;
       RS_XMM15       = $0f;
+      RS_XMM16       = $10;
+      RS_XMM17       = $11;
+      RS_XMM18       = $12;
+      RS_XMM19       = $13;
+      RS_XMM20       = $14;
+      RS_XMM21       = $15;
+      RS_XMM22       = $16;
+      RS_XMM23       = $17;
+      RS_XMM24       = $18;
+      RS_XMM25       = $19;
+      RS_XMM26       = $1a;
+      RS_XMM27       = $1b;
+      RS_XMM28       = $1c;
+      RS_XMM29       = $1d;
+      RS_XMM30       = $1e;
+      RS_XMM31       = $1f;
 
 
 {$if defined(x86_64)}
 {$if defined(x86_64)}
       RS_RFLAGS      = $06;
       RS_RFLAGS      = $06;

+ 8 - 2
compiler/x86_64/cgcpu.pas

@@ -60,6 +60,7 @@ unit cgcpu;
 
 
     uses
     uses
        globtype,globals,verbose,systems,cutils,cclasses,
        globtype,globals,verbose,systems,cutils,cclasses,
+       cpuinfo,
        symtable,paramgr,cpupi,
        symtable,paramgr,cpupi,
        rgcpu,ncgutil;
        rgcpu,ncgutil;
 
 
@@ -86,8 +87,13 @@ unit cgcpu;
           rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RSI,RS_RDI,RS_R8,
           rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RSI,RS_RDI,RS_R8,
             RS_R9,RS_R10,RS_R11,RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[]);
             RS_R9,RS_R10,RS_R11,RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[]);
 
 
-        rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
-          RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]);
+        if FPUX86_HAS_32MMREGS in fpu_capabilities[current_settings.fputype] then
+          rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
+            RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15,RS_XMM16,RS_XMM17,RS_XMM18,RS_XMM19,RS_XMM20,
+            RS_XMM21,RS_XMM22,RS_XMM23,RS_XMM24,RS_XMM25,RS_XMM26,RS_XMM27,RS_XMM28,RS_XMM29,RS_XMM30,RS_XMM31],first_mm_imreg,[])
+        else
+          rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
+            RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]);
         rgfpu:=Trgx86fpu.create;
         rgfpu:=Trgx86fpu.create;
       end;
       end;
 
 

+ 30 - 11
compiler/x86_64/cpuinfo.pas

@@ -62,7 +62,8 @@ Type
       fpu_sse41,
       fpu_sse41,
       fpu_sse42,
       fpu_sse42,
       fpu_avx,
       fpu_avx,
-      fpu_avx2
+      fpu_avx2,
+      fpu_avx512f
      );
      );
 
 
    tcontrollertype =
    tcontrollertype =
@@ -119,7 +120,7 @@ Const
      'COREAVX2'
      'COREAVX2'
    );
    );
 
 
-   fputypestr : array[tfputype] of string[6] = ('',
+   fputypestr : array[tfputype] of string[7] = ('',
 //     'SOFT',
 //     'SOFT',
      'SSE64',
      'SSE64',
      'SSE3',
      'SSE3',
@@ -127,10 +128,11 @@ Const
      'SSE41',
      'SSE41',
      'SSE42',
      'SSE42',
      'AVX',
      'AVX',
-     'AVX2'
+     'AVX2',
+     'AVX512F'
    );
    );
 
 
-   fputypestrllvm : array[tfputype] of string[6] = ('',
+   fputypestrllvm : array[tfputype] of string[7] = ('',
 //     'SOFT',
 //     'SOFT',
      '',
      '',
      'sse3',
      'sse3',
@@ -138,13 +140,14 @@ Const
      'sse4.1',
      'sse4.1',
      'sse4.2',
      'sse4.2',
      'avx',
      'avx',
-     'avx2'
+     'avx2',
+     'avx512f'
    );
    );
 
 
-   sse_singlescalar = [fpu_sse64..fpu_avx2];
-   sse_doublescalar = [fpu_sse64..fpu_avx2];
+   sse_singlescalar = [fpu_sse64..fpu_avx512f];
+   sse_doublescalar = [fpu_sse64..fpu_avx512f];
 
 
-   fpu_avx_instructionsets = [fpu_avx,fpu_avx2];
+   fpu_avx_instructionsets = [fpu_avx,fpu_avx2,fpu_avx512f];
 
 
    { Supported optimizations, only used for information }
    { Supported optimizations, only used for information }
    supported_optimizerswitches = genericlevel1optimizerswitches+
    supported_optimizerswitches = genericlevel1optimizerswitches+
@@ -169,20 +172,36 @@ type
        CPUX86_HAS_BMI1,
        CPUX86_HAS_BMI1,
        CPUX86_HAS_BMI2,
        CPUX86_HAS_BMI2,
        CPUX86_HAS_POPCNT,
        CPUX86_HAS_POPCNT,
-       CPUX86_HAS_AVXUNIT,
        CPUX86_HAS_LZCNT,
        CPUX86_HAS_LZCNT,
        CPUX86_HAS_MOVBE,
        CPUX86_HAS_MOVBE,
        CPUX86_HAS_FMA,
        CPUX86_HAS_FMA,
        CPUX86_HAS_FMA4
        CPUX86_HAS_FMA4
       );
       );
 
 
+   tfpuflags =
+      (FPUX86_HAS_AVXUNIT,
+       FPUX86_HAS_32MMREGS
+      );
+
  const
  const
    cpu_capabilities : array[tcputype] of set of tcpuflags = (
    cpu_capabilities : array[tcputype] of set of tcpuflags = (
      { cpu_none      } [],
      { cpu_none      } [],
      { Athlon64      } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2],
      { Athlon64      } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2],
      { cpu_core_i    } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT],
      { cpu_core_i    } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT],
-     { cpu_core_avx  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
-     { cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
+     { cpu_core_avx  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT],
+     { cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
+   );
+
+   fpu_capabilities : array[tfputype] of set of tfpuflags = (
+      { fpu_none     } [],
+      { fpu_sse64    } [],
+      { fpu_sse3     } [],
+      { fpu_ssse3    } [],
+      { fpu_sse41    } [],
+      { fpu_sse42    } [],
+      { fpu_avx      } [FPUX86_HAS_AVXUNIT],
+      { fpu_avx2     } [FPUX86_HAS_AVXUNIT],
+      { fpu_avx512   } [FPUX86_HAS_AVXUNIT,FPUX86_HAS_32MMREGS]
    );
    );
 
 
 Implementation
 Implementation