浏览代码

+ cpu_capabilites for x86_64 and i386
* take advantage of bmi2 instruction rorx

git-svn-id: trunk@26482 -

florian 11 年之前
父节点
当前提交
e210d5f30e
共有 5 个文件被更改,包括 63 次插入5 次删除
  1. 2 0
      compiler/fpcdefs.inc
  2. 25 2
      compiler/i386/cpuinfo.pas
  3. 13 0
      compiler/x86/cgx86.pas
  4. 3 1
      compiler/x86/rgx86.pas
  5. 20 2
      compiler/x86_64/cpuinfo.pas

+ 2 - 0
compiler/fpcdefs.inc

@@ -74,6 +74,7 @@
   {$define cpurox}
   {$define SUPPORT_SAFECALL}
   {$define SUPPORT_GET_FRAME}
+  {$define cpucapabilities}
 {$endif i386}
 
 {$ifdef x86_64}
@@ -89,6 +90,7 @@
   {$define cpurefshaveindexreg}
   {$define SUPPORT_SAFECALL}
   {$define SUPPORT_GET_FRAME}
+  {$define cpucapabilities}
 {$endif x86_64}
 
 {$ifdef ia64}

+ 25 - 2
compiler/i386/cpuinfo.pas

@@ -46,7 +46,9 @@ Type
        cpu_Pentium2,
        cpu_Pentium3,
        cpu_Pentium4,
-       cpu_PentiumM
+       cpu_PentiumM,
+       cpu_core_avx,
+       cpu_core_avx2
       );
 
    tfputype =
@@ -85,7 +87,9 @@ Const
      'PENTIUM2',
      'PENTIUM3',
      'PENTIUM4',
-     'PENTIUMM'
+     'PENTIUMM',
+     'COREAVX',
+     'COREAVX2'
    );
 
    fputypestr : array[tfputype] of string[6] = ('',
@@ -123,6 +127,25 @@ Const
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [cs_useebp];
 
+type
+   tcpuflags =
+      (CPUX86_HAS_BMI1,
+       CPUX86_HAS_BMI2
+      );
+
+ const
+   cpu_capabilities : array[tcputype] of set of tcpuflags = (
+     { cpu_none      } [],
+     { cpu_386       } [],
+     { cpu_Pentium   } [],
+     { cpu_Pentium2  } [],
+     { cpu_Pentium3  } [],
+     { cpu_Pentium4  } [],
+     { cpu_PentiumM  } [],
+     { cpu_core_avx  } [],
+     { cpu_core_avx2 } [CPUX86_HAS_BMI1,CPUX86_HAS_BMI2]
+   );
+
 Implementation
 
 end.

+ 13 - 0
compiler/x86/cgx86.pas

@@ -1606,6 +1606,19 @@ unit cgx86;
             reference_reset_base(href,src,-a,0);
             list.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[size],href,dst));
           end
+        else if (op in [OP_ROR,OP_ROL]) and
+          (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and
+          (size in [OS_32,OS_S32
+{$ifdef x86_64}
+            ,OS_64,OS_S64
+{$endif x86_64}
+          ]) then
+          begin
+            if op=OP_ROR then
+              list.concat(taicpu.op_const_reg_reg(A_RORX,TCgSize2OpSize[size], a,src,dst))
+            else
+              list.concat(taicpu.op_const_reg_reg(A_RORX,TCgSize2OpSize[size],TCgSize2Size[size]*8-a,src,dst));
+          end
         else
           inherited a_op_const_reg_reg(list,op,size,a,src,dst);
       end;

+ 3 - 1
compiler/x86/rgx86.pas

@@ -151,7 +151,9 @@ implementation
                           (get_alias(getsupreg(oper[0]^.reg))<>get_alias(getsupreg(oper[2]^.reg)))
                          ) and
                          (get_alias(getsupreg(oper[0]^.reg))=orgreg) then
-                        replaceoper:=0;
+                        replaceoper:=0
+                      else if (opcode=A_RORX) then
+                        replaceoper:=1;
                     end
                   else
                     begin

+ 20 - 2
compiler/x86_64/cpuinfo.pas

@@ -40,7 +40,9 @@ Type
 
    tcputype =
       (cpu_none,
-       cpu_athlon64
+       cpu_athlon64,
+       cpu_core_avx,
+       cpu_core_avx2
       );
 
    tfputype =
@@ -77,7 +79,9 @@ Const
    ];
 
    cputypestr : array[tcputype] of string[10] = ('',
-     'ATHLON64'
+     'ATHLON64',
+     'COREAVX',
+     'COREAVX2'
    );
 
    fputypestr : array[tfputype] of string[6] = ('',
@@ -111,6 +115,20 @@ Const
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
+type
+   tcpuflags =
+      (CPUX86_HAS_BMI1,
+       CPUX86_HAS_BMI2
+      );
+
+ const
+   cpu_capabilities : array[tcputype] of set of tcpuflags = (
+     { cpu_none      } [],
+     { Athlon64      } [],
+     { cpu_core_avx  } [],
+     { cpu_core_avx2 } [CPUX86_HAS_BMI1,CPUX86_HAS_BMI2]
+   );
+
 Implementation
 
 end.