浏览代码

+ basic Neon support in the assembler writer
+ make use of VEOR if possible to clear VFP registers

git-svn-id: trunk@42682 -

florian 6 年之前
父节点
当前提交
867df5362c

+ 18 - 13
compiler/arm/aasmcpu.pas

@@ -138,6 +138,10 @@ uses
 
       IF_NONE   = $00000000;
 
+      IF_EXTENSIONS = $0000000F;
+
+      IF_NEON       = $00000001;
+
       IF_ARMMASK    = $000F0000;
       IF_ARM32      = $00010000;
       IF_THUMB      = $00020000;
@@ -861,6 +865,7 @@ implementation
             A_UXTB,A_UXTH,A_SXTB,A_SXTH,
             A_NEG,
             A_VABS,A_VADD,A_VCVT,A_VDIV,A_VLDR,A_VMOV,A_VMUL,A_VNEG,A_VSQRT,A_VSUB,
+            A_VEOR,
             A_MRS,A_MSR:
               if opnr=0 then
                 result:=operand_write
@@ -2211,19 +2216,19 @@ implementation
 
         FPUMasks: array[tfputype] of longword =
           (
-            IF_NONE,
-            IF_NONE,
-            IF_NONE,
-            IF_FPA,
-            IF_FPA,
-            IF_FPA,
-            IF_VFPv2,
-            IF_VFPv2 or IF_VFPv3,
-            IF_VFPv2 or IF_VFPv3,
-            IF_VFPv2 or IF_VFPv3,
-            IF_NONE,
-            IF_VFPv2 or IF_VFPv3 or IF_VFPv4,
-            IF_VFPv2 or IF_VFPv3 or IF_VFPv4
+            { fpu_none       } IF_NONE,
+            { fpu_soft       } IF_NONE,
+            { fpu_libgcc     } IF_NONE,
+            { fpu_fpa        } IF_FPA,
+            { fpu_fpa10      } IF_FPA,
+            { fpu_fpa11      } IF_FPA,
+            { fpu_vfpv2      } IF_VFPv2,
+            { fpu_vfpv3      } IF_VFPv2 or IF_VFPv3,
+            { fpu_neon_vfpv3 } IF_VFPv2 or IF_VFPv3 or IF_NEON,
+            { fpu_vfpv3_d16  } IF_VFPv2 or IF_VFPv3,
+            { fpu_fpv4_s16   } IF_NONE,
+            { fpu_vfpv4      } IF_VFPv2 or IF_VFPv3 or IF_VFPv4,
+            { fpu_neon_vfpv4 } IF_VFPv2 or IF_VFPv3 or IF_VFPv4 or IF_NEON
           );
       begin
         fArmVMask:=Masks[current_settings.cputype] or FPUMasks[current_settings.fputype];

+ 1 - 0
compiler/arm/armatt.inc

@@ -324,6 +324,7 @@
 'svc',
 'bxj',
 'udf',
+'veor',
 'tan',
 'sqt',
 'suf',

+ 1 - 0
compiler/arm/armatts.inc

@@ -353,5 +353,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 5 - 0
compiler/arm/armins.dat

@@ -1731,6 +1731,11 @@ reg32              \x3\x01\x2F\xFF\x20             ARM32,ARMv5TEJ
 immshifter           \x61\xDE\x0                   THUMB,ARMv4T
 void                 void                          ARM32,ARMv4T
 
+; NEON/Advanced SIMD
+
+[VEOR]
+vreg,vreg,vreg              \x42\xF3\x00\x01\x10   ARM32,NEON
+
 ; FPA
 
 

+ 1 - 1
compiler/arm/armnop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from armins.dat }
-961;
+962;

+ 1 - 0
compiler/arm/armop.inc

@@ -324,6 +324,7 @@ A_NEG,
 A_SVC,
 A_BXJ,
 A_UDF,
+A_VEOR,
 A_TAN,
 A_SQT,
 A_SUF,

+ 7 - 0
compiler/arm/armtab.inc

@@ -6342,6 +6342,13 @@
     code    : #0;
     flags   : if_arm32 or if_armv4t
   ),
+  (
+    opcode  : A_VEOR;
+    ops     : 3;
+    optypes : (ot_vreg,ot_vreg,ot_vreg,ot_none,ot_none,ot_none);
+    code    : #66#243#0#1#16;
+    flags   : if_arm32 or if_neon
+  ),
   (
     opcode  : A_TAN;
     ops     : 2;

+ 24 - 14
compiler/arm/cgcpu.pas

@@ -3265,20 +3265,30 @@ unit cgcpu;
           case op of
             OP_XOR:
               begin
-                if (src<>dst) or
-                   (reg_cgsize(src)<>size) or
-                   assigned(shuffle) then
-                  internalerror(2009112907);
-                tmpreg:=getintregister(list,OS_32);
-                a_load_const_reg(list,OS_32,0,tmpreg);
-                case size of
-                  OS_F32:
-                    list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
-                  OS_F64:
-                    list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
-                  else
-                    internalerror(2009112908);
-                end;
+                if (FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype]) and (size in [OS_F64]) then
+                  begin
+                    if (reg_cgsize(src)<>size) or
+                       assigned(shuffle) then
+                      internalerror(2019081301);
+                    list.concat(taicpu.op_reg_reg_reg(A_VEOR,dst,dst,src));
+                  end
+                else
+                  begin
+                    if (src<>dst) or
+                       (reg_cgsize(src)<>size) or
+                       assigned(shuffle) then
+                      internalerror(2009112907);
+                    tmpreg:=getintregister(list,OS_32);
+                    a_load_const_reg(list,OS_32,0,tmpreg);
+                    case size of
+                      OS_F32:
+                        list.concat(taicpu.op_reg_reg(A_VMOV,dst,tmpreg));
+                      OS_F64:
+                        list.concat(taicpu.op_reg_reg_reg(A_VMOV,dst,tmpreg,tmpreg));
+                      else
+                        internalerror(2009112908);
+                    end;
+                  end;
               end
             else
               internalerror(2009112906);