Browse Source

+ copy mode avx512

git-svn-id: trunk@47314 -
florian 4 năm trước cách đây
mục cha
commit
60bb8600a1
1 tập tin đã thay đổi với 20 bổ sung3 xóa
  1. 20 3
      compiler/x86/cgx86.pas

+ 20 - 3
compiler/x86/cgx86.pas

@@ -2718,7 +2718,8 @@ unit cgx86;
         push_segment_size = S_W;
 {$endif}
 
-    type  copymode=(copy_move,copy_mmx,copy_string,copy_mm,copy_avx);
+    type
+      copymode=(copy_move,copy_mmx,copy_string,copy_mm,copy_avx,copy_avx512);
 
     var srcref,dstref,tmpref:Treference;
         r,r0,r1,r2,r3:Tregister;
@@ -2777,11 +2778,15 @@ unit cgx86;
       if cs_opt_size in current_settings.optimizerswitches then
         helpsize:=2*sizeof(aword);
 {$ifndef i8086}
+      if (FPUX86_HAS_AVX512F in fpu_capabilities[current_settings.fputype]) and
+         ((len mod 8)=0) and (len<=128) then
+         cm:=copy_avx512
+      else
       { avx helps only to reduce size, using it in general does at least not help on
         an i7-4770
-        but using the xmm registers reduces register pressure(FK) }
+        but using the xmm registers reduces register pressure (FK) }
       if (FPUX86_HAS_AVXUNIT in fpu_capabilities[current_settings.fputype]) and
-         ({$ifdef i386}(len=8) or{$endif i386}(len=16) or (len=24) or (len=32) or (len=40) or (len=48)) then
+         ((len mod 8)=0) and (len<=48) {$ifndef i386}and (len<>8){$endif i386} then
          cm:=copy_avx
       else
       { I'am not sure what CPUs would benefit from using sse instructions for moves
@@ -2949,9 +2954,21 @@ unit cgx86;
               end;
           end;
 
+        copy_avx512,
         copy_avx:
           begin
             hlist:=TAsmList.create;
+            if cm=copy_avx512 then
+              while len>=64 do
+                begin
+                  r0:=getmmregister(list,OS_M512);
+                  a_loadmm_ref_reg(list,OS_M512,OS_M512,srcref,r0,nil);
+                  a_loadmm_reg_ref(hlist,OS_M512,OS_M512,r0,dstref,nil);
+                  inc(srcref.offset,64);
+                  inc(dstref.offset,64);
+                  dec(len,64);
+                  Include(current_procinfo.flags,pi_uses_ymm);
+                end;
             while len>=32 do
               begin
                 r0:=getmmregister(list,OS_M256);