Bläddra i källkod

+ applied remaining patches of Torsten Grundke: adds gather instructions of avx2

git-svn-id: trunk@29745 -
florian 10 år sedan
förälder
incheckning
d6e4af8279

+ 8 - 0
compiler/i386/i386att.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsrlvd',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd231pd',

+ 8 - 0
compiler/i386/i386atts.inc

@@ -1016,5 +1016,13 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 8 - 0
compiler/i386/i386int.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsrlvd',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd231pd',

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-1926;
+1942;

+ 8 - 0
compiler/i386/i386op.inc

@@ -956,6 +956,14 @@ A_VPSLLVQ,
 A_VPSRAVD,
 A_VPSRLVD,
 A_VPSRLVQ,
+A_VGATHERDPD,
+A_VGATHERDPS,
+A_VGATHERQPD,
+A_VGATHERQPS,
+A_VPGATHERDD,
+A_VPGATHERDQ,
+A_VPGATHERQD,
+A_VPGATHERQQ,
 A_VFMADD132PD,
 A_VFMADD213PD,
 A_VFMADD231PD,

+ 8 - 0
compiler/i386/i386prop.inc

@@ -956,6 +956,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),

+ 112 - 0
compiler/i386/i386tab.inc

@@ -12796,6 +12796,118 @@
     code    : #241#242#243#249#1#69#61#80;
     flags   : if_avx2
   ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
   (
     opcode  : A_VFMADD132PD;
     ops     : 3;

+ 8 - 0
compiler/i8086/i8086att.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsrlvd',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'add4s',
 'brkem',
 'clr1',

+ 8 - 0
compiler/i8086/i8086atts.inc

@@ -1030,5 +1030,13 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 8 - 0
compiler/i8086/i8086int.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsrlvd',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'add4s',
 'brkem',
 'clr1',

+ 1 - 1
compiler/i8086/i8086nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-1954;
+1970;

+ 8 - 0
compiler/i8086/i8086op.inc

@@ -956,6 +956,14 @@ A_VPSLLVQ,
 A_VPSRAVD,
 A_VPSRLVD,
 A_VPSRLVQ,
+A_VGATHERDPD,
+A_VGATHERDPS,
+A_VGATHERQPD,
+A_VGATHERQPS,
+A_VPGATHERDD,
+A_VPGATHERDQ,
+A_VPGATHERQD,
+A_VPGATHERQQ,
 A_ADD4S,
 A_BRKEM,
 A_CLR1,

+ 8 - 0
compiler/i8086/i8086prop.inc

@@ -958,6 +958,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),

+ 112 - 0
compiler/i8086/i8086tab.inc

@@ -12796,6 +12796,118 @@
     code    : #241#242#243#249#1#69#61#80;
     flags   : if_avx2
   ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
   (
     opcode  : A_ADD4S;
     ops     : 0;

+ 169 - 23
compiler/x86/aasmcpu.pas

@@ -151,10 +151,17 @@ interface
       { register class 5: XMM (both reg and r/m) }
       OT_XMMREG    = OT_REGNORM or otf_reg_xmm;
       OT_XMMRM     = OT_REGMEM or otf_reg_xmm;
+      OT_XMEM32    = OT_REGNORM or otf_reg_xmm or otf_reg_gpr or OT_BITS32;
+      OT_XMEM64    = OT_REGNORM or otf_reg_xmm or otf_reg_gpr or OT_BITS64;
 
       { register class 5: XMM (both reg and r/m) }
       OT_YMMREG    = OT_REGNORM or otf_reg_ymm;
       OT_YMMRM     = OT_REGMEM or otf_reg_ymm;
+      OT_YMEM32    = OT_REGNORM or otf_reg_ymm or otf_reg_gpr or OT_BITS32;
+      OT_YMEM64    = OT_REGNORM or otf_reg_ymm or otf_reg_gpr or OT_BITS64;
+
+      { Vector-Memory operands }
+      OT_VMEM_ANY  = OT_XMEM32 or OT_XMEM64 or OT_YMEM32 or OT_YMEM64;
 
       { Memory operands }
       OT_MEM8      = OT_MEMORY or OT_BITS8;
@@ -228,7 +235,9 @@ interface
                          msiMultiple, msiMultiple8, msiMultiple16, msiMultiple32,
                          msiMultiple64, msiMultiple128, msiMultiple256,
                          msiMemRegSize, msiMemRegx16y32, msiMemRegx32y64, msiMemRegx64y128, msiMemRegx64y256,
-                         msiMem8, msiMem16, msiMem32, msiMem64, msiMem128, msiMem256);
+                         msiMem8, msiMem16, msiMem32, msiMem64, msiMem128, msiMem256,
+                         msiXMem32, msiXMem64, msiYMem32, msiYMem64,
+                         msiVMemMultiple, msiVMemRegSize);
 
       TConstSizeInfo  = (csiUnkown, csiMultiple, csiNoSize, csiMem8, csiMem16, csiMem32, csiMem64);
 
@@ -242,8 +251,10 @@ interface
       MemRefMultiples: set of TMemRefSizeInfo = [msiMultiple, msiMultiple8,
                                                  msiMultiple16, msiMultiple32,
                                                  msiMultiple64, msiMultiple128,
-                                                 msiMultiple256];
+                                                 msiMultiple256, msiVMemMultiple];
 
+      MemRefSizeInfoVMems: Set of TMemRefSizeInfo = [msiXMem32, msiXMem64, msiYMem32, msiYMem64,
+                                                     msiVMemMultiple, msiVMemRegSize];
 
       InsProp : array[tasmop] of TInsProp =
 {$if defined(x86_64)}
@@ -1139,7 +1150,22 @@ implementation
                      then
                     begin
                       { create ot field }
-                      if (ot and OT_SIZE_MASK)=0 then
+                      if (reg_ot_table[findreg_by_number(ref^.base)] and OT_REG_GPR = OT_REG_GPR) and
+                         ((reg_ot_table[findreg_by_number(ref^.index)] = OT_XMMREG) or
+                          (reg_ot_table[findreg_by_number(ref^.index)] = OT_YMMREG)
+                         ) then
+                        // AVX2 - vector-memory-referenz (e.g. vgatherdpd xmm0, [rax  xmm1], xmm2)
+                        ot := (reg_ot_table[findreg_by_number(ref^.base)] and OT_REG_GPR) or
+                              (reg_ot_table[findreg_by_number(ref^.index)])
+                      else if (ref^.base = NR_NO) and
+                              ((reg_ot_table[findreg_by_number(ref^.index)] = OT_XMMREG) or
+                               (reg_ot_table[findreg_by_number(ref^.index)] = OT_YMMREG)
+                              ) then
+                        // AVX2 - vector-memory-referenz without base-register (e.g. vgatherdpd xmm0, [xmm1], xmm2)
+                        ot := (OT_REG_GPR) or
+                              (reg_ot_table[findreg_by_number(ref^.index)])
+
+                      else if (ot and OT_SIZE_MASK)=0 then
                         ot:=OT_MEMORY_ANY or opsize_2_type[i,opsize]
                       else
                         ot:=OT_MEMORY_ANY or (ot and OT_SIZE_MASK);
@@ -1289,6 +1315,7 @@ implementation
          begin
            insot:=p^.optypes[i];
            currot:=oper[i]^.ot;
+
            { Check the operand flags }
            if (insot and (not currot) and OT_NON_SIZE)<>0 then
              exit;
@@ -1653,7 +1680,11 @@ implementation
         s:=input.ref^.scalefactor;
         o:=input.ref^.offset;
         sym:=input.ref^.symbol;
-        if ((ir<>NR_NO) and (getregtype(ir)<>R_INTREGISTER)) or
+
+        //if ((ir<>NR_NO) and (getregtype(ir)<>R_INTREGISTER)) or
+        //   ((br<>NR_NO) and (br<>NR_RIP) and (getregtype(br)<>R_INTREGISTER)) then
+        if ((ir<>NR_NO) and (getregtype(ir)=R_MMREGISTER) and (br<>NR_NO) and (getregtype(br)<>R_INTREGISTER)) or // vector memory (AVX2)
+           ((ir<>NR_NO) and (getregtype(ir)<>R_INTREGISTER) and (getregtype(ir)<>R_MMREGISTER)) or
            ((br<>NR_NO) and (br<>NR_RIP) and (getregtype(br)<>R_INTREGISTER)) then
           internalerror(200301081);
         { it's direct address }
@@ -1675,9 +1706,18 @@ implementation
         { it's an indirection }
          begin
            { 16 bit? }
-           if ((ir<>NR_NO) and (isub<>R_SUBADDR) and (isub<>R_SUBD)) or
-              ((br<>NR_NO) and (bsub<>R_SUBADDR) and (bsub<>R_SUBD)) then
+
+           if ((ir<>NR_NO) and (isub in [R_SUBMMX,R_SUBMMY]) and
+               (br<>NR_NO) and (bsub=R_SUBADDR)
+              ) then
+           begin
+             // vector memory (AVX2) =>> ignore
+           end
+           else if ((ir<>NR_NO) and (isub<>R_SUBADDR) and (isub<>R_SUBD)) or
+                   ((br<>NR_NO) and (bsub<>R_SUBADDR) and (bsub<>R_SUBD)) then
+           begin
              message(asmw_e_16bit_32bit_not_supported);
+           end;
 
            { wrong, for various reasons }
            if (ir=NR_ESP) or ((s<>1) and (s<>2) and (s<>4) and (s<>8) and (ir<>NR_NO)) then
@@ -1730,35 +1770,67 @@ implementation
              NR_R8D,
              NR_EAX,
              NR_R8,
-             NR_RAX : index:=0;
+             NR_RAX,
+             NR_XMM0,
+             NR_XMM8,
+             NR_YMM0,
+             NR_YMM8  : index:=0;
              NR_R9D,
              NR_ECX,
              NR_R9,
-             NR_RCX : index:=1;
+             NR_RCX,
+             NR_XMM1,
+             NR_XMM9,
+             NR_YMM1,
+             NR_YMM9  : index:=1;
              NR_R10D,
              NR_EDX,
              NR_R10,
-             NR_RDX : index:=2;
+             NR_RDX,
+             NR_XMM2,
+             NR_XMM10,
+             NR_YMM2,
+             NR_YMM10 : index:=2;
              NR_R11D,
              NR_EBX,
              NR_R11,
-             NR_RBX : index:=3;
+             NR_RBX,
+             NR_XMM3,
+             NR_XMM11,
+             NR_YMM3,
+             NR_YMM11 : index:=3;
              NR_R12D,
              NR_ESP,
              NR_R12,
-             NR_NO  : index:=4;
+             NR_NO,
+             NR_XMM4,
+             NR_XMM12,
+             NR_YMM4,
+             NR_YMM12 : index:=4;
              NR_R13D,
              NR_EBP,
              NR_R13,
-             NR_RBP : index:=5;
+             NR_RBP,
+             NR_XMM5,
+             NR_XMM13,
+             NR_YMM5,
+             NR_YMM13: index:=5;
              NR_R14D,
              NR_ESI,
              NR_R14,
-             NR_RSI : index:=6;
+             NR_RSI,
+             NR_XMM6,
+             NR_XMM14,
+             NR_YMM6,
+             NR_YMM14: index:=6;
              NR_R15D,
              NR_EDI,
              NR_R15,
-             NR_RDI : index:=7;
+             NR_RDI,
+             NR_XMM7,
+             NR_XMM15,
+             NR_YMM7,
+             NR_YMM15: index:=7;
            else
              exit;
            end;
@@ -3254,6 +3326,10 @@ implementation
       actRegTypes  : int64;
       actRegMemTypes: int64;
       NewRegSize: int64;
+
+      actVMemCount  : integer;
+      actVMemTypes  : int64;
+
       RegMMXSizeMask: int64;
       RegXMMSizeMask: int64;
       RegYMMSizeMask: int64;
@@ -3309,19 +3385,32 @@ implementation
             actMemCount      := 0;
             actRegMemTypes   := 0;
 
+            actVMemCount     := 0;
+            actVMemTypes     := 0;
+
             actConstSize     := 0;
             actConstCount    := 0;
 
-            if asmop = a_vpmovzxbq then
-            begin
-              RegXMMSizeMask := RegXMMSizeMask;
-            end;
-
             for j := 0 to insentry^.ops -1 do
             begin
-              if (insentry^.optypes[j] and OT_REGISTER) = OT_REGISTER then
-                begin
-                  inc(actRegCount);
+              if ((insentry^.optypes[j] and OT_XMEM32) = OT_XMEM32) OR
+                 ((insentry^.optypes[j] and OT_XMEM64) = OT_XMEM64) OR
+                 ((insentry^.optypes[j] and OT_YMEM32) = OT_YMEM32) OR
+                 ((insentry^.optypes[j] and OT_YMEM64) = OT_YMEM64) then
+              begin
+                inc(actVMemCount);
+
+                case insentry^.optypes[j] and (OT_XMEM32 OR OT_XMEM64 OR OT_YMEM32 OR OT_YMEM64) of
+                  OT_XMEM32: actVMemTypes := actVMemTypes or OT_XMEM32;
+                  OT_XMEM64: actVMemTypes := actVMemTypes or OT_XMEM64;
+                  OT_YMEM32: actVMemTypes := actVMemTypes or OT_YMEM32;
+                  OT_YMEM64: actVMemTypes := actVMemTypes or OT_YMEM64;
+                        else InternalError(777206);
+                end;
+              end
+              else if (insentry^.optypes[j] and OT_REGISTER) = OT_REGISTER then
+              begin
+                inc(actRegCount);
 
                   NewRegSize := (insentry^.optypes[j] and OT_SIZE_MASK);
                   if NewRegSize = 0 then
@@ -3384,7 +3473,57 @@ implementation
               end;
             end;
 
+            if actVMemCount > 0 then
+            begin
+              if actVMemCount = 1 then
+              begin
+                if actVMemTypes > 0 then
+                begin
+                  case actVMemTypes of
+                    OT_XMEM32: MRefInfo := msiXMem32;
+                    OT_XMEM64: MRefInfo := msiXMem64;
+                    OT_YMEM32: MRefInfo := msiYMem32;
+                    OT_YMEM64: MRefInfo := msiYMem64;
+                          else InternalError(777208);
+                  end;
+
+                  case actRegTypes of
+                    OT_XMMREG: case MRefInfo of
+                                 msiXMem32,
+                                 msiXMem64: RegXMMSizeMask := RegXMMSizeMask or OT_BITS128;
+                                 msiYMem32,
+                                 msiYMem64: RegXMMSizeMask := RegXMMSizeMask or OT_BITS256;
+                                       else InternalError(777210);
+                               end;
+                    OT_YMMREG: case MRefInfo of
+                                 msiXMem32,
+                                 msiXMem64: RegYMMSizeMask := RegYMMSizeMask or OT_BITS128;
+                                 msiYMem32,
+                                 msiYMem64: RegYMMSizeMask := RegYMMSizeMask or OT_BITS256;
+                                       else InternalError(777211);
+                               end;
+                          //else InternalError(777209);
+                  end;
+
+
+                  if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize = msiUnkown then
+                  begin
+                    InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := MRefInfo;
+                  end
+                  else if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize <> MRefInfo then
+                  begin
+                    if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize in [msiXMem32, msiXMem64, msiYMem32, msiYMem64] then
+                    begin
+                      InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiVMemMultiple;
+                    end
+                    else InternalError(777212);
+                  end;
 
+                end;
+              end
+              else InternalError(777207);
+            end
+            else
             case actMemCount of
                 0: ; // nothing todo
                 1: begin
@@ -3469,7 +3608,14 @@ implementation
                             OT_BITS256: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y256;
                           end;
               OT_BITS128: begin
-                            if RegMMXSizeMask = 0 then
+                            if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize = msiVMemMultiple then
+                            begin
+                              // vector-memory-operand AVX2 (e.g. VGATHER..)
+                              case RegYMMSizeMask of
+                                OT_BITS256: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiVMemRegSize;
+                              end;
+                            end
+                            else if RegMMXSizeMask = 0 then
                             begin
                               case RegYMMSizeMask of
                                 OT_BITS128: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y128;

+ 32 - 0
compiler/x86/rax86int.pas

@@ -1976,6 +1976,7 @@ Unit Rax86int;
       var
         PrefixOp,OverrideOp: tasmop;
         operandnum : longint;
+        t: TRegister;
         is_far_const:boolean;
         i:byte;
         tmp: toperand;
@@ -2147,6 +2148,37 @@ Unit Rax86int;
                   instr.opsize:=S_FAR;
                 end;
 {$endif i8086}
+        if (MemRefInfo(instr.opcode).ExistsSSEAVX) and
+           (MemRefInfo(instr.opcode).MemRefSize in MemRefSizeInfoVMems) then
+        begin
+          for i:=1 to operandnum do
+          begin
+            if (instr.operands[i].opr.typ = OPR_REFERENCE) and
+               (getregtype(instr.operands[i].opr.ref.base) = R_MMREGISTER) and
+               (instr.operands[i].opr.ref.index = NR_NO) then
+            begin
+              instr.operands[i].opr.ref.index := instr.operands[i].opr.ref.base;
+              instr.operands[i].opr.ref.base  := NR_NO;
+            end
+            else if (instr.operands[i].opr.typ = OPR_REFERENCE) and
+                    (getregtype(instr.operands[i].opr.ref.base) = R_MMREGISTER) and
+                    (getregtype(instr.operands[i].opr.ref.index) = R_INTREGISTER) and
+                    (getsubreg(instr.operands[i].opr.ref.index) = R_SUBADDR) then
+            begin
+              // exchange base- and index-register
+              // e.g. VGATHERDPD  XMM0, [XMM1 + RAX], XMM2 =>> VGATHERDPD  XMM0, [RAX + XMM1], XMM2
+              // e.g. VGATHERDPD  XMM0, [XMM1 + RAX * 2], XMM2 =>> not supported
+              // e.g. VGATHERDPD  XMM0, [XMM1 + RAX + 16], XMM2 =>> VGATHERDPD  XMM0, [RAX + XMM1 + 16]
+              if instr.operands[i].opr.ref.scalefactor > 1 then Message(asmr_e_invalid_reference_syntax)
+              else
+              begin
+                t := instr.operands[i].opr.ref.base;
+                instr.operands[i].opr.ref.base := instr.operands[i].opr.ref.index;
+                instr.operands[i].opr.ref.index := t;
+              end;
+            end;
+          end;
+        end;
       end;
 
 

+ 40 - 0
compiler/x86/x86ins.dat

@@ -4956,6 +4956,46 @@ xmmreg,xmmreg,xmmrm                  \361\362\371\1\x45\75\120            AVX2
 ymmreg,ymmreg,ymmrm                  \361\362\363\364\371\1\x45\75\120    AVX2
 xmmreg,xmmreg,xmmrm                  \361\362\363\371\1\x45\75\120        AVX2
 
+[VGATHERDPD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem64,xmmreg                 \361\362\363\371\1\x92\76\110        AVX2
+ymmreg,xmem64,ymmreg                 \361\362\363\364\371\1\x92\76\110    AVX2
+
+[VGATHERDPS]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem32,xmmreg                 \361\362\371\1\x92\76\110            AVX2
+ymmreg,ymem32,ymmreg                 \361\362\364\371\1\x92\76\110        AVX2
+
+[VGATHERQPD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem64,xmmreg                 \361\362\363\371\1\x93\76\110        AVX2
+ymmreg,ymem64,ymmreg                 \361\362\363\364\371\1\x93\76\110    AVX2
+
+[VGATHERQPS]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem32,xmmreg                 \361\362\371\1\x93\76\110            AVX2
+xmmreg,ymem32,xmmreg                 \361\362\364\371\1\x93\76\110        AVX2
+
+[VPGATHERDD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem32,xmmreg                 \361\362\371\1\x90\76\110            AVX2
+ymmreg,ymem32,ymmreg                 \361\362\364\371\1\x90\76\110        AVX2
+
+[VPGATHERDQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem64,xmmreg                 \361\362\363\371\1\x90\76\110        AVX2
+ymmreg,xmem64,ymmreg                 \361\362\363\364\371\1\x90\76\110    AVX2
+
+[VPGATHERQD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem32,xmmreg                 \361\362\371\1\x91\76\110            AVX2
+xmmreg,ymem32,xmmreg                 \361\362\364\371\1\x91\76\110        AVX2
+
+[VPGATHERQQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmem64,xmmreg                 \361\362\363\371\1\x91\76\110        AVX2
+ymmreg,ymem64,ymmreg                 \361\362\363\364\371\1\x91\76\110    AVX2
+
 ;*******************************************************************************
 ;********** NEC V20/V30 ********************************************************
 ;*******************************************************************************

+ 8 - 0
compiler/x86_64/x8664ats.inc

@@ -1010,5 +1010,13 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 8 - 0
compiler/x86_64/x8664att.inc

@@ -950,6 +950,14 @@
 'vpsravd',
 'vpsrlvd',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd231pd',

+ 8 - 0
compiler/x86_64/x8664int.inc

@@ -950,6 +950,14 @@
 'vpsravd',
 'vpsrlvd',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd231pd',

+ 1 - 1
compiler/x86_64/x8664nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-1947;
+1963;

+ 8 - 0
compiler/x86_64/x8664op.inc

@@ -950,6 +950,14 @@ A_VPSLLVQ,
 A_VPSRAVD,
 A_VPSRLVD,
 A_VPSRLVQ,
+A_VGATHERDPD,
+A_VGATHERDPS,
+A_VGATHERQPD,
+A_VGATHERQPS,
+A_VPGATHERDD,
+A_VPGATHERDQ,
+A_VPGATHERQD,
+A_VPGATHERQQ,
 A_VFMADD132PD,
 A_VFMADD213PD,
 A_VFMADD231PD,

+ 8 - 0
compiler/x86_64/x8664pro.inc

@@ -950,6 +950,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),

+ 112 - 0
compiler/x86_64/x8664tab.inc

@@ -12943,6 +12943,118 @@
     code    : #241#242#243#249#1#69#61#80;
     flags   : if_avx2
   ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
   (
     opcode  : A_VFMADD132PD;
     ops     : 3;