Browse Source

support vector operand writemask,zeroflag

git-svn-id: branches/tg74/avx512@39359 -
tg74 7 years ago
parent
commit
4dc5442fa5
54 changed files with 837 additions and 141 deletions
  1. 3 0
      compiler/aasmtai.pas
  2. 2 2
      compiler/i386/i386tab.inc
  3. 8 0
      compiler/i386/r386ari.inc
  4. 9 1
      compiler/i386/r386att.inc
  5. 8 0
      compiler/i386/r386con.inc
  6. 9 1
      compiler/i386/r386dwrf.inc
  7. 9 1
      compiler/i386/r386int.inc
  8. 8 0
      compiler/i386/r386iri.inc
  9. 9 1
      compiler/i386/r386nasm.inc
  10. 1 1
      compiler/i386/r386nor.inc
  11. 8 0
      compiler/i386/r386nri.inc
  12. 9 1
      compiler/i386/r386num.inc
  13. 9 1
      compiler/i386/r386ot.inc
  14. 9 1
      compiler/i386/r386rni.inc
  15. 8 0
      compiler/i386/r386sri.inc
  16. 9 1
      compiler/i386/r386stab.inc
  17. 9 1
      compiler/i386/r386std.inc
  18. 2 2
      compiler/i8086/i8086tab.inc
  19. 8 0
      compiler/i8086/r8086ari.inc
  20. 9 1
      compiler/i8086/r8086att.inc
  21. 8 0
      compiler/i8086/r8086con.inc
  22. 9 1
      compiler/i8086/r8086dwrf.inc
  23. 9 1
      compiler/i8086/r8086int.inc
  24. 8 0
      compiler/i8086/r8086iri.inc
  25. 9 1
      compiler/i8086/r8086nasm.inc
  26. 1 1
      compiler/i8086/r8086nor.inc
  27. 8 0
      compiler/i8086/r8086nri.inc
  28. 9 1
      compiler/i8086/r8086num.inc
  29. 9 1
      compiler/i8086/r8086ot.inc
  30. 9 1
      compiler/i8086/r8086rni.inc
  31. 8 0
      compiler/i8086/r8086sri.inc
  32. 9 1
      compiler/i8086/r8086stab.inc
  33. 9 1
      compiler/i8086/r8086std.inc
  34. 17 3
      compiler/scanner.pas
  35. 261 60
      compiler/x86/aasmcpu.pas
  36. 14 2
      compiler/x86/rax86.pas
  37. 168 31
      compiler/x86/rax86int.pas
  38. 2 1
      compiler/x86/x86ins.dat
  39. 8 8
      compiler/x86/x86reg.dat
  40. 8 0
      compiler/x86_64/r8664ari.inc
  41. 9 1
      compiler/x86_64/r8664att.inc
  42. 8 0
      compiler/x86_64/r8664con.inc
  43. 9 1
      compiler/x86_64/r8664dwrf.inc
  44. 9 1
      compiler/x86_64/r8664int.inc
  45. 8 0
      compiler/x86_64/r8664iri.inc
  46. 9 1
      compiler/x86_64/r8664nasm.inc
  47. 1 1
      compiler/x86_64/r8664nor.inc
  48. 9 1
      compiler/x86_64/r8664num.inc
  49. 9 1
      compiler/x86_64/r8664ot.inc
  50. 9 1
      compiler/x86_64/r8664rni.inc
  51. 8 0
      compiler/x86_64/r8664sri.inc
  52. 9 1
      compiler/x86_64/r8664stab.inc
  53. 9 1
      compiler/x86_64/r8664std.inc
  54. 2 2
      compiler/x86_64/x8664tab.inc

+ 3 - 0
compiler/aasmtai.pas

@@ -419,6 +419,9 @@ interface
         { please keep the size of this record <=12 bytes and keep it properly aligned }
         { please keep the size of this record <=12 bytes and keep it properly aligned }
         toper = record
         toper = record
           ot : longint;
           ot : longint;
+        {$ifdef x86}
+          vopext: byte;
+        {$ENDIF}
           case typ : toptype of
           case typ : toptype of
             top_none   : ();
             top_none   : ();
             top_reg    : (reg:tregister);
             top_reg    : (reg:tregister);

+ 2 - 2
compiler/i386/i386tab.inc

@@ -13807,8 +13807,8 @@
   (
   (
     opcode  : A_VPERMD;
     opcode  : A_VPERMD;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
-    code    : #241#242#244#249#1#54#61#80;
+    optypes : (ot_ymmregmz,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#236#241#242#244#249#1#54#61#80;
     flags   : [if_avx2]
     flags   : [if_avx2]
   ),
   ),
   (
   (

+ 8 - 0
compiler/i386/r386ari.inc

@@ -43,6 +43,14 @@
 29,
 29,
 30,
 30,
 39,
 39,
+98,
+99,
+100,
+101,
+102,
+103,
+104,
+105,
 66,
 66,
 67,
 67,
 68,
 68,

+ 9 - 1
compiler/i386/r386att.inc

@@ -96,4 +96,12 @@
 '%zmm4',
 '%zmm4',
 '%zmm5',
 '%zmm5',
 '%zmm6',
 '%zmm6',
-'%zmm7'
+'%zmm7',
+'%k0',
+'%k1',
+'%k2',
+'%k3',
+'%k4',
+'%k5',
+'%k6',
+'%k7'

+ 8 - 0
compiler/i386/r386con.inc

@@ -97,3 +97,11 @@ NR_ZMM4 = tregister($040E0004);
 NR_ZMM5 = tregister($040E0005);
 NR_ZMM5 = tregister($040E0005);
 NR_ZMM6 = tregister($040E0006);
 NR_ZMM6 = tregister($040E0006);
 NR_ZMM7 = tregister($040E0007);
 NR_ZMM7 = tregister($040E0007);
+NR_K0 = tregister($06000000);
+NR_K1 = tregister($06000001);
+NR_K2 = tregister($06000002);
+NR_K3 = tregister($06000003);
+NR_K4 = tregister($06000004);
+NR_K5 = tregister($06000005);
+NR_K6 = tregister($06000006);
+NR_K7 = tregister($06000007);

+ 9 - 1
compiler/i386/r386dwrf.inc

@@ -96,4 +96,12 @@
 25,
 25,
 26,
 26,
 27,
 27,
-28
+28,
+118,
+119,
+120,
+121,
+122,
+123,
+124,
+125

+ 9 - 1
compiler/i386/r386int.inc

@@ -96,4 +96,12 @@
 'zmm4',
 'zmm4',
 'zmm5',
 'zmm5',
 'zmm6',
 'zmm6',
-'zmm7'
+'zmm7',
+'k0',
+'k1',
+'k2',
+'k3',
+'k4',
+'k5',
+'k6',
+'k7'

+ 8 - 0
compiler/i386/r386iri.inc

@@ -44,6 +44,14 @@
 29,
 29,
 30,
 30,
 39,
 39,
+98,
+99,
+100,
+101,
+102,
+103,
+104,
+105,
 66,
 66,
 67,
 67,
 68,
 68,

+ 9 - 1
compiler/i386/r386nasm.inc

@@ -96,4 +96,12 @@
 'zmm4',
 'zmm4',
 'zmm5',
 'zmm5',
 'zmm6',
 'zmm6',
-'zmm7'
+'zmm7',
+'k0',
+'k1',
+'k2',
+'k3',
+'k4',
+'k5',
+'k6',
+'k7'

+ 1 - 1
compiler/i386/r386nor.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86reg.dat }
 { don't edit, this file is generated from x86reg.dat }
-98
+106

+ 8 - 0
compiler/i386/r386nri.inc

@@ -44,6 +44,14 @@
 29,
 29,
 30,
 30,
 39,
 39,
+98,
+99,
+100,
+101,
+102,
+103,
+104,
+105,
 66,
 66,
 67,
 67,
 68,
 68,

+ 9 - 1
compiler/i386/r386num.inc

@@ -96,4 +96,12 @@ tregister($040E0003),
 tregister($040E0004),
 tregister($040E0004),
 tregister($040E0005),
 tregister($040E0005),
 tregister($040E0006),
 tregister($040E0006),
-tregister($040E0007)
+tregister($040E0007),
+tregister($06000000),
+tregister($06000001),
+tregister($06000002),
+tregister($06000003),
+tregister($06000004),
+tregister($06000005),
+tregister($06000006),
+tregister($06000007)

+ 9 - 1
compiler/i386/r386ot.inc

@@ -96,4 +96,12 @@ OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
-OT_ZMMREG
+OT_ZMMREG,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK

+ 9 - 1
compiler/i386/r386rni.inc

@@ -96,4 +96,12 @@
 37,
 37,
 38,
 38,
 39,
 39,
-40
+40,
+98,
+99,
+100,
+101,
+102,
+103,
+104,
+105

+ 8 - 0
compiler/i386/r386sri.inc

@@ -44,6 +44,14 @@
 29,
 29,
 30,
 30,
 39,
 39,
+98,
+99,
+100,
+101,
+102,
+103,
+104,
+105,
 66,
 66,
 67,
 67,
 68,
 68,

+ 9 - 1
compiler/i386/r386stab.inc

@@ -96,4 +96,12 @@
 25,
 25,
 26,
 26,
 27,
 27,
-28
+28,
+-1,
+-1,
+-1,
+-1,
+-1,
+-1,
+-1,
+-1

+ 9 - 1
compiler/i386/r386std.inc

@@ -96,4 +96,12 @@
 'zmm4',
 'zmm4',
 'zmm5',
 'zmm5',
 'zmm6',
 'zmm6',
-'zmm7'
+'zmm7',
+'k0',
+'k1',
+'k2',
+'k3',
+'k4',
+'k5',
+'k6',
+'k7'

+ 2 - 2
compiler/i8086/i8086tab.inc

@@ -13835,8 +13835,8 @@
   (
   (
     opcode  : A_VPERMD;
     opcode  : A_VPERMD;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
-    code    : #241#242#244#249#1#54#61#80;
+    optypes : (ot_ymmregmz,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#236#241#242#244#249#1#54#61#80;
     flags   : [if_avx2]
     flags   : [if_avx2]
   ),
   ),
   (
   (

+ 8 - 0
compiler/i8086/r8086ari.inc

@@ -43,6 +43,14 @@
 29,
 29,
 30,
 30,
 39,
 39,
+98,
+99,
+100,
+101,
+102,
+103,
+104,
+105,
 66,
 66,
 67,
 67,
 68,
 68,

+ 9 - 1
compiler/i8086/r8086att.inc

@@ -96,4 +96,12 @@
 '%zmm4',
 '%zmm4',
 '%zmm5',
 '%zmm5',
 '%zmm6',
 '%zmm6',
-'%zmm7'
+'%zmm7',
+'%k0',
+'%k1',
+'%k2',
+'%k3',
+'%k4',
+'%k5',
+'%k6',
+'%k7'

+ 8 - 0
compiler/i8086/r8086con.inc

@@ -97,3 +97,11 @@ NR_ZMM4 = tregister($040E0004);
 NR_ZMM5 = tregister($040E0005);
 NR_ZMM5 = tregister($040E0005);
 NR_ZMM6 = tregister($040E0006);
 NR_ZMM6 = tregister($040E0006);
 NR_ZMM7 = tregister($040E0007);
 NR_ZMM7 = tregister($040E0007);
+NR_K0 = tregister($06000000);
+NR_K1 = tregister($06000001);
+NR_K2 = tregister($06000002);
+NR_K3 = tregister($06000003);
+NR_K4 = tregister($06000004);
+NR_K5 = tregister($06000005);
+NR_K6 = tregister($06000006);
+NR_K7 = tregister($06000007);

+ 9 - 1
compiler/i8086/r8086dwrf.inc

@@ -96,4 +96,12 @@
 25,
 25,
 26,
 26,
 27,
 27,
-28
+28,
+118,
+119,
+120,
+121,
+122,
+123,
+124,
+125

+ 9 - 1
compiler/i8086/r8086int.inc

@@ -96,4 +96,12 @@
 'zmm4',
 'zmm4',
 'zmm5',
 'zmm5',
 'zmm6',
 'zmm6',
-'zmm7'
+'zmm7',
+'k0',
+'k1',
+'k2',
+'k3',
+'k4',
+'k5',
+'k6',
+'k7'

+ 8 - 0
compiler/i8086/r8086iri.inc

@@ -44,6 +44,14 @@
 29,
 29,
 30,
 30,
 39,
 39,
+98,
+99,
+100,
+101,
+102,
+103,
+104,
+105,
 66,
 66,
 67,
 67,
 68,
 68,

+ 9 - 1
compiler/i8086/r8086nasm.inc

@@ -96,4 +96,12 @@
 'zmm4',
 'zmm4',
 'zmm5',
 'zmm5',
 'zmm6',
 'zmm6',
-'zmm7'
+'zmm7',
+'k0',
+'k1',
+'k2',
+'k3',
+'k4',
+'k5',
+'k6',
+'k7'

+ 1 - 1
compiler/i8086/r8086nor.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86reg.dat }
 { don't edit, this file is generated from x86reg.dat }
-98
+106

+ 8 - 0
compiler/i8086/r8086nri.inc

@@ -44,6 +44,14 @@
 29,
 29,
 30,
 30,
 39,
 39,
+98,
+99,
+100,
+101,
+102,
+103,
+104,
+105,
 66,
 66,
 67,
 67,
 68,
 68,

+ 9 - 1
compiler/i8086/r8086num.inc

@@ -96,4 +96,12 @@ tregister($040E0003),
 tregister($040E0004),
 tregister($040E0004),
 tregister($040E0005),
 tregister($040E0005),
 tregister($040E0006),
 tregister($040E0006),
-tregister($040E0007)
+tregister($040E0007),
+tregister($06000000),
+tregister($06000001),
+tregister($06000002),
+tregister($06000003),
+tregister($06000004),
+tregister($06000005),
+tregister($06000006),
+tregister($06000007)

+ 9 - 1
compiler/i8086/r8086ot.inc

@@ -96,4 +96,12 @@ OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
-OT_ZMMREG
+OT_ZMMREG,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK

+ 9 - 1
compiler/i8086/r8086rni.inc

@@ -96,4 +96,12 @@
 37,
 37,
 38,
 38,
 39,
 39,
-40
+40,
+98,
+99,
+100,
+101,
+102,
+103,
+104,
+105

+ 8 - 0
compiler/i8086/r8086sri.inc

@@ -44,6 +44,14 @@
 29,
 29,
 30,
 30,
 39,
 39,
+98,
+99,
+100,
+101,
+102,
+103,
+104,
+105,
 66,
 66,
 67,
 67,
 68,
 68,

+ 9 - 1
compiler/i8086/r8086stab.inc

@@ -96,4 +96,12 @@
 25,
 25,
 26,
 26,
 27,
 27,
-28
+28,
+-1,
+-1,
+-1,
+-1,
+-1,
+-1,
+-1,
+-1

+ 9 - 1
compiler/i8086/r8086std.inc

@@ -96,4 +96,12 @@
 'zmm4',
 'zmm4',
 'zmm5',
 'zmm5',
 'zmm6',
 'zmm6',
-'zmm7'
+'zmm7',
+'k0',
+'k1',
+'k2',
+'k3',
+'k4',
+'k5',
+'k6',
+'k7'

+ 17 - 3
compiler/scanner.pas

@@ -39,7 +39,7 @@ interface
 
 
 
 
     type
     type
-       tcommentstyle = (comment_none,comment_tp,comment_oldtp,comment_delphi,comment_c);
+       tcommentstyle = (comment_none,comment_tp,comment_oldtp,comment_delphi,comment_c, comment_x86OpExt);
 
 
        tscannerfile = class;
        tscannerfile = class;
 
 
@@ -212,7 +212,7 @@ interface
           procedure readnumber;
           procedure readnumber;
           function  readid:string;
           function  readid:string;
           function  readval:longint;
           function  readval:longint;
-          function  readcomment:string;
+          function  readcomment(include_special_char: boolean = false):string;
           function  readquotedstring:string;
           function  readquotedstring:string;
           function  readstate:char;
           function  readstate:char;
           function  readoptionalstate(fallback:char):char;
           function  readoptionalstate(fallback:char):char;
@@ -4132,7 +4132,7 @@ type
       end;
       end;
 
 
 
 
-    function tscannerfile.readcomment:string;
+    function tscannerfile.readcomment(include_special_char: boolean):string;
       var
       var
         i : longint;
         i : longint;
       begin
       begin
@@ -4141,15 +4141,29 @@ type
           case c of
           case c of
             '{' :
             '{' :
               begin
               begin
+                if (include_special_char) and (i<255) then
+                begin
+                  inc(i);
+                  readcomment[i]:=c;
+                end;
+
                 if current_commentstyle=comment_tp then
                 if current_commentstyle=comment_tp then
                   inc_comment_level;
                   inc_comment_level;
               end;
               end;
             '}' :
             '}' :
               begin
               begin
+                if (include_special_char) and (i<255) then
+                begin
+                  inc(i);
+                  readcomment[i]:=c;
+                end;
+
                 if current_commentstyle=comment_tp then
                 if current_commentstyle=comment_tp then
                   begin
                   begin
                     readchar;
                     readchar;
                     dec_comment_level;
                     dec_comment_level;
+
+
                     if comment_level=0 then
                     if comment_level=0 then
                       break
                       break
                     else
                     else

+ 261 - 60
compiler/x86/aasmcpu.pas

@@ -57,6 +57,11 @@ interface
       OT_BITS256   = $40000000;  { 32 byte AVX }
       OT_BITS256   = $40000000;  { 32 byte AVX }
       OT_BITS512   = $80000000;  { 64 byte AVX512 }
       OT_BITS512   = $80000000;  { 64 byte AVX512 }
 
 
+      OT_VECTORMASK = $1000000000;  { OPTIONAL VECTORMASK AVX512}
+      OT_VECTORZERO = $2000000000;  { OPTIONAL ZERO-FLAG  AVX512}
+
+      OT_VECTOR_EXT_MASK = OT_VECTORMASK or OT_VECTORZERO;
+
       OT_BITS80    = $00000010;  { FPU only  }
       OT_BITS80    = $00000010;  { FPU only  }
       OT_FAR       = $00000020;  { this means 16:16 or 16:32, like in CALL/JMP }
       OT_FAR       = $00000020;  { this means 16:16 or 16:32, like in CALL/JMP }
       OT_NEAR      = $00000040;
       OT_NEAR      = $00000040;
@@ -88,12 +93,14 @@ interface
       otf_reg_cdt  = $00100000;
       otf_reg_cdt  = $00100000;
       otf_reg_gpr  = $00200000;
       otf_reg_gpr  = $00200000;
       otf_reg_sreg = $00400000;
       otf_reg_sreg = $00400000;
+      otf_reg_k    = $00800000;
       otf_reg_fpu  = $01000000;
       otf_reg_fpu  = $01000000;
       otf_reg_mmx  = $02000000;
       otf_reg_mmx  = $02000000;
       otf_reg_xmm  = $04000000;
       otf_reg_xmm  = $04000000;
       otf_reg_ymm  = $08000000;
       otf_reg_ymm  = $08000000;
       otf_reg_zmm  = $10000000;
       otf_reg_zmm  = $10000000;
 
 
+
       otf_reg_extra_mask = $0F000000;
       otf_reg_extra_mask = $0F000000;
       { Bits 16..19: subclasses, meaning depends on classes field }
       { Bits 16..19: subclasses, meaning depends on classes field }
       otf_sub0     = $00010000;
       otf_sub0     = $00010000;
@@ -105,7 +112,7 @@ interface
       //OT_REG_EXTRA_MASK = $0F000000;
       //OT_REG_EXTRA_MASK = $0F000000;
       OT_REG_EXTRA_MASK = $1F000000;
       OT_REG_EXTRA_MASK = $1F000000;
 
 
-      OT_REG_TYPMASK = otf_reg_cdt or otf_reg_gpr or otf_reg_sreg or otf_reg_extra_mask;
+      OT_REG_TYPMASK = otf_reg_cdt or otf_reg_gpr or otf_reg_sreg or otf_reg_k or otf_reg_extra_mask;
       { register class 0: CRx, DRx and TRx }
       { register class 0: CRx, DRx and TRx }
 {$ifdef x86_64}
 {$ifdef x86_64}
       OT_REG_CDT   = OT_REGISTER or otf_reg_cdt or OT_BITS64;
       OT_REG_CDT   = OT_REGISTER or otf_reg_cdt or OT_BITS64;
@@ -165,18 +172,31 @@ interface
       OT_XMEM32    = OT_REGNORM or otf_reg_xmm or otf_reg_gpr or OT_BITS32;
       OT_XMEM32    = OT_REGNORM or otf_reg_xmm or otf_reg_gpr or OT_BITS32;
       OT_XMEM64    = OT_REGNORM or otf_reg_xmm or otf_reg_gpr or OT_BITS64;
       OT_XMEM64    = OT_REGNORM or otf_reg_xmm or otf_reg_gpr or OT_BITS64;
 
 
+      OT_XMMREGM   = OT_XMMREG or OT_VECTORMASK;
+      OT_XMMREGMZ  = OT_XMMREG or OT_VECTORMASK or OT_VECTORZERO;
+
+
       { register class 5: YMM (both reg and r/m) }
       { register class 5: YMM (both reg and r/m) }
       OT_YMMREG    = OT_REGNORM or otf_reg_ymm;
       OT_YMMREG    = OT_REGNORM or otf_reg_ymm;
       OT_YMMRM     = OT_REGMEM or otf_reg_ymm;
       OT_YMMRM     = OT_REGMEM or otf_reg_ymm;
       OT_YMEM32    = OT_REGNORM or otf_reg_ymm or otf_reg_gpr or OT_BITS32;
       OT_YMEM32    = OT_REGNORM or otf_reg_ymm or otf_reg_gpr or OT_BITS32;
       OT_YMEM64    = OT_REGNORM or otf_reg_ymm or otf_reg_gpr or OT_BITS64;
       OT_YMEM64    = OT_REGNORM or otf_reg_ymm or otf_reg_gpr or OT_BITS64;
 
 
+      OT_YMMREGM   = OT_YMMREG or OT_VECTORMASK;
+      OT_YMMREGMZ  = OT_YMMREG or OT_VECTORMASK or OT_VECTORZERO;
+
+
       { register class 5: ZMM (both reg and r/m) }
       { register class 5: ZMM (both reg and r/m) }
       OT_ZMMREG    = OT_REGNORM or otf_reg_zmm;
       OT_ZMMREG    = OT_REGNORM or otf_reg_zmm;
       OT_ZMMRM     = OT_REGMEM or otf_reg_zmm;
       OT_ZMMRM     = OT_REGMEM or otf_reg_zmm;
       OT_ZMEM32    = OT_REGNORM or otf_reg_zmm or otf_reg_gpr or OT_BITS32;
       OT_ZMEM32    = OT_REGNORM or otf_reg_zmm or otf_reg_gpr or OT_BITS32;
       OT_ZMEM64    = OT_REGNORM or otf_reg_zmm or otf_reg_gpr or OT_BITS64;
       OT_ZMEM64    = OT_REGNORM or otf_reg_zmm or otf_reg_gpr or OT_BITS64;
 
 
+      OT_ZMMREGM   = OT_ZMMREG or OT_VECTORMASK;
+      OT_ZMMREGMZ  = OT_ZMMREG or OT_VECTORMASK or OT_VECTORZERO;
+
+
+      OT_REG_VECTORMASK = OT_REGNORM or otf_reg_k;
 
 
       { Vector-Memory operands }
       { Vector-Memory operands }
       OT_VMEM_ANY  = OT_XMEM32 or OT_XMEM64 or OT_YMEM32 or OT_YMEM64 or OT_ZMEM32 or OT_ZMEM64;
       OT_VMEM_ANY  = OT_XMEM32 or OT_XMEM64 or OT_YMEM32 or OT_YMEM64 or OT_ZMEM32 or OT_ZMEM64;
@@ -206,6 +226,13 @@ interface
       OT_ONENESS   = otf_sub0;  { special type of immediate operand  }
       OT_ONENESS   = otf_sub0;  { special type of immediate operand  }
       OT_UNITY     = OT_IMMEDIATE or OT_ONENESS;  { for shift/rotate instructions  }
       OT_UNITY     = OT_IMMEDIATE or OT_ONENESS;  { for shift/rotate instructions  }
 
 
+      OTVE_VECTOR_SAE              = 1 shl 4;
+      OTVE_VECTOR_ER               = 1 shl 5;
+      OTVE_VECTORMASK_ZERO         = 1 shl 6;
+      OTVE_VECTORMASK_WRITEMASK    = 1 shl 7;
+
+      OTVE_VECTOR_MASK = OTVE_VECTOR_SAE or OTVE_VECTOR_ER or OTVE_VECTORMASK_ZERO or OTVE_VECTORMASK_WRITEMASK;
+
       { Size of the instruction table converted by nasmconv.pas }
       { Size of the instruction table converted by nasmconv.pas }
 {$if defined(x86_64)}
 {$if defined(x86_64)}
       instabentries = {$i x8664nop.inc}
       instabentries = {$i x8664nop.inc}
@@ -297,6 +324,8 @@ interface
                                                      msiZMem32, msiZMem64,
                                                      msiZMem32, msiZMem64,
                                                      msiVMemMultiple, msiVMemRegSize];
                                                      msiVMemMultiple, msiVMemRegSize];
 
 
+
+
       InsProp : array[tasmop] of TInsProp =
       InsProp : array[tasmop] of TInsProp =
 {$if defined(x86_64)}
 {$if defined(x86_64)}
         {$i x8664pro.inc}
         {$i x8664pro.inc}
@@ -1535,6 +1564,7 @@ implementation
         currot: int64;
         currot: int64;
         i,j,asize,oprs : longint;
         i,j,asize,oprs : longint;
         insflags:tinsflags;
         insflags:tinsflags;
+        vopext: int64;
         siz : array[0..max_operands-1] of longint;
         siz : array[0..max_operands-1] of longint;
       begin
       begin
         result:=false;
         result:=false;
@@ -1670,8 +1700,27 @@ implementation
                 ((insot and currot and OT_SIZE_MASK)<>(currot and OT_SIZE_MASK)) then
                 ((insot and currot and OT_SIZE_MASK)<>(currot and OT_SIZE_MASK)) then
                exit;
                exit;
            end;
            end;
+        end;
 
 
+
+        if (InsTabMemRefSizeInfoCache^[opcode].ExistsSSEAVX) then
+        begin
+          for i:=0 to p^.ops-1 do
+           begin
+             // check vectoroperand-extention e.g. {k1} {z}
+
+             vopext := 0;
+             if (oper[i]^.vopext and OTVE_VECTORMASK_WRITEMASK) = OTVE_VECTORMASK_WRITEMASK then
+              vopext := vopext or OT_VECTORMASK;
+             if (oper[i]^.vopext and OTVE_VECTORMASK_ZERO) = OTVE_VECTORMASK_ZERO then
+              vopext := vopext or OT_VECTORZERO;
+
+             //TG TODO SAE,ER
+             if p^.optypes[i] and vopext <> vopext then
+              exit;
+           end;
         end;
         end;
+
         result:=true;
         result:=true;
       end;
       end;
 
 
@@ -1727,6 +1776,7 @@ implementation
            { We need intel style operands }
            { We need intel style operands }
            SetOperandOrder(op_intel);
            SetOperandOrder(op_intel);
            { create the .ot fields }
            { create the .ot fields }
+
            create_ot(objdata);
            create_ot(objdata);
            { set the file postion }
            { set the file postion }
          end
          end
@@ -1765,7 +1815,6 @@ implementation
       i: integer;
       i: integer;
     begin
     begin
       result := false;
       result := false;
-
       for i := 0 to ops - 1 do
       for i := 0 to ops - 1 do
       begin
       begin
         if (oper[i]^.typ=top_reg) and
         if (oper[i]^.typ=top_reg) and
@@ -1773,11 +1822,15 @@ implementation
          if getsupreg(oper[i]^.reg)>=16 then
          if getsupreg(oper[i]^.reg)>=16 then
           result := true;
           result := true;
 
 
+        if (oper[i]^.vopext and OTVE_VECTOR_MASK) <> 0 then
+         result := true;
+
         //TG TODO k1 + z
         //TG TODO k1 + z
       end;
       end;
     end;
     end;
 
 
 
 
+
     function taicpu.Pass1(objdata:TObjData):longint;
     function taicpu.Pass1(objdata:TObjData):longint;
       begin
       begin
         Pass1:=0;
         Pass1:=0;
@@ -1791,6 +1844,7 @@ implementation
         { Get InsEntry }
         { Get InsEntry }
         if FindInsEntry(ObjData) then
         if FindInsEntry(ObjData) then
          begin
          begin
+
            { Calculate instruction size }
            { Calculate instruction size }
            InsSize:=calcsize(insentry);
            InsSize:=calcsize(insentry);
            if segprefix<>NR_NO then
            if segprefix<>NR_NO then
@@ -2014,7 +2068,7 @@ implementation
       end;
       end;
 
 
 
 
-    function taicpu.needaddrprefix(opidx:byte):boolean;
+        function taicpu.NeedAddrPrefix(opidx: byte): boolean;
       begin
       begin
 {$if defined(x86_64)}
 {$if defined(x86_64)}
         result:=(oper[opidx]^.typ=top_ref) and is_32_bit_ref(oper[opidx]^.ref^);
         result:=(oper[opidx]^.typ=top_ref) and is_32_bit_ref(oper[opidx]^.ref^);
@@ -2052,7 +2106,7 @@ implementation
         maxsupreg: array[tregistertype] of tsuperregister=
         maxsupreg: array[tregistertype] of tsuperregister=
 {$ifdef x86_64}
 {$ifdef x86_64}
           //(0, 16, 9, 8, 16, 32, 0, 0);
           //(0, 16, 9, 8, 16, 32, 0, 0);
-          (0, 16, 9, 8, 32, 32, 0, 0); //TG TODO check
+          (0, 16, 9, 8, 32, 32, 0, 0); //TG
 {$else x86_64}
 {$else x86_64}
           (0,  8, 9, 8,  8, 32, 0, 0);
           (0,  8, 9, 8,  8, 32, 0, 0);
 {$endif x86_64}
 {$endif x86_64}
@@ -2671,6 +2725,13 @@ implementation
         omit_rexw : boolean;
         omit_rexw : boolean;
 {$endif x86_64}
 {$endif x86_64}
       begin
       begin
+        //TG TODO delete
+        if p^.opcode = a_VPERMD then
+         begin
+           len:=0;
+         end;
+
+
         len:=0;
         len:=0;
         codes:=@p^.code[0];
         codes:=@p^.code[0];
         exists_vex := false;
         exists_vex := false;
@@ -2844,6 +2905,13 @@ implementation
 {$endif x86_64}
 {$endif x86_64}
 
 
               end;
               end;
+            &350:
+              begin
+                exists_evex := true;
+              end;
+            &351: ; // EVEX length bit 512
+            &352: ; // EVEX W1
+            &354: ; // EVEX brc-memoperand
             &362: // VEX prefix for AVX (length = 2 or 3 bytes, dependens on REX.XBW or opcode-prefix ($0F38 or $0F3A))
             &362: // VEX prefix for AVX (length = 2 or 3 bytes, dependens on REX.XBW or opcode-prefix ($0F38 or $0F3A))
                   // =>> DEFAULT = 2 Bytes
                   // =>> DEFAULT = 2 Bytes
               begin
               begin
@@ -2859,13 +2927,12 @@ implementation
               begin
               begin
                 if not(exists_vex_extension) then
                 if not(exists_vex_extension) then
                 begin
                 begin
-                  inc(len);
+                  //inc(len);
                   exists_vex_extension := true;
                   exists_vex_extension := true;
                 end;
                 end;
               end;
               end;
             &364: ; // VEX length bit 256
             &364: ; // VEX length bit 256
-            &351: ; // EVEX length bit 512
-            &352: ; // EVEX W1
+
             &366, // operand 2 (ymmreg) encoded immediate byte (bit 4-7)
             &366, // operand 2 (ymmreg) encoded immediate byte (bit 4-7)
             &367: inc(len); // operand 3 (ymmreg) encoded immediate byte (bit 4-7)
             &367: inc(len); // operand 3 (ymmreg) encoded immediate byte (bit 4-7)
             &370: // VEX-Extension prefix $0F
             &370: // VEX-Extension prefix $0F
@@ -2876,14 +2943,10 @@ implementation
               begin
               begin
                 if not(exists_vex_extension) then
                 if not(exists_vex_extension) then
                 begin
                 begin
-                  inc(len);
+                  //inc(len);
                   exists_vex_extension := true;
                   exists_vex_extension := true;
                 end;
                 end;
               end;
               end;
-            &350:
-              begin
-                exists_evex := true;
-              end;
             &300,&301,&302:
             &300,&301,&302:
               begin
               begin
 {$if defined(x86_64) or defined(i8086)}
 {$if defined(x86_64) or defined(i8086)}
@@ -2947,14 +3010,15 @@ implementation
         begin
         begin
           if exists_vex then
           if exists_vex then
           begin
           begin
-          if exists_prefix_66 then dec(len);
-          if exists_prefix_F2 then dec(len);
-          if exists_prefix_F3 then dec(len);
+            if exists_prefix_66 then dec(len);
+            if exists_prefix_F2 then dec(len);
+            if exists_prefix_F3 then dec(len);
 
 
-  {$ifdef x86_64}
-          if not(exists_vex_extension) then
-            if rex and $0B <> 0 then inc(len);  // REX.WXB <> 0 =>> needed VEX-Extension
-  {$endif x86_64}
+            if exists_vex_extension then inc(len);
+    {$ifdef x86_64}
+            if not(exists_vex_extension) then
+              if rex and $0B <> 0 then inc(len);  // REX.WXB <> 0 =>> needed VEX-Extension
+    {$endif x86_64}
 
 
           end;
           end;
 
 
@@ -3048,6 +3112,7 @@ implementation
        * \350          - EVEX prefix for AVX instructions
        * \350          - EVEX prefix for AVX instructions
        * \351          - EVEX Vector length 512
        * \351          - EVEX Vector length 512
        * \352          - EVEX W1
        * \352          - EVEX W1
+       * \354          - EVEX brc-memoperand
 
 
        * \361          - 0x66 prefix for SSE instructions
        * \361          - 0x66 prefix for SSE instructions
 
 
@@ -3230,12 +3295,11 @@ implementation
       begin
       begin
         { safety check }
         { safety check }
 
 
-        //TG TODO delete
+        // TODO delete
         i := longword(insoffset);
         i := longword(insoffset);
 
 
         if objdata.currobjsec.size<>longword(insoffset) then
         if objdata.currobjsec.size<>longword(insoffset) then
         begin
         begin
-        //TG TODO delete
           Message1(asmw_e_invalid_opcode_and_operands,GetString);
           Message1(asmw_e_invalid_opcode_and_operands,GetString);
 
 
           internalerror(200130121);
           internalerror(200130121);
@@ -3361,7 +3425,7 @@ implementation
                    // AVX 512 - EVEX
                    // AVX 512 - EVEX
                    // check operands
                    // check operands
 
 
-                   //TG TODO delete
+                   // TODO delete
                    pins := insentry^;
                    pins := insentry^;
 
 
 
 
@@ -3392,7 +3456,9 @@ implementation
                  end;
                  end;
            &350: needed_EVEX            := true;            // AVX512 instruction or AVX128/256/512-instruction (depended on operands [x,y,z]mm16..)
            &350: needed_EVEX            := true;            // AVX512 instruction or AVX128/256/512-instruction (depended on operands [x,y,z]mm16..)
            &351: EVEXll                 := $02;             // vectorlength = 512 bits AND no scalar
            &351: EVEXll                 := $02;             // vectorlength = 512 bits AND no scalar
-           &352: EVEXw1                 := $01;
+           &352: EVEXw0                 := $01;
+           &353: EVEXw1                 := $01;
+           &354: EVEXb                  := $01;             //TG TODO anpassen - nur zum Testen
            &361: begin
            &361: begin
                    VEXvvvv              := VEXvvvv  OR $01; // set SIMD-prefix $66
                    VEXvvvv              := VEXvvvv  OR $01; // set SIMD-prefix $66
                    VEXpp                := $01;             // set SIMD-prefix $66
                    VEXpp                := $01;             // set SIMD-prefix $66
@@ -3496,7 +3562,25 @@ implementation
 
 
           if needed_EVEX then
           if needed_EVEX then
           begin
           begin
+            for i := 0 to ops - 1 do
+             if (oper[i]^.vopext and OTVE_VECTOR_MASK) <> 0 then
+             begin
+               if oper[i]^.vopext and OTVE_VECTORMASK_WRITEMASK = OTVE_VECTORMASK_WRITEMASK then
+                EVEXaaa := oper[i]^.vopext and $07;
+               if oper[i]^.vopext and OTVE_VECTORMASK_ZERO = OTVE_VECTORMASK_ZERO then
+                EVEXz := 1;
+
+               //TG TODO ER, SAE
+               break;
+             end;
 
 
+            // if (insentry.optypes[i] and OT_VECTORMASK) = OT_VECTORMASK then
+            // begin
+            //   if oper[opidx]^.ot and OT_VECTORMASK = OT_VECTORMASK then
+            //   begin
+            //
+            //   end;
+            // end;
 
 
             bytes[0] := $62;
             bytes[0] := $62;
 
 
@@ -3938,6 +4022,7 @@ implementation
                   Internalerror(2014032001);
                   Internalerror(2014032001);
               end;
               end;
             &350..&352: ; // EVEX flags =>> nothing todo
             &350..&352: ; // EVEX flags =>> nothing todo
+            &354: ;       // EVEX flags =>> nothing todo
             &370..&372: ; // VEX flags =>> nothing todo
             &370..&372: ; // VEX flags =>> nothing todo
             &37:
             &37:
               begin
               begin
@@ -4407,6 +4492,13 @@ implementation
           RegYMMSizeMask := 0;
           RegYMMSizeMask := 0;
           RegZMMSizeMask := 0;
           RegZMMSizeMask := 0;
 
 
+
+          //TG TODO delete
+          if AsmOp = a_vpermd then
+          begin
+            RegMMXSizeMask := 0;
+          end;
+
           while (insentry^.opcode=AsmOp) do
           while (insentry^.opcode=AsmOp) do
           begin
           begin
             MRefInfo         := msiUnkown;
             MRefInfo         := msiUnkown;
@@ -4650,6 +4742,7 @@ implementation
                                         RegMMXSizeMask := not(0);
                                         RegMMXSizeMask := not(0);
                                         RegXMMSizeMask := not(0);
                                         RegXMMSizeMask := not(0);
                                         RegYMMSizeMask := not(0);
                                         RegYMMSizeMask := not(0);
+                                        RegZMMSizeMask := not(0);
                                       end;
                                       end;
                          end;
                          end;
                        end;
                        end;
@@ -4664,44 +4757,152 @@ implementation
           if (InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize in MemRefMultiples) and
           if (InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize in MemRefMultiples) and
              (InsTabMemRefSizeInfoCache^[AsmOp].ExistsSSEAVX)then
              (InsTabMemRefSizeInfoCache^[AsmOp].ExistsSSEAVX)then
           begin
           begin
-            case RegXMMSizeMask of
-              OT_BITS16: case RegYMMSizeMask of
-                           OT_BITS32: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx16y32;
-                        end;
-               OT_BITS32: case RegYMMSizeMask of
-                            OT_BITS64: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx32y64;
-                         end;
-               OT_BITS64: case RegYMMSizeMask of
-                            OT_BITS128: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y128;
-                            OT_BITS256: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y256;
-                          end;
-              OT_BITS128: begin
-                            if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize = msiVMemMultiple then
-                            begin
-                              // vector-memory-operand AVX2 (e.g. VGATHER..)
-                              case RegYMMSizeMask of
-                                OT_BITS256: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiVMemRegSize;
-                              end;
-                            end
-                            else if RegMMXSizeMask = 0 then
-                            begin
-                              case RegYMMSizeMask of
-                                OT_BITS128: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y128;
-                                OT_BITS256: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegSize;
-                              end;
-                            end
-                            else if RegYMMSizeMask = 0 then
-                            begin
-                              case RegMMXSizeMask of
-                                OT_BITS64: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegSize;
-                              end;
-                            end
-                            else InternalError(777205);
-                          end;
-
-                // TG TODO
-
+            if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize = msiVMemMultiple then
+            begin
+              if ((RegXMMSizeMask = OT_BITS128) or (RegXMMSizeMask = 0))     and
+                 ((RegYMMSizeMask = OT_BITS256) or (RegYMMSizeMask = 0))     and
+                 ((RegZMMSizeMask = OT_BITS512) or (RegZMMSizeMask = 0))     and
+                 ((RegXMMSizeMask or RegYMMSizeMask or RegZMMSizeMask) <> 0) then
+              begin
+                InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiVMemRegSize;
+              end
+              else
+              begin
+                //TG TODO delete
+                if not((AsmOp = A_VGATHERQPS) or
+                       (AsmOp = A_VGATHERQPS) or
+                       (AsmOp = A_VPGATHERQD))  then
+                begin
+                  RegZMMSizeMask := RegZMMSizeMask;
+                end;
+              end;
+            end
+            else if RegMMXSizeMask <> 0 then
+            begin
+              if (RegMMXSizeMask = OT_BITS64)  and
+                 (RegXMMSizeMask = OT_BITS128) and
+                 (RegYMMSizeMask = 0)          and
+                 (RegZMMSizeMask = 0) then
+              begin
+                InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegSize;
+              end
+              else
+              begin
+                //TG TODO delete
+                if not(InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize in [msiMultiple16]) then
+                 RegMMXSizeMask := RegMMXSizeMask;
+              end;
+            end
+            else if ((RegXMMSizeMask = OT_BITS128) or (RegXMMSizeMask = 0)) and
+                    ((RegYMMSizeMask = OT_BITS256) or (RegYMMSizeMask = 0)) and
+                    ((RegZMMSizeMask = OT_BITS512) or (RegZMMSizeMask = 0)) and
+                    ((RegXMMSizeMask or RegYMMSizeMask or RegZMMSizeMask) <> 0) then
+            begin
+              InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegSize;
+            end
+            else if (RegXMMSizeMask = OT_BITS16) and
+                    (RegYMMSizeMask = OT_BITS32) then
+            begin
+              if (RegZMMSizeMask = 0) then
+              begin
+                InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx16y32;
+              end
+              else
+              begin
+                //TG TODO delete
+                RegZMMSizeMask := RegZMMSizeMask;
+              end;
+            end
+            else if (RegXMMSizeMask = OT_BITS32) and
+                    (RegYMMSizeMask = OT_BITS64) then
+            begin
+              if (RegZMMSizeMask = 0) then
+              begin
+                InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx32y64;
+              end
+              else
+              begin
+                //TG TODO delete
+                RegZMMSizeMask := RegZMMSizeMask;
+              end;
+            end
+            else if (RegXMMSizeMask = OT_BITS64) and
+                    ((RegYMMSizeMask = OT_BITS128) or
+                     (RegYMMSizeMask = OT_BITS256)) then
+            begin
+              if (RegZMMSizeMask = 0) then
+              begin
+                case RegYMMSizeMask of
+                  OT_BITS128: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y128;
+                  OT_BITS256: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y256;
+                end;
+              end
+              else
+              begin
+                //TG TODO delete
+                RegZMMSizeMask := RegZMMSizeMask;
+              end;
+            end
+            else
+            begin
+              if not(
+                     (AsmOp = A_CVTSI2SS) or
+                     (AsmOp = A_CVTSI2SD) or
+                     (AsmOp = A_CVTPD2DQ) or
+                     (AsmOp = A_VCVTPD2DQ) or
+                     (AsmOp = A_VCVTPD2PS) or
+                     (AsmOp = A_VCVTSI2SD) or
+                     (AsmOp = A_VCVTSI2SS) or
+                     (AsmOp = A_VCVTTPD2DQ)
+
+                    ) then
+
+              InternalError(777205);
             end;
             end;
+
+            //begin
+
+              //case RegXMMSizeMask of
+                //OT_BITS16: case RegYMMSizeMask of
+                //             OT_BITS32: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx16y32;
+                //          end;
+                // OT_BITS32: case RegYMMSizeMask of
+                //              OT_BITS64: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx32y64;
+                //           end;
+                // OT_BITS64: case RegYMMSizeMask of
+                //              OT_BITS128: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y128;
+                //              OT_BITS256: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y256;
+                //            end;
+                //OT_BITS128: begin
+                //              if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize = msiVMemMultiple then
+                //              begin
+                //                // vector-memory-operand AVX2 (e.g. VGATHER..)
+                //                case RegYMMSizeMask of
+                //                  OT_BITS256: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiVMemRegSize;
+                //                end;
+                //              end
+                //              else if RegMMXSizeMask = 0 then
+                //              begin
+                //                case RegYMMSizeMask of
+                //                  OT_BITS128: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegx64y128;
+                //                  OT_BITS256: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegSize;
+                //                end;
+                //              end
+                //              else if RegYMMSizeMask = 0 then
+                //              begin
+                //                case RegMMXSizeMask of
+                //                  OT_BITS64: InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegSize;
+                //                end;
+                //              end
+                //              else InternalError(777205);
+                //            end;
+
+                  // TG TODO
+
+              //end;
+
+
+            //end;
           end;
           end;
         end;
         end;
       end;
       end;

+ 14 - 2
compiler/x86/rax86.pas

@@ -42,6 +42,7 @@ Procedure FWaitWarning;
 type
 type
   Tx86Operand=class(TOperand)
   Tx86Operand=class(TOperand)
     opsize  : topsize;
     opsize  : topsize;
+    vopext  : byte;      // bitmask: vector-operand extention AVX512 (e.g. vaddps xmm0 {k1} {z})
     Procedure SetSize(_size:longint;force:boolean);override;
     Procedure SetSize(_size:longint;force:boolean);override;
     Procedure SetCorrectSize(opcode:tasmop);override;
     Procedure SetCorrectSize(opcode:tasmop);override;
     Function CheckOperand: boolean; override;
     Function CheckOperand: boolean; override;
@@ -1034,9 +1035,11 @@ end;
 
 
 function Tx86Instruction.ConcatInstruction(p : TAsmList) : tai;
 function Tx86Instruction.ConcatInstruction(p : TAsmList) : tai;
 var
 var
-  siz  : topsize;
-  i,asize : longint;
+  siz   : topsize;
+  i     : longint;
+  asize : int64;
   ai   : taicpu;
   ai   : taicpu;
+
 begin
 begin
   ConcatInstruction:=nil;
   ConcatInstruction:=nil;
 
 
@@ -1245,6 +1248,9 @@ begin
   ai.Ops:=Ops;
   ai.Ops:=Ops;
   ai.Allocate_oper(Ops);
   ai.Allocate_oper(Ops);
   for i:=1 to Ops do
   for i:=1 to Ops do
+  begin
+    ai.oper[i-1]^.vopext := (operands[i] as tx86operand).vopext;
+
     case operands[i].opr.typ of
     case operands[i].opr.typ of
        OPR_CONSTANT :
        OPR_CONSTANT :
          ai.loadconst(i-1,operands[i].opr.val);
          ai.loadconst(i-1,operands[i].opr.val);
@@ -1311,6 +1317,9 @@ begin
                      asize := OT_BITS128;
                      asize := OT_BITS128;
                    OS_M256,OS_MS256:
                    OS_M256,OS_MS256:
                      asize := OT_BITS256;
                      asize := OT_BITS256;
+                   OS_M512,OS_MS512:
+                     asize := OT_BITS512;
+
                  end;
                  end;
                if asize<>0 then
                if asize<>0 then
                  ai.oper[i-1]^.ot:=(ai.oper[i-1]^.ot and not OT_SIZE_MASK) or asize;
                  ai.oper[i-1]^.ot:=(ai.oper[i-1]^.ot and not OT_SIZE_MASK) or asize;
@@ -1318,6 +1327,9 @@ begin
          end;
          end;
     end;
     end;
 
 
+
+  end;
+
  { Condition ? }
  { Condition ? }
   if condition<>C_None then
   if condition<>C_None then
    ai.SetCondition(condition);
    ai.SetCondition(condition);

+ 168 - 31
compiler/x86/rax86int.pas

@@ -40,12 +40,15 @@ Unit Rax86int;
       AS_COMMA,AS_LBRACKET,AS_RBRACKET,AS_LPAREN,
       AS_COMMA,AS_LBRACKET,AS_RBRACKET,AS_LPAREN,
       AS_RPAREN,AS_COLON,AS_DOT,AS_PLUS,AS_MINUS,AS_STAR,
       AS_RPAREN,AS_COLON,AS_DOT,AS_PLUS,AS_MINUS,AS_STAR,
       AS_SEPARATOR,AS_ID,AS_REGISTER,AS_OPCODE,AS_SLASH,
       AS_SEPARATOR,AS_ID,AS_REGISTER,AS_OPCODE,AS_SLASH,
+      AS_LOPMASK,AS_VOPMASK,AS_LOPZEROMASK,AS_VOPZEROMASK,
        {------------------ Assembler directives --------------------}
        {------------------ Assembler directives --------------------}
       AS_ALIGN,AS_DB,AS_DW,AS_DD,AS_DQ,AS_PUBLIC,AS_END,
       AS_ALIGN,AS_DB,AS_DW,AS_DD,AS_DQ,AS_PUBLIC,AS_END,
        {------------------ Assembler Operators  --------------------}
        {------------------ Assembler Operators  --------------------}
       AS_BYTE,AS_WORD,AS_DWORD,AS_QWORD,AS_TBYTE,AS_DQWORD,AS_OWORD,AS_XMMWORD,AS_YWORD,AS_YMMWORD,AS_ZWORD,AS_ZMMWORD,AS_NEAR,AS_FAR,
       AS_BYTE,AS_WORD,AS_DWORD,AS_QWORD,AS_TBYTE,AS_DQWORD,AS_OWORD,AS_XMMWORD,AS_YWORD,AS_YMMWORD,AS_ZWORD,AS_ZMMWORD,AS_NEAR,AS_FAR,
       AS_HIGH,AS_LOW,AS_OFFSET,AS_SIZEOF,AS_VMTOFFSET,AS_SEG,AS_TYPE,AS_PTR,AS_MOD,AS_SHL,AS_SHR,AS_NOT,
       AS_HIGH,AS_LOW,AS_OFFSET,AS_SIZEOF,AS_VMTOFFSET,AS_SEG,AS_TYPE,AS_PTR,AS_MOD,AS_SHL,AS_SHR,AS_NOT,
-      AS_AND,AS_OR,AS_XOR,AS_WRT,AS___GOTPCREL,AS_TARGET_DIRECTIVE);
+      AS_AND,AS_OR,AS_XOR,AS_WRT,AS___GOTPCREL,AS_TARGET_DIRECTIVE
+      ,AS_BROADCAST
+      );
 
 
     type
     type
        { input flags for BuildConstSymbolExpression }
        { input flags for BuildConstSymbolExpression }
@@ -66,9 +69,13 @@ Unit Rax86int;
          cseof_hasofs
          cseof_hasofs
        );
        );
        tconstsymbolexpressionoutputflags = set of tconstsymbolexpressionoutputflag;
        tconstsymbolexpressionoutputflags = set of tconstsymbolexpressionoutputflag;
+
+       { tx86intreader }
+
        tx86intreader = class(tasmreader)
        tx86intreader = class(tasmreader)
          actasmpattern_origcase : string;
          actasmpattern_origcase : string;
          actasmtoken : tasmtoken;
          actasmtoken : tasmtoken;
+         actoperextention: string;
          prevasmtoken : tasmtoken;
          prevasmtoken : tasmtoken;
          ActOpsize : topsize;
          ActOpsize : topsize;
          inexpression : boolean;
          inexpression : boolean;
@@ -79,8 +86,9 @@ Unit Rax86int;
          function is_register(const s:string):boolean;
          function is_register(const s:string):boolean;
          function is_locallabel(const s:string):boolean;
          function is_locallabel(const s:string):boolean;
          function Assemble: tlinkedlist;override;
          function Assemble: tlinkedlist;override;
-         procedure GetToken;
-         function consume(t : tasmtoken):boolean;
+         procedure GetToken(check_operand_extention: boolean = false);
+         function consume(t : tasmtoken; check_operand_extention: boolean = false):boolean;
+         //procedure ConsumeOperExtention(oper: tx86operand; const aOperExtention: string);
          procedure RecoverConsume(allowcomma:boolean);
          procedure RecoverConsume(allowcomma:boolean);
          procedure AddReferences(dest,src : tx86operand);
          procedure AddReferences(dest,src : tx86operand);
          procedure SetSegmentOverride(oper:tx86operand;seg:tregister);
          procedure SetSegmentOverride(oper:tx86operand;seg:tregister);
@@ -93,6 +101,8 @@ Unit Rax86int;
          procedure BuildConstantOperand(oper: tx86operand);
          procedure BuildConstantOperand(oper: tx86operand);
          procedure BuildOpCode(instr : tx86instruction);
          procedure BuildOpCode(instr : tx86instruction);
          procedure BuildConstant(constsize: byte);
          procedure BuildConstant(constsize: byte);
+         procedure consume_voperand_ext(aop: tx86operand);
+
 
 
          function is_targetdirective(const s: string): boolean;virtual;
          function is_targetdirective(const s: string): boolean;virtual;
          procedure HandleTargetDirective;virtual;
          procedure HandleTargetDirective;virtual;
@@ -123,6 +133,9 @@ Unit Rax86int;
        rautils,itx86int,
        rautils,itx86int,
        { codegen }
        { codegen }
        procinfo,paramgr
        procinfo,paramgr
+
+       //TG TODO delete
+       ,aasmtai
        ;
        ;
 
 
     type
     type
@@ -152,13 +165,13 @@ Unit Rax86int;
         ',','[',']','(',
         ',','[',']','(',
         ')',':','.','+','-','*',
         ')',':','.','+','-','*',
         ';','identifier','register','opcode','/',
         ';','identifier','register','opcode','/',
+        '','','','',
         '','','','','','','END',
         '','','','','','','END',
         '','','','','','','','','','','','','','',
         '','','','','','','','','','','','','','',
         '','','','sizeof','vmtoffset','','type','ptr','mod','shl','shr','not',
         '','','','sizeof','vmtoffset','','type','ptr','mod','shl','shr','not',
-        'and','or','xor','wrt','..gotpcrel',''
+        'and','or','xor','wrt','..gotpcrel','', '{1to8}'
       );
       );
 
 
-
     constructor tx86intreader.create;
     constructor tx86intreader.create;
       var
       var
         i : tasmop;
         i : tasmop;
@@ -167,6 +180,8 @@ Unit Rax86int;
         iasmops:=TFPHashList.create;
         iasmops:=TFPHashList.create;
         for i:=firstop to lastop do
         for i:=firstop to lastop do
           iasmops.Add(upper(std_op2str[i]),Pointer(PtrInt(i)));
           iasmops.Add(upper(std_op2str[i]),Pointer(PtrInt(i)));
+
+        actoperextention := '';
       end;
       end;
 
 
 
 
@@ -236,7 +251,7 @@ Unit Rax86int;
       end;
       end;
 
 
 
 
-    Function tx86intreader.is_asmdirective(const s: string):boolean;
+        function tx86intreader.is_asmdirective(const s: string): boolean;
       var
       var
         i : tasmtoken;
         i : tasmtoken;
       Begin
       Begin
@@ -262,15 +277,6 @@ Unit Rax86int;
           begin
           begin
             is_register:=true;
             is_register:=true;
             actasmtoken:=AS_REGISTER;
             actasmtoken:=AS_REGISTER;
-
-            //TG TODO CHECK
-            if getregtype(actasmregister) = R_MMREGISTER then
-            begin
-//                      actasmpattern:=actasmpattern + c;
-//                       c:=current_scanner.asmgetchar;
-            end;
-
-
           end;
           end;
       end;
       end;
 
 
@@ -287,18 +293,23 @@ Unit Rax86int;
       end;
       end;
 
 
 
 
-    procedure tx86intreader.handletargetdirective;
+        procedure tx86intreader.HandleTargetDirective;
       begin
       begin
       end;
       end;
 
 
 
 
-    Procedure tx86intreader.GetToken;
+        procedure tx86intreader.GetToken(check_operand_extention: boolean);
       var
       var
         len : longint;
         len : longint;
         forcelabel : boolean;
         forcelabel : boolean;
         srsym : tsym;
         srsym : tsym;
         srsymtable : TSymtable;
         srsymtable : TSymtable;
+        scomment: string;
+        schar: char;
+        old_commentstyle: tcommentstyle;
       begin
       begin
+        actoperextention := '';
+
         c:=scanner.c;
         c:=scanner.c;
         { save old token and reset new token }
         { save old token and reset new token }
         prevasmtoken:=actasmtoken;
         prevasmtoken:=actasmtoken;
@@ -450,6 +461,40 @@ Unit Rax86int;
                        c:=current_scanner.asmgetchar;
                        c:=current_scanner.asmgetchar;
                      end;
                      end;
                   end;
                   end;
+                 if prevasmtoken in [AS_LOPMASK,AS_LOPZEROMASK] then
+                  begin
+                    { allow spaces }
+                    while (c in [' ',#9]) do
+                      c:=current_scanner.asmgetchar;
+
+                    if c = '}' then
+                     begin
+                       current_scanner.readchar;
+                       case prevasmtoken of
+                         AS_LOPMASK: if (length(actasmpattern) = 2) and
+                                        (actasmpattern[2] in ['1'..'7']) then
+                                      begin
+                                        actasmtoken := AS_VOPMASK;
+                                        exit;
+                                      end;
+                         AS_LOPZEROMASK:
+                                      if (actasmpattern = 'z') or
+                                         (actasmpattern = 'Z') then
+                                      begin
+                                        actasmtoken := AS_VOPZEROMASK;
+                                        exit;
+                                      end;
+                                 else ; // is completely comment =>> nothing todo
+
+                       end;
+                       exit;
+                     end
+                    else
+                     begin
+                       if c = '{' then current_scanner.inc_comment_level;
+                       current_scanner.skipcomment(false); // is comment
+                     end;
+                  end;
                  if is_asmdirective(actasmpattern) then
                  if is_asmdirective(actasmpattern) then
                   exit;
                   exit;
                  if is_asmoperator(actasmpattern) then
                  if is_asmoperator(actasmpattern) then
@@ -786,7 +831,31 @@ Unit Rax86int;
 
 
              '{':
              '{':
                begin
                begin
-                 current_scanner.skipcomment(true);
+                 if not(check_operand_extention) then current_scanner.skipcomment(true)
+                  else  // exists operand extention e.g. AVX512 {k1..k7} or {z} or {1to8}
+                  begin
+                    case current_scanner.asmgetchar of
+                      '{': begin
+                             current_scanner.inc_comment_level;
+                             current_scanner.skipcomment(true);
+                           end;
+                      '}': ; // local comment closed
+                      'k',
+                      'K': begin
+                             actasmtoken := AS_LOPMASK;
+                             exit;
+                           end;
+                      'z',
+                      'Z': begin
+                             actasmtoken := AS_LOPZEROMASK;
+                             exit;
+                           end;
+                      else begin
+                             current_scanner.skipcomment(false);
+                           end;
+                    end;
+                  end;
+
                  GetToken;
                  GetToken;
                end;
                end;
 
 
@@ -797,7 +866,7 @@ Unit Rax86int;
       end;
       end;
 
 
 
 
-  function tx86intreader.consume(t : tasmtoken):boolean;
+  function tx86intreader.consume(t : tasmtoken; check_operand_extention: boolean):boolean;
     begin
     begin
       Consume:=true;
       Consume:=true;
       if t<>actasmtoken then
       if t<>actasmtoken then
@@ -806,10 +875,38 @@ Unit Rax86int;
          Consume:=false;
          Consume:=false;
        end;
        end;
       repeat
       repeat
-        gettoken;
+        gettoken(check_operand_extention);
       until actasmtoken<>AS_NONE;
       until actasmtoken<>AS_NONE;
     end;
     end;
 
 
+  //procedure tx86intreader.ConsumeOperExtention(oper: tx86operand; const aOperExtention: string);
+  //begin
+  //  //if oper.reg
+  //end;
+
+  procedure tx86intreader.consume_voperand_ext(aop: tx86operand);
+  var
+    kreg: tregister;
+  begin
+    Consume(actasmtoken);
+    if actasmtoken in [AS_VOPMASK, AS_VOPZEROMASK] then
+    begin
+      case actasmtoken of
+            AS_VOPMASK: begin
+                          kreg := masm_regnum_search(lower(actasmpattern));
+                          if (kreg >= NR_K1) and
+                             (kreg <= NR_K7) then
+                          begin
+                            aop.vopext := aop.vopext or (tregisterrec(kreg).supreg  and $07); //TG TODO check
+                            aop.vopext := aop.vopext or OTVE_VECTORMASK_WRITEMASK;
+                          end;
+                        end;
+        AS_VOPZEROMASK: aop.vopext := aop.vopext or OTVE_VECTORMASK_ZERO;
+      end;
+
+      Consume(actasmtoken, true);
+    end;
+  end;
 
 
   procedure tx86intreader.RecoverConsume(allowcomma:boolean);
   procedure tx86intreader.RecoverConsume(allowcomma:boolean);
     begin
     begin
@@ -1010,7 +1107,9 @@ Unit Rax86int;
     { This routine builds up a record offset after a AS_DOT
     { This routine builds up a record offset after a AS_DOT
       token is encountered.
       token is encountered.
       On entry actasmtoken should be equal to AS_DOT                     }
       On entry actasmtoken should be equal to AS_DOT                     }
-    Procedure tx86intreader.BuildRecordOffsetSize(const expr: string;out offset:tcgint;out size:tcgint; out mangledname: string; needvmtofs: boolean; out hastypecast: boolean);
+        procedure tx86intreader.BuildRecordOffsetSize(const expr: string; out
+      offset: tcgint; out size: tcgint; out mangledname: string;
+      needvmtofs: boolean; out hastypecast: boolean);
       var
       var
         s: string;
         s: string;
       Begin
       Begin
@@ -1039,7 +1138,10 @@ Unit Rax86int;
       end;
       end;
 
 
 
 
-    Procedure tx86intreader.BuildConstSymbolExpression(in_flags: tconstsymbolexpressioninputflags;out value:tcgint;out asmsym:string;out asmsymtyp:TAsmsymtype;out size:tcgint;out out_flags:tconstsymbolexpressionoutputflags);
+        procedure tx86intreader.BuildConstSymbolExpression(
+      in_flags: tconstsymbolexpressioninputflags; out value: tcgint; out
+      asmsym: string; out asmsymtyp: TAsmsymtype; out size: tcgint; out
+      out_flags: tconstsymbolexpressionoutputflags);
       var
       var
         tempstr,expr,hs,mangledname : string;
         tempstr,expr,hs,mangledname : string;
         parenlevel : longint;
         parenlevel : longint;
@@ -1467,7 +1569,7 @@ Unit Rax86int;
       end;
       end;
 
 
 
 
-    Function tx86intreader.BuildConstExpression:aint;
+        function tx86intreader.BuildConstExpression: aint;
       var
       var
         l,size : tcgint;
         l,size : tcgint;
         hs : string;
         hs : string;
@@ -1481,7 +1583,8 @@ Unit Rax86int;
       end;
       end;
 
 
 
 
-    Function tx86intreader.BuildRefConstExpression(out size:tcgint;startingminus:boolean):aint;
+        function tx86intreader.BuildRefConstExpression(out size: tcgint;
+      startingminus: boolean): aint;
       var
       var
         l : tcgint;
         l : tcgint;
         hs : string;
         hs : string;
@@ -1773,7 +1876,14 @@ Unit Rax86int;
             AS_REGISTER :
             AS_REGISTER :
               begin
               begin
                 hreg:=actasmregister;
                 hreg:=actasmregister;
-                Consume(AS_REGISTER);
+                Consume(AS_REGISTER, true);
+
+                //TG TODO check
+                while actasmtoken in [AS_LOPMASK,AS_LOPZEROMASK] do
+                begin
+                  consume_voperand_ext(oper);
+                end;
+
                 if actasmtoken=AS_COLON then
                 if actasmtoken=AS_COLON then
                   begin
                   begin
                     Consume(AS_COLON);
                     Consume(AS_COLON);
@@ -1937,7 +2047,16 @@ Unit Rax86int;
               begin
               begin
                 if GotPlus or GotStar or BracketlessReference then
                 if GotPlus or GotStar or BracketlessReference then
                   Message(asmr_e_invalid_reference_syntax);
                   Message(asmr_e_invalid_reference_syntax);
-                Consume(AS_RBRACKET);
+
+                Consume(AS_RBRACKET, true);
+                //TG TODO check
+                while actasmtoken in [AS_LOPMASK,AS_LOPZEROMASK] do
+                begin
+                  consume_voperand_ext(oper);
+                end;
+
+
+
                 if actasmtoken=AS_LBRACKET then
                 if actasmtoken=AS_LBRACKET then
                   begin
                   begin
                     tmpoper:=Tx86Operand.create;
                     tmpoper:=Tx86Operand.create;
@@ -1971,7 +2090,7 @@ Unit Rax86int;
       end;
       end;
 
 
 
 
-    Procedure tx86intreader.BuildConstantOperand(oper: tx86operand);
+        procedure tx86intreader.BuildConstantOperand(oper: tx86operand);
       var
       var
         l,size : tcgint;
         l,size : tcgint;
         tempstr : string;
         tempstr : string;
@@ -2015,7 +2134,8 @@ Unit Rax86int;
       end;
       end;
 
 
 
 
-    Procedure tx86intreader.BuildOperand(oper: tx86operand;istypecast:boolean);
+        procedure tx86intreader.BuildOperand(oper: tx86operand; istypecast: boolean
+      );
 
 
         procedure AddLabelOperand(hl:tasmlabel);
         procedure AddLabelOperand(hl:tasmlabel);
         begin
         begin
@@ -2037,6 +2157,7 @@ Unit Rax86int;
            end;
            end;
         end;
         end;
 
 
+
       var
       var
         expr,
         expr,
         hs      : string;
         hs      : string;
@@ -2046,7 +2167,10 @@ Unit Rax86int;
         toffset,
         toffset,
         tsize   : tcgint;
         tsize   : tcgint;
         hastypecast: boolean;
         hastypecast: boolean;
+
       begin
       begin
+        oper.vopext := 0;
+
         expr:='';
         expr:='';
         repeat
         repeat
           if actasmtoken=AS_DOT then
           if actasmtoken=AS_DOT then
@@ -2340,7 +2464,17 @@ Unit Rax86int;
               begin
               begin
                 { save the type of register used. }
                 { save the type of register used. }
                 tempreg:=actasmregister;
                 tempreg:=actasmregister;
-                Consume(AS_REGISTER);
+                Consume(AS_REGISTER, true);
+
+                //TG TODO check
+                if (getregtype(tempreg) = R_MMREGISTER) then
+                 begin
+                  while actasmtoken in [AS_LOPMASK,AS_LOPZEROMASK] do
+                  begin
+                    consume_voperand_ext(oper);
+                  end;
+                end;
+
                 if actasmtoken = AS_COLON then
                 if actasmtoken = AS_COLON then
                  Begin
                  Begin
                    Consume(AS_COLON);
                    Consume(AS_COLON);
@@ -2438,6 +2572,9 @@ Unit Rax86int;
               end;
               end;
           end;
           end;
         until false;
         until false;
+
+
+
         { End of operand, update size if a typecast is forced }
         { End of operand, update size if a typecast is forced }
         if (oper.typesize<>0) and
         if (oper.typesize<>0) and
            (oper.opr.typ in [OPR_REFERENCE,OPR_LOCAL]) then
            (oper.opr.typ in [OPR_REFERENCE,OPR_LOCAL]) then
@@ -2453,7 +2590,7 @@ Unit Rax86int;
       end;
       end;
 
 
 
 
-    Procedure tx86intreader.BuildOpCode(instr : tx86instruction);
+        procedure tx86intreader.BuildOpCode(instr: tx86instruction);
       var
       var
         PrefixOp,OverrideOp: tasmop;
         PrefixOp,OverrideOp: tasmop;
         operandnum : longint;
         operandnum : longint;
@@ -2752,7 +2889,7 @@ Unit Rax86int;
       end;
       end;
 
 
 
 
-    Procedure tx86intreader.BuildConstant(constsize: byte);
+        procedure tx86intreader.BuildConstant(constsize: byte);
       var
       var
         asmsymtyp : tasmsymtype;
         asmsymtyp : tasmsymtype;
         asmsym,
         asmsym,

+ 2 - 1
compiler/x86/x86ins.dat

@@ -5370,7 +5370,8 @@ ymmreg,ymmreg,ymmrm,imm8             \361\362\364\372\1\x46\75\120\27     AVX2
 
 
 [VPERMD]
 [VPERMD]
 (Ch_All)
 (Ch_All)
-ymmreg,ymmreg,ymmrm                  \361\362\364\371\1\x36\75\120        AVX2
+ymmregmz,ymmreg,ymmrm                \350\354\361\362\364\371\1\x36\75\120        AVX2
+
 
 
 [VPERMPD]
 [VPERMPD]
 (Ch_All)
 (Ch_All)

+ 8 - 8
compiler/x86/x86reg.dat

@@ -244,14 +244,14 @@ NR_ZMM29,$040E001D,zmm29,%zmm29,zmm29,zmm29,-1,-1,80,OT_ZMMREG,5,64
 NR_ZMM30,$040E001E,zmm30,%zmm30,zmm30,zmm30,-1,-1,81,OT_ZMMREG,6,64
 NR_ZMM30,$040E001E,zmm30,%zmm30,zmm30,zmm30,-1,-1,81,OT_ZMMREG,6,64
 NR_ZMM31,$040E001F,zmm31,%zmm31,zmm31,zmm31,-1,-1,82,OT_ZMMREG,7,64
 NR_ZMM31,$040E001F,zmm31,%zmm31,zmm31,zmm31,-1,-1,82,OT_ZMMREG,7,64
 
 
-; NR_K0,$06000000,k0,%k0,k0,k0,-1,118,118,OT_REG_VECTORMASK,0
-; NR_K1,$06000001,k1,%k1,k1,k1,-1,119,119,OT_REG_VECTORMASK,1
-; NR_K2,$06000002,k2,%k2,k2,k2,-1,120,120,OT_REG_VECTORMASK,2
-; NR_K3,$06000003,k3,%k3,k3,k3,-1,121,121,OT_REG_VECTORMASK,3
-; NR_K4,$06000004,k4,%k4,k4,k4,-1,122,122,OT_REG_VECTORMASK,4
-; NR_K5,$06000005,k5,%k5,k5,k5,-1,123,123,OT_REG_VECTORMASK,5
-; NR_K6,$06000006,k6,%k6,k6,k6,-1,124,124,OT_REG_VECTORMASK,6
-; NR_K7,$06000007,k7,%k7,k7,k7,-1,125,125,OT_REG_VECTORMASK,7
+NR_K0,$06000000,k0,%k0,k0,k0,-1,118,118,OT_REG_VECTORMASK,0
+NR_K1,$06000001,k1,%k1,k1,k1,-1,119,119,OT_REG_VECTORMASK,1
+NR_K2,$06000002,k2,%k2,k2,k2,-1,120,120,OT_REG_VECTORMASK,2
+NR_K3,$06000003,k3,%k3,k3,k3,-1,121,121,OT_REG_VECTORMASK,3
+NR_K4,$06000004,k4,%k4,k4,k4,-1,122,122,OT_REG_VECTORMASK,4
+NR_K5,$06000005,k5,%k5,k5,k5,-1,123,123,OT_REG_VECTORMASK,5
+NR_K6,$06000006,k6,%k6,k6,k6,-1,124,124,OT_REG_VECTORMASK,6
+NR_K7,$06000007,k7,%k7,k7,k7,-1,125,125,OT_REG_VECTORMASK,7
 
 
 ; NR_BND0,$07000000,bnd0,%bnd0,bnd0,bnd0,-1,126,126,OT_REG_BND,0
 ; NR_BND0,$07000000,bnd0,%bnd0,bnd0,bnd0,-1,126,126,OT_REG_BND,0
 ; NR_BND1,$07000001,bnd1,%bnd1,bnd1,bnd1,-1,127,127,OT_REG_BND,1
 ; NR_BND1,$07000001,bnd1,%bnd1,bnd1,bnd1,-1,127,127,OT_REG_BND,1

+ 8 - 0
compiler/x86_64/r8664ari.inc

@@ -45,6 +45,14 @@
 73,
 73,
 74,
 74,
 84,
 84,
+216,
+217,
+218,
+219,
+220,
+221,
+222,
+223,
 112,
 112,
 113,
 113,
 114,
 114,

+ 9 - 1
compiler/x86_64/r8664att.inc

@@ -214,4 +214,12 @@
 '%zmm28',
 '%zmm28',
 '%zmm29',
 '%zmm29',
 '%zmm30',
 '%zmm30',
-'%zmm31'
+'%zmm31',
+'%k0',
+'%k1',
+'%k2',
+'%k3',
+'%k4',
+'%k5',
+'%k6',
+'%k7'

+ 8 - 0
compiler/x86_64/r8664con.inc

@@ -215,3 +215,11 @@ NR_ZMM28 = tregister($040E001C);
 NR_ZMM29 = tregister($040E001D);
 NR_ZMM29 = tregister($040E001D);
 NR_ZMM30 = tregister($040E001E);
 NR_ZMM30 = tregister($040E001E);
 NR_ZMM31 = tregister($040E001F);
 NR_ZMM31 = tregister($040E001F);
+NR_K0 = tregister($06000000);
+NR_K1 = tregister($06000001);
+NR_K2 = tregister($06000002);
+NR_K3 = tregister($06000003);
+NR_K4 = tregister($06000004);
+NR_K5 = tregister($06000005);
+NR_K6 = tregister($06000006);
+NR_K7 = tregister($06000007);

+ 9 - 1
compiler/x86_64/r8664dwrf.inc

@@ -214,4 +214,12 @@
 79,
 79,
 80,
 80,
 81,
 81,
-82
+82,
+118,
+119,
+120,
+121,
+122,
+123,
+124,
+125

+ 9 - 1
compiler/x86_64/r8664int.inc

@@ -214,4 +214,12 @@
 'zmm28',
 'zmm28',
 'zmm29',
 'zmm29',
 'zmm30',
 'zmm30',
-'zmm31'
+'zmm31',
+'k0',
+'k1',
+'k2',
+'k3',
+'k4',
+'k5',
+'k6',
+'k7'

+ 8 - 0
compiler/x86_64/r8664iri.inc

@@ -46,6 +46,14 @@
 73,
 73,
 74,
 74,
 84,
 84,
+216,
+217,
+218,
+219,
+220,
+221,
+222,
+223,
 112,
 112,
 113,
 113,
 114,
 114,

+ 9 - 1
compiler/x86_64/r8664nasm.inc

@@ -214,4 +214,12 @@
 'zmm28',
 'zmm28',
 'zmm29',
 'zmm29',
 'zmm30',
 'zmm30',
-'zmm31'
+'zmm31',
+'k0',
+'k1',
+'k2',
+'k3',
+'k4',
+'k5',
+'k6',
+'k7'

+ 1 - 1
compiler/x86_64/r8664nor.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86reg.dat }
 { don't edit, this file is generated from x86reg.dat }
-216
+224

+ 9 - 1
compiler/x86_64/r8664num.inc

@@ -214,4 +214,12 @@ tregister($040E001B),
 tregister($040E001C),
 tregister($040E001C),
 tregister($040E001D),
 tregister($040E001D),
 tregister($040E001E),
 tregister($040E001E),
-tregister($040E001F)
+tregister($040E001F),
+tregister($06000000),
+tregister($06000001),
+tregister($06000002),
+tregister($06000003),
+tregister($06000004),
+tregister($06000005),
+tregister($06000006),
+tregister($06000007)

+ 9 - 1
compiler/x86_64/r8664ot.inc

@@ -214,4 +214,12 @@ OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
-OT_ZMMREG
+OT_ZMMREG,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK,
+OT_REG_VECTORMASK

+ 9 - 1
compiler/x86_64/r8664rni.inc

@@ -214,4 +214,12 @@
 82,
 82,
 83,
 83,
 84,
 84,
-85
+85,
+216,
+217,
+218,
+219,
+220,
+221,
+222,
+223

+ 8 - 0
compiler/x86_64/r8664sri.inc

@@ -46,6 +46,14 @@
 73,
 73,
 74,
 74,
 84,
 84,
+216,
+217,
+218,
+219,
+220,
+221,
+222,
+223,
 112,
 112,
 113,
 113,
 114,
 114,

+ 9 - 1
compiler/x86_64/r8664stab.inc

@@ -214,4 +214,12 @@
 79,
 79,
 80,
 80,
 81,
 81,
-82
+82,
+118,
+119,
+120,
+121,
+122,
+123,
+124,
+125

+ 9 - 1
compiler/x86_64/r8664std.inc

@@ -214,4 +214,12 @@
 'zmm28',
 'zmm28',
 'zmm29',
 'zmm29',
 'zmm30',
 'zmm30',
-'zmm31'
+'zmm31',
+'k0',
+'k1',
+'k2',
+'k3',
+'k4',
+'k5',
+'k6',
+'k7'

+ 2 - 2
compiler/x86_64/x8664tab.inc

@@ -14178,8 +14178,8 @@
   (
   (
     opcode  : A_VPERMD;
     opcode  : A_VPERMD;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
-    code    : #241#242#244#249#1#54#61#80;
+    optypes : (ot_ymmregmz,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#236#241#242#244#249#1#54#61#80;
     flags   : [if_avx2]
     flags   : [if_avx2]
   ),
   ),
   (
   (