Prechádzať zdrojové kódy

support vector operand bcst,{sae},{er} + k-register

git-svn-id: branches/tg74/avx512@39457 -
tg74 7 rokov pred
rodič
commit
867d145e50

+ 1 - 1
compiler/aasmtai.pas

@@ -420,7 +420,7 @@ interface
         toper = record
           ot : longint;
         {$ifdef x86}
-          vopext: byte;
+          vopext: smallint;
         {$ENDIF}
           case typ : toptype of
             top_none   : ();

+ 2 - 1
compiler/i386/i386att.inc

@@ -1112,5 +1112,6 @@
 'rdrand',
 'rdseed',
 'xgetbv',
-'prefetchwt1'
+'prefetchwt1',
+'kandb'
 );

+ 2 - 1
compiler/i386/i386atts.inc

@@ -1112,5 +1112,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
-attsufINT
+attsufINT,
+attsufNONE
 );

+ 2 - 1
compiler/i386/i386int.inc

@@ -1112,5 +1112,6 @@
 'rdrand',
 'rdseed',
 'xgetbv',
-'prefetchwt1'
+'prefetchwt1',
+'kandb'
 );

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-2118;
+2127;

+ 2 - 1
compiler/i386/i386op.inc

@@ -1112,5 +1112,6 @@ A_XTEST,
 A_RDRAND,
 A_RDSEED,
 A_XGETBV,
-A_PREFETCHWT1
+A_PREFETCHWT1,
+A_KANDB
 );

+ 2 - 1
compiler/i386/i386prop.inc

@@ -1112,5 +1112,6 @@
 (Ch: [Ch_Wop1, Ch_WFlags]),
 (Ch: [Ch_Wop1, Ch_WFlags]),
 (Ch: [Ch_WEAX, Ch_WEDX, Ch_RECX]),
-(Ch: [Ch_All])
+(Ch: [Ch_All]),
+(Ch: [Ch_Mop3, Ch_Rop2, Ch_Rop1])
 );

+ 81 - 18
compiler/i386/i386tab.inc

@@ -8711,15 +8711,22 @@
   (
     opcode  : A_VADDPD;
     ops     : 3;
-    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
-    code    : #241#242#248#1#88#61#80;
+    optypes : (ot_xmmreg_mz,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#234#241#242#248#1#88#61#80;
     flags   : [if_avx,if_sandybridge]
   ),
   (
     opcode  : A_VADDPD;
     ops     : 3;
-    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
-    code    : #241#242#244#248#1#88#61#80;
+    optypes : (ot_ymmreg_mz,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#234#241#242#244#248#1#88#61#80;
+    flags   : [if_avx,if_sandybridge]
+  ),
+  (
+    opcode  : A_VADDPD;
+    ops     : 3;
+    optypes : (ot_zmmreg_mz,ot_zmmreg,ot_zmmrm_er,ot_none);
+    code    : #232#233#234#241#248#1#88#61#80;
     flags   : [if_avx,if_sandybridge]
   ),
   (
@@ -8970,29 +8977,29 @@
   (
     opcode  : A_VBROADCASTSS;
     ops     : 2;
-    optypes : (ot_ymmreg,ot_memory or ot_bits32,ot_none,ot_none);
-    code    : #241#242#244#249#1#24#72;
+    optypes : (ot_ymmreg_mz,ot_memory or ot_bits32,ot_none,ot_none);
+    code    : #232#241#242#244#249#1#24#72;
     flags   : [if_avx,if_sandybridge]
   ),
   (
     opcode  : A_VBROADCASTSS;
     ops     : 2;
-    optypes : (ot_xmmreg,ot_memory or ot_bits32,ot_none,ot_none);
-    code    : #241#242#249#1#24#72;
+    optypes : (ot_xmmreg_mz,ot_memory or ot_bits32,ot_none,ot_none);
+    code    : #232#241#242#249#1#24#72;
     flags   : [if_avx,if_sandybridge]
   ),
   (
     opcode  : A_VBROADCASTSS;
     ops     : 2;
-    optypes : (ot_ymmreg,ot_xmmreg,ot_none,ot_none);
-    code    : #241#242#244#249#1#24#72;
+    optypes : (ot_ymmreg_mz,ot_xmmreg,ot_none,ot_none);
+    code    : #232#241#242#244#249#1#24#72;
     flags   : [if_avx2]
   ),
   (
     opcode  : A_VBROADCASTSS;
     ops     : 2;
-    optypes : (ot_xmmreg,ot_xmmreg,ot_none,ot_none);
-    code    : #241#242#249#1#24#72;
+    optypes : (ot_xmmreg_mz,ot_xmmreg,ot_none,ot_none);
+    code    : #232#241#242#249#1#24#72;
     flags   : [if_avx2]
   ),
   (
@@ -9919,6 +9926,13 @@
     code    : #242#244#248#1#194#61#80#23;
     flags   : [if_avx,if_sandybridge]
   ),
+  (
+    opcode  : A_VCMPPS;
+    ops     : 4;
+    optypes : (ot_kreg,ot_xmmreg,ot_xmmrm,ot_immediate or ot_bits8);
+    code    : #232#248#1#194#61#80#23;
+    flags   : [if_avx512]
+  ),
   (
     opcode  : A_VCMPSD;
     ops     : 4;
@@ -9971,8 +9985,8 @@
   (
     opcode  : A_VCOMISS;
     ops     : 2;
-    optypes : (ot_xmmreg,ot_xmmreg,ot_none,ot_none);
-    code    : #242#248#1#47#72;
+    optypes : (ot_xmmreg,ot_xmmreg_sae,ot_none,ot_none);
+    code    : #232#242#248#1#47#72;
     flags   : [if_avx,if_sandybridge]
   ),
   (
@@ -10686,14 +10700,14 @@
     opcode  : A_VMOVD;
     ops     : 2;
     optypes : (ot_xmmreg,ot_rm_gpr or ot_bits32,ot_none,ot_none);
-    code    : #241#242#248#1#110#72;
+    code    : #232#241#242#248#1#110#72;
     flags   : [if_avx,if_sandybridge]
   ),
   (
     opcode  : A_VMOVD;
     ops     : 2;
     optypes : (ot_rm_gpr or ot_bits32,ot_xmmreg,ot_none,ot_none);
-    code    : #241#242#248#1#126#65;
+    code    : #232#241#242#248#1#126#65;
     flags   : [if_avx,if_sandybridge]
   ),
   (
@@ -12845,6 +12859,20 @@
     code    : #241#242#248#1#114#60#138#22;
     flags   : [if_avx,if_sandybridge]
   ),
+  (
+    opcode  : A_VPSRLD;
+    ops     : 3;
+    optypes : (ot_xmmreg_mz or ot_signed,ot_xmmrm,ot_immediate or ot_bits8,ot_none);
+    code    : #232#172#241#242#248#1#114#60#138#22;
+    flags   : [if_avx512]
+  ),
+  (
+    opcode  : A_VPSRLD;
+    ops     : 3;
+    optypes : (ot_xmmreg_mz or ot_signed,ot_bmem32,ot_immediate or ot_bits8,ot_none);
+    code    : #232#172#241#242#248#1#114#60#138#22;
+    flags   : [if_avx512]
+  ),
   (
     opcode  : A_VPSRLD;
     ops     : 3;
@@ -13807,10 +13835,38 @@
   (
     opcode  : A_VPERMD;
     ops     : 3;
-    optypes : (ot_ymmregmz,ot_ymmreg,ot_ymmrm,ot_none);
-    code    : #232#236#241#242#244#249#1#54#61#80;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#242#244#249#1#54#61#80;
     flags   : [if_avx2]
   ),
+  (
+    opcode  : A_VPERMD;
+    ops     : 3;
+    optypes : (ot_ymmreg_mz,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#242#244#249#1#54#61#80;
+    flags   : [if_avx512]
+  ),
+  (
+    opcode  : A_VPERMD;
+    ops     : 3;
+    optypes : (ot_ymmreg_mz,ot_ymmreg,ot_bmem32,ot_none);
+    code    : #232#241#242#244#249#1#54#61#80;
+    flags   : [if_avx512]
+  ),
+  (
+    opcode  : A_VPERMD;
+    ops     : 3;
+    optypes : (ot_zmmreg_mz,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#242#244#249#1#54#61#80;
+    flags   : [if_avx512]
+  ),
+  (
+    opcode  : A_VPERMD;
+    ops     : 3;
+    optypes : (ot_zmmreg_mz,ot_zmmreg,ot_bmem32,ot_none);
+    code    : #232#233#241#242#244#249#1#54#61#80;
+    flags   : [if_avx512]
+  ),
   (
     opcode  : A_VPERMPD;
     ops     : 3;
@@ -14825,5 +14881,12 @@
     optypes : (ot_memory,ot_none,ot_none,ot_none);
     code    : #2#15#13#130;
     flags   : [if_prefetchwt1]
+  ),
+  (
+    opcode  : A_KANDB;
+    ops     : 3;
+    optypes : (ot_kreg,ot_kreg,ot_kreg,ot_none);
+    code    : #241#242#244#248#1#65#61#80;
+    flags   : [if_avx512]
   )
 );

+ 8 - 8
compiler/i386/r386ot.inc

@@ -97,11 +97,11 @@ OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG

+ 2 - 1
compiler/i8086/i8086att.inc

@@ -1126,5 +1126,6 @@
 'rdrand',
 'rdseed',
 'xgetbv',
-'prefetchwt1'
+'prefetchwt1',
+'kandb'
 );

+ 2 - 1
compiler/i8086/i8086atts.inc

@@ -1126,5 +1126,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
-attsufINT
+attsufINT,
+attsufNONE
 );

+ 2 - 1
compiler/i8086/i8086int.inc

@@ -1126,5 +1126,6 @@
 'rdrand',
 'rdseed',
 'xgetbv',
-'prefetchwt1'
+'prefetchwt1',
+'kandb'
 );

+ 1 - 1
compiler/i8086/i8086nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-2150;
+2159;

+ 2 - 1
compiler/i8086/i8086op.inc

@@ -1126,5 +1126,6 @@ A_XTEST,
 A_RDRAND,
 A_RDSEED,
 A_XGETBV,
-A_PREFETCHWT1
+A_PREFETCHWT1,
+A_KANDB
 );

+ 2 - 1
compiler/i8086/i8086prop.inc

@@ -1126,5 +1126,6 @@
 (Ch: [Ch_Wop1, Ch_WFlags]),
 (Ch: [Ch_Wop1, Ch_WFlags]),
 (Ch: [Ch_WEAX, Ch_WEDX, Ch_RECX]),
-(Ch: [Ch_All])
+(Ch: [Ch_All]),
+(Ch: [Ch_Mop3, Ch_Rop2, Ch_Rop1])
 );

+ 81 - 18
compiler/i8086/i8086tab.inc

@@ -8739,15 +8739,22 @@
   (
     opcode  : A_VADDPD;
     ops     : 3;
-    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
-    code    : #241#242#248#1#88#61#80;
+    optypes : (ot_xmmreg_mz,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#234#241#242#248#1#88#61#80;
     flags   : [if_avx,if_sandybridge]
   ),
   (
     opcode  : A_VADDPD;
     ops     : 3;
-    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
-    code    : #241#242#244#248#1#88#61#80;
+    optypes : (ot_ymmreg_mz,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#234#241#242#244#248#1#88#61#80;
+    flags   : [if_avx,if_sandybridge]
+  ),
+  (
+    opcode  : A_VADDPD;
+    ops     : 3;
+    optypes : (ot_zmmreg_mz,ot_zmmreg,ot_zmmrm_er,ot_none);
+    code    : #232#233#234#241#248#1#88#61#80;
     flags   : [if_avx,if_sandybridge]
   ),
   (
@@ -8998,29 +9005,29 @@
   (
     opcode  : A_VBROADCASTSS;
     ops     : 2;
-    optypes : (ot_ymmreg,ot_memory or ot_bits32,ot_none,ot_none);
-    code    : #241#242#244#249#1#24#72;
+    optypes : (ot_ymmreg_mz,ot_memory or ot_bits32,ot_none,ot_none);
+    code    : #232#241#242#244#249#1#24#72;
     flags   : [if_avx,if_sandybridge]
   ),
   (
     opcode  : A_VBROADCASTSS;
     ops     : 2;
-    optypes : (ot_xmmreg,ot_memory or ot_bits32,ot_none,ot_none);
-    code    : #241#242#249#1#24#72;
+    optypes : (ot_xmmreg_mz,ot_memory or ot_bits32,ot_none,ot_none);
+    code    : #232#241#242#249#1#24#72;
     flags   : [if_avx,if_sandybridge]
   ),
   (
     opcode  : A_VBROADCASTSS;
     ops     : 2;
-    optypes : (ot_ymmreg,ot_xmmreg,ot_none,ot_none);
-    code    : #241#242#244#249#1#24#72;
+    optypes : (ot_ymmreg_mz,ot_xmmreg,ot_none,ot_none);
+    code    : #232#241#242#244#249#1#24#72;
     flags   : [if_avx2]
   ),
   (
     opcode  : A_VBROADCASTSS;
     ops     : 2;
-    optypes : (ot_xmmreg,ot_xmmreg,ot_none,ot_none);
-    code    : #241#242#249#1#24#72;
+    optypes : (ot_xmmreg_mz,ot_xmmreg,ot_none,ot_none);
+    code    : #232#241#242#249#1#24#72;
     flags   : [if_avx2]
   ),
   (
@@ -9947,6 +9954,13 @@
     code    : #242#244#248#1#194#61#80#23;
     flags   : [if_avx,if_sandybridge]
   ),
+  (
+    opcode  : A_VCMPPS;
+    ops     : 4;
+    optypes : (ot_kreg,ot_xmmreg,ot_xmmrm,ot_immediate or ot_bits8);
+    code    : #232#248#1#194#61#80#23;
+    flags   : [if_avx512]
+  ),
   (
     opcode  : A_VCMPSD;
     ops     : 4;
@@ -9999,8 +10013,8 @@
   (
     opcode  : A_VCOMISS;
     ops     : 2;
-    optypes : (ot_xmmreg,ot_xmmreg,ot_none,ot_none);
-    code    : #242#248#1#47#72;
+    optypes : (ot_xmmreg,ot_xmmreg_sae,ot_none,ot_none);
+    code    : #232#242#248#1#47#72;
     flags   : [if_avx,if_sandybridge]
   ),
   (
@@ -10714,14 +10728,14 @@
     opcode  : A_VMOVD;
     ops     : 2;
     optypes : (ot_xmmreg,ot_rm_gpr or ot_bits32,ot_none,ot_none);
-    code    : #241#242#248#1#110#72;
+    code    : #232#241#242#248#1#110#72;
     flags   : [if_avx,if_sandybridge]
   ),
   (
     opcode  : A_VMOVD;
     ops     : 2;
     optypes : (ot_rm_gpr or ot_bits32,ot_xmmreg,ot_none,ot_none);
-    code    : #241#242#248#1#126#65;
+    code    : #232#241#242#248#1#126#65;
     flags   : [if_avx,if_sandybridge]
   ),
   (
@@ -12873,6 +12887,20 @@
     code    : #241#242#248#1#114#60#138#22;
     flags   : [if_avx,if_sandybridge]
   ),
+  (
+    opcode  : A_VPSRLD;
+    ops     : 3;
+    optypes : (ot_xmmreg_mz or ot_signed,ot_xmmrm,ot_immediate or ot_bits8,ot_none);
+    code    : #232#172#241#242#248#1#114#60#138#22;
+    flags   : [if_avx512]
+  ),
+  (
+    opcode  : A_VPSRLD;
+    ops     : 3;
+    optypes : (ot_xmmreg_mz or ot_signed,ot_bmem32,ot_immediate or ot_bits8,ot_none);
+    code    : #232#172#241#242#248#1#114#60#138#22;
+    flags   : [if_avx512]
+  ),
   (
     opcode  : A_VPSRLD;
     ops     : 3;
@@ -13835,10 +13863,38 @@
   (
     opcode  : A_VPERMD;
     ops     : 3;
-    optypes : (ot_ymmregmz,ot_ymmreg,ot_ymmrm,ot_none);
-    code    : #232#236#241#242#244#249#1#54#61#80;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#242#244#249#1#54#61#80;
     flags   : [if_avx2]
   ),
+  (
+    opcode  : A_VPERMD;
+    ops     : 3;
+    optypes : (ot_ymmreg_mz,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#242#244#249#1#54#61#80;
+    flags   : [if_avx512]
+  ),
+  (
+    opcode  : A_VPERMD;
+    ops     : 3;
+    optypes : (ot_ymmreg_mz,ot_ymmreg,ot_bmem32,ot_none);
+    code    : #232#241#242#244#249#1#54#61#80;
+    flags   : [if_avx512]
+  ),
+  (
+    opcode  : A_VPERMD;
+    ops     : 3;
+    optypes : (ot_zmmreg_mz,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#242#244#249#1#54#61#80;
+    flags   : [if_avx512]
+  ),
+  (
+    opcode  : A_VPERMD;
+    ops     : 3;
+    optypes : (ot_zmmreg_mz,ot_zmmreg,ot_bmem32,ot_none);
+    code    : #232#233#241#242#244#249#1#54#61#80;
+    flags   : [if_avx512]
+  ),
   (
     opcode  : A_VPERMPD;
     ops     : 3;
@@ -15049,5 +15105,12 @@
     optypes : (ot_memory,ot_none,ot_none,ot_none);
     code    : #2#15#13#130;
     flags   : [if_prefetchwt1]
+  ),
+  (
+    opcode  : A_KANDB;
+    ops     : 3;
+    optypes : (ot_kreg,ot_kreg,ot_kreg,ot_none);
+    code    : #241#242#244#248#1#65#61#80;
+    flags   : [if_avx512]
   )
 );

+ 8 - 8
compiler/i8086/r8086ot.inc

@@ -97,11 +97,11 @@ OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG

+ 1 - 0
compiler/msg/errore.msg

@@ -2834,6 +2834,7 @@ asmr_e_local_label_cannot_be_declared_public=07138_E_Local label $1 cannot be de
 asmr_e_multiple_segment_overrides=07139_E_Cannot use multiple segment overrides
 asmr_w_multiple_segment_overrides=07140_W_Multiple segment overrides (only the last one will take effect)
 asmr_w_segment_override_ignored_in_64bit_mode=07141_W_Segment base $1 will be generated, but is ignored by the CPU in 64-bit mode
+asmr_e_mismatch_broadcasting_elements=07142_E_Mismatch broadcasting elements (expected: {$1} found: {$2})
 #
 # Assembler/binary writers
 #

+ 3 - 2
compiler/msgidx.inc

@@ -828,6 +828,7 @@ const
   asmr_e_multiple_segment_overrides=07139;
   asmr_w_multiple_segment_overrides=07140;
   asmr_w_segment_override_ignored_in_64bit_mode=07141;
+  asmr_e_mismatch_broadcasting_elements=07142;
   asmw_f_too_many_asm_files=08000;
   asmw_f_assembler_output_not_supported=08001;
   asmw_f_comp_not_supported=08002;
@@ -1101,9 +1102,9 @@ const
   option_info=11024;
   option_help_pages=11025;
 
-  MsgTxtSize = 82280;
+  MsgTxtSize = 82348;
 
   MsgIdxMax : array[1..20] of longint=(
-    28,106,347,126,96,59,142,34,221,67,
+    28,106,347,126,96,59,143,34,221,67,
     62,20,30,1,1,1,1,1,1,1
   );

Rozdielové dáta súboru neboli zobrazené, pretože súbor je príliš veľký
+ 294 - 295
compiler/msgtxt.inc


+ 275 - 107
compiler/x86/aasmcpu.pas

@@ -59,8 +59,16 @@ interface
 
       OT_VECTORMASK = $1000000000;  { OPTIONAL VECTORMASK AVX512}
       OT_VECTORZERO = $2000000000;  { OPTIONAL ZERO-FLAG  AVX512}
+      OT_VECTORBCST = $4000000000;  { BROADCAST-MEM-FLAG  AVX512}
+      OT_VECTORSAE  = $8000000000;  { OPTIONAL SAE-FLAG  AVX512}
+      OT_VECTORER   = $10000000000; { OPTIONAL ER-FLAG-FLAG  AVX512}
 
-      OT_VECTOR_EXT_MASK = OT_VECTORMASK or OT_VECTORZERO;
+
+      OT_BITSB32    = OT_BITS32 or OT_VECTORBCST;
+      OT_BITSB64    = OT_BITS64 or OT_VECTORBCST;
+
+
+      OT_VECTOR_EXT_MASK = OT_VECTORMASK or OT_VECTORZERO or OT_VECTORBCST;
 
       OT_BITS80    = $00000010;  { FPU only  }
       OT_FAR       = $00000020;  { this means 16:16 or 16:32, like in CALL/JMP }
@@ -172,31 +180,44 @@ interface
       OT_XMEM32    = OT_REGNORM or otf_reg_xmm or otf_reg_gpr or OT_BITS32;
       OT_XMEM64    = OT_REGNORM or otf_reg_xmm or otf_reg_gpr or OT_BITS64;
 
-      OT_XMMREGM   = OT_XMMREG or OT_VECTORMASK;
-      OT_XMMREGMZ  = OT_XMMREG or OT_VECTORMASK or OT_VECTORZERO;
+      OT_XMMREG_M   = OT_XMMREG or OT_VECTORMASK;
+      OT_XMMREG_MZ  = OT_XMMREG or OT_VECTORMASK or OT_VECTORZERO;
+      OT_XMMREG_SAE = OT_XMMREG or OT_VECTORSAE;
+      OT_XMMRM_SAE  = OT_XMMRM  or OT_VECTORSAE;
+      OT_XMMREG_ER  = OT_XMMREG or OT_VECTORER;
+      OT_XMMRM_ER   = OT_XMMRM  or OT_VECTORER;
+
 
 
       { register class 5: YMM (both reg and r/m) }
-      OT_YMMREG    = OT_REGNORM or otf_reg_ymm;
-      OT_YMMRM     = OT_REGMEM or otf_reg_ymm;
-      OT_YMEM32    = OT_REGNORM or otf_reg_ymm or otf_reg_gpr or OT_BITS32;
-      OT_YMEM64    = OT_REGNORM or otf_reg_ymm or otf_reg_gpr or OT_BITS64;
+      OT_YMMREG     = OT_REGNORM or otf_reg_ymm;
+      OT_YMMRM      = OT_REGMEM or otf_reg_ymm;
+      OT_YMEM32     = OT_REGNORM or otf_reg_ymm or otf_reg_gpr or OT_BITS32;
+      OT_YMEM64     = OT_REGNORM or otf_reg_ymm or otf_reg_gpr or OT_BITS64;
 
-      OT_YMMREGM   = OT_YMMREG or OT_VECTORMASK;
-      OT_YMMREGMZ  = OT_YMMREG or OT_VECTORMASK or OT_VECTORZERO;
+      OT_YMMREG_M   = OT_YMMREG or OT_VECTORMASK;
+      OT_YMMREG_MZ  = OT_YMMREG or OT_VECTORMASK or OT_VECTORZERO;
+      OT_YMMREG_SAE = OT_YMMREG or OT_VECTORSAE;
+      OT_YMMRM_SAE  = OT_YMMRM  or OT_VECTORSAE;
+      OT_YMMREG_ER  = OT_YMMREG or OT_VECTORER;
+      OT_YMMRM_ER   = OT_YMMRM  or OT_VECTORER;
 
 
       { register class 5: ZMM (both reg and r/m) }
-      OT_ZMMREG    = OT_REGNORM or otf_reg_zmm;
-      OT_ZMMRM     = OT_REGMEM or otf_reg_zmm;
-      OT_ZMEM32    = OT_REGNORM or otf_reg_zmm or otf_reg_gpr or OT_BITS32;
-      OT_ZMEM64    = OT_REGNORM or otf_reg_zmm or otf_reg_gpr or OT_BITS64;
+      OT_ZMMREG     = OT_REGNORM or otf_reg_zmm;
+      OT_ZMMRM      = OT_REGMEM or otf_reg_zmm;
+      OT_ZMEM32     = OT_REGNORM or otf_reg_zmm or otf_reg_gpr or OT_BITS32;
+      OT_ZMEM64     = OT_REGNORM or otf_reg_zmm or otf_reg_gpr or OT_BITS64;
 
-      OT_ZMMREGM   = OT_ZMMREG or OT_VECTORMASK;
-      OT_ZMMREGMZ  = OT_ZMMREG or OT_VECTORMASK or OT_VECTORZERO;
+      OT_ZMMREG_M   = OT_ZMMREG or OT_VECTORMASK;
+      OT_ZMMREG_MZ  = OT_ZMMREG or OT_VECTORMASK or OT_VECTORZERO;
+      OT_ZMMREG_SAE = OT_ZMMREG or OT_VECTORSAE;
+      OT_ZMMRM_SAE  = OT_ZMMRM  or OT_VECTORSAE;
+      OT_ZMMREG_ER  = OT_ZMMREG or OT_VECTORER;
+      OT_ZMMRM_ER   = OT_ZMMRM  or OT_VECTORER;
 
 
-      OT_REG_VECTORMASK = OT_REGNORM or otf_reg_k;
+      OT_KREG       = OT_REGNORM or otf_reg_k;
 
       { Vector-Memory operands }
       OT_VMEM_ANY  = OT_XMEM32 or OT_XMEM64 or OT_YMEM32 or OT_YMEM64 or OT_ZMEM32 or OT_ZMEM64;
@@ -205,12 +226,17 @@ interface
       OT_MEM8      = OT_MEMORY or OT_BITS8;
       OT_MEM16     = OT_MEMORY or OT_BITS16;
       OT_MEM32     = OT_MEMORY or OT_BITS32;
+      OT_BMEM32    = OT_MEMORY or OT_BITS32 or OT_VECTORBCST;
       OT_MEM64     = OT_MEMORY or OT_BITS64;
+      OT_BMEM64    = OT_MEMORY or OT_BITS64 or OT_VECTORBCST;
       OT_MEM128    = OT_MEMORY or OT_BITS128;
       OT_MEM256    = OT_MEMORY or OT_BITS256;
       OT_MEM512    = OT_MEMORY or OT_BITS512;
       OT_MEM80     = OT_MEMORY or OT_BITS80;
 
+
+
+
       OT_MEM_OFFS  = OT_MEMORY or otf_sub0;  { special type of EA  }
                                              { simple [address] offset  }
 
@@ -226,12 +252,25 @@ interface
       OT_ONENESS   = otf_sub0;  { special type of immediate operand  }
       OT_UNITY     = OT_IMMEDIATE or OT_ONENESS;  { for shift/rotate instructions  }
 
-      OTVE_VECTOR_SAE              = 1 shl 4;
-      OTVE_VECTOR_ER               = 1 shl 5;
-      OTVE_VECTORMASK_ZERO         = 1 shl 6;
-      OTVE_VECTORMASK_WRITEMASK    = 1 shl 7;
+      OTVE_VECTOR_SAE          = 1 shl 8;
+      OTVE_VECTOR_ER           = 1 shl 9;
+      OTVE_VECTOR_ZERO         = 1 shl 10;
+      OTVE_VECTOR_WRITEMASK    = 1 shl 11;
+      OTVE_VECTOR_BCST         = 1 shl 12;
+      OTVE_VECTOR_BCST2        = 0;
+      OTVE_VECTOR_BCST4        = 1 shl 4;
+      OTVE_VECTOR_BCST8        = 1 shl 5;
+      OTVE_VECTOR_BCST16       = 3 shl 4;
+      OTVE_VECTOR_RNSAE        = OTVE_VECTOR_ER or 0;
+      OTVE_VECTOR_RDSAE        = OTVE_VECTOR_ER or 1 shl 6;
+      OTVE_VECTOR_RUSAE        = OTVE_VECTOR_ER or 1 shl 7;
+      OTVE_VECTOR_RZSAE        = OTVE_VECTOR_ER or 3 shl 6;
+
 
-      OTVE_VECTOR_MASK = OTVE_VECTOR_SAE or OTVE_VECTOR_ER or OTVE_VECTORMASK_ZERO or OTVE_VECTORMASK_WRITEMASK;
+      OTVE_VECTOR_BCST_MASK    = OTVE_VECTOR_BCST2 or OTVE_VECTOR_BCST4 or OTVE_VECTOR_BCST8 or OTVE_VECTOR_BCST16;
+      OTVE_VECTOR_ER_MASK      = OTVE_VECTOR_RNSAE or OTVE_VECTOR_RDSAE or OTVE_VECTOR_RUSAE or OTVE_VECTOR_RZSAE;
+
+      OTVE_VECTOR_MASK = OTVE_VECTOR_SAE or OTVE_VECTOR_ER or OTVE_VECTOR_ZERO or OTVE_VECTOR_WRITEMASK or OTVE_VECTOR_BCST;
 
       { Size of the instruction table converted by nasmconv.pas }
 {$if defined(x86_64)}
@@ -301,16 +340,20 @@ interface
                          msiMultiple, msiMultiple8, msiMultiple16, msiMultiple32,
                          msiMultiple64, msiMultiple128, msiMultiple256, msiMultiple512,
                          msiMemRegSize, msiMemRegx16y32, msiMemRegx32y64, msiMemRegx64y128, msiMemRegx64y256,
-                         msiMem8, msiMem16, msiMem32, msiMem64, msiMem128, msiMem256, msiMem512,
+                         msiMem8, msiMem16, msiMem32, msiBMem32, msiMem64, msiBMem64, msiMem128, msiMem256, msiMem512,
                          msiXMem32, msiXMem64, msiYMem32, msiYMem64, msiZMem32, msiZMem64,
                          msiVMemMultiple, msiVMemRegSize);
 
+      TMemRefSizeInfoBCST = (msbUnknown, msbBCST32, msbBCST64);
+
       TConstSizeInfo  = (csiUnkown, csiMultiple, csiNoSize, csiMem8, csiMem16, csiMem32, csiMem64);
 
       TInsTabMemRefSizeInfoRec = record
-        MemRefSize  : TMemRefSizeInfo;
-        ExistsSSEAVX: boolean;
-        ConstSize   : TConstSizeInfo;
+        MemRefSize            : TMemRefSizeInfo;
+        MemRefSizeBCST        : TMemRefSizeInfoBCST;
+        BCSTXMMMultiplicator  : byte;
+        ExistsSSEAVX          : boolean;
+        ConstSize             : TConstSizeInfo;
       end;
 
     const
@@ -379,6 +422,7 @@ interface
         IF_SSE42,
         IF_AVX,
         IF_AVX2,
+        IF_AVX512,
         IF_BMI1,
         IF_BMI2,
         IF_16BITONLY,
@@ -1231,6 +1275,37 @@ implementation
                   if (ot and OT_SIGNED)<>0 then
                    s:=s+'s';
                 end;
+
+               if vopext <> 0 then
+                begin
+                  str(vopext and $07, regnr);
+                  if vopext and OTVE_VECTOR_WRITEMASK = OTVE_VECTOR_WRITEMASK then
+                    s := s + ' {k' + regnr + '}';
+
+                  if vopext and OTVE_VECTOR_ZERO = OTVE_VECTOR_ZERO then
+                    s := s + ' {z}';
+
+                  if vopext and OTVE_VECTOR_SAE = OTVE_VECTOR_SAE then
+                    s := s + ' {sae}';
+
+
+                  if vopext and OTVE_VECTOR_BCST = OTVE_VECTOR_BCST then
+                   case vopext and OTVE_VECTOR_BCST_MASK of
+                      OTVE_VECTOR_BCST2: s := s + ' {1to2}';
+                      OTVE_VECTOR_BCST4: s := s + ' {1to4}';
+                      OTVE_VECTOR_BCST8: s := s + ' {1to8}';
+                     OTVE_VECTOR_BCST16: s := s + ' {1to16}';
+                   end;
+
+                  if vopext and OTVE_VECTOR_ER = OTVE_VECTOR_ER then
+                   case vopext and OTVE_VECTOR_ER_MASK of
+                      OTVE_VECTOR_RNSAE: s := s + ' {rn-sae}';
+                      OTVE_VECTOR_RDSAE: s := s + ' {rd-sae}';
+                      OTVE_VECTOR_RUSAE: s := s + ' {ru-sae}';
+                      OTVE_VECTOR_RZSAE: s := s + ' {rz-sae}';
+                   end;
+
+                end;
              end;
          end;
         GetString:=s+']';
@@ -1710,12 +1785,23 @@ implementation
              // check vectoroperand-extention e.g. {k1} {z}
 
              vopext := 0;
-             if (oper[i]^.vopext and OTVE_VECTORMASK_WRITEMASK) = OTVE_VECTORMASK_WRITEMASK then
-              vopext := vopext or OT_VECTORMASK;
-             if (oper[i]^.vopext and OTVE_VECTORMASK_ZERO) = OTVE_VECTORMASK_ZERO then
-              vopext := vopext or OT_VECTORZERO;
+             if (oper[i]^.vopext and OTVE_VECTOR_WRITEMASK) = OTVE_VECTOR_WRITEMASK then
+             begin
+               vopext := vopext or OT_VECTORMASK;
+
+               if (oper[i]^.vopext and OTVE_VECTOR_ZERO) = OTVE_VECTOR_ZERO then
+                vopext := vopext or OT_VECTORZERO;
+             end;
+
+             if (oper[i]^.vopext and OTVE_VECTOR_BCST) = OTVE_VECTOR_BCST then
+              vopext := vopext or OT_VECTORBCST;
+
+             if (oper[i]^.vopext and OTVE_VECTOR_ER) = OTVE_VECTOR_ER then
+              vopext := vopext or OT_VECTORER;
+
+             if (oper[i]^.vopext and OTVE_VECTOR_SAE) = OTVE_VECTOR_SAE then
+              vopext := vopext or OT_VECTORSAE;
 
-             //TG TODO SAE,ER
              if p^.optypes[i] and vopext <> vopext then
               exit;
            end;
@@ -1824,8 +1910,6 @@ implementation
 
         if (oper[i]^.vopext and OTVE_VECTOR_MASK) <> 0 then
          result := true;
-
-        //TG TODO k1 + z
       end;
     end;
 
@@ -2106,7 +2190,7 @@ implementation
         maxsupreg: array[tregistertype] of tsuperregister=
 {$ifdef x86_64}
           //(0, 16, 9, 8, 16, 32, 0, 0);
-          (0, 16, 9, 8, 32, 32, 0, 0); //TG
+          (0, 16, 9, 8, 32, 32, 8, 0); //TG
 {$else x86_64}
           (0,  8, 9, 8,  8, 32, 0, 0);
 {$endif x86_64}
@@ -2473,7 +2557,7 @@ implementation
          begin
            { 16 bit address? }
 
-           if ((ir<>NR_NO) and (isub in [R_SUBMMX,R_SUBMMY]) and
+           if ((ir<>NR_NO) and (isub in [R_SUBMMX,R_SUBMMY,]R_SUBMMZ) and
                (br<>NR_NO) and (bsub=R_SUBD)
               ) then
            begin
@@ -2726,7 +2810,7 @@ implementation
 {$endif x86_64}
       begin
         //TG TODO delete
-        if p^.opcode = a_VPERMD then
+        if p^.opcode = a_VADDPS then
          begin
            len:=0;
          end;
@@ -3010,6 +3094,8 @@ implementation
         begin
           if exists_vex then
           begin
+            inc(len,2);
+
             if exists_prefix_66 then dec(len);
             if exists_prefix_F2 then dec(len);
             if exists_prefix_F3 then dec(len);
@@ -3432,15 +3518,13 @@ implementation
 
                    opidx := c and 7;
                    if ops > opidx then
-                    if (oper[opidx]^.typ=top_reg) and
-                       (getregtype(oper[opidx]^.reg) = R_MMREGISTER) then
+                    if (oper[opidx]^.typ=top_reg) then
                      if getsupreg(oper[opidx]^.reg) and $10 = $0 then EVEXr := 1;  //TG TODO check
 
 
                    opidx := (c shr 3) and 7;
                    if ops > opidx then
-                    if (oper[opidx]^.typ=top_reg) and
-                      (getregtype(oper[opidx]^.reg) = R_MMREGISTER) then
+                    if (oper[opidx]^.typ=top_reg) then
                      if getsupreg(oper[opidx]^.reg) and $10 = $0 then EVEXx := 1;  //TG TODO check
 
                  end;
@@ -3456,9 +3540,7 @@ implementation
                  end;
            &350: needed_EVEX            := true;            // AVX512 instruction or AVX128/256/512-instruction (depended on operands [x,y,z]mm16..)
            &351: EVEXll                 := $02;             // vectorlength = 512 bits AND no scalar
-           &352: EVEXw0                 := $01;
-           &353: EVEXw1                 := $01;
-           &354: EVEXb                  := $01;             //TG TODO anpassen - nur zum Testen
+           &352: EVEXw1                 := $01;
            &361: begin
                    VEXvvvv              := VEXvvvv  OR $01; // set SIMD-prefix $66
                    VEXpp                := $01;             // set SIMD-prefix $66
@@ -3514,6 +3596,7 @@ implementation
           begin
             VEXvvvv  := VEXvvvv or ($0F shl 3); // set VEXvvvv bits (bits 6-3) to 1
             EVEXvvvv := $0F; //TG TODO check
+            EVEXv    := 1;   //TG TODO check
           end
           else if oper[opmode]^.typ = top_reg then
           begin
@@ -3562,16 +3645,45 @@ implementation
 
           if needed_EVEX then
           begin
+            EVEXaaa:= 0;
+            EVEXz  := 0;
+
             for i := 0 to ops - 1 do
              if (oper[i]^.vopext and OTVE_VECTOR_MASK) <> 0 then
              begin
-               if oper[i]^.vopext and OTVE_VECTORMASK_WRITEMASK = OTVE_VECTORMASK_WRITEMASK then
-                EVEXaaa := oper[i]^.vopext and $07;
-               if oper[i]^.vopext and OTVE_VECTORMASK_ZERO = OTVE_VECTORMASK_ZERO then
-                EVEXz := 1;
+               if oper[i]^.vopext and OTVE_VECTOR_WRITEMASK = OTVE_VECTOR_WRITEMASK then
+               begin
+                 EVEXaaa := oper[i]^.vopext and $07;
+                 if oper[i]^.vopext and OTVE_VECTOR_ZERO = OTVE_VECTOR_ZERO then EVEXz := 1;
+               end;
+
+               if oper[i]^.vopext and OTVE_VECTOR_BCST = OTVE_VECTOR_BCST then
+               begin
+                 EVEXb := 1;
+               end;
+
+               // flag EVEXb is multiple use (broadcast, sae and er)
+               if oper[i]^.vopext and OTVE_VECTOR_SAE = OTVE_VECTOR_SAE then
+               begin
+                 EVEXb := 1;
+               end;
+
+               if oper[i]^.vopext and OTVE_VECTOR_ER = OTVE_VECTOR_ER then
+               begin
+                 EVEXb := 1;
+
+                 case oper[i]^.vopext and OTVE_VECTOR_ER_MASK of
+                   OTVE_VECTOR_RNSAE: EVEXll := 0;
+                   OTVE_VECTOR_RDSAE: EVEXll := 1;
+                   OTVE_VECTOR_RUSAE: EVEXll := 2;
+                   OTVE_VECTOR_RZSAE: EVEXll := 3;
+                                 else EVEXll := 0;
+                 end;
+               end;
+
 
                //TG TODO ER, SAE
-               break;
+               //break;
              end;
 
             // if (insentry.optypes[i] and OT_VECTORMASK) = OT_VECTORMASK then
@@ -3587,9 +3699,9 @@ implementation
             i := rex and 7;
             //bytes[1] := ((VEXmmmmm and $03) shl 0)  or ((not(rex) and $07) shl 4) and EVEXr and EVEXb;
             bytes[1] := ((EVEXmm   and $03) shl 0)  or
-                        ((not(rex) and $07) shl 5)  or
-                        ((EVEXr    and $01) shl 4) ; // or
-                        //((EVEXx    and $01) shl 6);
+                        ((not(rex) and $05) shl 5)  or
+                        ((EVEXr    and $01) shl 4)  or
+                        ((EVEXx    and $01) shl 6);
 
             bytes[2] := ((EVEXpp   and $03) shl 0)  or
                         ((1        and $01) shl 2)  or  // fixed in AVX512
@@ -4452,6 +4564,10 @@ implementation
       RegXMMSizeMask: int64;
       RegYMMSizeMask: int64;
       RegZMMSizeMask: int64;
+      RegBCSTSizeMask: int64;
+      RegBCSTXMMSizeMask: int64;
+      RegBCSTYMMSizeMask: int64;
+      RegBCSTZMMSizeMask: int64;
 
       bitcount: integer;
 
@@ -4482,19 +4598,25 @@ implementation
 
         if i >= 0 then
         begin
-          InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize   := msiUnkown;
-          InsTabMemRefSizeInfoCache^[AsmOp].ConstSize    := csiUnkown;
-          InsTabMemRefSizeInfoCache^[AsmOp].ExistsSSEAVX := false;
+          InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize           := msiUnkown;
+          InsTabMemRefSizeInfoCache^[AsmOp].MemRefSizeBCST       := msbUnknown;
+          InsTabMemRefSizeInfoCache^[AsmOp].BCSTXMMMultiplicator := 0;
+          InsTabMemRefSizeInfoCache^[AsmOp].ConstSize            := csiUnkown;
+          InsTabMemRefSizeInfoCache^[AsmOp].ExistsSSEAVX         := false;
 
           insentry:=@instab[i];
           RegMMXSizeMask := 0;
           RegXMMSizeMask := 0;
           RegYMMSizeMask := 0;
           RegZMMSizeMask := 0;
+          RegBCSTSizeMask:= 0;
+          RegBCSTXMMSizeMask := 0;
+          RegBCSTYMMSizeMask := 0;
+          RegBCSTZMMSizeMask := 0;
 
 
           //TG TODO delete
-          if AsmOp = a_vpermd then
+          if AsmOp = A_VPERMD then
           begin
             RegMMXSizeMask := 0;
           end;
@@ -4574,7 +4696,7 @@ implementation
                 begin
                   inc(actMemCount);
 
-                  actMemSize:=actMemSize or (insentry^.optypes[j] and OT_SIZE_MASK);
+                  actMemSize:=actMemSize or (insentry^.optypes[j] and (OT_SIZE_MASK OR OT_VECTORBCST));
                   if (insentry^.optypes[j] and OT_REGMEM) = OT_REGMEM then
                     begin
                       actRegMemTypes  := actRegMemTypes or insentry^.optypes[j];
@@ -4677,62 +4799,89 @@ implementation
             end
             else
               begin
-                if (actMemCount=2) and ((AsmOp=A_MOVS) or (AsmOp=A_CMPS)) then
-                  actMemCount:=1;
-            case actMemCount of
-                0: ; // nothing todo
-                1: begin
-                     MRefInfo := msiUnkown;
-                     case actRegMemTypes and (OT_MMXRM or OT_XMMRM or OT_YMMRM or OT_ZMMRM or OT_REG_EXTRA_MASK) of
-                       OT_MMXRM: actMemSize := actMemSize or OT_BITS64;
-                       OT_XMMRM: actMemSize := actMemSize or OT_BITS128;
-                       OT_YMMRM: actMemSize := actMemSize or OT_BITS256;
-                       OT_ZMMRM: actMemSize := actMemSize or OT_BITS512;
-                     end;
+                if (actMemCount=2) and ((AsmOp=A_MOVS) or (AsmOp=A_CMPS)) then actMemCount:=1;
+
+                case actMemCount of
+                  0: ; // nothing todo
+                  1: begin
+                       MRefInfo := msiUnkown;
+                       case actRegMemTypes and (OT_MMXRM or OT_XMMRM or OT_YMMRM or OT_ZMMRM or OT_REG_EXTRA_MASK) of
+                         OT_MMXRM: actMemSize := actMemSize or OT_BITS64;
+                         OT_XMMRM: actMemSize := actMemSize or OT_BITS128;
+                         OT_YMMRM: actMemSize := actMemSize or OT_BITS256;
+                         OT_ZMMRM: actMemSize := actMemSize or OT_BITS512;
+                       end;
 
-                     case actMemSize of
-                       0: MRefInfo := msiNoSize;
-                       OT_BITS8: MRefInfo := msiMem8;
-                       OT_BITS16: MRefInfo := msiMem16;
-                       OT_BITS32: MRefInfo := msiMem32;
-                       OT_BITS64: MRefInfo := msiMem64;
-                       OT_BITS128: MRefInfo := msiMem128;
-                       OT_BITS256: MRefInfo := msiMem256;
-                       OT_BITS512: MRefInfo := msiMem512;
-                       OT_BITS80,
-                       OT_FAR,
-                       OT_NEAR,
-                       OT_SHORT: ; // ignore
-                       else
-                         begin
-                           bitcount := bitcnt(actMemSize);
+                       case actMemSize of
+                                  0: MRefInfo := msiNoSize;
+                           OT_BITS8: MRefInfo := msiMem8;
+                          OT_BITS16: MRefInfo := msiMem16;
+                          OT_BITS32: MRefInfo := msiMem32;
+                         OT_BITSB32: MRefInfo := msiBMem32;
+                          OT_BITS64: MRefInfo := msiMem64;
+                         OT_BITSB64: MRefInfo := msiBMem64;
+                         OT_BITS128: MRefInfo := msiMem128;
+                         OT_BITS256: MRefInfo := msiMem256;
+                         OT_BITS512: MRefInfo := msiMem512;
+                         OT_BITS80,
+                         OT_FAR,
+                         OT_NEAR,
+                         OT_SHORT: ; // ignore
+                         else
+                           begin
+                             bitcount := bitcnt(actMemSize);
 
-                           if bitcount > 1 then MRefInfo := msiMultiple
-                           else InternalError(777203);
-                         end;
-                     end;
+                             if bitcount > 1 then MRefInfo := msiMultiple
+                             else InternalError(777203);
+                           end;
+                       end;
 
-                     if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize = msiUnkown then
-                       begin
-                         InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := MRefInfo;
-                       end
-                     else if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize <> MRefInfo then
+                       if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize = msiUnkown then
+                         begin
+                           InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := MRefInfo;
+                         end
+                       else
                        begin
-                         with InsTabMemRefSizeInfoCache^[AsmOp] do
+                         // ignore broadcast-memory
+                         if not(MRefInfo in [msiBMem32, msiBMem64]) then
                          begin
-                           if ((MemRefSize = msiMem8)        OR (MRefInfo = msiMem8))   then MemRefSize := msiMultiple8
-                           else if ((MemRefSize = msiMem16)  OR (MRefInfo = msiMem16))  then MemRefSize := msiMultiple16
-                           else if ((MemRefSize = msiMem32)  OR (MRefInfo = msiMem32))  then MemRefSize := msiMultiple32
-                           else if ((MemRefSize = msiMem64)  OR (MRefInfo = msiMem64))  then MemRefSize := msiMultiple64
-                           else if ((MemRefSize = msiMem128) OR (MRefInfo = msiMem128)) then MemRefSize := msiMultiple128
-                           else if ((MemRefSize = msiMem256) OR (MRefInfo = msiMem256)) then MemRefSize := msiMultiple256
-                           else if ((MemRefSize = msiMem512) OR (MRefInfo = msiMem512)) then MemRefSize := msiMultiple512
-                           else MemRefSize := msiMultiple;
+                           if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize <> MRefInfo then
+                           begin
+                             with InsTabMemRefSizeInfoCache^[AsmOp] do
+                             begin
+                               if ((MemRefSize = msiMem8)        OR (MRefInfo = msiMem8))   then MemRefSize := msiMultiple8
+                               else if ((MemRefSize = msiMem16)  OR (MRefInfo = msiMem16))  then MemRefSize := msiMultiple16
+                               else if ((MemRefSize = msiMem32)  OR (MRefInfo = msiMem32))  then MemRefSize := msiMultiple32
+                               else if ((MemRefSize = msiMem64)  OR (MRefInfo = msiMem64))  then MemRefSize := msiMultiple64
+                               else if ((MemRefSize = msiMem128) OR (MRefInfo = msiMem128)) then MemRefSize := msiMultiple128
+                               else if ((MemRefSize = msiMem256) OR (MRefInfo = msiMem256)) then MemRefSize := msiMultiple256
+                               else if ((MemRefSize = msiMem512) OR (MRefInfo = msiMem512)) then MemRefSize := msiMultiple512
+                               else MemRefSize := msiMultiple;
+                             end;
+                           end;
                          end;
-                     end;
+                       end;
 
-                     if actRegCount > 0 then
+                       //if not(MRefInfo in [msiBMem32, msiBMem64]) and (actRegCount > 0) then
+                       if actRegCount > 0 then
                        begin
+                         if MRefInfo in [msiBMem32, msiBMem64] then
+                         begin
+                           // BROADCAST - OPERAND
+                           RegBCSTSizeMask := RegBCSTSizeMask or actMemSize;
+
+                           case actRegTypes and (OT_XMMREG or OT_YMMREG or OT_ZMMREG or OT_REG_EXTRA_MASK) of
+                             OT_XMMREG: RegBCSTXMMSizeMask := RegBCSTXMMSizeMask or actMemSize;
+                             OT_YMMREG: RegBCSTYMMSizeMask := RegBCSTYMMSizeMask or actMemSize;
+                             OT_ZMMREG: RegBCSTZMMSizeMask := RegBCSTZMMSizeMask or actMemSize;
+                                   else begin
+                                          RegBCSTXMMSizeMask := not(0);
+                                          RegBCSTYMMSizeMask := not(0);
+                                          RegBCSTZMMSizeMask := not(0);
+                                        end;
+                           end;
+                         end
+                         else
                          case actRegTypes and (OT_MMXREG or OT_XMMREG or OT_YMMREG or OT_ZMMREG or OT_REG_EXTRA_MASK) of
                            OT_MMXREG: RegMMXSizeMask := RegMMXSizeMask or actMemSize;
                            OT_XMMREG: RegXMMSizeMask := RegXMMSizeMask or actMemSize;
@@ -4745,10 +4894,13 @@ implementation
                                         RegZMMSizeMask := not(0);
                                       end;
                          end;
-                       end;
-                   end;
-              else InternalError(777202);
-            end;
+                       end
+                       else
+
+
+                     end
+                  else InternalError(777202);
+                end;
               end;
 
             inc(insentry);
@@ -4799,6 +4951,22 @@ implementation
                     ((RegXMMSizeMask or RegYMMSizeMask or RegZMMSizeMask) <> 0) then
             begin
               InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegSize;
+
+              case RegBCSTSizeMask of
+                        0: ; // ignore;
+                OT_BITSB32: begin
+                              InsTabMemRefSizeInfoCache^[AsmOp].MemRefSizeBCST       := msbBCST32;
+                              InsTabMemRefSizeInfoCache^[AsmOp].BCSTXMMMultiplicator := 4;
+                            end;
+                OT_BITSB64: begin
+                              InsTabMemRefSizeInfoCache^[AsmOp].MemRefSizeBCST       := msbBCST64;
+                              InsTabMemRefSizeInfoCache^[AsmOp].BCSTXMMMultiplicator := 2;
+                            end;
+                      else begin
+                             //TG TODO - mixed broadcast
+                             InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiMemRegSize;
+                           end;;
+              end;
             end
             else if (RegXMMSizeMask = OT_BITS16) and
                     (RegYMMSizeMask = OT_BITS32) then

+ 6 - 1
compiler/x86/cpubase.pas

@@ -485,7 +485,12 @@ implementation
 {$endif x86_64}
               else
                 reg_cgsize:=OS_32
-            end
+            end;
+          R_ADDRESSREGISTER:
+            case reg of
+              NR_K0..NR_K7: reg_cgsize:=OS_64;
+                       else internalerror(2003031801);
+            end;
           else
             internalerror(2003031801);
           end;

+ 373 - 181
compiler/x86/rax86.pas

@@ -42,7 +42,8 @@ Procedure FWaitWarning;
 type
   Tx86Operand=class(TOperand)
     opsize  : topsize;
-    vopext  : byte;      // bitmask: vector-operand extention AVX512 (e.g. vaddps xmm0 {k1} {z})
+    vopext  : smallint;      // bitmask: vector-operand extention AVX512 (e.g. vaddps xmm0 {k1} {z})
+    vbcst   : byte;
     Procedure SetSize(_size:longint;force:boolean);override;
     Procedure SetCorrectSize(opcode:tasmop);override;
     Function CheckOperand: boolean; override;
@@ -50,10 +51,15 @@ type
     Procedure SetupCode;
     { handles the @Data symbol }
     Procedure SetupData;
+
+    constructor create; override;
   end;
 
   { Operands are always in AT&T order.
     Intel reader attaches them right-to-left, then shifts to start with 1 }
+
+  { Tx86Instruction }
+
   Tx86Instruction=class(TInstruction)
     opsize  : topsize;
     constructor Create(optype : tcoperand);override;
@@ -66,6 +72,7 @@ type
     procedure FixupOpcode;virtual;
     { opcode adding }
     function ConcatInstruction(p : TAsmList) : tai;override;
+    function getstring: string;
   end;
 
 const
@@ -323,6 +330,13 @@ begin
 {$endif i8086}
 end;
 
+constructor Tx86Operand.create;
+begin
+  inherited;
+
+  vopext  := 0;
+  vbcst   := 0;
+end;
 
 {*****************************************************************************
                               T386Instruction
@@ -346,14 +360,20 @@ var
   ExistsMemRef: boolean;
   ExistsConstNoSize: boolean;
   ExistsLocalSymSize: boolean;
+  ExistsBCST: boolean;
   memrefsize: integer;
   memopsize: integer;
   memoffset: asizeint;
+  vbcst: byte;
+  mmregs: Set of TSubregister;
+  multiplicator: integer;
+  bcst1,bcst2: string;
 begin
   ExistsMemRefNoSize := false;
   ExistsMemRef       := false;
   ExistsConstNoSize  := false;
   ExistsLocalSymSize := false;
+  ExistsBCST         := false;
 
   // EXIST A MEMORY- OR CONSTANT-OPERAND WITHOUT SIZE ?
   for i := 1 to ops do
@@ -362,6 +382,9 @@ begin
     begin
       ExistsMemRef := true;
 
+      ExistsBCST := (MemRefInfo(opcode).ExistsSSEAVX) and
+                    (tx86operand(operands[i]).vbcst <> 0);
+
       if (tx86operand(operands[i]).opsize = S_NO) then
       begin
         ExistsMemRefNoSize := true;
@@ -370,7 +393,6 @@ begin
               OPR_LOCAL: ExistsLocalSymSize := tx86operand(operands[i]).opr.localsym.getsize > 0;
           OPR_REFERENCE: ExistsLocalSymSize := true;
         end;
-
       end;
     end
     else if operands[i].Opr.Typ in [OPR_CONSTANT] then
@@ -400,6 +422,14 @@ begin
         begin
           memrefsize := -1;
 
+          if ExistsBCST then
+          begin
+            case MemRefInfo(opcode).MemRefSizeBCST of
+              msbBCST32: memrefsize := 32;
+              msbBCST64: memrefsize := 64;
+            end;
+          end
+          else
           case MemRefInfo(opcode).MemRefSize of
               msiMem8: memrefsize := 8;
              msiMem16: memrefsize := 16;
@@ -407,8 +437,8 @@ begin
              msiMem64: memrefsize := 64;
             msiMem128: memrefsize := 128;
             msiMem256: memrefsize := 256;
-            msiMemRegSize
-                     : for j := 1 to ops do
+            msiMemRegSize:
+                       for j := 1 to ops do
                        begin
                          if operands[j].Opr.Typ = OPR_REGISTER then
                          begin
@@ -507,205 +537,221 @@ begin
       begin
         case operands[i].Opr.Typ of
           OPR_REFERENCE:
-                case MemRefInfo(opcode).MemRefSize of
-                    msiMem8:
-                            begin
-                              tx86operand(operands[i]).opsize := S_B;
-                              tx86operand(operands[i]).size   := OS_8;
-                            end;
-                    msiMultiple8:
-                            begin
-                              tx86operand(operands[i]).opsize := S_B;
-                              tx86operand(operands[i]).size   := OS_8;
+                begin
+                  if ExistsBCST then
+                  begin
+                    case MemRefInfo(opcode).MemRefSizeBCST of
+                      msbBCST32: begin
+                                   tx86operand(operands[i]).opsize := S_L;
+                                   tx86operand(operands[i]).size   := OS_32;
+                                 end;
+                      msbBCST64: begin
+                                   tx86operand(operands[i]).opsize := S_Q;
+                                   tx86operand(operands[i]).size   := OS_M64;
+                                 end;
+                    end;
+                  end
+                  else
 
-                              Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"8 bit memory operand"');
-                            end;
-                    msiMem16:
-                            begin
-                              tx86operand(operands[i]).opsize := S_W;
-                              tx86operand(operands[i]).size   := OS_16;
-                            end;
-                    msiMultiple16:
-                             begin
-                               tx86operand(operands[i]).opsize := S_W;
-                               tx86operand(operands[i]).size   := OS_16;
+                  case MemRefInfo(opcode).MemRefSize of
+                      msiMem8:
+                              begin
+                                tx86operand(operands[i]).opsize := S_B;
+                                tx86operand(operands[i]).size   := OS_8;
+                              end;
+                      msiMultiple8:
+                              begin
+                                tx86operand(operands[i]).opsize := S_B;
+                                tx86operand(operands[i]).size   := OS_8;
+
+                                Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"8 bit memory operand"');
+                              end;
+                      msiMem16:
+                              begin
+                                tx86operand(operands[i]).opsize := S_W;
+                                tx86operand(operands[i]).size   := OS_16;
+                              end;
+                      msiMultiple16:
+                               begin
+                                 tx86operand(operands[i]).opsize := S_W;
+                                 tx86operand(operands[i]).size   := OS_16;
 
-                               Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"16 bit memory operand"');
-                             end;
-                    msiMem32:
-                             begin
-                               tx86operand(operands[i]).opsize := S_L;
-                               tx86operand(operands[i]).size   := OS_32;
-                             end;
-                    msiMultiple32:
-                             begin
-                               tx86operand(operands[i]).opsize := S_L;
-                               tx86operand(operands[i]).size   := OS_32;
+                                 Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"16 bit memory operand"');
+                               end;
+                      msiMem32:
+                               begin
+                                 tx86operand(operands[i]).opsize := S_L;
+                                 tx86operand(operands[i]).size   := OS_32;
+                               end;
+                      msiMultiple32:
+                               begin
+                                 tx86operand(operands[i]).opsize := S_L;
+                                 tx86operand(operands[i]).size   := OS_32;
 
-                               Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"32 bit memory operand"');
-                             end;
-                    msiMem64:
-                             begin
-                               tx86operand(operands[i]).opsize := S_Q;
-                               tx86operand(operands[i]).size   := OS_M64;
-                             end;
-                    msiMultiple64:
-                             begin
-                               tx86operand(operands[i]).opsize := S_Q;
-                               tx86operand(operands[i]).size   := OS_M64;
+                                 Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"32 bit memory operand"');
+                               end;
+                      msiMem64:
+                               begin
+                                 tx86operand(operands[i]).opsize := S_Q;
+                                 tx86operand(operands[i]).size   := OS_M64;
+                               end;
+                      msiMultiple64:
+                               begin
+                                 tx86operand(operands[i]).opsize := S_Q;
+                                 tx86operand(operands[i]).size   := OS_M64;
 
-                               Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"64 bit memory operand"');
-                             end;
-                    msiMem128:
-                             begin
-                               tx86operand(operands[i]).opsize := S_XMM;
-                               tx86operand(operands[i]).size   := OS_M128;
-                             end;
-                    msiMultiple128:
-                             begin
-                               tx86operand(operands[i]).opsize := S_XMM;
-                               tx86operand(operands[i]).size   := OS_M128;
+                                 Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"64 bit memory operand"');
+                               end;
+                      msiMem128:
+                               begin
+                                 tx86operand(operands[i]).opsize := S_XMM;
+                                 tx86operand(operands[i]).size   := OS_M128;
+                               end;
+                      msiMultiple128:
+                               begin
+                                 tx86operand(operands[i]).opsize := S_XMM;
+                                 tx86operand(operands[i]).size   := OS_M128;
 
-                               Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"128 bit memory operand"');
-                             end;
-                    msiMem256:
-                             begin
-                               tx86operand(operands[i]).opsize := S_YMM;
-                               tx86operand(operands[i]).size   := OS_M256;
-                               opsize := S_YMM;
-                             end;
-                    msiMultiple256:
-                             begin
-                               tx86operand(operands[i]).opsize := S_YMM;
-                               tx86operand(operands[i]).size   := OS_M256;
-                               opsize := S_YMM;
+                                 Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"128 bit memory operand"');
+                               end;
+                      msiMem256:
+                               begin
+                                 tx86operand(operands[i]).opsize := S_YMM;
+                                 tx86operand(operands[i]).size   := OS_M256;
+                                 opsize := S_YMM;
+                               end;
+                      msiMultiple256:
+                               begin
+                                 tx86operand(operands[i]).opsize := S_YMM;
+                                 tx86operand(operands[i]).size   := OS_M256;
+                                 opsize := S_YMM;
 
-                               Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"256 bit memory operand"');
-                             end;
-                    msiMem512:
-                             begin
-                               tx86operand(operands[i]).opsize := S_ZMM;
-                               tx86operand(operands[i]).size   := OS_M512;
-                               opsize := S_ZMM;
-                             end;
-                    msiMultiple512:
-                             begin
-                               tx86operand(operands[i]).opsize := S_ZMM;
-                               tx86operand(operands[i]).size   := OS_M512;
-                               opsize := S_ZMM;
+                                 Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"256 bit memory operand"');
+                               end;
+                      msiMem512:
+                               begin
+                                 tx86operand(operands[i]).opsize := S_ZMM;
+                                 tx86operand(operands[i]).size   := OS_M512;
+                                 opsize := S_ZMM;
+                               end;
+                      msiMultiple512:
+                               begin
+                                 tx86operand(operands[i]).opsize := S_ZMM;
+                                 tx86operand(operands[i]).size   := OS_M512;
+                                 opsize := S_ZMM;
 
-                               Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"512 bit memory operand"');
-                             end;
+                                 Message2(asmr_w_check_mem_operand_automap_multiple_size, std_op2str[opcode], '"512 bit memory operand"');
+                               end;
 
-                  msiMemRegSize:
-                             begin
-                               // mem-ref-size = register size
-                               for j := 1 to ops do
+                    msiMemRegSize:
                                begin
-                                 if operands[j].Opr.Typ = OPR_REGISTER then
+                                 // mem-ref-size = register size
+                                 for j := 1 to ops do
                                  begin
-                                   if (tx86operand(operands[j]).opsize <> S_NO) and
-                                      (tx86operand(operands[j]).size <> OS_NO) then
+                                   if operands[j].Opr.Typ = OPR_REGISTER then
                                    begin
-                                     tx86operand(operands[i]).opsize := tx86operand(operands[j]).opsize;
-                                     tx86operand(operands[i]).size   := tx86operand(operands[j]).size;
-                                     break;
-                                   end
-                                   else Message(asmr_e_unable_to_determine_reference_size);
+                                     if (tx86operand(operands[j]).opsize <> S_NO) and
+                                        (tx86operand(operands[j]).size <> OS_NO) then
+                                     begin
+                                       tx86operand(operands[i]).opsize := tx86operand(operands[j]).opsize;
+                                       tx86operand(operands[i]).size   := tx86operand(operands[j]).size;
+                                       break;
+                                     end
+                                     else Message(asmr_e_unable_to_determine_reference_size);
+                                   end;
                                  end;
                                end;
-                             end;
-                    msiMemRegx16y32:
-                      begin
-                        for j := 1 to ops do
-                        begin
-                          if operands[j].Opr.Typ = OPR_REGISTER then
-                          begin
-                            case getsubreg(operands[j].opr.reg) of
-                              R_SUBMMX: begin
-                                          tx86operand(operands[i]).opsize := S_L;
-                                          tx86operand(operands[i]).size   := OS_M16;
-                                          break;
-                                        end;
-                              R_SUBMMY: begin
-                                          tx86operand(operands[i]).opsize := S_Q;
-                                          tx86operand(operands[i]).size   := OS_M32;
-                                          break;
-                                        end;
-                                   else Message(asmr_e_unable_to_determine_reference_size);
-                            end;
-                          end;
-                        end;
-                      end;
-
-                    msiMemRegx32y64:
-                      begin
-                        for j := 1 to ops do
-                        begin
-                          if operands[j].Opr.Typ = OPR_REGISTER then
-                          begin
-                            case getsubreg(operands[j].opr.reg) of
-                              R_SUBMMX: begin
-                                          tx86operand(operands[i]).opsize := S_L;
-                                          tx86operand(operands[i]).size   := OS_M32;
-                                          break;
-                                        end;
-                              R_SUBMMY: begin
-                                          tx86operand(operands[i]).opsize := S_Q;
-                                          tx86operand(operands[i]).size   := OS_M64;
-                                          break;
-                                        end;
-                                   else Message(asmr_e_unable_to_determine_reference_size);
-                            end;
-                          end;
-                        end;
-                      end;
-                   msiMemRegx64y128:
-                             begin
-                               for j := 1 to ops do
+                      msiMemRegx16y32:
                                begin
-                                 if operands[j].Opr.Typ = OPR_REGISTER then
+                                 for j := 1 to ops do
                                  begin
-                                   case getsubreg(operands[j].opr.reg) of
-                                     R_SUBMMX: begin
-                                                 tx86operand(operands[i]).opsize := S_Q;
-                                                 tx86operand(operands[i]).size   := OS_M64;
-                                                 break;
-                                               end;
-                                     R_SUBMMY: begin
-                                                 tx86operand(operands[i]).opsize := S_XMM;
-                                                 tx86operand(operands[i]).size   := OS_M128;
-                                                 break;
-                                               end;
-                                          else Message(asmr_e_unable_to_determine_reference_size);
+                                   if operands[j].Opr.Typ = OPR_REGISTER then
+                                   begin
+                                     case getsubreg(operands[j].opr.reg) of
+                                       R_SUBMMX: begin
+                                                   tx86operand(operands[i]).opsize := S_L;
+                                                   tx86operand(operands[i]).size   := OS_M16;
+                                                   break;
+                                                 end;
+                                       R_SUBMMY: begin
+                                                   tx86operand(operands[i]).opsize := S_Q;
+                                                   tx86operand(operands[i]).size   := OS_M32;
+                                                   break;
+                                                 end;
+                                            else Message(asmr_e_unable_to_determine_reference_size);
+                                     end;
                                    end;
                                  end;
                                end;
-                             end;
-                   msiMemRegx64y256:
-                             begin
-                               for j := 1 to ops do
+                      msiMemRegx32y64:
                                begin
-                                 if operands[j].Opr.Typ = OPR_REGISTER then
+                                 for j := 1 to ops do
                                  begin
-                                   case getsubreg(operands[j].opr.reg) of
-                                     R_SUBMMX: begin
-                                                 tx86operand(operands[i]).opsize := S_Q;
-                                                 tx86operand(operands[i]).size   := OS_M64;
-                                                 break;
-                                               end;
-                                     R_SUBMMY: begin
-                                                 tx86operand(operands[i]).opsize := S_YMM;
-                                                 tx86operand(operands[i]).size   := OS_M256;
-                                                 break;
-                                               end;
-                                          else Message(asmr_e_unable_to_determine_reference_size);
+                                   if operands[j].Opr.Typ = OPR_REGISTER then
+                                   begin
+                                     case getsubreg(operands[j].opr.reg) of
+                                       R_SUBMMX: begin
+                                                   tx86operand(operands[i]).opsize := S_L;
+                                                   tx86operand(operands[i]).size   := OS_M32;
+                                                   break;
+                                                 end;
+                                       R_SUBMMY: begin
+                                                   tx86operand(operands[i]).opsize := S_Q;
+                                                   tx86operand(operands[i]).size   := OS_M64;
+                                                   break;
+                                                 end;
+                                            else Message(asmr_e_unable_to_determine_reference_size);
+                                     end;
                                    end;
                                  end;
                                end;
-                             end;
-                   msiNoSize: ; //  all memory-sizes are ok
-                   msiMultiple: Message(asmr_e_unable_to_determine_reference_size); // TODO individual message
+                     msiMemRegx64y128:
+                               begin
+                                 for j := 1 to ops do
+                                 begin
+                                   if operands[j].Opr.Typ = OPR_REGISTER then
+                                   begin
+                                     case getsubreg(operands[j].opr.reg) of
+                                       R_SUBMMX: begin
+                                                   tx86operand(operands[i]).opsize := S_Q;
+                                                   tx86operand(operands[i]).size   := OS_M64;
+                                                   break;
+                                                 end;
+                                       R_SUBMMY: begin
+                                                   tx86operand(operands[i]).opsize := S_XMM;
+                                                   tx86operand(operands[i]).size   := OS_M128;
+                                                   break;
+                                                 end;
+                                            else Message(asmr_e_unable_to_determine_reference_size);
+                                     end;
+                                   end;
+                                 end;
+                               end;
+                     msiMemRegx64y256:
+                               begin
+                                 for j := 1 to ops do
+                                 begin
+                                   if operands[j].Opr.Typ = OPR_REGISTER then
+                                   begin
+                                     case getsubreg(operands[j].opr.reg) of
+                                       R_SUBMMX: begin
+                                                   tx86operand(operands[i]).opsize := S_Q;
+                                                   tx86operand(operands[i]).size   := OS_M64;
+                                                   break;
+                                                 end;
+                                       R_SUBMMY: begin
+                                                   tx86operand(operands[i]).opsize := S_YMM;
+                                                   tx86operand(operands[i]).size   := OS_M256;
+                                                   break;
+                                                 end;
+                                            else Message(asmr_e_unable_to_determine_reference_size);
+                                     end;
+                                   end;
+                                 end;
+                               end;
+                     msiNoSize: ; //  all memory-sizes are ok
+                     msiMultiple: Message(asmr_e_unable_to_determine_reference_size); // TODO individual message
+                  end;
                 end;
           OPR_CONSTANT:
                 case MemRefInfo(opcode).ConstSize of
@@ -821,6 +867,48 @@ begin
             end;
         end;
     end;
+
+  if MemRefInfo(opcode).ExistsSSEAVX then
+  begin
+    // validate broadcast-memory-operands
+    vbcst := 0;
+    mmregs := [];
+
+    for i := 1 to ops do
+     if operands[i].Opr.Typ in [OPR_REFERENCE, OPR_LOCAL] then vbcst := tx86operand(operands[i]).vbcst
+      else if operands[i].Opr.Typ = OPR_REGISTER then
+      begin
+        if getsubreg(operands[i].opr.reg) in [R_SUBMMX, R_SUBMMY, R_SUBMMZ] then
+        begin
+          include(mmregs, getsubreg(operands[i].opr.reg));
+        end;
+      end;
+
+    if vbcst <> 0 then
+    begin
+      // found broadcast-memory-operand  (e.g. "{1to8}")
+      // check is correct
+
+      multiplicator := 0;
+      if mmregs = [R_SUBMMX] then multiplicator := 1
+       else if mmregs = [R_SUBMMY] then multiplicator := 2
+       else if mmregs = [R_SUBMMZ] then multiplicator := 3
+       else
+        begin
+          //TG TODO
+
+        end;
+
+      if MemRefInfo(opcode).BCSTXMMMultiplicator * multiplicator <> vbcst then
+      begin
+        str(MemRefInfo(opcode).BCSTXMMMultiplicator * multiplicator, bcst1);
+        str(vbcst, bcst2);
+        Message2(asmr_e_mismatch_broadcasting_elements, '1to' + bcst1, '1to' + bcst2);
+      end;
+    end;
+  end;
+
+
 end;
 
 
@@ -973,6 +1061,7 @@ begin
      else
        Message(asmr_e_size_suffix_and_dest_dont_match);
    end;
+
 end;
 
 
@@ -1040,6 +1129,8 @@ var
   asize : int64;
   ai   : taicpu;
 
+  //TG TODO delete
+  ocode: tasmop;
 begin
   ConcatInstruction:=nil;
 
@@ -1240,7 +1331,8 @@ begin
      Message(asmr_w_enter_not_supported_by_linux);
 
 
-
+  //TG TODO delete
+  oCode := opcode;
 
   ai:=taicpu.op_none(opcode,siz);
   ai.fileinfo:=filepos;
@@ -1346,4 +1438,104 @@ begin
   result:=ai;
 end;
 
+function Tx86Instruction.getstring: string;
+var
+  i : longint;
+  s, sval : string;
+  regnr: string;
+  addsize : boolean;
+begin
+  s:='['+std_op2str[opcode];
+  for i:=1 to ops do
+   begin
+     with operands[i] as Tx86Operand do
+       begin
+         if i=0 then
+          s:=s+' '
+         else
+          s:=s+',';
+         { type }
+         addsize:=false;
+
+         case operands[i].opr.typ of
+            OPR_CONSTANT : begin
+                             str(operands[i].opr.val, sval);
+                             s:=s+ sval;
+                           end;
+            OPR_REGISTER : begin
+                             regnr := '';
+                             str(getsupreg(opr.reg),regnr);
+
+                             if getsubreg(opr.reg)= R_SUBMMX then
+                              s:=s+'xmmreg' + regnr
+                             else
+                               if getsubreg(opr.reg)= R_SUBMMY then
+                                s:=s+'ymmreg' + regnr
+                             else
+                               if getsubreg(opr.reg)= R_SUBMMZ then
+                                s:=s+'zmmreg' + regnr
+                             else
+                               if getregtype(opr.reg)= R_MMXREGISTER then
+                                s:=s+'mmxreg'
+                             else
+                               if getregtype(opr.reg)= R_FPUREGISTER then
+                                s:=s+'fpureg'
+                             else
+                               if getregtype(opr.reg)=R_INTREGISTER then
+                               begin
+                                 s:=s+'reg';
+                                 addsize:=true;
+                               end;
+                           end;
+               OPR_LOCAL,
+            OPR_REFERENCE: begin
+                             s:=s + 'mem';
+                             addsize:=true;
+                           end;
+                      else s:=s + '???';
+         end;
+
+         if addsize then
+          begin
+            sval := '';
+            str(tcgsize2size[size], sval);
+
+            s := s + sval;
+          end;
+
+         if vopext <> 0 then
+          begin
+            str(vopext and $07, regnr);
+            if vopext and OTVE_VECTOR_WRITEMASK = OTVE_VECTOR_WRITEMASK then
+              s := s + ' {k' + regnr + '}';
+
+            if vopext and OTVE_VECTOR_ZERO = OTVE_VECTOR_ZERO then
+              s := s + ' {z}';
+
+            if vopext and OTVE_VECTOR_SAE = OTVE_VECTOR_SAE then
+              s := s + ' {sae}';
+
+
+            if vopext and OTVE_VECTOR_BCST = OTVE_VECTOR_BCST then
+             case vopext and OTVE_VECTOR_BCST_MASK of
+                OTVE_VECTOR_BCST2: s := s + ' {1to2}';
+                OTVE_VECTOR_BCST4: s := s + ' {1to4}';
+                OTVE_VECTOR_BCST8: s := s + ' {1to8}';
+               OTVE_VECTOR_BCST16: s := s + ' {1to16}';
+             end;
+
+            if vopext and OTVE_VECTOR_ER = OTVE_VECTOR_ER then
+             case vopext and OTVE_VECTOR_ER_MASK of
+                OTVE_VECTOR_RNSAE: s := s + ' {rn-sae}';
+                OTVE_VECTOR_RDSAE: s := s + ' {rd-sae}';
+                OTVE_VECTOR_RUSAE: s := s + ' {ru-sae}';
+                OTVE_VECTOR_RZSAE: s := s + ' {rz-sae}';
+             end;
+
+          end;
+       end;
+   end;
+  GetString:=s+']';
+end;
+
 end.

+ 205 - 62
compiler/x86/rax86int.pas

@@ -40,14 +40,15 @@ Unit Rax86int;
       AS_COMMA,AS_LBRACKET,AS_RBRACKET,AS_LPAREN,
       AS_RPAREN,AS_COLON,AS_DOT,AS_PLUS,AS_MINUS,AS_STAR,
       AS_SEPARATOR,AS_ID,AS_REGISTER,AS_OPCODE,AS_SLASH,
-      AS_LOPMASK,AS_VOPMASK,AS_LOPZEROMASK,AS_VOPZEROMASK,
+      AS_LOPMASK,AS_VOPMASK,AS_LOPZEROMASK,AS_VOPZEROMASK,AS_LOPBCST,AS_OPBCST1TO2,AS_OPBCST1TO4,AS_OPBCST1TO8,AS_OPBCST1TO16,AS_LOPSAE,AS_OPSAE,
+      AS_LOPER,AS_OPRNSAE,AS_OPRDSAE,AS_OPRUSAE,AS_OPRZSAE,
        {------------------ Assembler directives --------------------}
       AS_ALIGN,AS_DB,AS_DW,AS_DD,AS_DQ,AS_PUBLIC,AS_END,
        {------------------ Assembler Operators  --------------------}
       AS_BYTE,AS_WORD,AS_DWORD,AS_QWORD,AS_TBYTE,AS_DQWORD,AS_OWORD,AS_XMMWORD,AS_YWORD,AS_YMMWORD,AS_ZWORD,AS_ZMMWORD,AS_NEAR,AS_FAR,
       AS_HIGH,AS_LOW,AS_OFFSET,AS_SIZEOF,AS_VMTOFFSET,AS_SEG,AS_TYPE,AS_PTR,AS_MOD,AS_SHL,AS_SHR,AS_NOT,
-      AS_AND,AS_OR,AS_XOR,AS_WRT,AS___GOTPCREL,AS_TARGET_DIRECTIVE
-      ,AS_BROADCAST
+      AS_AND,AS_OR,AS_XOR,AS_WRT,AS___GOTPCREL,AS_TARGET_DIRECTIVE,AS_RNSAE
+
       );
 
     type
@@ -101,7 +102,7 @@ Unit Rax86int;
          procedure BuildConstantOperand(oper: tx86operand);
          procedure BuildOpCode(instr : tx86instruction);
          procedure BuildConstant(constsize: byte);
-         procedure consume_voperand_ext(aop: tx86operand);
+         procedure consume_voperand_ext(aop: tx86operand; aConsumeVOpExt: boolean = true);
 
 
          function is_targetdirective(const s: string): boolean;virtual;
@@ -165,11 +166,12 @@ Unit Rax86int;
         ',','[',']','(',
         ')',':','.','+','-','*',
         ';','identifier','register','opcode','/',
-        '','','','',
+        '','','','','','','','','','','',
+        '','','','','',
         '','','','','','','END',
         '','','','','','','','','','','','','','',
         '','','','sizeof','vmtoffset','','type','ptr','mod','shl','shr','not',
-        'and','or','xor','wrt','..gotpcrel','', '{1to8}'
+        'and','or','xor','wrt','..gotpcrel','','{RN-SAE}'
       );
 
     constructor tx86intreader.create;
@@ -461,8 +463,20 @@ Unit Rax86int;
                        c:=current_scanner.asmgetchar;
                      end;
                   end;
-                 if prevasmtoken in [AS_LOPMASK,AS_LOPZEROMASK] then
+                 if prevasmtoken in [AS_LOPMASK,AS_LOPZEROMASK,AS_LOPBCST,AS_LOPSAE,AS_LOPER] then
                   begin
+                    if (prevasmtoken = AS_LOPER) and (c = '-') then
+                     begin
+                       actasmpattern := actasmpattern + c;
+                       c:=current_scanner.asmgetchar;
+                       while c in  ['A'..'Z','a'..'z'] do
+                       begin
+                         actasmpattern:=actasmpattern + c;
+                         c:=current_scanner.asmgetchar;
+                       end;
+                     end;
+
+
                     { allow spaces }
                     while (c in [' ',#9]) do
                       c:=current_scanner.asmgetchar;
@@ -475,15 +489,44 @@ Unit Rax86int;
                                         (actasmpattern[2] in ['1'..'7']) then
                                       begin
                                         actasmtoken := AS_VOPMASK;
-                                        exit;
                                       end;
                          AS_LOPZEROMASK:
                                       if (actasmpattern = 'z') or
                                          (actasmpattern = 'Z') then
                                       begin
                                         actasmtoken := AS_VOPZEROMASK;
-                                        exit;
                                       end;
+                             AS_LOPBCST:
+                                      begin
+                                        actasmpattern_origcase:=actasmpattern;
+                                        uppervar(actasmpattern);
+
+                                        if (actasmpattern = '1TO2') then actasmtoken := AS_OPBCST1TO2
+                                         else if (actasmpattern = '1TO4') then actasmtoken := AS_OPBCST1TO4
+                                         else if (actasmpattern = '1TO8') then actasmtoken := AS_OPBCST1TO8
+                                         else if (actasmpattern = '1TO16') then actasmtoken := AS_OPBCST1TO16
+                                         else actasmpattern := actasmpattern_origcase;
+                                      end;
+                             AS_LOPSAE:
+                                      begin
+                                        actasmpattern_origcase:=actasmpattern;
+                                        uppervar(actasmpattern);
+
+                                        if (actasmpattern = 'SAE') then actasmtoken := AS_OPSAE
+                                         else actasmpattern := actasmpattern_origcase;
+                                      end;
+                              AS_LOPER:
+                                      begin
+                                        actasmpattern_origcase:=actasmpattern;
+                                        uppervar(actasmpattern);
+
+                                        if (actasmpattern = 'RD-SAE') then actasmtoken := AS_OPRDSAE
+                                         else if (actasmpattern = 'RN-SAE') then actasmtoken := AS_OPRNSAE
+                                         else if (actasmpattern = 'RU-SAE') then actasmtoken := AS_OPRUSAE
+                                         else if (actasmpattern = 'RZ-SAE') then actasmtoken := AS_OPRZSAE
+                                         else actasmpattern := actasmpattern_origcase;
+                                      end
+
                                  else ; // is completely comment =>> nothing todo
 
                        end;
@@ -764,52 +807,91 @@ Unit Rax86int;
 
              '0'..'9':
                begin
-                 actasmpattern:=c;
-                 c:=current_scanner.asmgetchar;
-                 { Get the possible characters }
-                 while c in ['0'..'9','A'..'F','a'..'f'] do
-                  begin
-                    actasmpattern:=actasmpattern + c;
+                 if prevasmtoken = AS_LOPBCST then
+                 begin
+                   actasmpattern:=c;
+                   c:=current_scanner.asmgetchar;
+                   { Get the possible characters }
+                   while c in ['1','2','4','6','8','t','T','o','O'] do
+                    begin
+                      actasmpattern:=actasmpattern + c;
+                      c:=current_scanner.asmgetchar;
+                    end;
+
+                   while (c in [' ',#9]) do
                     c:=current_scanner.asmgetchar;
-                  end;
-                 { Get ending character }
-                 actasmpattern_origcase:=actasmpattern;
-                 uppervar(actasmpattern);
-                 c:=upcase(c);
-                 { possibly a binary number. }
-                 if (actasmpattern[length(actasmpattern)] = 'B') and (c <> 'H') then
-                  Begin
-                    { Delete the last binary specifier }
-                    delete(actasmpattern,length(actasmpattern),1);
-                    actasmpattern:=tostr(ParseVal(actasmpattern,2));
-                    actasmtoken:=AS_INTNUM;
+
+                   if c = '}' then
+                    begin
+                      actasmpattern_origcase:=actasmpattern;
+                      uppervar(actasmpattern);
+
+                      if (actasmpattern = '1TO2') then actasmtoken := AS_OPBCST1TO2
+                       else if (actasmpattern = '1TO4') then actasmtoken := AS_OPBCST1TO4
+                       else if (actasmpattern = '1TO8') then actasmtoken := AS_OPBCST1TO8
+                       else if (actasmpattern = '1TO16') then actasmtoken := AS_OPBCST1TO16
+                       else actasmpattern := actasmpattern_origcase;
+                       c:=current_scanner.asmgetchar;
+                    end
+                    else
+                    begin
+                      if c = '{' then current_scanner.inc_comment_level;
+                      current_scanner.skipcomment(false); // is comment
+                    end;
+
+                    actasmpattern := '';
                     exit;
-                  end
+                 end
                  else
-                  Begin
-                    case c of
-                      'O' :
-                        Begin
-                          actasmpattern:=tostr(ParseVal(actasmpattern,8));
-                          actasmtoken:=AS_INTNUM;
-                          c:=current_scanner.asmgetchar;
-                          exit;
-                        end;
-                      'H' :
-                        Begin
-                          actasmpattern:=tostr(ParseVal(actasmpattern,16));
-                          actasmtoken:=AS_INTNUM;
-                          c:=current_scanner.asmgetchar;
-                          exit;
-                        end;
-                      else { must be an integer number }
-                        begin
-                          actasmpattern:=tostr(ParseVal(actasmpattern,10));
-                          actasmtoken:=AS_INTNUM;
-                          exit;
-                        end;
+                 begin
+
+                   actasmpattern:=c;
+                   c:=current_scanner.asmgetchar;
+                   { Get the possible characters }
+                   while c in ['0'..'9','A'..'F','a'..'f'] do
+                    begin
+                      actasmpattern:=actasmpattern + c;
+                      c:=current_scanner.asmgetchar;
                     end;
-                  end;
+                   { Get ending character }
+                   actasmpattern_origcase:=actasmpattern;
+                   uppervar(actasmpattern);
+                   c:=upcase(c);
+                   { possibly a binary number. }
+                   if (actasmpattern[length(actasmpattern)] = 'B') and (c <> 'H') then
+                    Begin
+                      { Delete the last binary specifier }
+                      delete(actasmpattern,length(actasmpattern),1);
+                      actasmpattern:=tostr(ParseVal(actasmpattern,2));
+                      actasmtoken:=AS_INTNUM;
+                      exit;
+                    end
+                   else
+                    Begin
+                      case c of
+                        'O' :
+                          Begin
+                            actasmpattern:=tostr(ParseVal(actasmpattern,8));
+                            actasmtoken:=AS_INTNUM;
+                            c:=current_scanner.asmgetchar;
+                            exit;
+                          end;
+                        'H' :
+                          Begin
+                            actasmpattern:=tostr(ParseVal(actasmpattern,16));
+                            actasmtoken:=AS_INTNUM;
+                            c:=current_scanner.asmgetchar;
+                            exit;
+                          end;
+                        else { must be an integer number }
+                          begin
+                            actasmpattern:=tostr(ParseVal(actasmpattern,10));
+                            actasmtoken:=AS_INTNUM;
+                            exit;
+                          end;
+                      end;
+                   end;
+                 end;
                end;
 
              #13,#10:
@@ -850,13 +932,28 @@ Unit Rax86int;
                              actasmtoken := AS_LOPZEROMASK;
                              exit;
                            end;
+
+                      '1': begin
+                             actasmtoken := AS_LOPBCST;
+                             exit;
+                           end;
+                      's',
+                      'S': begin
+                             actasmtoken := AS_LOPSAE;
+                             exit;
+                           end;
+                      'r',
+                      'R': begin
+                             actasmtoken := AS_LOPER;
+                             exit;
+                           end;
                       else begin
                              current_scanner.skipcomment(false);
                            end;
                     end;
                   end;
 
-                 GetToken;
+                 GetToken(check_operand_extention);
                end;
 
               else
@@ -884,12 +981,13 @@ Unit Rax86int;
   //  //if oper.reg
   //end;
 
-  procedure tx86intreader.consume_voperand_ext(aop: tx86operand);
+  procedure tx86intreader.consume_voperand_ext(aop: tx86operand; aConsumeVOpExt: boolean);
   var
     kreg: tregister;
   begin
-    Consume(actasmtoken);
-    if actasmtoken in [AS_VOPMASK, AS_VOPZEROMASK] then
+    Consume(actasmtoken, true);
+    if actasmtoken in [AS_VOPMASK, AS_VOPZEROMASK, AS_OPBCST1TO2, AS_OPBCST1TO4, AS_OPBCST1TO8, AS_OPBCST1TO16,
+                       AS_OPSAE,AS_OPRNSAE,AS_OPRDSAE,AS_OPRUSAE,AS_OPRZSAE] then
     begin
       case actasmtoken of
             AS_VOPMASK: begin
@@ -898,13 +996,35 @@ Unit Rax86int;
                              (kreg <= NR_K7) then
                           begin
                             aop.vopext := aop.vopext or (tregisterrec(kreg).supreg  and $07); //TG TODO check
-                            aop.vopext := aop.vopext or OTVE_VECTORMASK_WRITEMASK;
+                            aop.vopext := aop.vopext or OTVE_VECTOR_WRITEMASK;
                           end;
                         end;
-        AS_VOPZEROMASK: aop.vopext := aop.vopext or OTVE_VECTORMASK_ZERO;
+        AS_VOPZEROMASK: aop.vopext := aop.vopext or OTVE_VECTOR_ZERO;
+         AS_OPBCST1TO2: begin
+                          aop.vopext := aop.vopext or OTVE_VECTOR_BCST or OTVE_VECTOR_BCST2;
+                          aop.vbcst  := 2;
+                        end;
+         AS_OPBCST1TO4: begin
+                          aop.vopext := aop.vopext or OTVE_VECTOR_BCST or OTVE_VECTOR_BCST4;
+                          aop.vbcst  := 4;
+                        end;
+         AS_OPBCST1TO8: begin
+                          aop.vopext := aop.vopext or OTVE_VECTOR_BCST or OTVE_VECTOR_BCST8;
+                          aop.vbcst  := 8;
+                        end;
+        AS_OPBCST1TO16: begin
+                          aop.vopext := aop.vopext or OTVE_VECTOR_BCST or OTVE_VECTOR_BCST16;
+                          aop.vbcst  := 16;
+                        end;
+              AS_OPSAE: aop.vopext := aop.vopext or OTVE_VECTOR_SAE;
+            AS_OPRNSAE: aop.vopext := aop.vopext or OTVE_VECTOR_RNSAE;
+            AS_OPRDSAE: aop.vopext := aop.vopext or OTVE_VECTOR_RDSAE;
+            AS_OPRUSAE: aop.vopext := aop.vopext or OTVE_VECTOR_RUSAE;
+            AS_OPRZSAE: aop.vopext := aop.vopext or OTVE_VECTOR_RZSAE;
       end;
 
-      Consume(actasmtoken, true);
+      if aConsumeVOpExt then
+       Consume(actasmtoken, true);
     end;
   end;
 
@@ -2050,7 +2170,7 @@ Unit Rax86int;
 
                 Consume(AS_RBRACKET, true);
                 //TG TODO check
-                while actasmtoken in [AS_LOPMASK,AS_LOPZEROMASK] do
+                while actasmtoken in [AS_LOPMASK,AS_LOPZEROMASK,AS_LOPBCST] do
                 begin
                   consume_voperand_ext(oper);
                 end;
@@ -2404,7 +2524,15 @@ Unit Rax86int;
                     { is it a normal variable ? }
                      Begin
                        expr:=actasmpattern;
-                       Consume(AS_ID);
+                       Consume(AS_ID, true);
+
+                       //TG TODO check
+                       while actasmtoken in [AS_LOPMASK,AS_LOPZEROMASK,AS_LOPBCST] do
+                       begin
+                         consume_voperand_ext(oper);
+                       end;
+
+
                        { typecasting? }
                        if SearchType(expr,l) then
                         begin
@@ -2469,7 +2597,7 @@ Unit Rax86int;
                 //TG TODO check
                 if (getregtype(tempreg) = R_MMREGISTER) then
                  begin
-                  while actasmtoken in [AS_LOPMASK,AS_LOPZEROMASK] do
+                  while actasmtoken in [AS_LOPMASK,AS_LOPZEROMASK, AS_LOPSAE, AS_LOPER] do
                   begin
                     consume_voperand_ext(oper);
                   end;
@@ -2728,7 +2856,7 @@ Unit Rax86int;
                   Message(asmr_e_too_many_operands)
                 else
                   Dec(operandnum);
-                Consume(AS_COMMA);
+                Consume(AS_COMMA, true); //TG CHECK
               end;
 
             {Far constant, i.e. jmp $0000:$11111111.}
@@ -2768,6 +2896,21 @@ Unit Rax86int;
                  end;
                 BuildOperand(instr.Operands[operandnum] as tx86operand,false);
               end;
+            AS_LOPSAE,
+            AS_LOPER:
+              if operandnum < max_operands then
+               begin
+                 consume_voperand_ext(instr.Operands[operandnum + 1] as tx86operand, false);
+                 if actasmtoken in [AS_OPSAE,AS_OPRNSAE,AS_OPRDSAE,AS_OPRUSAE,AS_OPRZSAE] then
+                  begin
+                    consume(actasmtoken);
+                    // ignore operand
+                    if actasmtoken in [AS_END,AS_SEPARATOR,AS_COMMA] then inc(operandnum)
+                     else Message(asmr_e_syntax_error);
+                  end
+                   else Message(asmr_e_syntax_error);
+               end
+                else Message(asmr_e_syntax_error);
             else
               BuildOperand(instr.Operands[operandnum] as tx86operand,false);
           end; { end case }

+ 38 - 16
compiler/x86/x86ins.dat

@@ -3589,8 +3589,9 @@ void                   \326\1\xA7                                    X86_64
 
 [VADDPD]
 (Ch_Wop3, Ch_Rop2, Ch_Rop1)
-xmmreg,xmmreg,xmmrm                  \361\362\370\1\x58\75\120            AVX,SANDYBRIDGE
-ymmreg,ymmreg,ymmrm                  \361\362\364\370\1\x58\75\120        AVX,SANDYBRIDGE
+xmmreg_mz,xmmreg,xmmrm               \350\352\361\362\370\1\x58\75\120            AVX,SANDYBRIDGE
+ymmreg_mz,ymmreg,ymmrm               \350\352\361\362\364\370\1\x58\75\120        AVX,SANDYBRIDGE
+zmmreg_mz,zmmreg,zmmrm_er            \350\351\352\361\370\1\x58\75\120        AVX,SANDYBRIDGE
 
 [VADDPS]
 (Ch_Wop3, Ch_Rop2, Ch_Rop1)
@@ -3692,10 +3693,10 @@ ymmreg,xmmreg                        \361\362\364\371\1\x19\110           AVX2
 
 [VBROADCASTSS]
 (Ch_All)
-ymmreg,mem32                         \361\362\364\371\1\x18\110           AVX,SANDYBRIDGE
-xmmreg,mem32                         \361\362\371\1\x18\110               AVX,SANDYBRIDGE
-ymmreg,xmmreg                        \361\362\364\371\1\x18\110           AVX2
-xmmreg,xmmreg                        \361\362\371\1\x18\110               AVX2
+ymmreg_mz,mem32                      \350\361\362\364\371\1\x18\110       AVX,SANDYBRIDGE
+xmmreg_mz,mem32                      \350\361\362\371\1\x18\110           AVX,SANDYBRIDGE
+ymmreg_mz,xmmreg                     \350\361\362\364\371\1\x18\110       AVX2
+xmmreg_mz,xmmreg                     \350\361\362\371\1\x18\110           AVX2
 
 [VCMPEQPS]
 (Ch_All)
@@ -4028,10 +4029,12 @@ ymmreg,ymmreg,ymmrm             \361\362\364\370\1\xC2\75\120\1\x1F     AVX,SAND
 xmmreg,xmmreg,xmmrm,imm8             \361\362\370\1\xC2\75\120\27         AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm,imm8             \361\362\364\370\1\xC2\75\120\27     AVX,SANDYBRIDGE
 
+
 [VCMPPS]
 (Ch_All)
 xmmreg,xmmreg,xmmrm,imm8             \362\370\1\xC2\75\120\27             AVX,SANDYBRIDGE
 ymmreg,ymmreg,ymmrm,imm8             \362\364\370\1\xC2\75\120\27         AVX,SANDYBRIDGE
+kreg,xmmreg,xmmrm,imm8               \350\370\1\xC2\75\120\27             AVX512
 
 [VCMPSD]
 (Ch_All)
@@ -4051,7 +4054,8 @@ xmmreg,xmmreg                        \361\362\370\1\x2F\110               AVX,SA
 [VCOMISS]
 (Ch_Rop1, Ch_Rop2, Ch_WFlags)
 xmmreg,mem32                         \362\370\1\x2F\110                   AVX,SANDYBRIDGE
-xmmreg,xmmreg                        \362\370\1\x2F\110                   AVX,SANDYBRIDGE
+xmmreg,xmmreg_sae                    \350\362\370\1\x2F\110               AVX,SANDYBRIDGE
+
 
 [VCVTDQ2PD]
 (Ch_Wop2, Ch_Rop1)
@@ -4293,8 +4297,8 @@ ymmrm,ymmreg                         \362\364\370\1\x29\101               AVX,SA
 
 [VMOVD]
 (Ch_Wop2, Ch_Rop1)
-xmmreg,rm32                          \361\362\370\1\x6E\110               AVX,SANDYBRIDGE
-rm32,xmmreg                          \361\362\370\1\x7E\101               AVX,SANDYBRIDGE
+xmmreg,rm32                          \350\361\362\370\1\x6E\110           AVX,SANDYBRIDGE
+rm32,xmmreg                          \350\361\362\370\1\x7E\101           AVX,SANDYBRIDGE
 
 [VMOVDDUP]
 (Ch_Wop2, Ch_Rop1)
@@ -4380,12 +4384,12 @@ mem128,xmmreg                        \362\370\1\x2B\101                   AVX,SA
 
 [VMOVQ]
 (Ch_Wop2, Ch_Rop1)
-xmmreg,xmmreg                        \362\333\370\1\x7E\110               AVX,SANDYBRIDGE
-xmmreg,mem64                         \362\333\370\1\x7E\110               AVX,SANDYBRIDGE
-xmmreg,xmmreg                        \362\361\370\1\xD6\101               AVX,SANDYBRIDGE
-mem64,xmmreg                         \362\361\370\1\xD6\101               AVX,SANDYBRIDGE
-rm64,xmmreg                          \362\361\363\370\1\x7E\101           AVX,SANDYBRIDGE,X86_64
-xmmreg,rm64                          \362\361\363\370\1\x6E\110           AVX,SANDYBRIDGE,X86_64
+xmmreg,xmmreg                        \350\362\333\370\1\x7E\110           AVX,SANDYBRIDGE
+xmmreg,mem64                         \350\362\333\370\1\x7E\110           AVX,SANDYBRIDGE
+xmmreg,xmmreg                        \350\362\361\370\1\xD6\101           AVX,SANDYBRIDGE
+mem64,xmmreg                         \350\362\361\370\1\xD6\101           AVX,SANDYBRIDGE
+rm64,xmmreg                          \350\362\361\363\370\1\x7E\101       AVX,SANDYBRIDGE,X86_64
+xmmreg,rm64                          \350\362\361\363\370\1\x6E\110       AVX,SANDYBRIDGE,X86_64
 
 [VMOVSD]
 ; the three ops must be handle by the compiler internally
@@ -5006,11 +5010,14 @@ ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xE1\75\120        AVX2
 [VPSRLD]
 (Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,imm8                   \361\362\370\1\x72\74\212\26         AVX,SANDYBRIDGE
+xmmreg_mz,xmmrm,imm8                  \350\254\361\362\370\1\x72\74\212\26         AVX512
+xmmreg_mz,bmem32,imm8                 \350\254\361\362\370\1\x72\74\212\26         AVX512
 xmmreg,xmmreg,xmmrm                  \361\362\370\1\xD2\75\120            AVX,SANDYBRIDGE
 ymmreg,ymmreg,imm8                   \361\362\364\370\1\x72\74\212\26     AVX2
 ymmreg,ymmreg,xmmrm                  \361\362\364\370\1\xD2\75\120        AVX2
 
 
+
 [VPSRLDQ]
 (Ch_Rop1, Ch_Rop2, Ch_Wop3)
 xmmreg,xmmreg,imm8                   \361\362\370\1\x73\74\213\26         AVX,SANDYBRIDGE
@@ -5370,7 +5377,11 @@ ymmreg,ymmreg,ymmrm,imm8             \361\362\364\372\1\x46\75\120\27     AVX2
 
 [VPERMD]
 (Ch_All)
-ymmregmz,ymmreg,ymmrm                \350\354\361\362\364\371\1\x36\75\120        AVX2
+ymmreg,ymmreg,ymmrm                  \350\361\362\364\371\1\x36\75\120        AVX2
+ymmreg_mz,ymmreg,ymmrm                \350\361\362\364\371\1\x36\75\120      AVX512
+ymmreg_mz,ymmreg,bmem32               \350\361\362\364\371\1\x36\75\120      AVX512
+zmmreg_mz,zmmreg,zmmrm                \350\351\361\362\364\371\1\x36\75\120      AVX512
+zmmreg_mz,zmmreg,bmem32               \350\351\361\362\364\371\1\x36\75\120      AVX512
 
 
 [VPERMPD]
@@ -5877,3 +5888,14 @@ void                  \3\x0F\x01\xD0                                      XSAVE
 (Ch_All)
 mem                   \2\x0F\x0D\202                                      PREFETCHWT1
 
+
+;*******************************************************************************
+;********** AVX 512 - MASKRegister *********************************************
+;*******************************************************************************
+
+[KANDB]
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
+kreg,kreg,kreg                  \361\362\364\370\1\x41\75\120             AVX512
+
+
+

+ 8 - 8
compiler/x86/x86reg.dat

@@ -244,14 +244,14 @@ NR_ZMM29,$040E001D,zmm29,%zmm29,zmm29,zmm29,-1,-1,80,OT_ZMMREG,5,64
 NR_ZMM30,$040E001E,zmm30,%zmm30,zmm30,zmm30,-1,-1,81,OT_ZMMREG,6,64
 NR_ZMM31,$040E001F,zmm31,%zmm31,zmm31,zmm31,-1,-1,82,OT_ZMMREG,7,64
 
-NR_K0,$06000000,k0,%k0,k0,k0,-1,118,118,OT_REG_VECTORMASK,0
-NR_K1,$06000001,k1,%k1,k1,k1,-1,119,119,OT_REG_VECTORMASK,1
-NR_K2,$06000002,k2,%k2,k2,k2,-1,120,120,OT_REG_VECTORMASK,2
-NR_K3,$06000003,k3,%k3,k3,k3,-1,121,121,OT_REG_VECTORMASK,3
-NR_K4,$06000004,k4,%k4,k4,k4,-1,122,122,OT_REG_VECTORMASK,4
-NR_K5,$06000005,k5,%k5,k5,k5,-1,123,123,OT_REG_VECTORMASK,5
-NR_K6,$06000006,k6,%k6,k6,k6,-1,124,124,OT_REG_VECTORMASK,6
-NR_K7,$06000007,k7,%k7,k7,k7,-1,125,125,OT_REG_VECTORMASK,7
+NR_K0,$06000000,k0,%k0,k0,k0,-1,118,118,OT_KREG,0
+NR_K1,$06000001,k1,%k1,k1,k1,-1,119,119,OT_KREG,1
+NR_K2,$06000002,k2,%k2,k2,k2,-1,120,120,OT_KREG,2
+NR_K3,$06000003,k3,%k3,k3,k3,-1,121,121,OT_KREG,3
+NR_K4,$06000004,k4,%k4,k4,k4,-1,122,122,OT_KREG,4
+NR_K5,$06000005,k5,%k5,k5,k5,-1,123,123,OT_KREG,5
+NR_K6,$06000006,k6,%k6,k6,k6,-1,124,124,OT_KREG,6
+NR_K7,$06000007,k7,%k7,k7,k7,-1,125,125,OT_KREG,7
 
 ; NR_BND0,$07000000,bnd0,%bnd0,bnd0,bnd0,-1,126,126,OT_REG_BND,0
 ; NR_BND1,$07000001,bnd1,%bnd1,bnd1,bnd1,-1,127,127,OT_REG_BND,1

+ 8 - 8
compiler/x86_64/r8664ot.inc

@@ -215,11 +215,11 @@ OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
 OT_ZMMREG,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK,
-OT_REG_VECTORMASK
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG,
+OT_KREG

+ 2 - 1
compiler/x86_64/x8664ats.inc

@@ -1108,5 +1108,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
-attsufINT
+attsufINT,
+attsufNONE
 );

+ 2 - 1
compiler/x86_64/x8664att.inc

@@ -1108,5 +1108,6 @@
 'rdrand',
 'rdseed',
 'xgetbv',
-'prefetchwt1'
+'prefetchwt1',
+'kandb'
 );

+ 2 - 1
compiler/x86_64/x8664int.inc

@@ -1108,5 +1108,6 @@
 'rdrand',
 'rdseed',
 'xgetbv',
-'prefetchwt1'
+'prefetchwt1',
+'kandb'
 );

+ 1 - 1
compiler/x86_64/x8664nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-2171;
+2180;

+ 2 - 1
compiler/x86_64/x8664op.inc

@@ -1108,5 +1108,6 @@ A_XTEST,
 A_RDRAND,
 A_RDSEED,
 A_XGETBV,
-A_PREFETCHWT1
+A_PREFETCHWT1,
+A_KANDB
 );

+ 2 - 1
compiler/x86_64/x8664pro.inc

@@ -1108,5 +1108,6 @@
 (Ch: [Ch_Wop1, Ch_WFlags]),
 (Ch: [Ch_Wop1, Ch_WFlags]),
 (Ch: [Ch_WEAX, Ch_WEDX, Ch_RECX]),
-(Ch: [Ch_All])
+(Ch: [Ch_All]),
+(Ch: [Ch_Mop3, Ch_Rop2, Ch_Rop1])
 );

+ 81 - 18
compiler/x86_64/x8664tab.inc

@@ -9012,15 +9012,22 @@
   (
     opcode  : A_VADDPD;
     ops     : 3;
-    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
-    code    : #241#242#248#1#88#61#80;
+    optypes : (ot_xmmreg_mz,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#234#241#242#248#1#88#61#80;
     flags   : [if_avx,if_sandybridge]
   ),
   (
     opcode  : A_VADDPD;
     ops     : 3;
-    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
-    code    : #241#242#244#248#1#88#61#80;
+    optypes : (ot_ymmreg_mz,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#234#241#242#244#248#1#88#61#80;
+    flags   : [if_avx,if_sandybridge]
+  ),
+  (
+    opcode  : A_VADDPD;
+    ops     : 3;
+    optypes : (ot_zmmreg_mz,ot_zmmreg,ot_zmmrm_er,ot_none);
+    code    : #232#233#234#241#248#1#88#61#80;
     flags   : [if_avx,if_sandybridge]
   ),
   (
@@ -9271,29 +9278,29 @@
   (
     opcode  : A_VBROADCASTSS;
     ops     : 2;
-    optypes : (ot_ymmreg,ot_memory or ot_bits32,ot_none,ot_none);
-    code    : #241#242#244#249#1#24#72;
+    optypes : (ot_ymmreg_mz,ot_memory or ot_bits32,ot_none,ot_none);
+    code    : #232#241#242#244#249#1#24#72;
     flags   : [if_avx,if_sandybridge]
   ),
   (
     opcode  : A_VBROADCASTSS;
     ops     : 2;
-    optypes : (ot_xmmreg,ot_memory or ot_bits32,ot_none,ot_none);
-    code    : #241#242#249#1#24#72;
+    optypes : (ot_xmmreg_mz,ot_memory or ot_bits32,ot_none,ot_none);
+    code    : #232#241#242#249#1#24#72;
     flags   : [if_avx,if_sandybridge]
   ),
   (
     opcode  : A_VBROADCASTSS;
     ops     : 2;
-    optypes : (ot_ymmreg,ot_xmmreg,ot_none,ot_none);
-    code    : #241#242#244#249#1#24#72;
+    optypes : (ot_ymmreg_mz,ot_xmmreg,ot_none,ot_none);
+    code    : #232#241#242#244#249#1#24#72;
     flags   : [if_avx2]
   ),
   (
     opcode  : A_VBROADCASTSS;
     ops     : 2;
-    optypes : (ot_xmmreg,ot_xmmreg,ot_none,ot_none);
-    code    : #241#242#249#1#24#72;
+    optypes : (ot_xmmreg_mz,ot_xmmreg,ot_none,ot_none);
+    code    : #232#241#242#249#1#24#72;
     flags   : [if_avx2]
   ),
   (
@@ -10220,6 +10227,13 @@
     code    : #242#244#248#1#194#61#80#23;
     flags   : [if_avx,if_sandybridge]
   ),
+  (
+    opcode  : A_VCMPPS;
+    ops     : 4;
+    optypes : (ot_kreg,ot_xmmreg,ot_xmmrm,ot_immediate or ot_bits8);
+    code    : #232#248#1#194#61#80#23;
+    flags   : [if_avx512]
+  ),
   (
     opcode  : A_VCMPSD;
     ops     : 4;
@@ -10272,8 +10286,8 @@
   (
     opcode  : A_VCOMISS;
     ops     : 2;
-    optypes : (ot_xmmreg,ot_xmmreg,ot_none,ot_none);
-    code    : #242#248#1#47#72;
+    optypes : (ot_xmmreg,ot_xmmreg_sae,ot_none,ot_none);
+    code    : #232#242#248#1#47#72;
     flags   : [if_avx,if_sandybridge]
   ),
   (
@@ -11001,14 +11015,14 @@
     opcode  : A_VMOVD;
     ops     : 2;
     optypes : (ot_xmmreg,ot_rm_gpr or ot_bits32,ot_none,ot_none);
-    code    : #241#242#248#1#110#72;
+    code    : #232#241#242#248#1#110#72;
     flags   : [if_avx,if_sandybridge]
   ),
   (
     opcode  : A_VMOVD;
     ops     : 2;
     optypes : (ot_rm_gpr or ot_bits32,ot_xmmreg,ot_none,ot_none);
-    code    : #241#242#248#1#126#65;
+    code    : #232#241#242#248#1#126#65;
     flags   : [if_avx,if_sandybridge]
   ),
   (
@@ -13174,6 +13188,20 @@
     code    : #241#242#248#1#114#60#138#22;
     flags   : [if_avx,if_sandybridge]
   ),
+  (
+    opcode  : A_VPSRLD;
+    ops     : 3;
+    optypes : (ot_xmmreg_mz or ot_signed,ot_xmmrm,ot_immediate or ot_bits8,ot_none);
+    code    : #232#172#241#242#248#1#114#60#138#22;
+    flags   : [if_avx512]
+  ),
+  (
+    opcode  : A_VPSRLD;
+    ops     : 3;
+    optypes : (ot_xmmreg_mz or ot_signed,ot_bmem32,ot_immediate or ot_bits8,ot_none);
+    code    : #232#172#241#242#248#1#114#60#138#22;
+    flags   : [if_avx512]
+  ),
   (
     opcode  : A_VPSRLD;
     ops     : 3;
@@ -14178,10 +14206,38 @@
   (
     opcode  : A_VPERMD;
     ops     : 3;
-    optypes : (ot_ymmregmz,ot_ymmreg,ot_ymmrm,ot_none);
-    code    : #232#236#241#242#244#249#1#54#61#80;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#242#244#249#1#54#61#80;
     flags   : [if_avx2]
   ),
+  (
+    opcode  : A_VPERMD;
+    ops     : 3;
+    optypes : (ot_ymmreg_mz,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#242#244#249#1#54#61#80;
+    flags   : [if_avx512]
+  ),
+  (
+    opcode  : A_VPERMD;
+    ops     : 3;
+    optypes : (ot_ymmreg_mz,ot_ymmreg,ot_bmem32,ot_none);
+    code    : #232#241#242#244#249#1#54#61#80;
+    flags   : [if_avx512]
+  ),
+  (
+    opcode  : A_VPERMD;
+    ops     : 3;
+    optypes : (ot_zmmreg_mz,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#242#244#249#1#54#61#80;
+    flags   : [if_avx512]
+  ),
+  (
+    opcode  : A_VPERMD;
+    ops     : 3;
+    optypes : (ot_zmmreg_mz,ot_zmmreg,ot_bmem32,ot_none);
+    code    : #232#233#241#242#244#249#1#54#61#80;
+    flags   : [if_avx512]
+  ),
   (
     opcode  : A_VPERMPD;
     ops     : 3;
@@ -15196,5 +15252,12 @@
     optypes : (ot_memory,ot_none,ot_none,ot_none);
     code    : #2#15#13#130;
     flags   : [if_prefetchwt1]
+  ),
+  (
+    opcode  : A_KANDB;
+    ops     : 3;
+    optypes : (ot_kreg,ot_kreg,ot_kreg,ot_none);
+    code    : #241#242#244#248#1#65#61#80;
+    flags   : [if_avx512]
   )
 );

Niektoré súbory nie sú zobrazené, pretože je v týchto rozdielových dátach zmenené mnoho súborov