Browse Source

Add x86 instructions of SM4, VP2INTERSECT.

Margers 3 months ago
parent
commit
c8cb48ed73

+ 5 - 1
compiler/i386/i386att.inc

@@ -1565,10 +1565,14 @@
 'vsm3rnds2',
 'vsm3msg1',
 'vsm3msg2',
+'vsm4key4',
+'vsm4rnds4',
 'vgf2p8affineinvqb',
 'vgf2p8affineqb',
 'vgf2p8mulb',
 'gf2p8affineinvqb',
 'gf2p8affineqb',
-'gf2p8mulb'
+'gf2p8mulb',
+'vp2intersectd',
+'vp2intersectq'
 );

+ 4 - 0
compiler/i386/i386atts.inc

@@ -1570,5 +1570,9 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 5 - 1
compiler/i386/i386int.inc

@@ -1565,10 +1565,14 @@
 'vsm3rnds2',
 'vsm3msg1',
 'vsm3msg2',
+'vsm4key4',
+'vsm4rnds4',
 'vgf2p8affineinvqb',
 'vgf2p8affineqb',
 'vgf2p8mulb',
 'gf2p8affineinvqb',
 'gf2p8affineqb',
-'gf2p8mulb'
+'gf2p8mulb',
+'vp2intersectd',
+'vp2intersectq'
 );

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-5093;
+5111;

+ 5 - 1
compiler/i386/i386op.inc

@@ -1565,10 +1565,14 @@ A_VSHA512MSG2,
 A_VSM3RNDS2,
 A_VSM3MSG1,
 A_VSM3MSG2,
+A_VSM4KEY4,
+A_VSM4RNDS4,
 A_VGF2P8AFFINEINVQB,
 A_VGF2P8AFFINEQB,
 A_VGF2P8MULB,
 A_GF2P8AFFINEINVQB,
 A_GF2P8AFFINEQB,
-A_GF2P8MULB
+A_GF2P8MULB,
+A_VP2INTERSECTD,
+A_VP2INTERSECTQ
 );

+ 5 - 1
compiler/i386/i386prop.inc

@@ -1565,10 +1565,14 @@
 (Ch: [Ch_Wop4, Ch_Rop3, Ch_Rop2]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop4, Ch_Rop3, Ch_Rop2]),
 (Ch: [Ch_Wop4, Ch_Rop3, Ch_Rop2]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Mop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Mop3, Ch_Rop2, Ch_Rop1]),
-(Ch: [Ch_Mop2, Ch_Rop1])
+(Ch: [Ch_Mop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1])
 );

+ 129 - 3
compiler/i386/i386tab.inc

@@ -35551,21 +35551,63 @@
     ops     : 4;
     optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_immediate or ot_bits8);
     code    : #242#241#250#1#222#61#80#23;
-    flags   : [if_sm3_hash]
+    flags   : [if_sm3ni,if_avx]
   ),
   (
     opcode  : A_VSM3MSG1;
     ops     : 3;
     optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
     code    : #242#249#1#218#61#80;
-    flags   : [if_sm3_hash]
+    flags   : [if_sm3ni,if_avx]
   ),
   (
     opcode  : A_VSM3MSG2;
     ops     : 3;
     optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
     code    : #242#241#249#1#218#61#80;
-    flags   : [if_sm3_hash]
+    flags   : [if_sm3ni,if_avx]
+  ),
+  (
+    opcode  : A_VSM4KEY4;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#242#219#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4KEY4;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#242#244#219#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4KEY4;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#219#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4RNDS4;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#242#220#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4RNDS4;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#242#244#220#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4RNDS4;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#220#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
   ),
   (
     opcode  : A_VGF2P8AFFINEINVQB;
@@ -35650,5 +35692,89 @@
     optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
     code    : #241#3#15#56#207#72;
     flags   : [if_gfni]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#220#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_xmmreg,ot_bmem32,ot_none);
+    code    : #232#220#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#220#244#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_ymmreg,ot_bmem32,ot_none);
+    code    : #232#220#244#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#220#233#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_zmmreg,ot_bmem32,ot_none);
+    code    : #232#220#233#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#234#220#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_xmmreg,ot_bmem64,ot_none);
+    code    : #232#234#220#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#234#220#244#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_ymmreg,ot_bmem64,ot_none);
+    code    : #232#234#220#244#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#234#220#233#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_zmmreg,ot_bmem64,ot_none);
+    code    : #232#234#220#233#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
   )
 );

+ 5 - 1
compiler/i8086/i8086att.inc

@@ -1579,10 +1579,14 @@
 'vsm3rnds2',
 'vsm3msg1',
 'vsm3msg2',
+'vsm4key4',
+'vsm4rnds4',
 'vgf2p8affineinvqb',
 'vgf2p8affineqb',
 'vgf2p8mulb',
 'gf2p8affineinvqb',
 'gf2p8affineqb',
-'gf2p8mulb'
+'gf2p8mulb',
+'vp2intersectd',
+'vp2intersectq'
 );

+ 4 - 0
compiler/i8086/i8086atts.inc

@@ -1584,5 +1584,9 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 5 - 1
compiler/i8086/i8086int.inc

@@ -1579,10 +1579,14 @@
 'vsm3rnds2',
 'vsm3msg1',
 'vsm3msg2',
+'vsm4key4',
+'vsm4rnds4',
 'vgf2p8affineinvqb',
 'vgf2p8affineqb',
 'vgf2p8mulb',
 'gf2p8affineinvqb',
 'gf2p8affineqb',
-'gf2p8mulb'
+'gf2p8mulb',
+'vp2intersectd',
+'vp2intersectq'
 );

+ 1 - 1
compiler/i8086/i8086nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-5125;
+5143;

+ 5 - 1
compiler/i8086/i8086op.inc

@@ -1579,10 +1579,14 @@ A_VSHA512MSG2,
 A_VSM3RNDS2,
 A_VSM3MSG1,
 A_VSM3MSG2,
+A_VSM4KEY4,
+A_VSM4RNDS4,
 A_VGF2P8AFFINEINVQB,
 A_VGF2P8AFFINEQB,
 A_VGF2P8MULB,
 A_GF2P8AFFINEINVQB,
 A_GF2P8AFFINEQB,
-A_GF2P8MULB
+A_GF2P8MULB,
+A_VP2INTERSECTD,
+A_VP2INTERSECTQ
 );

+ 5 - 1
compiler/i8086/i8086prop.inc

@@ -1579,10 +1579,14 @@
 (Ch: [Ch_Wop4, Ch_Rop3, Ch_Rop2]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop4, Ch_Rop3, Ch_Rop2]),
 (Ch: [Ch_Wop4, Ch_Rop3, Ch_Rop2]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Mop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Mop3, Ch_Rop2, Ch_Rop1]),
-(Ch: [Ch_Mop2, Ch_Rop1])
+(Ch: [Ch_Mop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1])
 );

+ 129 - 3
compiler/i8086/i8086tab.inc

@@ -35775,21 +35775,63 @@
     ops     : 4;
     optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_immediate or ot_bits8);
     code    : #242#241#250#1#222#61#80#23;
-    flags   : [if_sm3_hash]
+    flags   : [if_sm3ni,if_avx]
   ),
   (
     opcode  : A_VSM3MSG1;
     ops     : 3;
     optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
     code    : #242#249#1#218#61#80;
-    flags   : [if_sm3_hash]
+    flags   : [if_sm3ni,if_avx]
   ),
   (
     opcode  : A_VSM3MSG2;
     ops     : 3;
     optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
     code    : #242#241#249#1#218#61#80;
-    flags   : [if_sm3_hash]
+    flags   : [if_sm3ni,if_avx]
+  ),
+  (
+    opcode  : A_VSM4KEY4;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#242#219#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4KEY4;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#242#244#219#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4KEY4;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#219#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4RNDS4;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#242#220#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4RNDS4;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#242#244#220#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4RNDS4;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#220#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
   ),
   (
     opcode  : A_VGF2P8AFFINEINVQB;
@@ -35874,5 +35916,89 @@
     optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
     code    : #241#3#15#56#207#72;
     flags   : [if_gfni]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#220#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_xmmreg,ot_bmem32,ot_none);
+    code    : #232#220#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#220#244#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_ymmreg,ot_bmem32,ot_none);
+    code    : #232#220#244#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#220#233#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_zmmreg,ot_bmem32,ot_none);
+    code    : #232#220#233#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#234#220#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_xmmreg,ot_bmem64,ot_none);
+    code    : #232#234#220#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#234#220#244#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_ymmreg,ot_bmem64,ot_none);
+    code    : #232#234#220#244#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#234#220#233#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_zmmreg,ot_bmem64,ot_none);
+    code    : #232#234#220#233#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
   )
 );

+ 44 - 3
compiler/x86/x86ins.dat

@@ -10443,15 +10443,31 @@ ymmreg,ymmreg                             \362\364\334\371\1\xCD\110         SHA
 
 [VSM3RNDS2]
 (Ch_Wop4, Ch_Rop3, Ch_Rop2)
-xmmreg,xmmreg,xmmrm,imm8                  \362\361\372\1\xDE\75\120\27       SM3_hash
+xmmreg,xmmreg,xmmrm,imm8                  \362\361\372\1\xDE\75\120\27       SM3NI,AVX
 
 [VSM3MSG1]
 (Ch_Wop3, Ch_Rop2, Ch_Rop1)
-xmmreg,xmmreg,xmmrm                       \362\371\1\xDA\75\120              SM3_hash
+xmmreg,xmmreg,xmmrm                       \362\371\1\xDA\75\120              SM3NI,AVX
 
 [VSM3MSG2]
 (Ch_Wop3, Ch_Rop2, Ch_Rop1)
-xmmreg,xmmreg,xmmrm                       \362\361\371\1\xDA\75\120          SM3_hash
+xmmreg,xmmreg,xmmrm                       \362\361\371\1\xDA\75\120          SM3NI,AVX
+
+;*******************************************************************************
+;********* SM4 *****************************************************************
+;*******************************************************************************
+
+[VSM4KEY4]
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                       \350\362\333\371\1\xDA\75\120      SM4NI,AVX512,TFVM ; AVX10.2
+ymmreg,ymmreg,ymmrm                       \350\362\364\333\371\1\xDA\75\120  SM4NI,AVX512,TFVM ; AVX10.2
+zmmreg,zmmreg,zmmrm                       \350\351\333\371\1\xDA\75\120      SM4NI,AVX512,TFVM ; AVX10.2
+
+[VSM4RNDS4]
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                       \350\362\334\371\1\xDA\75\120      SM4NI,AVX512,TFVM ; AVX10.2
+ymmreg,ymmreg,ymmrm                       \350\362\364\334\371\1\xDA\75\120  SM4NI,AVX512,TFVM ; AVX10.2
+zmmreg,zmmreg,zmmrm                       \350\351\334\371\1\xDA\75\120      SM4NI,AVX512,TFVM ; AVX10.2
 
 ;*******************************************************************************
 ;********* GFNI ****************************************************************
@@ -10486,3 +10502,28 @@ xmmreg,xmmrm,imm                          \361\3\x0F\x3A\xCE\110\26
 [GF2P8MULB]
 (Ch_Mop2, Ch_Rop1)
 xmmreg,xmmrm                              \361\3\x0F\x38\xCF\110                 GFNI
+
+;*******************************************************************************
+;********* VP2INTERSECT ********************************************************
+;*******************************************************************************
+
+[VP2INTERSECTD]
+;-- Write kreg and kreg+1
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
+kreg,xmmreg,xmmrm            \350\334\371\1\x68\75\120               AVX512,TFV
+kreg,xmmreg,bmem32           \350\334\371\1\x68\75\120               AVX512,TFV
+kreg,ymmreg,ymmrm            \350\334\364\371\1\x68\75\120           AVX512,TFV
+kreg,ymmreg,bmem32           \350\334\364\371\1\x68\75\120           AVX512,TFV
+kreg,zmmreg,zmmrm            \350\334\351\371\1\x68\75\120           AVX512,TFV
+kreg,zmmreg,bmem32           \350\334\351\371\1\x68\75\120           AVX512,TFV
+
+[VP2INTERSECTQ]
+;-- Write kreg and kreg+1
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
+kreg,xmmreg,xmmrm            \350\352\334\371\1\x68\75\120           AVX512,TFV
+kreg,xmmreg,bmem64           \350\352\334\371\1\x68\75\120           AVX512,TFV
+kreg,ymmreg,ymmrm            \350\352\334\364\371\1\x68\75\120       AVX512,TFV
+kreg,ymmreg,bmem64           \350\352\334\364\371\1\x68\75\120       AVX512,TFV
+kreg,zmmreg,zmmrm            \350\352\334\351\371\1\x68\75\120       AVX512,TFV
+kreg,zmmreg,bmem64           \350\352\334\351\371\1\x68\75\120       AVX512,TFV
+

+ 4 - 0
compiler/x86_64/x8664ats.inc

@@ -1566,5 +1566,9 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 5 - 1
compiler/x86_64/x8664att.inc

@@ -1561,10 +1561,14 @@
 'vsm3rnds2',
 'vsm3msg1',
 'vsm3msg2',
+'vsm4key4',
+'vsm4rnds4',
 'vgf2p8affineinvqb',
 'vgf2p8affineqb',
 'vgf2p8mulb',
 'gf2p8affineinvqb',
 'gf2p8affineqb',
-'gf2p8mulb'
+'gf2p8mulb',
+'vp2intersectd',
+'vp2intersectq'
 );

+ 5 - 1
compiler/x86_64/x8664int.inc

@@ -1561,10 +1561,14 @@
 'vsm3rnds2',
 'vsm3msg1',
 'vsm3msg2',
+'vsm4key4',
+'vsm4rnds4',
 'vgf2p8affineinvqb',
 'vgf2p8affineqb',
 'vgf2p8mulb',
 'gf2p8affineinvqb',
 'gf2p8affineqb',
-'gf2p8mulb'
+'gf2p8mulb',
+'vp2intersectd',
+'vp2intersectq'
 );

+ 1 - 1
compiler/x86_64/x8664nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-5157;
+5175;

+ 5 - 1
compiler/x86_64/x8664op.inc

@@ -1561,10 +1561,14 @@ A_VSHA512MSG2,
 A_VSM3RNDS2,
 A_VSM3MSG1,
 A_VSM3MSG2,
+A_VSM4KEY4,
+A_VSM4RNDS4,
 A_VGF2P8AFFINEINVQB,
 A_VGF2P8AFFINEQB,
 A_VGF2P8MULB,
 A_GF2P8AFFINEINVQB,
 A_GF2P8AFFINEQB,
-A_GF2P8MULB
+A_GF2P8MULB,
+A_VP2INTERSECTD,
+A_VP2INTERSECTQ
 );

+ 5 - 1
compiler/x86_64/x8664pro.inc

@@ -1561,10 +1561,14 @@
 (Ch: [Ch_Wop4, Ch_Rop3, Ch_Rop2]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop4, Ch_Rop3, Ch_Rop2]),
 (Ch: [Ch_Wop4, Ch_Rop3, Ch_Rop2]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Mop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Mop3, Ch_Rop2, Ch_Rop1]),
-(Ch: [Ch_Mop2, Ch_Rop1])
+(Ch: [Ch_Mop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1])
 );

+ 129 - 3
compiler/x86_64/x8664tab.inc

@@ -35999,21 +35999,63 @@
     ops     : 4;
     optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_immediate or ot_bits8);
     code    : #242#241#250#1#222#61#80#23;
-    flags   : [if_sm3_hash]
+    flags   : [if_sm3ni,if_avx]
   ),
   (
     opcode  : A_VSM3MSG1;
     ops     : 3;
     optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
     code    : #242#249#1#218#61#80;
-    flags   : [if_sm3_hash]
+    flags   : [if_sm3ni,if_avx]
   ),
   (
     opcode  : A_VSM3MSG2;
     ops     : 3;
     optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
     code    : #242#241#249#1#218#61#80;
-    flags   : [if_sm3_hash]
+    flags   : [if_sm3ni,if_avx]
+  ),
+  (
+    opcode  : A_VSM4KEY4;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#242#219#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4KEY4;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#242#244#219#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4KEY4;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#219#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4RNDS4;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#242#220#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4RNDS4;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#242#244#220#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
+  ),
+  (
+    opcode  : A_VSM4RNDS4;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#220#249#1#218#61#80;
+    flags   : [if_sm4ni,if_avx512,if_tfvm]
   ),
   (
     opcode  : A_VGF2P8AFFINEINVQB;
@@ -36098,5 +36140,89 @@
     optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
     code    : #241#3#15#56#207#72;
     flags   : [if_gfni]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#220#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_xmmreg,ot_bmem32,ot_none);
+    code    : #232#220#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#220#244#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_ymmreg,ot_bmem32,ot_none);
+    code    : #232#220#244#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#220#233#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTD;
+    ops     : 3;
+    optypes : (ot_kreg,ot_zmmreg,ot_bmem32,ot_none);
+    code    : #232#220#233#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#234#220#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_xmmreg,ot_bmem64,ot_none);
+    code    : #232#234#220#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#234#220#244#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_ymmreg,ot_bmem64,ot_none);
+    code    : #232#234#220#244#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#234#220#233#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VP2INTERSECTQ;
+    ops     : 3;
+    optypes : (ot_kreg,ot_zmmreg,ot_bmem64,ot_none);
+    code    : #232#234#220#233#249#1#104#61#80;
+    flags   : [if_avx512,if_tfv]
   )
 );