Browse Source

* patch by Marģers
- Rename 3DNow instruction (fixed long lasting typo in mnemonic). PMULHRWA --> PMULHRW
- Add vpclmullqlqdq, vpclmulhqlqdq, vpclmullqhqdq, vpclmulhqhqdq.
- Fix "typo" for SHA1MSG2

florian 1 year ago
parent
commit
4eb8f8e565

+ 9 - 1
compiler/i386/i386att.inc

@@ -254,7 +254,7 @@
 'pmaddwd',
 'pmagw',
 'pmulhriw',
-'pmulhrwa',
+'pmulhrw',
 'pmulhrwc',
 'pmulhw',
 'pmullw',
@@ -686,6 +686,10 @@
 'rdtscp',
 'movbe',
 'pclmulqdq',
+'pclmullqlqdq',
+'pclmulhqlqdq',
+'pclmullqhqdq',
+'pclmulhqhqdq',
 'vaddpd',
 'vaddps',
 'vaddsd',
@@ -879,6 +883,10 @@
 'vpblendvb',
 'vpblendw',
 'vpclmulqdq',
+'vpclmullqlqdq',
+'vpclmulhqlqdq',
+'vpclmullqhqdq',
+'vpclmulhqhqdq',
 'vpcmpeqb',
 'vpcmpeqd',
 'vpcmpeqq',

+ 8 - 0
compiler/i386/i386atts.inc

@@ -781,6 +781,10 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufMMX,
 attsufMMX,
 attsufNONE,
@@ -1025,6 +1029,10 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufINT,
 attsufINT,
 attsufNONE,

+ 9 - 1
compiler/i386/i386int.inc

@@ -254,7 +254,7 @@
 'pmaddwd',
 'pmagw',
 'pmulhriw',
-'pmulhrwa',
+'pmulhrw',
 'pmulhrwc',
 'pmulhw',
 'pmullw',
@@ -686,6 +686,10 @@
 'rdtscp',
 'movbe',
 'pclmulqdq',
+'pclmullqlqdq',
+'pclmulhqlqdq',
+'pclmullqhqdq',
+'pclmulhqhqdq',
 'vaddpd',
 'vaddps',
 'vaddsd',
@@ -879,6 +883,10 @@
 'vpblendvb',
 'vpblendw',
 'vpclmulqdq',
+'vpclmullqlqdq',
+'vpclmulhqlqdq',
+'vpclmullqhqdq',
+'vpclmulhqhqdq',
 'vpcmpeqb',
 'vpcmpeqd',
 'vpcmpeqq',

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-4171;
+4187;

+ 9 - 1
compiler/i386/i386op.inc

@@ -254,7 +254,7 @@ A_PMACHRIW,
 A_PMADDWD,
 A_PMAGW,
 A_PMULHRIW,
-A_PMULHRWA,
+A_PMULHRW,
 A_PMULHRWC,
 A_PMULHW,
 A_PMULLW,
@@ -686,6 +686,10 @@ A_AESKEYGENASSIST,
 A_RDTSCP,
 A_MOVBE,
 A_PCLMULQDQ,
+A_PCLMULLQLQDQ,
+A_PCLMULHQLQDQ,
+A_PCLMULLQHQDQ,
+A_PCLMULHQHQDQ,
 A_VADDPD,
 A_VADDPS,
 A_VADDSD,
@@ -879,6 +883,10 @@ A_VPAVGW,
 A_VPBLENDVB,
 A_VPBLENDW,
 A_VPCLMULQDQ,
+A_VPCLMULLQLQDQ,
+A_VPCLMULHQLQDQ,
+A_VPCLMULLQHQDQ,
+A_VPCLMULHQHQDQ,
 A_VPCMPEQB,
 A_VPCMPEQD,
 A_VPCMPEQQ,

+ 9 - 1
compiler/i386/i386prop.inc

@@ -685,7 +685,11 @@
 (Ch: [Ch_Wop3, Ch_Rop2]),
 (Ch: [Ch_WEAX, Ch_WEDX]),
 (Ch: [Ch_Rop1, Ch_Wop2]),
-(Ch: [Ch_Mop3, Ch_Rop2]),
+(Ch: [Ch_Mop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Mop2, Ch_Rop1]),
+(Ch: [Ch_Mop2, Ch_Rop1]),
+(Ch: [Ch_Mop2, Ch_Rop1]),
+(Ch: [Ch_Mop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
@@ -891,6 +895,10 @@
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop4, Ch_Rop3, Ch_Rop2]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),

+ 114 - 2
compiler/i386/i386tab.inc

@@ -3865,7 +3865,7 @@
     flags   : [if_pent,if_mmx,if_sm,if_cyrix]
   ),
   (
-    opcode  : A_PMULHRWA;
+    opcode  : A_PMULHRW;
     ops     : 2;
     optypes : (ot_mmxreg,ot_mmxrm,ot_none,ot_none);
     code    : #2#15#15#72#1#183;
@@ -8729,6 +8729,34 @@
     code    : #241#3#15#58#68#72#22;
     flags   : [if_clmul,if_sandybridge]
   ),
+  (
+    opcode  : A_PCLMULLQLQDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
+    code    : #241#3#15#58#68#72#1#0;
+    flags   : [if_clmul,if_sandybridge]
+  ),
+  (
+    opcode  : A_PCLMULHQLQDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
+    code    : #241#3#15#58#68#72#1#16;
+    flags   : [if_clmul,if_sandybridge]
+  ),
+  (
+    opcode  : A_PCLMULLQHQDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
+    code    : #241#3#15#58#68#72#1#1;
+    flags   : [if_clmul,if_sandybridge]
+  ),
+  (
+    opcode  : A_PCLMULHQHQDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
+    code    : #241#3#15#58#68#72#1#17;
+    flags   : [if_clmul,if_sandybridge]
+  ),
   (
     opcode  : A_VADDPD;
     ops     : 3;
@@ -13034,6 +13062,90 @@
     code    : #232#233#241#250#1#68#61#80#23;
     flags   : [if_avx512,if_tfv]
   ),
+  (
+    opcode  : A_VPCLMULLQLQDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#241#242#250#1#68#61#80#1#0;
+    flags   : [if_avx,if_sandybridge,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQLQDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#244#250#1#68#61#80#1#0;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQLQDQ;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#250#1#68#61#80#1#0;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQLQDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#241#242#250#1#68#61#80#1#16;
+    flags   : [if_avx,if_sandybridge,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQLQDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#244#250#1#68#61#80#1#16;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQLQDQ;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#250#1#68#61#80#1#16;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQHQDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#241#242#250#1#68#61#80#1#1;
+    flags   : [if_avx,if_sandybridge,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQHQDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#244#250#1#68#61#80#1#1;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQHQDQ;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#250#1#68#61#80#1#1;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQHQDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#241#242#250#1#68#61#80#1#17;
+    flags   : [if_avx,if_sandybridge,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQHQDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#244#250#1#68#61#80#1#17;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQHQDQ;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#250#1#68#61#80#1#17;
+    flags   : [if_avx512,if_tfv]
+  ),
   (
     opcode  : A_VPCMPEQB;
     ops     : 3;
@@ -29173,7 +29285,7 @@
     opcode  : A_SHA1MSG2;
     ops     : 2;
     optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
-    code    : #3#15#56#12#72;
+    code    : #3#15#56#202#72;
     flags   : [if_sha]
   ),
   (

+ 9 - 1
compiler/i8086/i8086att.inc

@@ -254,7 +254,7 @@
 'pmaddwd',
 'pmagw',
 'pmulhriw',
-'pmulhrwa',
+'pmulhrw',
 'pmulhrwc',
 'pmulhw',
 'pmullw',
@@ -686,6 +686,10 @@
 'rdtscp',
 'movbe',
 'pclmulqdq',
+'pclmullqlqdq',
+'pclmulhqlqdq',
+'pclmullqhqdq',
+'pclmulhqhqdq',
 'vaddpd',
 'vaddps',
 'vaddsd',
@@ -879,6 +883,10 @@
 'vpblendvb',
 'vpblendw',
 'vpclmulqdq',
+'vpclmullqlqdq',
+'vpclmulhqlqdq',
+'vpclmullqhqdq',
+'vpclmulhqhqdq',
 'vpcmpeqb',
 'vpcmpeqd',
 'vpcmpeqq',

+ 8 - 0
compiler/i8086/i8086atts.inc

@@ -781,6 +781,10 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufMMX,
 attsufMMX,
 attsufNONE,
@@ -1025,6 +1029,10 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufINT,
 attsufINT,
 attsufNONE,

+ 9 - 1
compiler/i8086/i8086int.inc

@@ -254,7 +254,7 @@
 'pmaddwd',
 'pmagw',
 'pmulhriw',
-'pmulhrwa',
+'pmulhrw',
 'pmulhrwc',
 'pmulhw',
 'pmullw',
@@ -686,6 +686,10 @@
 'rdtscp',
 'movbe',
 'pclmulqdq',
+'pclmullqlqdq',
+'pclmulhqlqdq',
+'pclmullqhqdq',
+'pclmulhqhqdq',
 'vaddpd',
 'vaddps',
 'vaddsd',
@@ -879,6 +883,10 @@
 'vpblendvb',
 'vpblendw',
 'vpclmulqdq',
+'vpclmullqlqdq',
+'vpclmulhqlqdq',
+'vpclmullqhqdq',
+'vpclmulhqhqdq',
 'vpcmpeqb',
 'vpcmpeqd',
 'vpcmpeqq',

+ 1 - 1
compiler/i8086/i8086nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-4203;
+4219;

+ 9 - 1
compiler/i8086/i8086op.inc

@@ -254,7 +254,7 @@ A_PMACHRIW,
 A_PMADDWD,
 A_PMAGW,
 A_PMULHRIW,
-A_PMULHRWA,
+A_PMULHRW,
 A_PMULHRWC,
 A_PMULHW,
 A_PMULLW,
@@ -686,6 +686,10 @@ A_AESKEYGENASSIST,
 A_RDTSCP,
 A_MOVBE,
 A_PCLMULQDQ,
+A_PCLMULLQLQDQ,
+A_PCLMULHQLQDQ,
+A_PCLMULLQHQDQ,
+A_PCLMULHQHQDQ,
 A_VADDPD,
 A_VADDPS,
 A_VADDSD,
@@ -879,6 +883,10 @@ A_VPAVGW,
 A_VPBLENDVB,
 A_VPBLENDW,
 A_VPCLMULQDQ,
+A_VPCLMULLQLQDQ,
+A_VPCLMULHQLQDQ,
+A_VPCLMULLQHQDQ,
+A_VPCLMULHQHQDQ,
 A_VPCMPEQB,
 A_VPCMPEQD,
 A_VPCMPEQQ,

+ 9 - 1
compiler/i8086/i8086prop.inc

@@ -685,7 +685,11 @@
 (Ch: [Ch_Wop3, Ch_Rop2]),
 (Ch: [Ch_WEAX, Ch_WEDX]),
 (Ch: [Ch_Rop1, Ch_Wop2]),
-(Ch: [Ch_Mop3, Ch_Rop2]),
+(Ch: [Ch_Mop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Mop2, Ch_Rop1]),
+(Ch: [Ch_Mop2, Ch_Rop1]),
+(Ch: [Ch_Mop2, Ch_Rop1]),
+(Ch: [Ch_Mop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
@@ -891,6 +895,10 @@
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop4, Ch_Rop3, Ch_Rop2]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),

+ 114 - 2
compiler/i8086/i8086tab.inc

@@ -3879,7 +3879,7 @@
     flags   : [if_pent,if_mmx,if_sm,if_cyrix]
   ),
   (
-    opcode  : A_PMULHRWA;
+    opcode  : A_PMULHRW;
     ops     : 2;
     optypes : (ot_mmxreg,ot_mmxrm,ot_none,ot_none);
     code    : #2#15#15#72#1#183;
@@ -8757,6 +8757,34 @@
     code    : #241#3#15#58#68#72#22;
     flags   : [if_clmul,if_sandybridge]
   ),
+  (
+    opcode  : A_PCLMULLQLQDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
+    code    : #241#3#15#58#68#72#1#0;
+    flags   : [if_clmul,if_sandybridge]
+  ),
+  (
+    opcode  : A_PCLMULHQLQDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
+    code    : #241#3#15#58#68#72#1#16;
+    flags   : [if_clmul,if_sandybridge]
+  ),
+  (
+    opcode  : A_PCLMULLQHQDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
+    code    : #241#3#15#58#68#72#1#1;
+    flags   : [if_clmul,if_sandybridge]
+  ),
+  (
+    opcode  : A_PCLMULHQHQDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
+    code    : #241#3#15#58#68#72#1#17;
+    flags   : [if_clmul,if_sandybridge]
+  ),
   (
     opcode  : A_VADDPD;
     ops     : 3;
@@ -13062,6 +13090,90 @@
     code    : #232#233#241#250#1#68#61#80#23;
     flags   : [if_avx512,if_tfv]
   ),
+  (
+    opcode  : A_VPCLMULLQLQDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#241#242#250#1#68#61#80#1#0;
+    flags   : [if_avx,if_sandybridge,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQLQDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#244#250#1#68#61#80#1#0;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQLQDQ;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#250#1#68#61#80#1#0;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQLQDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#241#242#250#1#68#61#80#1#16;
+    flags   : [if_avx,if_sandybridge,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQLQDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#244#250#1#68#61#80#1#16;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQLQDQ;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#250#1#68#61#80#1#16;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQHQDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#241#242#250#1#68#61#80#1#1;
+    flags   : [if_avx,if_sandybridge,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQHQDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#244#250#1#68#61#80#1#1;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQHQDQ;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#250#1#68#61#80#1#1;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQHQDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#241#242#250#1#68#61#80#1#17;
+    flags   : [if_avx,if_sandybridge,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQHQDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#244#250#1#68#61#80#1#17;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQHQDQ;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#250#1#68#61#80#1#17;
+    flags   : [if_avx512,if_tfv]
+  ),
   (
     opcode  : A_VPCMPEQB;
     ops     : 3;
@@ -29397,7 +29509,7 @@
     opcode  : A_SHA1MSG2;
     ops     : 2;
     optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
-    code    : #3#15#56#12#72;
+    code    : #3#15#56#202#72;
     flags   : [if_sha]
   ),
   (

+ 43 - 4
compiler/x86/x86ins.dat

@@ -1398,7 +1398,7 @@ mmxreg,mmxrm          \2\x0F\x52\110                  PENT,MMX,SM,CYRIX
 (Ch_All)
 mmxreg,mmxrm          \2\x0F\x5D\110                  PENT,MMX,SM,CYRIX
 
-[PMULHRWA]
+[PMULHRW]
 (Ch_All)
 mmxreg,mmxrm          \2\x0F\x0F\110\1\xB7            PENT,3DNOW,SM
 
@@ -3593,9 +3593,25 @@ mem16|32|64,reg16|32|64              \321\3\xf\x38\xf1\101           MOVBE,SM
 ;*******************************************************************************
 
 [PCLMULQDQ]
-(Ch_Mop3, Ch_Rop2)
+(Ch_Mop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmrm,imm8                    \361\3\xf\x3A\x44\110\26        CLMUL,SANDYBRIDGE
 
+[PCLMULLQLQDQ]
+(Ch_Mop2, Ch_Rop1)
+xmmreg,xmmrm                         \361\3\xf\x3A\x44\110\1\x00     CLMUL,SANDYBRIDGE
+
+[PCLMULHQLQDQ]
+(Ch_Mop2, Ch_Rop1)
+xmmreg,xmmrm                         \361\3\xf\x3A\x44\110\1\x10     CLMUL,SANDYBRIDGE
+
+[PCLMULLQHQDQ]
+(Ch_Mop2, Ch_Rop1)
+xmmreg,xmmrm                         \361\3\xf\x3A\x44\110\1\x01     CLMUL,SANDYBRIDGE
+
+[PCLMULHQHQDQ]
+(Ch_Mop2, Ch_Rop1)
+xmmreg,xmmrm                         \361\3\xf\x3A\x44\110\1\x11     CLMUL,SANDYBRIDGE
+
 ;*******************************************************************************
 ;****** AVX I ******************************************************************
 ;*******************************************************************************
@@ -4867,9 +4883,32 @@ ymmreg,ymmreg,ymmrm,imm8             \361\362\364\372\1\x0E\75\120\27     AVX2
 [VPCLMULQDQ]
 (Ch_Wop4, Ch_Rop3, Ch_Rop2, Ch_Rop1)
 xmmreg,xmmreg,xmmrm,imm8                  \350\361\362\372\1\x44\75\120\27          AVX,SANDYBRIDGE,TFV
-ymmreg,ymmreg,ymmrm,imm8                  \350\361\364\372\1\x44\75\120\27          AVX512,TFV
+ymmreg,ymmreg,ymmrm,imm8                  \350\361\364\372\1\x44\75\120\27          AVX512,TFV  ;Use AVX512, but has special VPCLMULQD feature flag bit 
 zmmreg,zmmreg,zmmrm,imm8                  \350\351\361\372\1\x44\75\120\27          AVX512,TFV
 
+[VPCLMULLQLQDQ]
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                       \350\361\362\372\1\x44\75\120\1\x00       AVX,SANDYBRIDGE,TFV
+ymmreg,ymmreg,ymmrm                       \350\361\364\372\1\x44\75\120\1\x00       AVX512,TFV  ;Use AVX512, but has special VPCLMULQD feature flag bit 
+zmmreg,zmmreg,zmmrm                       \350\351\361\372\1\x44\75\120\1\x00       AVX512,TFV
+
+[VPCLMULHQLQDQ]
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                       \350\361\362\372\1\x44\75\120\1\x10       AVX,SANDYBRIDGE,TFV
+ymmreg,ymmreg,ymmrm                       \350\361\364\372\1\x44\75\120\1\x10       AVX512,TFV  ;Use AVX512, but has special VPCLMULQD feature flag bit 
+zmmreg,zmmreg,zmmrm                       \350\351\361\372\1\x44\75\120\1\x10       AVX512,TFV
+
+[VPCLMULLQHQDQ]
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                       \350\361\362\372\1\x44\75\120\1\x01       AVX,SANDYBRIDGE,TFV
+ymmreg,ymmreg,ymmrm                       \350\361\364\372\1\x44\75\120\1\x01       AVX512,TFV  ;Use AVX512, but has special VPCLMULQD feature flag bit 
+zmmreg,zmmreg,zmmrm                       \350\351\361\372\1\x44\75\120\1\x01       AVX512,TFV
+
+[VPCLMULHQHQDQ]
+(Ch_Wop3, Ch_Rop2, Ch_Rop1)
+xmmreg,xmmreg,xmmrm                       \350\361\362\372\1\x44\75\120\1\x11       AVX,SANDYBRIDGE,TFV
+ymmreg,ymmreg,ymmrm                       \350\361\364\372\1\x44\75\120\1\x11       AVX512,TFV  ;Use AVX512, but has special VPCLMULQD feature flag bit 
+zmmreg,zmmreg,zmmrm                       \350\351\361\372\1\x44\75\120\1\x11       AVX512,TFV
 
 [VPCMPEQB]
 (Ch_Wop3, Ch_Rop2, Ch_Rop1)
@@ -9148,7 +9187,7 @@ xmmreg,xmmrm                              \3\x0F\x38\xC9\110
 
 [SHA1MSG2]
 (Ch_Mop2, Ch_Rop1)
-xmmreg,xmmrm                              \3\x0F\x38\xC\110                        SHA
+xmmreg,xmmrm                              \3\x0F\x38\xCA\110                        SHA
 
 [SHA256RNDS2]
 ; reads also xmm0 !

+ 8 - 0
compiler/x86_64/x8664ats.inc

@@ -777,6 +777,10 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufMMX,
 attsufMMX,
 attsufNONE,
@@ -1021,6 +1025,10 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufINT,
 attsufINT,
 attsufNONE,

+ 9 - 1
compiler/x86_64/x8664att.inc

@@ -245,7 +245,7 @@
 'pmaddwd',
 'pmagw',
 'pmulhriw',
-'pmulhrwa',
+'pmulhrw',
 'pmulhrwc',
 'pmulhw',
 'pmullw',
@@ -682,6 +682,10 @@
 'cmpsq',
 'movbe',
 'pclmulqdq',
+'pclmullqlqdq',
+'pclmulhqlqdq',
+'pclmullqhqdq',
+'pclmulhqhqdq',
 'vaddpd',
 'vaddps',
 'vaddsd',
@@ -875,6 +879,10 @@
 'vpblendvb',
 'vpblendw',
 'vpclmulqdq',
+'vpclmullqlqdq',
+'vpclmulhqlqdq',
+'vpclmullqhqdq',
+'vpclmulhqhqdq',
 'vpcmpeqb',
 'vpcmpeqd',
 'vpcmpeqq',

+ 9 - 1
compiler/x86_64/x8664int.inc

@@ -245,7 +245,7 @@
 'pmaddwd',
 'pmagw',
 'pmulhriw',
-'pmulhrwa',
+'pmulhrw',
 'pmulhrwc',
 'pmulhw',
 'pmullw',
@@ -682,6 +682,10 @@
 'cmpsq',
 'movbe',
 'pclmulqdq',
+'pclmullqlqdq',
+'pclmulhqlqdq',
+'pclmullqhqdq',
+'pclmulhqhqdq',
 'vaddpd',
 'vaddps',
 'vaddsd',
@@ -875,6 +879,10 @@
 'vpblendvb',
 'vpblendw',
 'vpclmulqdq',
+'vpclmullqlqdq',
+'vpclmulhqlqdq',
+'vpclmullqhqdq',
+'vpclmulhqhqdq',
 'vpcmpeqb',
 'vpcmpeqd',
 'vpcmpeqq',

+ 1 - 1
compiler/x86_64/x8664nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-4235;
+4251;

+ 9 - 1
compiler/x86_64/x8664op.inc

@@ -245,7 +245,7 @@ A_PMACHRIW,
 A_PMADDWD,
 A_PMAGW,
 A_PMULHRIW,
-A_PMULHRWA,
+A_PMULHRW,
 A_PMULHRWC,
 A_PMULHW,
 A_PMULLW,
@@ -682,6 +682,10 @@ A_LODSQ,
 A_CMPSQ,
 A_MOVBE,
 A_PCLMULQDQ,
+A_PCLMULLQLQDQ,
+A_PCLMULHQLQDQ,
+A_PCLMULLQHQDQ,
+A_PCLMULHQHQDQ,
 A_VADDPD,
 A_VADDPS,
 A_VADDSD,
@@ -875,6 +879,10 @@ A_VPAVGW,
 A_VPBLENDVB,
 A_VPBLENDW,
 A_VPCLMULQDQ,
+A_VPCLMULLQLQDQ,
+A_VPCLMULHQLQDQ,
+A_VPCLMULLQHQDQ,
+A_VPCLMULHQHQDQ,
 A_VPCMPEQB,
 A_VPCMPEQD,
 A_VPCMPEQQ,

+ 9 - 1
compiler/x86_64/x8664pro.inc

@@ -681,7 +681,11 @@
 (Ch: [Ch_WRAX, Ch_RWRSI, Ch_RDirFlag]),
 (Ch: [Ch_RWRSI, Ch_RMemEDI, Ch_RWRDI, Ch_RDirFlag, Ch_WOverflowFlag, Ch_WSignFlag, Ch_WZeroFlag, Ch_WAuxiliaryFlag, Ch_WCarryFlag, Ch_WParityFlag]),
 (Ch: [Ch_Rop1, Ch_Wop2]),
-(Ch: [Ch_Mop3, Ch_Rop2]),
+(Ch: [Ch_Mop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Mop2, Ch_Rop1]),
+(Ch: [Ch_Mop2, Ch_Rop1]),
+(Ch: [Ch_Mop2, Ch_Rop1]),
+(Ch: [Ch_Mop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
@@ -887,6 +891,10 @@
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
+(Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop4, Ch_Rop3, Ch_Rop2]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),
 (Ch: [Ch_Wop3, Ch_Rop2, Ch_Rop1]),

+ 114 - 2
compiler/x86_64/x8664tab.inc

@@ -3879,7 +3879,7 @@
     flags   : [if_pent,if_mmx,if_sm,if_cyrix]
   ),
   (
-    opcode  : A_PMULHRWA;
+    opcode  : A_PMULHRW;
     ops     : 2;
     optypes : (ot_mmxreg,ot_mmxrm,ot_none,ot_none);
     code    : #2#15#15#72#1#183;
@@ -9030,6 +9030,34 @@
     code    : #241#3#15#58#68#72#22;
     flags   : [if_clmul,if_sandybridge]
   ),
+  (
+    opcode  : A_PCLMULLQLQDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
+    code    : #241#3#15#58#68#72#1#0;
+    flags   : [if_clmul,if_sandybridge]
+  ),
+  (
+    opcode  : A_PCLMULHQLQDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
+    code    : #241#3#15#58#68#72#1#16;
+    flags   : [if_clmul,if_sandybridge]
+  ),
+  (
+    opcode  : A_PCLMULLQHQDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
+    code    : #241#3#15#58#68#72#1#1;
+    flags   : [if_clmul,if_sandybridge]
+  ),
+  (
+    opcode  : A_PCLMULHQHQDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
+    code    : #241#3#15#58#68#72#1#17;
+    flags   : [if_clmul,if_sandybridge]
+  ),
   (
     opcode  : A_VADDPD;
     ops     : 3;
@@ -13363,6 +13391,90 @@
     code    : #232#233#241#250#1#68#61#80#23;
     flags   : [if_avx512,if_tfv]
   ),
+  (
+    opcode  : A_VPCLMULLQLQDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#241#242#250#1#68#61#80#1#0;
+    flags   : [if_avx,if_sandybridge,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQLQDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#244#250#1#68#61#80#1#0;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQLQDQ;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#250#1#68#61#80#1#0;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQLQDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#241#242#250#1#68#61#80#1#16;
+    flags   : [if_avx,if_sandybridge,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQLQDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#244#250#1#68#61#80#1#16;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQLQDQ;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#250#1#68#61#80#1#16;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQHQDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#241#242#250#1#68#61#80#1#1;
+    flags   : [if_avx,if_sandybridge,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQHQDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#244#250#1#68#61#80#1#1;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULLQHQDQ;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#250#1#68#61#80#1#1;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQHQDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_xmmrm,ot_none);
+    code    : #232#241#242#250#1#68#61#80#1#17;
+    flags   : [if_avx,if_sandybridge,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQHQDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymmreg,ot_ymmrm,ot_none);
+    code    : #232#241#244#250#1#68#61#80#1#17;
+    flags   : [if_avx512,if_tfv]
+  ),
+  (
+    opcode  : A_VPCLMULHQHQDQ;
+    ops     : 3;
+    optypes : (ot_zmmreg,ot_zmmreg,ot_zmmrm,ot_none);
+    code    : #232#233#241#250#1#68#61#80#1#17;
+    flags   : [if_avx512,if_tfv]
+  ),
   (
     opcode  : A_VPCMPEQB;
     ops     : 3;
@@ -29621,7 +29733,7 @@
     opcode  : A_SHA1MSG2;
     ops     : 2;
     optypes : (ot_xmmreg,ot_xmmrm,ot_none,ot_none);
-    code    : #3#15#56#12#72;
+    code    : #3#15#56#202#72;
     flags   : [if_sha]
   ),
   (