瀏覽代碼

* support for SSSE3, SSE4,1, SSE4.2, AES instructions set by Emelyanov Roman, resolves #18527
+ test for aes support

git-svn-id: trunk@17256 -

florian 14 年之前
父節點
當前提交
9279c6955e

+ 1 - 0
.gitattributes

@@ -9492,6 +9492,7 @@ tests/test/tabsvr3.pp svneol=native#text/plain
 tests/test/tabsvr4.pp svneol=native#text/plain
 tests/test/tabsvr4.pp svneol=native#text/plain
 tests/test/tabsvr5.pp svneol=native#text/plain
 tests/test/tabsvr5.pp svneol=native#text/plain
 tests/test/taddstr1.pp svneol=native#text/plain
 tests/test/taddstr1.pp svneol=native#text/plain
+tests/test/taes1.pp svneol=native#text/plain
 tests/test/talign.pp svneol=native#text/plain
 tests/test/talign.pp svneol=native#text/plain
 tests/test/talign1.pp svneol=native#text/plain
 tests/test/talign1.pp svneol=native#text/plain
 tests/test/talign2.pp svneol=native#text/plain
 tests/test/talign2.pp svneol=native#text/plain

+ 78 - 1
compiler/i386/i386att.inc

@@ -602,5 +602,82 @@
 'insertq',
 'insertq',
 'extrq',
 'extrq',
 'lzcnt',
 'lzcnt',
-'popcnt'
+'pabsb',
+'pabsw',
+'pabsd',
+'palignr',
+'phaddw',
+'phaddd',
+'phaddsw',
+'phsubw',
+'phsubd',
+'phsubsw',
+'pmaddubsw',
+'pmulhrsw',
+'pshufb',
+'psignb',
+'psignw',
+'psignd',
+'blendps',
+'blendpd',
+'blendvps',
+'blendvpd',
+'dpps',
+'dppd',
+'extractps',
+'insertps',
+'movntdqa',
+'mpsadbw',
+'packusdw',
+'pblendvb',
+'pblendw',
+'pcmpeqq',
+'pextrb',
+'pextrd',
+'pextrq',
+'phminposuw',
+'pinsrb',
+'pinsrd',
+'pinsrq',
+'pmaxsb',
+'pmaxsd',
+'pmaxud',
+'pmaxuw',
+'pminsb',
+'pminsd',
+'pminuw',
+'pminud',
+'pmovsxbw',
+'pmovsxbd',
+'pmovsxbq',
+'pmovsxwd',
+'pmovsxwq',
+'pmovsxdq',
+'pmovzxbw',
+'pmovzxbd',
+'pmovzxbq',
+'pmovzxwd',
+'pmovzxwq',
+'pmovzxdq',
+'pmuldq',
+'ptest',
+'roundps',
+'roundpd',
+'roundss',
+'roundsd',
+'pcmpestri',
+'pcmpestrm',
+'pcmpistri',
+'pcmpistrm',
+'pcmpgtq',
+'popcnt',
+'aesenc',
+'aesenclast',
+'aesdec',
+'aesdeclast',
+'aesimc',
+'aeskeygen',
+'stosq',
+'lodsq',
+'cmpsq'
 );
 );

+ 78 - 1
compiler/i386/i386atts.inc

@@ -602,5 +602,82 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufINT,
 attsufINT,
-attsufINT
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufINT,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE
 );
 );

+ 78 - 1
compiler/i386/i386int.inc

@@ -602,5 +602,82 @@
 'insertq',
 'insertq',
 'extrq',
 'extrq',
 'lzcnt',
 'lzcnt',
-'popcnt'
+'pabsb',
+'pabsw',
+'pabsd',
+'palignr',
+'phaddw',
+'phaddd',
+'phaddsw',
+'phsubw',
+'phsubd',
+'phsubsw',
+'pmaddubsw',
+'pmulhrsw',
+'pshufb',
+'psignb',
+'psignw',
+'psignd',
+'blendps',
+'blendpd',
+'blendvps',
+'blendvpd',
+'dpps',
+'dppd',
+'extractps',
+'insertps',
+'movntdqa',
+'mpsadbw',
+'packusdw',
+'pblendvb',
+'pblendw',
+'pcmpeqq',
+'pextrb',
+'pextrd',
+'pextrq',
+'phminposuw',
+'pinsrb',
+'pinsrd',
+'pinsrq',
+'pmaxsb',
+'pmaxsd',
+'pmaxud',
+'pmaxuw',
+'pminsb',
+'pminsd',
+'pminuw',
+'pminud',
+'pmovsxbw',
+'pmovsxbd',
+'pmovsxbq',
+'pmovsxwd',
+'pmovsxwq',
+'pmovsxdq',
+'pmovzxbw',
+'pmovzxbd',
+'pmovzxbq',
+'pmovzxwd',
+'pmovzxwq',
+'pmovzxdq',
+'pmuldq',
+'ptest',
+'roundps',
+'roundpd',
+'roundss',
+'roundsd',
+'pcmpestri',
+'pcmpestrm',
+'pcmpistri',
+'pcmpistrm',
+'pcmpgtq',
+'popcnt',
+'aesenc',
+'aesenclast',
+'aesdec',
+'aesdeclast',
+'aesimc',
+'aeskeygen',
+'stosq',
+'lodsq',
+'cmpsq'
 );
 );

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
-1380;
+1560;

+ 78 - 1
compiler/i386/i386op.inc

@@ -602,5 +602,82 @@ A_MOVNTSD,
 A_INSERTQ,
 A_INSERTQ,
 A_EXTRQ,
 A_EXTRQ,
 A_LZCNT,
 A_LZCNT,
-A_POPCNT
+A_PABSB,
+A_PABSW,
+A_PABSD,
+A_PALIGNR,
+A_PHADDW,
+A_PHADDD,
+A_PHADDSW,
+A_PHSUBW,
+A_PHSUBD,
+A_PHSUBSW,
+A_PMADDUBSW,
+A_PMULHRSW,
+A_PSHUFB,
+A_PSIGNB,
+A_PSIGNW,
+A_PSIGND,
+A_BLENDPS,
+A_BLENDPD,
+A_BLENDVPS,
+A_BLENDVPD,
+A_DPPS,
+A_DPPD,
+A_EXTRACTPS,
+A_INSERTPS,
+A_MOVNTDQA,
+A_MPSADBW,
+A_PACKUSDW,
+A_PBLENDVB,
+A_PBLENDW,
+A_PCMPEQQ,
+A_PEXTRB,
+A_PEXTRD,
+A_PEXTRQ,
+A_PHMINPOSUW,
+A_PINSRB,
+A_PINSRD,
+A_PINSRQ,
+A_PMAXSB,
+A_PMAXSD,
+A_PMAXUD,
+A_PMAXUW,
+A_PMINSB,
+A_PMINSD,
+A_PMINUW,
+A_PMINUD,
+A_PMOVSXBW,
+A_PMOVSXBD,
+A_PMOVSXBQ,
+A_PMOVSXWD,
+A_PMOVSXWQ,
+A_PMOVSXDQ,
+A_PMOVZXBW,
+A_PMOVZXBD,
+A_PMOVZXBQ,
+A_PMOVZXWD,
+A_PMOVZXWQ,
+A_PMOVZXDQ,
+A_PMULDQ,
+A_PTEST,
+A_ROUNDPS,
+A_ROUNDPD,
+A_ROUNDSS,
+A_ROUNDSD,
+A_PCMPESTRI,
+A_PCMPESTRM,
+A_PCMPISTRI,
+A_PCMPISTRM,
+A_PCMPGTQ,
+A_POPCNT,
+A_AESENC,
+A_AESENCLAST,
+A_AESDEC,
+A_AESDECLAST,
+A_AESIMC,
+A_AESKEYGEN,
+A_STOSQ,
+A_LODSQ,
+A_CMPSQ
 );
 );

+ 77 - 0
compiler/i386/i386prop.inc

@@ -602,5 +602,82 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_RRAX, Ch_WMemEDI, Ch_RWRDI)),
+(Ch: (Ch_WRAX, Ch_RWRSI, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None))
 (Ch: (Ch_All, Ch_None, Ch_None))
 );
 );

+ 1270 - 10
compiler/i386/i386tab.inc

@@ -7796,7 +7796,14 @@
     ops     : 3;
     ops     : 3;
     optypes : (ot_reg32,ot_xmmreg,ot_immediate);
     optypes : (ot_reg32,ot_xmmreg,ot_immediate);
     code    : #1#102#211#2#15#197#72#22;
     code    : #1#102#211#2#15#197#72#22;
-    flags   : if_willamette or if_sse2 or if_sb or if_ar2
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRW;
+    ops     : 3;
+    optypes : (ot_memory or ot_bits32,ot_xmmreg,ot_immediate);
+    code    : #1#102#192#211#3#15#58#21#65#22;
+    flags   : if_sse4
   ),
   ),
   (
   (
     opcode  : A_PINSRW;
     opcode  : A_PINSRW;
@@ -9608,7 +9615,7 @@
     opcode  : A_MOVNTSD;
     opcode  : A_MOVNTSD;
     ops     : 2;
     ops     : 2;
     optypes : (ot_memory,ot_xmmreg,ot_none);
     optypes : (ot_memory,ot_xmmreg,ot_none);
-    code    : #192#220#213#211#2#15#43#0#65;
+    code    : #192#220#213#211#2#15#43#65;
     flags   : if_sse4
     flags   : if_sse4
   ),
   ),
   (
   (
@@ -9647,17 +9654,1270 @@
     flags   : if_386 or if_sm or if_sse4
     flags   : if_386 or if_sm or if_sse4
   ),
   ),
   (
   (
-    opcode  : A_POPCNT;
+    opcode  : A_PABSB;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg16,ot_regmem,ot_none);
-    code    : #208#219#193#211#2#15#184#72;
-    flags   : if_386 or if_sm or if_sse4
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#28#72;
+    flags   : if_sse4
   ),
   ),
   (
   (
-    opcode  : A_POPCNT;
+    opcode  : A_PABSB;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32 or ot_bits64,ot_regmem,ot_none);
-    code    : #209#219#193#211#2#15#184#72;
-    flags   : if_386 or if_sm or if_sse4
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#28#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#28#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#28#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#29#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#29#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#29#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#29#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSD;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#30#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSD;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#30#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#30#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#30#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PALIGNR;
+    ops     : 3;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_immediate);
+    code    : #217#3#15#58#15#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PALIGNR;
+    ops     : 3;
+    optypes : (ot_mmxreg,ot_memory,ot_immediate);
+    code    : #193#217#3#15#58#15#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PALIGNR;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#15#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PALIGNR;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#15#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#1#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#1#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#1#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#1#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDD;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#2#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDD;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#2#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#2#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#2#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#3#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#3#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#3#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#3#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#5#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#5#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#5#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#5#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBD;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#6#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBD;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#6#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#6#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#6#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#7#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#7#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#7#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#7#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMADDUBSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#4#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMADDUBSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#4#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMADDUBSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#4#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMADDUBSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#4#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMULHRSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#11#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMULHRSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#11#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMULHRSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#11#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMULHRSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#11#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSHUFB;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#0#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSHUFB;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#0#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSHUFB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#0#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSHUFB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#0#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNB;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#8#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNB;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#8#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#8#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#8#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#9#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#9#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#9#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#9#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGND;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#10#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGND;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#10#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGND;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#10#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGND;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#10#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg or ot_signed,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#12#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg or ot_signed,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#12#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#2#15#58#253#1#13#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#2#15#58#253#1#13#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDVPS;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#20#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDVPS;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#20#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDVPD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#21#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDVPD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#21#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_DPPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#64#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_DPPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#64#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_DPPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#65#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_DPPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#65#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_EXTRACTPS;
+    ops     : 3;
+    optypes : (ot_memory,ot_xmmreg,ot_immediate);
+    code    : #1#102#213#3#15#58#23#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_EXTRACTPS;
+    ops     : 3;
+    optypes : (ot_reg32 or ot_bits64,ot_xmmreg,ot_immediate);
+    code    : #1#102#192#3#15#58#23#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_INSERTPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#33#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_INSERTPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#33#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_MOVNTDQA;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#42#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_MPSADBW;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#66#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_MPSADBW;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#66#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PACKUSDW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#43#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PACKUSDW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#43#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PBLENDVB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#16#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PBLENDVB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#16#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PBLENDW;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate or ot_signed);
+    code    : #1#102#217#3#15#58#14#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PBLENDW;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate or ot_signed);
+    code    : #1#102#193#217#3#15#58#14#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPEQQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#41#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPEQQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#41#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRB;
+    ops     : 3;
+    optypes : (ot_reg32,ot_xmmreg,ot_immediate);
+    code    : #1#102#211#3#15#58#20#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRB;
+    ops     : 3;
+    optypes : (ot_memory or ot_bits8,ot_xmmreg,ot_immediate);
+    code    : #1#102#192#211#3#15#58#20#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRD;
+    ops     : 3;
+    optypes : (ot_reg32,ot_xmmreg,ot_immediate);
+    code    : #1#102#211#3#15#58#22#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRD;
+    ops     : 3;
+    optypes : (ot_memory or ot_bits32,ot_xmmreg,ot_immediate);
+    code    : #1#102#192#211#3#15#58#22#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRQ;
+    ops     : 3;
+    optypes : (ot_reg64,ot_xmmreg,ot_immediate);
+    code    : #1#102#214#3#15#58#22#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRQ;
+    ops     : 3;
+    optypes : (ot_memory or ot_bits64,ot_xmmreg,ot_immediate);
+    code    : #1#102#192#214#3#15#58#22#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHMINPOSUW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#65#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHMINPOSUW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#65#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PINSRB;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_reg32,ot_immediate);
+    code    : #1#102#217#3#15#58#32#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PINSRB;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory or ot_bits8,ot_immediate);
+    code    : #1#102#193#217#3#15#58#32#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PINSRD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_reg32,ot_immediate);
+    code    : #1#102#217#3#15#58#34#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PINSRD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory or ot_bits32,ot_immediate);
+    code    : #1#102#193#217#3#15#58#34#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PINSRQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_reg64,ot_immediate);
+    code    : #1#102#214#3#15#58#34#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PINSRQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory or ot_bits64,ot_immediate);
+    code    : #1#102#193#214#3#15#58#34#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXSB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#60#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXSB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#60#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXSD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#61#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXSD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#61#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXUD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#63#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXUD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#63#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXUW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#62#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXUW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#62#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINSB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#56#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINSB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#56#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINSD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#57#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINSD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#57#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINUW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#58#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINUW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#58#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINUD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#59#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINUD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#59#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXBW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#32#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXBW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#32#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXBD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#33#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXBD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#33#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXBQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#34#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXBQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#34#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXWD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#35#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXWD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#35#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXWQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#36#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXWQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#36#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#37#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#37#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXBW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#48#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXBW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#48#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXBD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#49#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXBD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#49#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXBQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#50#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXBQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#50#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXWD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#51#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXWD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#51#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXWQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#52#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXWQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#52#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#53#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#53#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMULDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#40#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMULDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#40#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PTEST;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#23#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#8#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#8#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#9#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#9#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDSS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#10#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDSS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#10#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDSD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#11#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDSD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#11#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPESTRI;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#97#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPESTRI;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#97#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPESTRM;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#96#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPESTRM;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#96#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPISTRI;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#99#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPISTRI;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#99#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPISTRM;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#98#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPISTRM;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#98#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPGTQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#55#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPGTQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#55#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_POPCNT;
+    ops     : 2;
+    optypes : (ot_reg16,ot_regmem or ot_bits16,ot_none);
+    code    : #1#243#193#208#2#15#184#72;
+    flags   : if_386 or if_sm or if_sse4
+  ),
+  (
+    opcode  : A_POPCNT;
+    ops     : 2;
+    optypes : (ot_reg32,ot_regmem or ot_bits32,ot_none);
+    code    : #1#243#193#208#2#15#184#72;
+    flags   : if_386 or if_sm or if_sse4
+  ),
+  (
+    opcode  : A_POPCNT;
+    ops     : 2;
+    optypes : (ot_reg64,ot_regmem or ot_bits64,ot_none);
+    code    : #1#243#193#208#2#15#184#72;
+    flags   : if_386 or if_sm or if_sse4
+  ),
+  (
+    opcode  : A_AESENC;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#220#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESENC;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#220#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESENCLAST;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#221#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESENCLAST;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#221#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESDEC;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#222#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESDEC;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#222#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESDECLAST;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#223#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESDECLAST;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#223#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESIMC;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#219#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESIMC;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#219#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESKEYGEN;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#223#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESKEYGEN;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#223#72#22;
+    flags   : if_sse4
   )
   )
 );
 );

+ 431 - 4
compiler/x86/x86ins.dat

@@ -2557,7 +2557,9 @@ xmmreg,mem            \1\x66\301\323\2\x0F\xE3\110          WILLAMETTE,SSE2,SM
 [PEXTRW]
 [PEXTRW]
 (Ch_All, Ch_None, Ch_None)
 (Ch_All, Ch_None, Ch_None)
 reg32,mmxreg,imm      \2\x0F\xC5\110\22               KATMAI,MMX,SB,AR2
 reg32,mmxreg,imm      \2\x0F\xC5\110\22               KATMAI,MMX,SB,AR2
-reg32,xmmreg,imm      \1\x66\323\2\x0F\xC5\110\26       WILLAMETTE,SSE2,SB,AR2
+reg32,xmmreg,imm      \1\x66\323\2\x0F\xC5\110\26                    SSE4
+mem32,xmmreg,imm      \1\x66\300\323\3\x0F\x3A\x15\101\26            SSE4
+
 
 
 [PINSRW]
 [PINSRW]
 (Ch_All, Ch_None, Ch_None)
 (Ch_All, Ch_None, Ch_None)
@@ -3269,7 +3271,7 @@ mem,xmmreg              \333\300\323\2\x0F\x2B\101                   SSE4,SD
 
 
 [MOVNTSD]
 [MOVNTSD]
 (Ch_All, Ch_None, Ch_None)
 (Ch_All, Ch_None, Ch_None)
-mem,xmmreg              \300\334\325\323\2\x0F\x2B\\101                SSE4 ;,SQ
+mem,xmmreg              \300\334\325\323\2\x0F\x2B\101                SSE4 ;,SQ
 
 
 [INSERTQ]
 [INSERTQ]
 (Ch_All, Ch_None, Ch_None)
 (Ch_All, Ch_None, Ch_None)
@@ -3287,7 +3289,432 @@ xmmreg,xmmreg           \336\323\2\x0F\x79\110                        SSE4
 reg16,regmem            \320\333\301\323\2\x0F\xBD\110                386,SM,SSE4
 reg16,regmem            \320\333\301\323\2\x0F\xBD\110                386,SM,SSE4
 reg32|64,regmem         \321\333\301\323\2\x0F\xBD\110                386,SM,SSE4
 reg32|64,regmem         \321\333\301\323\2\x0F\xBD\110                386,SM,SSE4
 
 
+;*******************************************************************************
+;**********SSSE3****************************************************************
+;*******************************************************************************
+;Use SSE4, but need special flag for SSSE3 insructions set
+[PABSB]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x1C\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x1C\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x1C\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x1C\110               SSE4
+
+[PABSW]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x1D\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x1D\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x1D\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x1D\110               SSE4
+
+[PABSD]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x1E\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x1E\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x1E\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x1E\110               SSE4
+
+[PALIGNR]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg,imm     \331\3\x0F\x3A\x0F\110\26                      SSE4
+mmxreg,mem,imm        \301\331\3\x0F\x3A\x0F\110\26                  SSE4
+xmmreg,xmmreg,imm     \1\x66\331\3\x0F\x3A\x0F\110\26                SSE4
+xmmreg,mem,imm        \1\x66\301\331\3\x0F\x3A\x0F\110\26            SSE4
+
+[PHADDW]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x01\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x01\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x01\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x01\110               SSE4
+
+[PHADDD]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x02\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x02\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x02\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x02\110               SSE4
+
+[PHADDSW]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x03\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x03\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x03\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x03\110               SSE4
+
+[PHSUBW]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x05\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x05\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x05\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x05\110               SSE4
+
+[PHSUBD]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x06\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x06\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x06\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x06\110               SSE4
+
+[PHSUBSW]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x07\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x07\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x07\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x07\110               SSE4
+
+[PMADDUBSW]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x04\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x04\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x04\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x04\110               SSE4
+
+[PMULHRSW]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x0B\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x0B\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x0B\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x0B\110               SSE4
+
+[PSHUFB]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x00\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x00\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x00\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x00\110               SSE4
+
+[PSIGNB]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x08\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x08\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x08\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x08\110               SSE4
+
+[PSIGNW]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x09\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x09\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x09\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x09\110               SSE4
+
+[PSIGND]
+(Ch_All, Ch_None, Ch_None)
+mmxreg,mmxreg         \331\3\x0F\x38\x0A\110                         SSE4
+mmxreg,mem            \301\331\3\x0F\x38\x0A\110                     SSE4
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x0A\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x0A\110               SSE4
+;*******************************************************************************
+;**********SSE4.1***************************************************************
+;*******************************************************************************
+[BLENDPS] ;By hands delete 'or ot_signed' from i386tab.inc or x8664tab.inc
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm     \1\x66\331\3\x0F\x3A\x0C\110\26          SSE4
+xmmreg,mem,imm        \1\x66\301\331\3\x0F\x3A\x0C\110\26      SSE4
+
+[BLENDPD] ;By hands delete 'or ot_signed' from i386tab.inc or x8664tab.inc 
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm     \1\x66\331\2\x0F\x3A\375\1\x0D\110\26          SSE4
+xmmreg,mem,imm        \1\x66\301\331\2\x0F\x3A\375\1\x0D\110\26      SSE4
+
+[BLENDVPS]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x14\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x14\110               SSE4
+
+[BLENDVPD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x15\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x15\110               SSE4
+
+[DPPS]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm     \1\x66\331\3\x0F\x3A\x40\110\26                SSE4
+xmmreg,mem,imm        \1\x66\301\331\3\x0F\x3A\x40\110\26            SSE4
+
+[DPPD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm     \1\x66\331\3\x0F\x3A\x41\110\26                SSE4
+xmmreg,mem,imm        \1\x66\301\331\3\x0F\x3A\x41\110\26            SSE4
+
+[EXTRACTPS]
+(Ch_All, Ch_None, Ch_None)
+mem,xmmreg,imm       \1\x66\325\3\x0F\x3A\x17\101\26    SSE4
+reg32|64,xmmreg,imm  \1\x66\300\3\x0F\x3A\x17\101\26    SSE4
+
+[INSERTPS]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm     \1\x66\331\3\x0F\x3A\x21\110\26                SSE4
+xmmreg,mem,imm        \1\x66\301\331\3\x0F\x3A\x21\110\26            SSE4
+
+[MOVNTDQA]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x2A\110               SSE4
+
+[MPSADBW]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm     \1\x66\331\3\x0F\x3A\x42\110\26                SSE4
+xmmreg,mem,imm        \1\x66\301\331\3\x0F\x3A\x42\110\26            SSE4
+
+[PACKUSDW]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x2B\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x2B\110               SSE4
+
+[PBLENDVB]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x10\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x10\110               SSE4
+
+[PBLENDW]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm     \1\x66\331\3\x0F\x3A\x0E\110\26                SSE4
+xmmreg,mem,imm        \1\x66\301\331\3\x0F\x3A\x0E\110\26            SSE4
+
+[PCMPEQQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x29\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x29\110               SSE4
+
+[PEXTRB]
+(Ch_All, Ch_None, Ch_None)
+reg32,xmmreg,imm      \1\x66\323\3\x0F\x3A\x14\101\26                SSE4
+mem8,xmmreg,imm       \1\x66\300\323\3\x0F\x3A\x14\101\26            SSE4
+
+;PEXTRW - Look is prev. implementation
+
+[PEXTRD]
+(Ch_All, Ch_None, Ch_None)
+reg32,xmmreg,imm      \1\x66\323\3\x0F\x3A\x16\101\26                SSE4
+mem32,xmmreg,imm      \1\x66\300\323\3\x0F\x3A\x16\101\26            SSE4
+
+[PEXTRQ]
+(Ch_All, Ch_None, Ch_None)
+reg64,xmmreg,imm      \1\x66\326\3\x0F\x3A\x16\101\26                SSE4
+mem64,xmmreg,imm      \1\x66\300\326\3\x0F\x3A\x16\101\26            SSE4
+
+[PHMINPOSUW]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x41\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x41\110               SSE4
+
+[PINSRB]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,reg32,imm      \1\x66\331\3\x0F\x3A\x20\110\26                SSE4
+xmmreg,mem8,imm       \1\x66\301\331\3\x0F\x3A\x20\110\26            SSE4
+
+[PINSRD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,reg32,imm      \1\x66\331\3\x0F\x3A\x22\110\26                SSE4
+xmmreg,mem32,imm      \1\x66\301\331\3\x0F\x3A\x22\110\26            SSE4
+
+[PINSRQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,reg64,imm      \1\x66\326\3\x0F\x3A\x22\110\26                SSE4
+xmmreg,mem64,imm      \1\x66\301\326\3\x0F\x3A\x22\110\26            SSE4
+
+[PMAXSB]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x3C\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x3C\110               SSE4
+
+[PMAXSD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x3D\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x3D\110               SSE4
+
+[PMAXUD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x3F\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x3F\110               SSE4
+
+[PMAXUW]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x3E\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x3E\110               SSE4
+
+[PMINSB]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x38\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x38\110               SSE4
+
+[PMINSD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x39\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x39\110               SSE4
+
+[PMINUW]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x3A\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x3A\110               SSE4
+
+[PMINUD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x3B\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x3B\110               SSE4
+
+[PMOVSXBW]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x20\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x20\110               SSE4
+
+[PMOVSXBD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x21\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x21\110               SSE4
+
+[PMOVSXBQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x22\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x22\110               SSE4
+
+[PMOVSXWD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x23\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x23\110               SSE4
+
+[PMOVSXWQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x24\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x24\110               SSE4
+
+[PMOVSXDQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x25\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x25\110               SSE4
+
+[PMOVZXBW]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x30\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x30\110               SSE4
+
+[PMOVZXBD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x31\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x31\110               SSE4
+
+[PMOVZXBQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x32\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x32\110               SSE4
+
+[PMOVZXWD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x33\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x33\110               SSE4
+
+[PMOVZXWQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x34\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x34\110               SSE4
+
+[PMOVZXDQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x35\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x35\110               SSE4
+
+[PMULDQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x28\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x28\110               SSE4
+
+[PTEST]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x17\110                   SSE4
+
+[ROUNDPS]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm     \1\x66\331\3\x0F\x3A\x08\110\26                SSE4
+xmmreg,mem,imm        \1\x66\301\331\3\x0F\x3A\x08\110\26            SSE4
+
+[ROUNDPD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm     \1\x66\331\3\x0F\x3A\x09\110\26                SSE4
+xmmreg,mem,imm        \1\x66\301\331\3\x0F\x3A\x09\110\26            SSE4
+
+[ROUNDSS]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm     \1\x66\331\3\x0F\x3A\x0A\110\26                SSE4
+xmmreg,mem,imm        \1\x66\301\331\3\x0F\x3A\x0A\110\26            SSE4
+
+[ROUNDSD]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm     \1\x66\331\3\x0F\x3A\x0B\110\26                SSE4
+xmmreg,mem,imm        \1\x66\301\331\3\x0F\x3A\x0B\110\26            SSE4
+;*******************************************************************************
+;**********SSE4.2***************************************************************
+;*******************************************************************************
+[PCMPESTRI]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm      \1\x66\331\3\x0F\x3A\x61\110\26               SSE4
+xmmreg,mem,imm         \1\x66\301\331\3\x0F\x3A\x61\110\26           SSE4
+[PCMPESTRM]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm      \1\x66\331\3\x0F\x3A\x60\110\26               SSE4
+xmmreg,mem,imm         \1\x66\301\331\3\x0F\x3A\x60\110\26           SSE4
+[PCMPISTRI]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm      \1\x66\331\3\x0F\x3A\x63\110\26               SSE4
+xmmreg,mem,imm         \1\x66\301\331\3\x0F\x3A\x63\110\26           SSE4
+[PCMPISTRM]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm      \1\x66\331\3\x0F\x3A\x62\110\26               SSE4
+xmmreg,mem,imm         \1\x66\301\331\3\x0F\x3A\x62\110\26           SSE4
+[PCMPGTQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\x37\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\x37\110               SSE4
+;   CRC32
 [POPCNT,popcntX]
 [POPCNT,popcntX]
 (Ch_All, Ch_None, Ch_None)
 (Ch_All, Ch_None, Ch_None)
-reg16,regmem            \320\333\301\323\2\x0F\xB8\110                386,SM,SSE4
-reg32|64,regmem         \321\333\301\323\2\x0F\xB8\110                386,SM,SSE4
+reg16,rm16             \1\xF3\301\320\2\x0F\xB8\110                  386,SM,SSE4
+reg32,rm32             \1\xF3\301\320\2\x0F\xB8\110                  386,SM,SSE4
+reg64,rm64             \1\xF3\301\320\2\x0F\xB8\110                  386,SM,SSE4
+;*******************************************************************************
+;**********AES******************************************************************
+;*******************************************************************************
+;Use SSE4, but need special flag for AES insructions set
+
+[AESENC]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\xDC\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\xDC\110               SSE4
+
+[AESENCLAST]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\xDD\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\xDD\110               SSE4
+
+[AESDEC]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\xDE\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\xDE\110               SSE4
+
+[AESDECLAST]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\xDF\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\xDF\110               SSE4
+
+[AESIMC]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg         \1\x66\331\3\x0F\x38\xDB\110                   SSE4
+xmmreg,mem            \1\x66\301\331\3\x0F\x38\xDB\110               SSE4
+
+[AESKEYGEN] ;AESKEYGENASIST
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm     \1\x66\331\3\x0F\x3A\xDF\110\26                SSE4
+xmmreg,mem,imm        \1\x66\301\331\3\x0F\x3A\xDF\110\26            SSE4
+;*******************************************************************************
+;*******************************************************************************
+;*******************************************************************************
+[STOSQ]
+(Ch_RRAX, Ch_WMemEDI, Ch_RWRDI)
+void                   \2\x48\xAB                                    X86_64  
+
+[LODSQ]
+(Ch_WRAX, Ch_RWRSI, Ch_None)
+void                   \2\x48\xAD                                    X86_64    
+
+[CMPSQ]
+(Ch_All, Ch_None, Ch_None)
+void                   \2\x48\xA7                                    X86_64

+ 78 - 1
compiler/x86_64/x8664ats.inc

@@ -602,5 +602,82 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufINT,
 attsufINT,
-attsufINT
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufINT,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE
 );
 );

+ 78 - 1
compiler/x86_64/x8664att.inc

@@ -602,5 +602,82 @@
 'insertq',
 'insertq',
 'extrq',
 'extrq',
 'lzcnt',
 'lzcnt',
-'popcnt'
+'pabsb',
+'pabsw',
+'pabsd',
+'palignr',
+'phaddw',
+'phaddd',
+'phaddsw',
+'phsubw',
+'phsubd',
+'phsubsw',
+'pmaddubsw',
+'pmulhrsw',
+'pshufb',
+'psignb',
+'psignw',
+'psignd',
+'blendps',
+'blendpd',
+'blendvps',
+'blendvpd',
+'dpps',
+'dppd',
+'extractps',
+'insertps',
+'movntdqa',
+'mpsadbw',
+'packusdw',
+'pblendvb',
+'pblendw',
+'pcmpeqq',
+'pextrb',
+'pextrd',
+'pextrq',
+'phminposuw',
+'pinsrb',
+'pinsrd',
+'pinsrq',
+'pmaxsb',
+'pmaxsd',
+'pmaxud',
+'pmaxuw',
+'pminsb',
+'pminsd',
+'pminuw',
+'pminud',
+'pmovsxbw',
+'pmovsxbd',
+'pmovsxbq',
+'pmovsxwd',
+'pmovsxwq',
+'pmovsxdq',
+'pmovzxbw',
+'pmovzxbd',
+'pmovzxbq',
+'pmovzxwd',
+'pmovzxwq',
+'pmovzxdq',
+'pmuldq',
+'ptest',
+'roundps',
+'roundpd',
+'roundss',
+'roundsd',
+'pcmpestri',
+'pcmpestrm',
+'pcmpistri',
+'pcmpistrm',
+'pcmpgtq',
+'popcnt',
+'aesenc',
+'aesenclast',
+'aesdec',
+'aesdeclast',
+'aesimc',
+'aeskeygen',
+'stosq',
+'lodsq',
+'cmpsq'
 );
 );

+ 78 - 1
compiler/x86_64/x8664int.inc

@@ -602,5 +602,82 @@
 'insertq',
 'insertq',
 'extrq',
 'extrq',
 'lzcnt',
 'lzcnt',
-'popcnt'
+'pabsb',
+'pabsw',
+'pabsd',
+'palignr',
+'phaddw',
+'phaddd',
+'phaddsw',
+'phsubw',
+'phsubd',
+'phsubsw',
+'pmaddubsw',
+'pmulhrsw',
+'pshufb',
+'psignb',
+'psignw',
+'psignd',
+'blendps',
+'blendpd',
+'blendvps',
+'blendvpd',
+'dpps',
+'dppd',
+'extractps',
+'insertps',
+'movntdqa',
+'mpsadbw',
+'packusdw',
+'pblendvb',
+'pblendw',
+'pcmpeqq',
+'pextrb',
+'pextrd',
+'pextrq',
+'phminposuw',
+'pinsrb',
+'pinsrd',
+'pinsrq',
+'pmaxsb',
+'pmaxsd',
+'pmaxud',
+'pmaxuw',
+'pminsb',
+'pminsd',
+'pminuw',
+'pminud',
+'pmovsxbw',
+'pmovsxbd',
+'pmovsxbq',
+'pmovsxwd',
+'pmovsxwq',
+'pmovsxdq',
+'pmovzxbw',
+'pmovzxbd',
+'pmovzxbq',
+'pmovzxwd',
+'pmovzxwq',
+'pmovzxdq',
+'pmuldq',
+'ptest',
+'roundps',
+'roundpd',
+'roundss',
+'roundsd',
+'pcmpestri',
+'pcmpestrm',
+'pcmpistri',
+'pcmpistrm',
+'pcmpgtq',
+'popcnt',
+'aesenc',
+'aesenclast',
+'aesdec',
+'aesdeclast',
+'aesimc',
+'aeskeygen',
+'stosq',
+'lodsq',
+'cmpsq'
 );
 );

+ 1 - 1
compiler/x86_64/x8664nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
-1372;
+1555;

+ 78 - 1
compiler/x86_64/x8664op.inc

@@ -602,5 +602,82 @@ A_MOVNTSD,
 A_INSERTQ,
 A_INSERTQ,
 A_EXTRQ,
 A_EXTRQ,
 A_LZCNT,
 A_LZCNT,
-A_POPCNT
+A_PABSB,
+A_PABSW,
+A_PABSD,
+A_PALIGNR,
+A_PHADDW,
+A_PHADDD,
+A_PHADDSW,
+A_PHSUBW,
+A_PHSUBD,
+A_PHSUBSW,
+A_PMADDUBSW,
+A_PMULHRSW,
+A_PSHUFB,
+A_PSIGNB,
+A_PSIGNW,
+A_PSIGND,
+A_BLENDPS,
+A_BLENDPD,
+A_BLENDVPS,
+A_BLENDVPD,
+A_DPPS,
+A_DPPD,
+A_EXTRACTPS,
+A_INSERTPS,
+A_MOVNTDQA,
+A_MPSADBW,
+A_PACKUSDW,
+A_PBLENDVB,
+A_PBLENDW,
+A_PCMPEQQ,
+A_PEXTRB,
+A_PEXTRD,
+A_PEXTRQ,
+A_PHMINPOSUW,
+A_PINSRB,
+A_PINSRD,
+A_PINSRQ,
+A_PMAXSB,
+A_PMAXSD,
+A_PMAXUD,
+A_PMAXUW,
+A_PMINSB,
+A_PMINSD,
+A_PMINUW,
+A_PMINUD,
+A_PMOVSXBW,
+A_PMOVSXBD,
+A_PMOVSXBQ,
+A_PMOVSXWD,
+A_PMOVSXWQ,
+A_PMOVSXDQ,
+A_PMOVZXBW,
+A_PMOVZXBD,
+A_PMOVZXBQ,
+A_PMOVZXWD,
+A_PMOVZXWQ,
+A_PMOVZXDQ,
+A_PMULDQ,
+A_PTEST,
+A_ROUNDPS,
+A_ROUNDPD,
+A_ROUNDSS,
+A_ROUNDSD,
+A_PCMPESTRI,
+A_PCMPESTRM,
+A_PCMPISTRI,
+A_PCMPISTRM,
+A_PCMPGTQ,
+A_POPCNT,
+A_AESENC,
+A_AESENCLAST,
+A_AESDEC,
+A_AESDECLAST,
+A_AESIMC,
+A_AESKEYGEN,
+A_STOSQ,
+A_LODSQ,
+A_CMPSQ
 );
 );

+ 77 - 0
compiler/x86_64/x8664pro.inc

@@ -602,5 +602,82 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_RRAX, Ch_WMemEDI, Ch_RWRDI)),
+(Ch: (Ch_WRAX, Ch_RWRSI, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None))
 (Ch: (Ch_All, Ch_None, Ch_None))
 );
 );

+ 1291 - 10
compiler/x86_64/x8664tab.inc

@@ -7705,7 +7705,14 @@
     ops     : 3;
     ops     : 3;
     optypes : (ot_reg32,ot_xmmreg,ot_immediate);
     optypes : (ot_reg32,ot_xmmreg,ot_immediate);
     code    : #1#102#211#2#15#197#72#22;
     code    : #1#102#211#2#15#197#72#22;
-    flags   : if_willamette or if_sse2 or if_sb or if_ar2
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRW;
+    ops     : 3;
+    optypes : (ot_memory or ot_bits32,ot_xmmreg,ot_immediate);
+    code    : #1#102#192#211#3#15#58#21#65#22;
+    flags   : if_sse4
   ),
   ),
   (
   (
     opcode  : A_PINSRW;
     opcode  : A_PINSRW;
@@ -9552,7 +9559,7 @@
     opcode  : A_MOVNTSD;
     opcode  : A_MOVNTSD;
     ops     : 2;
     ops     : 2;
     optypes : (ot_memory,ot_xmmreg,ot_none);
     optypes : (ot_memory,ot_xmmreg,ot_none);
-    code    : #192#220#213#211#2#15#43#0#65;
+    code    : #192#220#213#211#2#15#43#65;
     flags   : if_sse4
     flags   : if_sse4
   ),
   ),
   (
   (
@@ -9591,17 +9598,1291 @@
     flags   : if_386 or if_sm or if_sse4
     flags   : if_386 or if_sm or if_sse4
   ),
   ),
   (
   (
-    opcode  : A_POPCNT;
+    opcode  : A_PABSB;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg16,ot_regmem,ot_none);
-    code    : #208#219#193#211#2#15#184#72;
-    flags   : if_386 or if_sm or if_sse4
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#28#72;
+    flags   : if_sse4
   ),
   ),
   (
   (
-    opcode  : A_POPCNT;
+    opcode  : A_PABSB;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32 or ot_bits64,ot_regmem,ot_none);
-    code    : #209#219#193#211#2#15#184#72;
-    flags   : if_386 or if_sm or if_sse4
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#28#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#28#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#28#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#29#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#29#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#29#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#29#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSD;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#30#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSD;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#30#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#30#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PABSD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#30#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PALIGNR;
+    ops     : 3;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_immediate);
+    code    : #217#3#15#58#15#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PALIGNR;
+    ops     : 3;
+    optypes : (ot_mmxreg,ot_memory,ot_immediate);
+    code    : #193#217#3#15#58#15#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PALIGNR;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#15#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PALIGNR;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#15#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#1#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#1#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#1#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#1#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDD;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#2#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDD;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#2#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#2#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#2#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#3#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#3#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#3#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHADDSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#3#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#5#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#5#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#5#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#5#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBD;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#6#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBD;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#6#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#6#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#6#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#7#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#7#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#7#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHSUBSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#7#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMADDUBSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#4#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMADDUBSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#4#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMADDUBSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#4#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMADDUBSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#4#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMULHRSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#11#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMULHRSW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#11#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMULHRSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#11#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMULHRSW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#11#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSHUFB;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#0#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSHUFB;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#0#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSHUFB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#0#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSHUFB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#0#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNB;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#8#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNB;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#8#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#8#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#8#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#9#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNW;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#9#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#9#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGNW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#9#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGND;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_mmxreg,ot_none);
+    code    : #217#3#15#56#10#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGND;
+    ops     : 2;
+    optypes : (ot_mmxreg,ot_memory,ot_none);
+    code    : #193#217#3#15#56#10#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGND;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#10#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PSIGND;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#10#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg or ot_signed,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#12#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg or ot_signed,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#12#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#2#15#58#253#1#13#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#2#15#58#253#1#13#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDVPS;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#20#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDVPS;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#20#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDVPD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#21#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_BLENDVPD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#21#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_DPPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#64#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_DPPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#64#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_DPPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#65#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_DPPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#65#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_EXTRACTPS;
+    ops     : 3;
+    optypes : (ot_memory,ot_xmmreg,ot_immediate);
+    code    : #1#102#213#3#15#58#23#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_EXTRACTPS;
+    ops     : 3;
+    optypes : (ot_reg32 or ot_bits64,ot_xmmreg,ot_immediate);
+    code    : #1#102#192#3#15#58#23#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_INSERTPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#33#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_INSERTPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#33#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_MOVNTDQA;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#42#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_MPSADBW;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#66#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_MPSADBW;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#66#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PACKUSDW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#43#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PACKUSDW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#43#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PBLENDVB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#16#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PBLENDVB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#16#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PBLENDW;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate or ot_signed);
+    code    : #1#102#217#3#15#58#14#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PBLENDW;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate or ot_signed);
+    code    : #1#102#193#217#3#15#58#14#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPEQQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#41#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPEQQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#41#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRB;
+    ops     : 3;
+    optypes : (ot_reg32,ot_xmmreg,ot_immediate);
+    code    : #1#102#211#3#15#58#20#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRB;
+    ops     : 3;
+    optypes : (ot_memory or ot_bits8,ot_xmmreg,ot_immediate);
+    code    : #1#102#192#211#3#15#58#20#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRD;
+    ops     : 3;
+    optypes : (ot_reg32,ot_xmmreg,ot_immediate);
+    code    : #1#102#211#3#15#58#22#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRD;
+    ops     : 3;
+    optypes : (ot_memory or ot_bits32,ot_xmmreg,ot_immediate);
+    code    : #1#102#192#211#3#15#58#22#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRQ;
+    ops     : 3;
+    optypes : (ot_reg64,ot_xmmreg,ot_immediate);
+    code    : #1#102#214#3#15#58#22#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PEXTRQ;
+    ops     : 3;
+    optypes : (ot_memory or ot_bits64,ot_xmmreg,ot_immediate);
+    code    : #1#102#192#214#3#15#58#22#65#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHMINPOSUW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#65#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PHMINPOSUW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#65#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PINSRB;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_reg32,ot_immediate);
+    code    : #1#102#217#3#15#58#32#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PINSRB;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory or ot_bits8,ot_immediate);
+    code    : #1#102#193#217#3#15#58#32#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PINSRD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_reg32,ot_immediate);
+    code    : #1#102#217#3#15#58#34#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PINSRD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory or ot_bits32,ot_immediate);
+    code    : #1#102#193#217#3#15#58#34#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PINSRQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_reg64,ot_immediate);
+    code    : #1#102#214#3#15#58#34#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PINSRQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory or ot_bits64,ot_immediate);
+    code    : #1#102#193#214#3#15#58#34#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXSB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#60#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXSB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#60#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXSD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#61#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXSD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#61#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXUD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#63#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXUD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#63#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXUW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#62#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMAXUW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#62#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINSB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#56#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINSB;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#56#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINSD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#57#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINSD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#57#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINUW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#58#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINUW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#58#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINUD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#59#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMINUD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#59#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXBW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#32#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXBW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#32#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXBD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#33#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXBD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#33#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXBQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#34#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXBQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#34#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXWD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#35#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXWD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#35#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXWQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#36#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXWQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#36#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#37#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVSXDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#37#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXBW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#48#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXBW;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#48#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXBD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#49#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXBD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#49#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXBQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#50#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXBQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#50#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXWD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#51#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXWD;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#51#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXWQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#52#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXWQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#52#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#53#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMOVZXDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#53#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMULDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#40#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PMULDQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#40#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PTEST;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#23#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#8#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#8#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#9#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#9#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDSS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#10#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDSS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#10#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDSD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#11#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_ROUNDSD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#11#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPESTRI;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#97#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPESTRI;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#97#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPESTRM;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#96#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPESTRM;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#96#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPISTRI;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#99#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPISTRI;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#99#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPISTRM;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#98#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPISTRM;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#98#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPGTQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#55#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_PCMPGTQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#55#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_POPCNT;
+    ops     : 2;
+    optypes : (ot_reg16,ot_regmem or ot_bits16,ot_none);
+    code    : #1#243#193#208#2#15#184#72;
+    flags   : if_386 or if_sm or if_sse4
+  ),
+  (
+    opcode  : A_POPCNT;
+    ops     : 2;
+    optypes : (ot_reg32,ot_regmem or ot_bits32,ot_none);
+    code    : #1#243#193#208#2#15#184#72;
+    flags   : if_386 or if_sm or if_sse4
+  ),
+  (
+    opcode  : A_POPCNT;
+    ops     : 2;
+    optypes : (ot_reg64,ot_regmem or ot_bits64,ot_none);
+    code    : #1#243#193#208#2#15#184#72;
+    flags   : if_386 or if_sm or if_sse4
+  ),
+  (
+    opcode  : A_AESENC;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#220#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESENC;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#220#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESENCLAST;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#221#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESENCLAST;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#221#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESDEC;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#222#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESDEC;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#222#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESDECLAST;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#223#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESDECLAST;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#223#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESIMC;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #1#102#217#3#15#56#219#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESIMC;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_memory,ot_none);
+    code    : #1#102#193#217#3#15#56#219#72;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESKEYGEN;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #1#102#217#3#15#58#223#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_AESKEYGEN;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_memory,ot_immediate);
+    code    : #1#102#193#217#3#15#58#223#72#22;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_STOSQ;
+    ops     : 0;
+    optypes : (ot_none,ot_none,ot_none);
+    code    : #2#72#171;
+    flags   : if_x86_64
+  ),
+  (
+    opcode  : A_LODSQ;
+    ops     : 0;
+    optypes : (ot_none,ot_none,ot_none);
+    code    : #2#72#173;
+    flags   : if_x86_64
+  ),
+  (
+    opcode  : A_CMPSQ;
+    ops     : 0;
+    optypes : (ot_none,ot_none,ot_none);
+    code    : #2#72#167;
+    flags   : if_x86_64
   )
   )
 );
 );

+ 9 - 0
rtl/x86_64/cpu.pp

@@ -29,11 +29,14 @@ unit cpu;
       sysutils;
       sysutils;
 
 
     function InterlockedCompareExchange128Support : boolean;inline;
     function InterlockedCompareExchange128Support : boolean;inline;
+    function AESSupport : boolean;inline;
+
     function InterlockedCompareExchange128(var Target: Int128Rec; NewValue: Int128Rec; Comperand: Int128Rec): Int128Rec;
     function InterlockedCompareExchange128(var Target: Int128Rec; NewValue: Int128Rec; Comperand: Int128Rec): Int128Rec;
 
 
   implementation
   implementation
 
 
     var
     var
+      _AESSupport,
       _InterlockedCompareExchange128Support : boolean;
       _InterlockedCompareExchange128Support : boolean;
 
 
     function InterlockedCompareExchange128Support : boolean;inline;
     function InterlockedCompareExchange128Support : boolean;inline;
@@ -41,6 +44,11 @@ unit cpu;
         result:=_InterlockedCompareExchange128Support;
         result:=_InterlockedCompareExchange128Support;
       end;
       end;
 
 
+    function AESSupport : boolean;inline;
+      begin
+        result:=_AESSupport;
+      end;
+
 
 
     function InterlockedCompareExchange128(var Target: Int128Rec; NewValue: Int128Rec; Comperand: Int128Rec): Int128Rec; assembler;
     function InterlockedCompareExchange128(var Target: Int128Rec; NewValue: Int128Rec; Comperand: Int128Rec): Int128Rec; assembler;
      {
      {
@@ -119,6 +127,7 @@ unit cpu;
            popq %rbx
            popq %rbx
         end;
         end;
         _InterlockedCompareExchange128Support:=(_ecx and $2000)<>0;
         _InterlockedCompareExchange128Support:=(_ecx and $2000)<>0;
+        _AESSupport:=(_ecx and $2000000)<>0;        
       end;
       end;
 
 
 
 

+ 190 - 0
tests/test/taes1.pp

@@ -0,0 +1,190 @@
+{ %CPU=x86_64 }
+Program AESTest;
+{$INLINE ON}
+{$ASMMODE INTEL}
+{$MODE DELPHI}
+
+Uses Cpu,SysUtils;
+
+Type
+  Word32 = LongWord;
+  Word64 = QWord;
+  WordPR = Word64;
+
+  PAESKey256 = ^TAESKey256; TAESKey256 = Array [0..3] of Word64;
+
+  PAESOpenedKey = ^TAESOpenedKey; TAESOpenedKey = Packed Record
+    EnCryptRoundKeys    : Array [0..14, 0..3] Of Word32;
+    Padding0            : Array [0..3] of Word32;
+    DeCryptRoundKeys    : Array [0..14, 0..3] Of Word32;
+    Padding1            : Array [0..3] of Word32;
+  End;
+
+Const
+  Test_Key     : Array[0..3] of Word64 = ($0706050403020100, $0f0e0d0c0b0a0908, $1716151413121110, $1f1e1d1c1b1a1918);
+  Test_Data    : Array[0..1] of Word64 = (Word64($7766554433221100), Word64($ffeeddccbbaa9988));
+  Test_Crypt   : Array[0..1] of Word64 = (Word64($bf456751cab7a28e), Word64($8960494b9049fcea));
+
+Var
+  OpenedKey   : TAESOpenedKey;
+  Data        : Array [0..1] of Word64;
+  Passed      : Boolean;
+
+Procedure OpenKey_AES(Key: PAESKey256; OpenedKey: PAESOpenedKey); Assembler; NoStackFrame;
+  Procedure key_expansion; Assembler; NoStackFrame;
+  Asm
+   MOV RDX, RCX
+   PSHUFD XMM2, XMM2, 011111111b; PXOR XMM2, XMM1; MOVD EAX, XMM2; MOV [RCX], EAX; ADD RCX, 4
+   PSHUFD XMM1, XMM1, 011100101b; MOVD EBX, XMM1; XOR EAX, EBX; MOV [RCX], EAX; ADD RCX, 4
+   PSHUFD XMM1, XMM1, 011100110b; MOVD EBX, XMM1; XOR EAX, EBX; MOV [RCX], EAX; ADD RCX, 4
+   PSHUFD XMM1, XMM1, 011100111b; MOVD EBX, XMM1; XOR EAX, EBX; MOV [RCX], EAX; ADD RCX, 4
+   MOVDQU XMM4, [RDX]; AESKEYGEN XMM4, XMM4, 0
+   PSHUFD XMM4, XMM4, 011100110b; MOVD EAX, XMM4; MOVD EBX, XMM3; XOR EAX, EBX; MOV [RCX], EAX; ADD RCX, 4
+   PSHUFD XMM3, XMM3, 011100101b; MOVD EBX, XMM3; XOR EAX, EBX; MOV [RCX], EAX; ADD RCX, 4
+   PSHUFD XMM3, XMM3, 011100110b; MOVD EBX, XMM3; XOR EAX, EBX; MOV [RCX], EAX; ADD RCX, 4
+   PSHUFD XMM3, XMM3, 011100111b; MOVD EBX, XMM3; XOR EAX, EBX; MOV [RCX], EAX; ADD RCX, 4
+   MOVDQU XMM1, [RDX]; ADD RDX, $10; MOVDQU XMM3, [RDX]
+  End;
+Asm
+ PUSH RBX
+{$ifndef win64}
+ // fix parameter locations
+ MOV RDX,RSI
+ MOV RCX,RDI 
+{$endif win64}
+ MOV R8, RDX
+ MOVDQU XMM1, [RCX]; MOVDQU XMM3, [RCX+16]
+ MOVDQU [RDX], XMM1; MOVDQU [RDX + $10], XMM3
+ LEA RCX, [RDX+$20]
+ AESKEYGEN XMM2, XMM3, $1;  CALL key_expansion
+ AESKEYGEN XMM2, XMM3, $2;  CALL key_expansion
+ AESKEYGEN XMM2, XMM3, $4;  CALL key_expansion
+ AESKEYGEN XMM2, XMM3, $8;  CALL key_expansion
+ AESKEYGEN XMM2, XMM3, $10; CALL key_expansion
+ AESKEYGEN XMM2, XMM3, $20; CALL key_expansion
+ AESKEYGEN XMM2, XMM3, $40; CALL key_expansion
+ MOVDQU XMM0,  [R8+$00]; MOVDQU XMM1,  [R8+$10]; MOVDQU XMM2,  [R8+$20]; MOVDQU XMM3,  [R8+$30]
+ MOVDQU XMM4,  [R8+$40]; MOVDQU XMM5,  [R8+$50]; MOVDQU XMM6,  [R8+$60]; MOVDQU XMM7,  [R8+$70]
+ MOVDQU XMM8,  [R8+$80]; MOVDQU XMM9,  [R8+$90]; MOVDQU XMM10, [R8+$A0]; MOVDQU XMM11, [R8+$B0]
+ MOVDQU XMM12, [R8+$C0]; MOVDQU XMM13, [R8+$D0]; MOVDQU XMM14, [R8+$E0]
+ AESIMC XMM1,  XMM1; AESIMC XMM2,  XMM2;  AESIMC XMM3,  XMM3;  AESIMC XMM4,  XMM4
+ AESIMC XMM5,  XMM5; AESIMC XMM6,  XMM6;  AESIMC XMM7,  XMM7;  AESIMC XMM8,  XMM8
+ AESIMC XMM9,  XMM9; AESIMC XMM10, XMM10; AESIMC XMM11, XMM11; AESIMC XMM12, XMM12
+ AESIMC XMM13, XMM13
+ MOVDQU [R8+$100], XMM0;  MOVDQU [R8+$110], XMM1;  MOVDQU [R8+$120], XMM2;  MOVDQU [R8+$130], XMM3
+ MOVDQU [R8+$140], XMM4;  MOVDQU [R8+$150], XMM5;  MOVDQU [R8+$160], XMM6;  MOVDQU [R8+$170], XMM7
+ MOVDQU [R8+$180], XMM8;  MOVDQU [R8+$190], XMM9;  MOVDQU [R8+$1A0], XMM10; MOVDQU [R8+$1B0], XMM11
+ MOVDQU [R8+$1C0], XMM12; MOVDQU [R8+$1D0], XMM13; MOVDQU [R8+$1E0], XMM14
+ POP RBX
+End;
+
+Procedure EnCrypt_AES(InData, OutData: Pointer; DataSize: WordPR; EnCryptRoundKeys: Pointer); Assembler; NoStackFrame;
+Asm
+{$ifndef win64}
+ // fix parameter locations
+ MOV R9,RCX
+ MOV R8,RDX
+ MOV RDX,RSI
+ MOV RCX,RDI
+{$endif win64}
+ // Loading encryption keys
+ MOVDQU XMM0, [R9+16*0]
+ MOVDQU XMM1, [R9+16*1]
+ MOVDQU XMM2, [R9+16*2]
+ MOVDQU XMM3, [R9+16*3]
+ MOVDQU XMM4, [R9+16*4]
+ MOVDQU XMM5, [R9+16*5]
+ MOVDQU XMM6, [R9+16*6]
+ MOVDQU XMM7, [R9+16*7]
+ MOVDQU XMM8, [R9+16*8]
+ MOVDQU XMM9, [R9+16*9]
+ MOVDQU XMM10, [R9+16*10]
+ MOVDQU XMM11, [R9+16*11]
+ MOVDQU XMM12, [R9+16*12]
+ MOVDQU XMM13, [R9+16*13]
+ MOVDQU XMM14, [R9+16*14]
+ // Setting the main loop
+ XCHG RCX, R8
+ SHR RCX, 4
+@Loop: MOVDQU XMM15, [R8]; ADD R8, 16
+       PXOR XMM15, XMM0
+       AESENC XMM15, XMM1
+       AESENC XMM15, XMM2
+       AESENC XMM15, XMM3
+       AESENC XMM15, XMM4
+       AESENC XMM15, XMM5
+       AESENC XMM15, XMM6
+       AESENC XMM15, XMM7
+       AESENC XMM15, XMM8
+       AESENC XMM15, XMM9
+       AESENC XMM15, XMM10
+       AESENC XMM15, XMM11
+       AESENC XMM15, XMM12
+       AESENC XMM15, XMM13
+       AESENCLAST XMM15, XMM14
+       MOVDQU [RDX], XMM15; ADD RDX, 16
+       LOOP @Loop
+End;
+
+Procedure DeCrypt_AES(InData, OutData: Pointer; DataSize: WordPR; DeCryptRoundKeys: Pointer); Assembler; NoStackFrame;
+Asm
+{$ifndef win64}
+ // fix parameter locations
+ MOV R9,RCX
+ MOV R8,RDX
+ MOV RDX,RSI
+ MOV RCX,RDI  
+{$endif win64}
+ // Loading decryption keys
+ MOVDQU XMM0, [R9+16*0]
+ MOVDQU XMM1, [R9+16*1]
+ MOVDQU XMM2, [R9+16*2]
+ MOVDQU XMM3, [R9+16*3]
+ MOVDQU XMM4, [R9+16*4]
+ MOVDQU XMM5, [R9+16*5]
+ MOVDQU XMM6, [R9+16*6]
+ MOVDQU XMM7, [R9+16*7]
+ MOVDQU XMM8, [R9+16*8]
+ MOVDQU XMM9, [R9+16*9]
+ MOVDQU XMM10, [R9+16*10]
+ MOVDQU XMM11, [R9+16*11]
+ MOVDQU XMM12, [R9+16*12]
+ MOVDQU XMM13, [R9+16*13]
+ MOVDQU XMM14, [R9+16*14]
+ // Setting the main loop
+ XCHG RCX, R8
+ SHR RCX, 4
+@Loop: MOVDQU XMM15, [R8]; ADD R8, 16
+       PXOR XMM15, XMM14
+       AESDEC XMM15, XMM13
+       AESDEC XMM15, XMM12
+       AESDEC XMM15, XMM11
+       AESDEC XMM15, XMM10
+       AESDEC XMM15, XMM9
+       AESDEC XMM15, XMM8
+       AESDEC XMM15, XMM7
+       AESDEC XMM15, XMM6
+       AESDEC XMM15, XMM5
+       AESDEC XMM15, XMM4
+       AESDEC XMM15, XMM3
+       AESDEC XMM15, XMM2
+       AESDEC XMM15, XMM1
+       AESDECLAST XMM15, XMM0
+       MOVDQU [RDX], XMM15; ADD RDX, 16
+       LOOP @Loop
+End;
+
+BEGIN
+  if AESSupport then
+    begin
+      OpenKey_AES(@Test_Key, @OpenedKey);
+      EnCrypt_AES(@Test_Data, @Data, 16, @OpenedKey.EnCryptRoundKeys);
+      Passed := SysUtils.CompareMem(@Data, @Test_Crypt, 16);
+      DeCrypt_AES(@Data, @Data, 16, @OpenedKey.DeCryptRoundKeys);
+      Passed := Passed and SysUtils.CompareMem(@Data, @Test_Data, 16);
+      If Not Passed Then Halt(1);
+      writeln('ok');
+    end 
+  else
+    writeln('CPU has no AES instruction support');
+END.