Browse Source

+ patch by J. Gareth Moreton to support BMI2 instructions
+ extended avx test generator with the newly added BMI2 instructions

git-svn-id: trunk@39875 -

florian 6 years ago
parent
commit
8943c0584e

+ 4 - 0
compiler/i386/i386att.inc

@@ -1010,6 +1010,10 @@
 'andn',
 'bextr',
 'tzcnt',
+'bzhi',
+'mulx',
+'pdep',
+'pext',
 'rorx',
 'sarx',
 'shlx',

+ 4 - 0
compiler/i386/i386atts.inc

@@ -1105,6 +1105,10 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufINT,
 attsufNONE,
 attsufNONE,

+ 4 - 0
compiler/i386/i386int.inc

@@ -1010,6 +1010,10 @@
 'andn',
 'bextr',
 'tzcnt',
+'bzhi',
+'mulx',
+'pdep',
+'pext',
 'rorx',
 'sarx',
 'shlx',

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-2117;
+2121;

+ 4 - 0
compiler/i386/i386op.inc

@@ -1010,6 +1010,10 @@ A_VZEROUPPER,
 A_ANDN,
 A_BEXTR,
 A_TZCNT,
+A_BZHI,
+A_MULX,
+A_PDEP,
+A_PEXT,
 A_RORX,
 A_SARX,
 A_SHLX,

+ 4 - 0
compiler/i386/i386prop.inc

@@ -1010,6 +1010,10 @@
 (Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
 (Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
 (Ch: [Ch_Wop2, Ch_WFlags, Ch_Rop1]),
+(Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
+(Ch: [Ch_REDX, Ch_Rop1, Ch_Wop2, Ch_Wop3]),
+(Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
+(Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
 (Ch: [Ch_Rop1, Ch_Wop2]),
 (Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
 (Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),

+ 28 - 0
compiler/i386/i386tab.inc

@@ -13615,6 +13615,34 @@
     code    : #208#219#2#15#188#72;
     flags   : [if_bmi1,if_sm]
   ),
+  (
+    opcode  : A_BZHI;
+    ops     : 3;
+    optypes : (ot_reg32,ot_rm_gpr or ot_bits32,ot_reg32,ot_none);
+    code    : #242#249#1#245#62#72;
+    flags   : [if_bmi2,if_prot]
+  ),
+  (
+    opcode  : A_MULX;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_rm_gpr or ot_bits32,ot_none);
+    code    : #220#242#249#1#246#61#80;
+    flags   : [if_bmi2,if_prot]
+  ),
+  (
+    opcode  : A_PDEP;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_rm_gpr or ot_bits32,ot_none);
+    code    : #220#242#249#1#245#61#80;
+    flags   : [if_bmi2,if_prot]
+  ),
+  (
+    opcode  : A_PEXT;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_rm_gpr or ot_bits32,ot_none);
+    code    : #219#242#249#1#245#61#80;
+    flags   : [if_bmi2,if_prot]
+  ),
   (
     opcode  : A_RORX;
     ops     : 3;

+ 4 - 0
compiler/i8086/i8086att.inc

@@ -1010,6 +1010,10 @@
 'andn',
 'bextr',
 'tzcnt',
+'bzhi',
+'mulx',
+'pdep',
+'pext',
 'rorx',
 'sarx',
 'shlx',

+ 4 - 0
compiler/i8086/i8086atts.inc

@@ -1119,6 +1119,10 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufINT,
 attsufNONE,
 attsufNONE,

+ 4 - 0
compiler/i8086/i8086int.inc

@@ -1010,6 +1010,10 @@
 'andn',
 'bextr',
 'tzcnt',
+'bzhi',
+'mulx',
+'pdep',
+'pext',
 'rorx',
 'sarx',
 'shlx',

+ 1 - 1
compiler/i8086/i8086nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-2149;
+2153;

+ 4 - 0
compiler/i8086/i8086op.inc

@@ -1010,6 +1010,10 @@ A_VZEROUPPER,
 A_ANDN,
 A_BEXTR,
 A_TZCNT,
+A_BZHI,
+A_MULX,
+A_PDEP,
+A_PEXT,
 A_RORX,
 A_SARX,
 A_SHLX,

+ 4 - 0
compiler/i8086/i8086prop.inc

@@ -1010,6 +1010,10 @@
 (Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
 (Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
 (Ch: [Ch_Wop2, Ch_WFlags, Ch_Rop1]),
+(Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
+(Ch: [Ch_REDX, Ch_Rop1, Ch_Wop2, Ch_Wop3]),
+(Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
+(Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
 (Ch: [Ch_Rop1, Ch_Wop2]),
 (Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
 (Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),

+ 28 - 0
compiler/i8086/i8086tab.inc

@@ -13643,6 +13643,34 @@
     code    : #208#219#2#15#188#72;
     flags   : [if_bmi1,if_sm]
   ),
+  (
+    opcode  : A_BZHI;
+    ops     : 3;
+    optypes : (ot_reg32,ot_rm_gpr or ot_bits32,ot_reg32,ot_none);
+    code    : #242#249#1#245#62#72;
+    flags   : [if_bmi2,if_prot]
+  ),
+  (
+    opcode  : A_MULX;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_rm_gpr or ot_bits32,ot_none);
+    code    : #220#242#249#1#246#61#80;
+    flags   : [if_bmi2,if_prot]
+  ),
+  (
+    opcode  : A_PDEP;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_rm_gpr or ot_bits32,ot_none);
+    code    : #220#242#249#1#245#61#80;
+    flags   : [if_bmi2,if_prot]
+  ),
+  (
+    opcode  : A_PEXT;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_rm_gpr or ot_bits32,ot_none);
+    code    : #219#242#249#1#245#61#80;
+    flags   : [if_bmi2,if_prot]
+  ),
   (
     opcode  : A_RORX;
     ops     : 3;

+ 24 - 4
compiler/x86/x86ins.dat

@@ -5293,22 +5293,42 @@ reg16|32|64,regmem                    \320\333\2\x0F\xBC\110              BMI1,S
 ;********** BMI2 ***************************************************************
 ;*******************************************************************************
 
+[BZHI]
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
+reg32,rm32,reg32                      \362\371\1\xf5\76\110               BMI2,PROT
+reg64,rm64,reg64                      \362\363\371\1\xf5\76\110           BMI2,PROT,X86_64
+
+[MULX]
+(Ch_REDX, Ch_Rop1, Ch_Wop2, Ch_Wop3)
+reg32,reg32,rm32                      \334\362\371\1\xf6\75\120           BMI2,PROT
+reg64,reg64,rm64                      \334\362\363\371\1\xf6\75\120       BMI2,PROT,X86_64
+
+[PDEP]
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
+reg32,reg32,rm32                      \334\362\371\1\xf5\75\120           BMI2,PROT
+reg64,reg64,rm64                      \334\362\363\371\1\xf5\75\120       BMI2,PROT,X86_64
+
+[PEXT]
+(Ch_Rop1, Ch_Rop2, Ch_Wop3)
+reg32,reg32,rm32                      \333\362\371\1\xf5\75\120           BMI2,PROT
+reg64,reg64,rm64                      \333\362\363\371\1\xf5\75\120       BMI2,PROT,X86_64
+
 [RORX]
 (Ch_Rop1, Ch_Wop2)
-reg32,rm32,imm8                      \334\362\372\1\xf0\110\26            BMI2,PROT
-reg64,rm64,imm8                      \334\362\363\372\1\xf0\110\26        BMI2,PROT,X86_64
+reg32,rm32,imm8                       \334\362\372\1\xf0\110\26           BMI2,PROT
+reg64,rm64,imm8                       \334\362\363\372\1\xf0\110\26       BMI2,PROT,X86_64
 
 [SARX]
 (Ch_Rop1, Ch_Rop2, Ch_Wop3)
 reg32,rm32,reg32                      \333\362\371\1\xf7\76\110           BMI2,PROT
 reg64,rm64,reg64                      \333\362\363\371\1\xf7\76\110       BMI2,PROT,X86_64
 
-[SHLX]
+[SHLX]                                ; VEX.NDS.LZ.66.0F38.W0 F7 /r
 (Ch_Rop1, Ch_Rop2, Ch_Wop3)
 reg32,rm32,reg32                      \361\362\371\1\xf7\76\110           BMI2,PROT
 reg64,rm64,reg64                      \361\362\363\371\1\xf7\76\110       BMI2,PROT,X86_64
 
-[SHRX]
+[SHRX]                                ; VEX.NDS.LZ.F2.0F38.W0 F7 /r
 (Ch_Rop1, Ch_Rop2, Ch_Wop3)
 reg32,rm32,reg32                      \334\362\371\1\xf7\76\110           BMI2,PROT
 reg64,rm64,reg64                      \334\362\363\371\1\xf7\76\110       BMI2,PROT,X86_64

+ 4 - 0
compiler/x86_64/x8664ats.inc

@@ -1101,6 +1101,10 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufINT,
 attsufNONE,
 attsufNONE,

+ 4 - 0
compiler/x86_64/x8664att.inc

@@ -1006,6 +1006,10 @@
 'andn',
 'bextr',
 'tzcnt',
+'bzhi',
+'mulx',
+'pdep',
+'pext',
 'rorx',
 'sarx',
 'shlx',

+ 4 - 0
compiler/x86_64/x8664int.inc

@@ -1006,6 +1006,10 @@
 'andn',
 'bextr',
 'tzcnt',
+'bzhi',
+'mulx',
+'pdep',
+'pext',
 'rorx',
 'sarx',
 'shlx',

+ 1 - 1
compiler/x86_64/x8664nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-2170;
+2178;

+ 4 - 0
compiler/x86_64/x8664op.inc

@@ -1006,6 +1006,10 @@ A_VZEROUPPER,
 A_ANDN,
 A_BEXTR,
 A_TZCNT,
+A_BZHI,
+A_MULX,
+A_PDEP,
+A_PEXT,
 A_RORX,
 A_SARX,
 A_SHLX,

+ 4 - 0
compiler/x86_64/x8664pro.inc

@@ -1006,6 +1006,10 @@
 (Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
 (Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
 (Ch: [Ch_Wop2, Ch_WFlags, Ch_Rop1]),
+(Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
+(Ch: [Ch_REDX, Ch_Rop1, Ch_Wop2, Ch_Wop3]),
+(Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
+(Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
 (Ch: [Ch_Rop1, Ch_Wop2]),
 (Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),
 (Ch: [Ch_Rop1, Ch_Rop2, Ch_Wop3]),

+ 56 - 0
compiler/x86_64/x8664tab.inc

@@ -13958,6 +13958,62 @@
     code    : #208#219#2#15#188#72;
     flags   : [if_bmi1,if_sm]
   ),
+  (
+    opcode  : A_BZHI;
+    ops     : 3;
+    optypes : (ot_reg32,ot_rm_gpr or ot_bits32,ot_reg32,ot_none);
+    code    : #242#249#1#245#62#72;
+    flags   : [if_bmi2,if_prot]
+  ),
+  (
+    opcode  : A_BZHI;
+    ops     : 3;
+    optypes : (ot_reg64,ot_rm_gpr or ot_bits64,ot_reg64,ot_none);
+    code    : #242#243#249#1#245#62#72;
+    flags   : [if_bmi2,if_prot,if_x86_64]
+  ),
+  (
+    opcode  : A_MULX;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_rm_gpr or ot_bits32,ot_none);
+    code    : #220#242#249#1#246#61#80;
+    flags   : [if_bmi2,if_prot]
+  ),
+  (
+    opcode  : A_MULX;
+    ops     : 3;
+    optypes : (ot_reg64,ot_reg64,ot_rm_gpr or ot_bits64,ot_none);
+    code    : #220#242#243#249#1#246#61#80;
+    flags   : [if_bmi2,if_prot,if_x86_64]
+  ),
+  (
+    opcode  : A_PDEP;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_rm_gpr or ot_bits32,ot_none);
+    code    : #220#242#249#1#245#61#80;
+    flags   : [if_bmi2,if_prot]
+  ),
+  (
+    opcode  : A_PDEP;
+    ops     : 3;
+    optypes : (ot_reg64,ot_reg64,ot_rm_gpr or ot_bits64,ot_none);
+    code    : #220#242#243#249#1#245#61#80;
+    flags   : [if_bmi2,if_prot,if_x86_64]
+  ),
+  (
+    opcode  : A_PEXT;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_rm_gpr or ot_bits32,ot_none);
+    code    : #219#242#249#1#245#61#80;
+    flags   : [if_bmi2,if_prot]
+  ),
+  (
+    opcode  : A_PEXT;
+    ops     : 3;
+    optypes : (ot_reg64,ot_reg64,ot_rm_gpr or ot_bits64,ot_none);
+    code    : #219#242#243#249#1#245#61#80;
+    flags   : [if_bmi2,if_prot,if_x86_64]
+  ),
   (
     opcode  : A_RORX;
     ops     : 3;

+ 8 - 0
tests/utils/avx/avxopcodes.pas

@@ -51,6 +51,14 @@ end;
 
 procedure TAVXTestGenerator.Init;
 begin
+  FOpCodeList.Add('BZHI,1,1,REG32,RM32,REG32,,');
+  FOpCodeList.Add('BZHI,1,1,REG64,RM64,REG64,,');
+  FOpCodeList.Add('MULX,1,1,REG32,REG32,RM32,,');
+  FOpCodeList.Add('MULX,1,1,REG64,REG64,RM64,,');
+  FOpCodeList.Add('PDEP,1,1,REG32,REG32,RM32,,');
+  FOpCodeList.Add('PDEP,1,1,REG64,REG64,RM64,,');
+  FOpCodeList.Add('PEXT,1,1,REG32,REG32,RM32,,');
+  FOpCodeList.Add('PEXT,1,1,REG64,REG64,RM64,,');
   FOpCodeList.Add('VADDPD,1,1,XMMREG,XMMREG,XMMRM,');
   FOpCodeList.Add('VADDPD,1,1,YMMREG,YMMREG,YMMRM,');
   FOpCodeList.Add('VADDPS,1,1,XMMREG,XMMREG,XMMRM,');