소스 검색

+ some sse4 instructions supported, resolves #9046

git-svn-id: trunk@7613 -
florian 18 년 전
부모
커밋
0e96eda236

+ 7 - 1
compiler/i386/i386att.inc

@@ -594,5 +594,11 @@
 'movabs',
 'movslq',
 'cqto',
-'cmpxchg16b'
+'cmpxchg16b',
+'movntss',
+'movntsd',
+'insertq',
+'extrq',
+'lzcnt',
+'popcnt'
 );

+ 7 - 1
compiler/i386/i386atts.inc

@@ -594,5 +594,11 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
-attsufINT
+attsufINT,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE
 );

+ 7 - 1
compiler/i386/i386int.inc

@@ -594,5 +594,11 @@
 'movabs',
 'movsxd',
 'cqo',
-'cmpxchg16b'
+'cmpxchg16b',
+'movntss',
+'movntsd',
+'insertq',
+'extrq',
+'lzcnt',
+'popcnt'
 );

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-1370;
+1380;

+ 7 - 1
compiler/i386/i386op.inc

@@ -594,5 +594,11 @@ A_RDM,
 A_MOVABS,
 A_MOVSXD,
 A_CQO,
-A_CMPXCHG16B
+A_CMPXCHG16B,
+A_MOVNTSS,
+A_MOVNTSD,
+A_INSERTQ,
+A_EXTRQ,
+A_LZCNT,
+A_POPCNT
 );

+ 6 - 0
compiler/i386/i386prop.inc

@@ -594,5 +594,11 @@
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_MRAX, Ch_WRDX, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None))
 );

+ 70 - 0
compiler/i386/i386tab.inc

@@ -9589,5 +9589,75 @@
     optypes : (ot_none,ot_none,ot_none);
     code    : #2#15#58;
     flags   : if_p6 or if_cyrix
+  ),
+  (
+    opcode  : A_MOVNTSS;
+    ops     : 2;
+    optypes : (ot_memory,ot_xmmreg,ot_none);
+    code    : #65#192#219#62#2#15#43#63;
+    flags   : if_sse4 or if_sd
+  ),
+  (
+    opcode  : A_MOVNTSD;
+    ops     : 2;
+    optypes : (ot_memory,ot_xmmreg,ot_none);
+    code    : #65#192#220#213#62#2#15#43#63;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_INSERTQ;
+    ops     : 1768187245;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #76#2#15#120#63#253#18#253#19;
+    flags   : if_sse4 or if_sb
+  ),
+  (
+    opcode  : A_INSERTQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #72#220#62#2#15#121#63;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_EXTRQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_immediate,ot_immediate);
+    code    : #128#222#62#2#15#120#63#253#17#253#18;
+    flags   : if_sse4 or if_sb
+  ),
+  (
+    opcode  : A_EXTRQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #72#222#62#2#15#121#63;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_LZCNT;
+    ops     : 2;
+    optypes : (ot_reg16,ot_regmem,ot_none);
+    code    : #72#208#193#219#62#2#15#189#63;
+    flags   : if_386 or if_sm or if_sse4
+  ),
+  (
+    opcode  : A_LZCNT;
+    ops     : 2;
+    optypes : (ot_reg32 or ot_bits64,ot_regmem,ot_none);
+    code    : #72#209#193#219#62#2#15#189#63;
+    flags   : if_386 or if_sm or if_sse4
+  ),
+  (
+    opcode  : A_POPCNT;
+    ops     : 2;
+    optypes : (ot_reg16,ot_regmem,ot_none);
+    code    : #72#208#193#219#62#2#15#184#63;
+    flags   : if_386 or if_sm or if_sse4
+  ),
+  (
+    opcode  : A_POPCNT;
+    ops     : 2;
+    optypes : (ot_reg32 or ot_bits64,ot_regmem,ot_none);
+    code    : #72#209#193#219#62#2#15#184#63;
+    flags   : if_386 or if_sm or if_sse4
   )
 );

+ 2 - 0
compiler/x86/aasmcpu.pas

@@ -316,6 +316,8 @@ implementation
        {IF_PFMASK = longint($F001FF00);}
        { SVM instructions  }
        IF_SVM    = $00100000;
+       { SSE4 instructions  }
+       IF_SSE4   = $00200000;
 
        IF_8086   = $00000000;  { 8086 instruction  }
        IF_186    = $01000000;  { 186+ instruction  }

+ 35 - 0
compiler/x86/x86ins.dat

@@ -3244,3 +3244,38 @@ void                  \326\1\x99                      X86_64
 [CMPXCHG16B,cmpxchg16bX]
 (Ch_All, Ch_None, Ch_None)
 mem                   \320\323\2\x0F\xC7\201          X86_64
+
+;
+; SSE4
+;
+
+
+; note: \333=F3h, \334=F2h, \336=66h, \76=REX, \77=EA, \325=no REX.W=1 for qword, \375=unsigned
+
+[MOVNTSS]
+(Ch_All, Ch_None, Ch_None)
+mem,xmmreg              \101\300\333\76\2\x0F\x2B\77                    SSE4,SD
+
+[MOVNTSD]
+(Ch_All, Ch_None, Ch_None)
+mem,xmmreg              \101\300\334\325\76\2\x0F\x2B\77                SSE4 ;,SQ
+
+[INSERTQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,xmmreg,imm,imm   \110\334\76\2\x0F\x78\77\375\22\375\23          SSE4,SB
+xmmreg,xmmreg           \110\334\76\2\x0F\x79\77                        SSE4
+
+[EXTRQ]
+(Ch_All, Ch_None, Ch_None)
+xmmreg,imm,imm          \200\336\76\2\x0F\x78\77\375\21\375\22          SSE4,SB
+xmmreg,xmmreg           \110\336\76\2\x0F\x79\77                        SSE4
+
+[LZCNT]
+(Ch_All, Ch_None, Ch_None)
+reg16,regmem            \110\320\301\333\76\2\x0F\xBD\77                386,SM,SSE4
+reg32|64,regmem         \110\321\301\333\76\2\x0F\xBD\77                386,SM,SSE4
+
+[POPCNT]
+(Ch_All, Ch_None, Ch_None)
+reg16,regmem            \110\320\301\333\76\2\x0F\xB8\77                386,SM,SSE4
+reg32|64,regmem         \110\321\301\333\76\2\x0F\xB8\77                386,SM,SSE4

+ 7 - 1
compiler/x86_64/x8664ats.inc

@@ -594,5 +594,11 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
-attsufINT
+attsufINT,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE
 );

+ 7 - 1
compiler/x86_64/x8664att.inc

@@ -594,5 +594,11 @@
 'movabs',
 'movslq',
 'cqto',
-'cmpxchg16b'
+'cmpxchg16b',
+'movntss',
+'movntsd',
+'insertq',
+'extrq',
+'lzcnt',
+'popcnt'
 );

+ 7 - 1
compiler/x86_64/x8664int.inc

@@ -594,5 +594,11 @@
 'movabs',
 'movsxd',
 'cqo',
-'cmpxchg16b'
+'cmpxchg16b',
+'movntss',
+'movntsd',
+'insertq',
+'extrq',
+'lzcnt',
+'popcnt'
 );

+ 1 - 1
compiler/x86_64/x8664nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-1369;
+1379;

+ 7 - 1
compiler/x86_64/x8664op.inc

@@ -594,5 +594,11 @@ A_RDM,
 A_MOVABS,
 A_MOVSXD,
 A_CQO,
-A_CMPXCHG16B
+A_CMPXCHG16B,
+A_MOVNTSS,
+A_MOVNTSD,
+A_INSERTQ,
+A_EXTRQ,
+A_LZCNT,
+A_POPCNT
 );

+ 6 - 0
compiler/x86_64/x8664pro.inc

@@ -594,5 +594,11 @@
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_MRAX, Ch_WRDX, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None))
 );

+ 70 - 0
compiler/x86_64/x8664tab.inc

@@ -9582,5 +9582,75 @@
     optypes : (ot_memory,ot_none,ot_none);
     code    : #208#211#2#15#199#129;
     flags   : if_x86_64
+  ),
+  (
+    opcode  : A_MOVNTSS;
+    ops     : 2;
+    optypes : (ot_memory,ot_xmmreg,ot_none);
+    code    : #65#192#219#62#2#15#43#63;
+    flags   : if_sse4 or if_sd
+  ),
+  (
+    opcode  : A_MOVNTSD;
+    ops     : 2;
+    optypes : (ot_memory,ot_xmmreg,ot_none);
+    code    : #65#192#220#213#62#2#15#43#63;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_INSERTQ;
+    ops     : 1768187245;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_immediate);
+    code    : #76#2#15#120#63#253#18#253#19;
+    flags   : if_sse4 or if_sb
+  ),
+  (
+    opcode  : A_INSERTQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #72#220#62#2#15#121#63;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_EXTRQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_immediate,ot_immediate);
+    code    : #128#222#62#2#15#120#63#253#17#253#18;
+    flags   : if_sse4 or if_sb
+  ),
+  (
+    opcode  : A_EXTRQ;
+    ops     : 2;
+    optypes : (ot_xmmreg,ot_xmmreg,ot_none);
+    code    : #72#222#62#2#15#121#63;
+    flags   : if_sse4
+  ),
+  (
+    opcode  : A_LZCNT;
+    ops     : 2;
+    optypes : (ot_reg16,ot_regmem,ot_none);
+    code    : #72#208#193#219#62#2#15#189#63;
+    flags   : if_386 or if_sm or if_sse4
+  ),
+  (
+    opcode  : A_LZCNT;
+    ops     : 2;
+    optypes : (ot_reg32 or ot_bits64,ot_regmem,ot_none);
+    code    : #72#209#193#219#62#2#15#189#63;
+    flags   : if_386 or if_sm or if_sse4
+  ),
+  (
+    opcode  : A_POPCNT;
+    ops     : 2;
+    optypes : (ot_reg16,ot_regmem,ot_none);
+    code    : #72#208#193#219#62#2#15#184#63;
+    flags   : if_386 or if_sm or if_sse4
+  ),
+  (
+    opcode  : A_POPCNT;
+    ops     : 2;
+    optypes : (ot_reg32 or ot_bits64,ot_regmem,ot_none);
+    code    : #72#209#193#219#62#2#15#184#63;
+    flags   : if_386 or if_sm or if_sse4
   )
 );