Переглянути джерело

+ possible sse2 unit usage for double calculations
* some sse2 assembler issues fixed

florian 21 роки тому
батько
коміт
dd2bb53aa5

+ 9 - 7
compiler/i386/cpuinfo.pas

@@ -65,8 +65,7 @@ Type
       fpu_soft,
       fpu_x87,
       fpu_sse,
-      fpu_sse2,
-      fpu_sse3
+      fpu_sse2
      );
 
 
@@ -113,19 +112,22 @@ Const
      'SOFT',
      'X87',
      'SSE',
-     'SSE2',
-     'SSE3'
+     'SSE2'
    );
 
-   sse_singlescalar : set of tfputype = [fpu_sse,fpu_sse2,fpu_sse3];
-   sse_doublescalar : set of tfputype = [];
+   sse_singlescalar : set of tfputype = [fpu_sse,fpu_sse2];
+   sse_doublescalar : set of tfputype = [fpu_sse2];
 
 Implementation
 
 end.
 {
   $Log$
-  Revision 1.21  2003-12-25 01:07:09  florian
+  Revision 1.22  2003-12-25 12:01:35  florian
+    + possible sse2 unit usage for double calculations
+    * some sse2 assembler issues fixed
+
+  Revision 1.21  2003/12/25 01:07:09  florian
     + $fputype directive support
     + single data type operations with sse unit
     * fixed more x86-64 stuff

+ 2 - 0
compiler/i386/i386att.inc

@@ -197,6 +197,7 @@
 'movd',
 'movq',
 'movsb',
+'movsd',
 'movsl',
 'movsw',
 'movs',
@@ -493,6 +494,7 @@
 'punpckhqdq',
 'punpcklqdq',
 'addpd',
+'addsd',
 'andnpd',
 'andpd',
 'cmpeqpd',

+ 2 - 0
compiler/i386/i386atts.inc

@@ -199,6 +199,7 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
 attsufINT,
 attsufINT,
 attsufINT,
@@ -560,5 +561,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 2 - 0
compiler/i386/i386int.inc

@@ -198,6 +198,7 @@
 'movq',
 'movsb',
 'movsd',
+'movsl',
 'movsw',
 'movsx',
 'movzx',
@@ -493,6 +494,7 @@
 'punpckhqdq',
 'punpcklqdq',
 'addpd',
+'addsd',
 'andnpd',
 'andpd',
 'cmpeqpd',

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-1651;
+1652;

+ 2 - 0
compiler/i386/i386op.inc

@@ -198,6 +198,7 @@ A_MOVD,
 A_MOVQ,
 A_MOVSB,
 A_MOVSD,
+A_MOVSL,
 A_MOVSW,
 A_MOVSX,
 A_MOVZX,
@@ -493,6 +494,7 @@ A_PSUBQ,
 A_PUNPCKHQDQ,
 A_PUNPCKLQDQ,
 A_ADDPD,
+A_ADDSD,
 A_ANDNPD,
 A_ANDPD,
 A_CMPEQPD,

+ 2 - 0
compiler/i386/i386prop.inc

@@ -199,6 +199,7 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_RWEAX, Ch_WEDX, Ch_WFlags)),
@@ -560,5 +561,6 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None))
 );

+ 9 - 2
compiler/i386/i386tab.inc

@@ -4529,6 +4529,13 @@
     code    : #193#3#242#15#16#72;
     flags   : if_willamette or if_sse2
   ),
+  (
+    opcode  : A_MOVSL;
+    ops     : 0;
+    optypes : (ot_none,ot_none,ot_none);
+    code    : #209#1#165;
+    flags   : if_386
+  ),
   (
     opcode  : A_MOVSW;
     ops     : 0;
@@ -10564,14 +10571,14 @@
     flags   : if_willamette or if_sse2 or if_sm
   ),
   (
-    opcode  : A_ADDPD;
+    opcode  : A_ADDSD;
     ops     : 2;
     optypes : (ot_xmmreg,ot_xmmreg,ot_none);
     code    : #217#3#242#15#88#72;
     flags   : if_willamette or if_sse2
   ),
   (
-    opcode  : A_ADDPD;
+    opcode  : A_ADDSD;
     ops     : 2;
     optypes : (ot_xmmreg,ot_memory,ot_none);
     code    : #193#217#3#242#15#88#72;

+ 14 - 2
compiler/x86/aasmcpu.pas

@@ -271,6 +271,8 @@ implementation
        IF_SSE2   = $00020000;
        { SSE3 instructions  }
        IF_SSE3   = $00040000;
+       { SSE64 instructions  }
+       IF_SSE64   = $00040000;
        { the mask for processor types  }
        {IF_PMASK  = longint($FF000000);}
        { the mask for disassembly "prefer"  }
@@ -287,6 +289,7 @@ implementation
        IF_WILLAMETTE = $08000000;
        { Prescott instructions }
        IF_PRESCOTT = $09000000;
+       IF_ATHLON64 = $0a000000;
        IF_CYRIX  = $10000000;  { Cyrix-specific instruction  }
        IF_AMD    = $20000000;  { AMD-specific instruction  }
        { added flags }
@@ -1532,7 +1535,7 @@ implementation
             209,
             210,
             217,218: ;
-            219 :
+            219,220 :
               inc(len);
             216 :
               begin
@@ -1837,6 +1840,11 @@ implementation
                 bytes[0]:=$f3;
                 sec.writebytes(bytes,1);
               end;
+            220 :
+              begin
+                bytes[0]:=$f2;
+                sec.writebytes(bytes,1);
+              end;
             31,
             48,49,50,
             224,225,226 :
@@ -2351,7 +2359,11 @@ implementation
 end.
 {
   $Log$
-  Revision 1.41  2003-12-25 01:07:09  florian
+  Revision 1.42  2003-12-25 12:01:35  florian
+    + possible sse2 unit usage for double calculations
+    * some sse2 assembler issues fixed
+
+  Revision 1.41  2003/12/25 01:07:09  florian
     + $fputype directive support
     + single data type operations with sse unit
     * fixed more x86-64 stuff

+ 8 - 13
compiler/x86/cgx86.pas

@@ -817,18 +817,9 @@ unit cgx86;
             ( { OS_F32 }
               A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP
             ),
-          { Intel did again a "nice" job: they added packed double operations (*PD) to SSE2 but
-            no scalar ones (*SD)
-          }
-          {$ifdef x86_64}
             ( { OS_F64 }
-              A_NOP,{!!! A_ADDSD}A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP
+              A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP
             )
-          {$else x86_64}
-            ( { OS_F64 }
-              A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP
-            )
-          {$endif x86_64}
           ),
           ( { vectorized/packed }
             ( { OS_F32 }
@@ -1457,7 +1448,7 @@ unit cgx86;
                     list.concat(Taicpu.op_none(A_REP,S_NO));
                   end;
                 if helpsize>0 then
-                  list.concat(Taicpu.op_none(A_MOVSD,S_NO));
+                  list.concat(Taicpu.op_none(A_MOVSL,S_NO));
                 if len>1 then
                   begin
                     dec(len,2);
@@ -1593,7 +1584,7 @@ unit cgx86;
         case opsize of
           S_B : list.concat(Taicpu.Op_none(A_MOVSB,S_NO));
           S_W : list.concat(Taicpu.Op_none(A_MOVSW,S_NO));
-          S_L : list.concat(Taicpu.Op_none(A_MOVSD,S_NO));
+          S_L : list.concat(Taicpu.Op_none(A_MOVSL,S_NO));
         end;
         ungetregister(list,NR_EDI);
         ungetregister(list,NR_ECX);
@@ -1919,7 +1910,11 @@ unit cgx86;
 end.
 {
   $Log$
-  Revision 1.96  2003-12-25 01:07:09  florian
+  Revision 1.97  2003-12-25 12:01:35  florian
+    + possible sse2 unit usage for double calculations
+    * some sse2 assembler issues fixed
+
+  Revision 1.96  2003/12/25 01:07:09  florian
     + $fputype directive support
     + single data type operations with sse unit
     * fixed more x86-64 stuff

+ 13 - 2
compiler/x86/x86ins.dat

@@ -1248,7 +1248,7 @@ xmmreg,mem            \301\333\2\x0F\x7E\110          WILLAMETTE,SSE2
 (Ch_All, Ch_None, Ch_None)
 void                  \1\xA4                          8086
 
-[MOVSD,movsl]
+[MOVSD]
 (Ch_All, Ch_None, Ch_None)
 void                  \321\1\xA5                      386
 xmmreg,xmmreg         \3\xF2\x0F\x10\110              WILLAMETTE,SSE2
@@ -1256,6 +1256,10 @@ xmmreg,xmmreg         \3\xF2\x0F\x11\110              WILLAMETTE,SSE2
 mem,xmmreg            \300\3\xF2\x0F\x11\101          WILLAMETTE,SSE2
 xmmreg,mem            \301\3\xF2\x0F\x10\110          WILLAMETTE,SSE2
 
+[MOVSL]
+(Ch_All, Ch_None, Ch_None)
+void                  \321\1\xA5                      386
+
 [MOVSW]
 (Ch_All, Ch_None, Ch_None)
 void                  \320\1\xA5                      8086
@@ -3040,6 +3044,9 @@ xmmreg,mem              \301\3\x66\x0F\x6C\110          WILLAMETTE,SSE2,SM
 (Ch_All, Ch_None, Ch_None)
 xmmreg,xmmreg           \331\3\x66\x0F\x58\110          WILLAMETTE,SSE2
 xmmreg,mem              \301\331\3\x66\x0F\x58\110      WILLAMETTE,SSE2,SM
+
+[ADDSD]
+(Ch_All, Ch_None, Ch_None)
 xmmreg,xmmreg           \331\3\xF2\x0F\x58\110          WILLAMETTE,SSE2
 xmmreg,mem              \301\331\3\xF2\x0F\x58\110      WILLAMETTE,SSE2
 
@@ -3394,7 +3401,11 @@ xmmreg,xmmreg           \3\xF3\x0F\x12\110              PRESCOTT,SSE3
 
 ;
 ; $Log$
-; Revision 1.5  2003-11-22 00:35:42  jonas
+; Revision 1.6  2003-12-25 12:01:35  florian
+;   + possible sse2 unit usage for double calculations
+;   * some sse2 assembler issues fixed
+;
+; Revision 1.5  2003/11/22 00:35:42  jonas
 ;   * fixed properties for MOVSB
 ;
 ; Revision 1.4  2003/11/13 18:54:22  jonas