22 rokov pred · dd2bb53aa5
--- a/compiler/i386/cpuinfo.pas
+++ b/compiler/i386/cpuinfo.pas
@@ -65,8 +65,7 @@ Type
 
				       fpu_soft,
			
 
				       fpu_x87,
			
 
				       fpu_sse,
			
 
				-      fpu_sse2,
			
 
				-      fpu_sse3
			
 
				+      fpu_sse2
			
 
				      );
			
 
				 
			
 
				 
			
@@ -113,19 +112,22 @@ Const
 
				      'SOFT',
			
 
				      'X87',
			
 
				      'SSE',
			
 
				-     'SSE2',
			
 
				-     'SSE3'
			
 
				+     'SSE2'
			
 
				    );
			
 
				 
			
 
				-   sse_singlescalar : set of tfputype = [fpu_sse,fpu_sse2,fpu_sse3];
			
 
				-   sse_doublescalar : set of tfputype = [];
			
 
				+   sse_singlescalar : set of tfputype = [fpu_sse,fpu_sse2];
			
 
				+   sse_doublescalar : set of tfputype = [fpu_sse2];
			
 
				 
			
 
				 Implementation
			
 
				 
			
 
				 end.
			
 
				 {
			
 
				   $Log$
			
 
				-  Revision 1.21  2003-12-25 01:07:09  florian
			
 
				+  Revision 1.22  2003-12-25 12:01:35  florian
			
 
				+    + possible sse2 unit usage for double calculations
			
 
				+    * some sse2 assembler issues fixed
			
 
				+
			
 
				+  Revision 1.21  2003/12/25 01:07:09  florian
			
 
				     + $fputype directive support
			
 
				     + single data type operations with sse unit
			
 
				     * fixed more x86-64 stuff
			
--- a/compiler/i386/i386att.inc
+++ b/compiler/i386/i386att.inc
@@ -197,6 +197,7 @@
 
				 'movd',
			
 
				 'movq',
			
 
				 'movsb',
			
 
				+'movsd',
			
 
				 'movsl',
			
 
				 'movsw',
			
 
				 'movs',
			
@@ -493,6 +494,7 @@
 
				 'punpckhqdq',
			
 
				 'punpcklqdq',
			
 
				 'addpd',
			
 
				+'addsd',
			
 
				 'andnpd',
			
 
				 'andpd',
			
 
				 'cmpeqpd',
			
--- a/compiler/i386/i386atts.inc
+++ b/compiler/i386/i386atts.inc
@@ -199,6 +199,7 @@ attsufNONE,
 
				 attsufNONE,
			
 
				 attsufNONE,
			
 
				 attsufNONE,
			
 
				+attsufNONE,
			
 
				 attsufINT,
			
 
				 attsufINT,
			
 
				 attsufINT,
			
@@ -560,5 +561,6 @@ attsufNONE,
 
				 attsufNONE,
			
 
				 attsufNONE,
			
 
				 attsufNONE,
			
 
				+attsufNONE,
			
 
				 attsufNONE
			
 
				 );
			
--- a/compiler/i386/i386int.inc
+++ b/compiler/i386/i386int.inc
@@ -198,6 +198,7 @@
 
				 'movq',
			
 
				 'movsb',
			
 
				 'movsd',
			
 
				+'movsl',
			
 
				 'movsw',
			
 
				 'movsx',
			
 
				 'movzx',
			
@@ -493,6 +494,7 @@
 
				 'punpckhqdq',
			
 
				 'punpcklqdq',
			
 
				 'addpd',
			
 
				+'addsd',
			
 
				 'andnpd',
			
 
				 'andpd',
			
 
				 'cmpeqpd',
			
--- a/compiler/i386/i386nop.inc
+++ b/compiler/i386/i386nop.inc
@@ -1,2 +1,2 @@
 
				 { don't edit, this file is generated from x86ins.dat }
			
 
				-1651;
			
 
				+1652;
			
--- a/compiler/i386/i386op.inc
+++ b/compiler/i386/i386op.inc
@@ -198,6 +198,7 @@ A_MOVD,
 
				 A_MOVQ,
			
 
				 A_MOVSB,
			
 
				 A_MOVSD,
			
 
				+A_MOVSL,
			
 
				 A_MOVSW,
			
 
				 A_MOVSX,
			
 
				 A_MOVZX,
			
@@ -493,6 +494,7 @@ A_PSUBQ,
 
				 A_PUNPCKHQDQ,
			
 
				 A_PUNPCKLQDQ,
			
 
				 A_ADDPD,
			
 
				+A_ADDSD,
			
 
				 A_ANDNPD,
			
 
				 A_ANDPD,
			
 
				 A_CMPEQPD,
			
--- a/compiler/i386/i386prop.inc
+++ b/compiler/i386/i386prop.inc
@@ -199,6 +199,7 @@
 
				 (Ch: (Ch_All, Ch_None, Ch_None)),
			
 
				 (Ch: (Ch_All, Ch_None, Ch_None)),
			
 
				 (Ch: (Ch_All, Ch_None, Ch_None)),
			
 
				+(Ch: (Ch_All, Ch_None, Ch_None)),
			
 
				 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
			
 
				 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
			
 
				 (Ch: (Ch_RWEAX, Ch_WEDX, Ch_WFlags)),
			
@@ -560,5 +561,6 @@
 
				 (Ch: (Ch_All, Ch_None, Ch_None)),
			
 
				 (Ch: (Ch_All, Ch_None, Ch_None)),
			
 
				 (Ch: (Ch_All, Ch_None, Ch_None)),
			
 
				+(Ch: (Ch_All, Ch_None, Ch_None)),
			
 
				 (Ch: (Ch_All, Ch_None, Ch_None))
			
 
				 );
			
--- a/compiler/i386/i386tab.inc
+++ b/compiler/i386/i386tab.inc
@@ -4529,6 +4529,13 @@
 
				     code    : #193#3#242#15#16#72;
			
 
				     flags   : if_willamette or if_sse2
			
 
				   ),
			
 
				+  (
			
 
				+    opcode  : A_MOVSL;
			
 
				+    ops     : 0;
			
 
				+    optypes : (ot_none,ot_none,ot_none);
			
 
				+    code    : #209#1#165;
			
 
				+    flags   : if_386
			
 
				+  ),
			
 
				   (
			
 
				     opcode  : A_MOVSW;
			
 
				     ops     : 0;
			
@@ -10564,14 +10571,14 @@
 
				     flags   : if_willamette or if_sse2 or if_sm
			
 
				   ),
			
 
				   (
			
 
				-    opcode  : A_ADDPD;
			
 
				+    opcode  : A_ADDSD;
			
 
				     ops     : 2;
			
 
				     optypes : (ot_xmmreg,ot_xmmreg,ot_none);
			
 
				     code    : #217#3#242#15#88#72;
			
 
				     flags   : if_willamette or if_sse2
			
 
				   ),
			
 
				   (
			
 
				-    opcode  : A_ADDPD;
			
 
				+    opcode  : A_ADDSD;
			
 
				     ops     : 2;
			
 
				     optypes : (ot_xmmreg,ot_memory,ot_none);
			
 
				     code    : #193#217#3#242#15#88#72;
			
--- a/compiler/x86/aasmcpu.pas
+++ b/compiler/x86/aasmcpu.pas
@@ -271,6 +271,8 @@ implementation
 
				        IF_SSE2   = $00020000;
			
 
				        { SSE3 instructions  }
			
 
				        IF_SSE3   = $00040000;
			
 
				+       { SSE64 instructions  }
			
 
				+       IF_SSE64   = $00040000;
			
 
				        { the mask for processor types  }
			
 
				        {IF_PMASK  = longint($FF000000);}
			
 
				        { the mask for disassembly "prefer"  }
			
@@ -287,6 +289,7 @@ implementation
 
				        IF_WILLAMETTE = $08000000;
			
 
				        { Prescott instructions }
			
 
				        IF_PRESCOTT = $09000000;
			
 
				+       IF_ATHLON64 = $0a000000;
			
 
				        IF_CYRIX  = $10000000;  { Cyrix-specific instruction  }
			
 
				        IF_AMD    = $20000000;  { AMD-specific instruction  }
			
 
				        { added flags }
			
@@ -1532,7 +1535,7 @@ implementation
 
				             209,
			
 
				             210,
			
 
				             217,218: ;
			
 
				-            219 :
			
 
				+            219,220 :
			
 
				               inc(len);
			
 
				             216 :
			
 
				               begin
			
@@ -1837,6 +1840,11 @@ implementation
 
				                 bytes[0]:=$f3;
			
 
				                 sec.writebytes(bytes,1);
			
 
				               end;
			
 
				+            220 :
			
 
				+              begin
			
 
				+                bytes[0]:=$f2;
			
 
				+                sec.writebytes(bytes,1);
			
 
				+              end;
			
 
				             31,
			
 
				             48,49,50,
			
 
				             224,225,226 :
			
@@ -2351,7 +2359,11 @@ implementation
 
				 end.
			
 
				 {
			
 
				   $Log$
			
 
				-  Revision 1.41  2003-12-25 01:07:09  florian
			
 
				+  Revision 1.42  2003-12-25 12:01:35  florian
			
 
				+    + possible sse2 unit usage for double calculations
			
 
				+    * some sse2 assembler issues fixed
			
 
				+
			
 
				+  Revision 1.41  2003/12/25 01:07:09  florian
			
 
				     + $fputype directive support
			
 
				     + single data type operations with sse unit
			
 
				     * fixed more x86-64 stuff
			
--- a/compiler/x86/cgx86.pas
+++ b/compiler/x86/cgx86.pas
@@ -817,18 +817,9 @@ unit cgx86;
 
				             ( { OS_F32 }
			
 
				               A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP
			
 
				             ),
			
 
				-          { Intel did again a "nice" job: they added packed double operations (*PD) to SSE2 but
			
 
				-            no scalar ones (*SD)
			
 
				-          }
			
 
				-          {$ifdef x86_64}
			
 
				             ( { OS_F64 }
			
 
				-              A_NOP,{!!! A_ADDSD}A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP
			
 
				+              A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP
			
 
				             )
			
 
				-          {$else x86_64}
			
 
				-            ( { OS_F64 }
			
 
				-              A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP
			
 
				-            )
			
 
				-          {$endif x86_64}
			
 
				           ),
			
 
				           ( { vectorized/packed }
			
 
				             ( { OS_F32 }
			
@@ -1457,7 +1448,7 @@ unit cgx86;
 
				                     list.concat(Taicpu.op_none(A_REP,S_NO));
			
 
				                   end;
			
 
				                 if helpsize>0 then
			
 
				-                  list.concat(Taicpu.op_none(A_MOVSD,S_NO));
			
 
				+                  list.concat(Taicpu.op_none(A_MOVSL,S_NO));
			
 
				                 if len>1 then
			
 
				                   begin
			
 
				                     dec(len,2);
			
@@ -1593,7 +1584,7 @@ unit cgx86;
 
				         case opsize of
			
 
				           S_B : list.concat(Taicpu.Op_none(A_MOVSB,S_NO));
			
 
				           S_W : list.concat(Taicpu.Op_none(A_MOVSW,S_NO));
			
 
				-          S_L : list.concat(Taicpu.Op_none(A_MOVSD,S_NO));
			
 
				+          S_L : list.concat(Taicpu.Op_none(A_MOVSL,S_NO));
			
 
				         end;
			
 
				         ungetregister(list,NR_EDI);
			
 
				         ungetregister(list,NR_ECX);
			
@@ -1919,7 +1910,11 @@ unit cgx86;
 
				 end.
			
 
				 {
			
 
				   $Log$
			
 
				-  Revision 1.96  2003-12-25 01:07:09  florian
			
 
				+  Revision 1.97  2003-12-25 12:01:35  florian
			
 
				+    + possible sse2 unit usage for double calculations
			
 
				+    * some sse2 assembler issues fixed
			
 
				+
			
 
				+  Revision 1.96  2003/12/25 01:07:09  florian
			
 
				     + $fputype directive support
			
 
				     + single data type operations with sse unit
			
 
				     * fixed more x86-64 stuff
			
--- a/compiler/x86/x86ins.dat
+++ b/compiler/x86/x86ins.dat
@@ -1248,7 +1248,7 @@ xmmreg,mem            \301\333\2\x0F\x7E\110          WILLAMETTE,SSE2
 
				 (Ch_All, Ch_None, Ch_None)
			
 
				 void                  \1\xA4                          8086
			
 
				 
			
 
				-[MOVSD,movsl]
			
 
				+[MOVSD]
			
 
				 (Ch_All, Ch_None, Ch_None)
			
 
				 void                  \321\1\xA5                      386
			
 
				 xmmreg,xmmreg         \3\xF2\x0F\x10\110              WILLAMETTE,SSE2
			
@@ -1256,6 +1256,10 @@ xmmreg,xmmreg         \3\xF2\x0F\x11\110              WILLAMETTE,SSE2
 
				 mem,xmmreg            \300\3\xF2\x0F\x11\101          WILLAMETTE,SSE2
			
 
				 xmmreg,mem            \301\3\xF2\x0F\x10\110          WILLAMETTE,SSE2
			
 
				 
			
 
				+[MOVSL]
			
 
				+(Ch_All, Ch_None, Ch_None)
			
 
				+void                  \321\1\xA5                      386
			
 
				+
			
 
				 [MOVSW]
			
 
				 (Ch_All, Ch_None, Ch_None)
			
 
				 void                  \320\1\xA5                      8086
			
@@ -3040,6 +3044,9 @@ xmmreg,mem              \301\3\x66\x0F\x6C\110          WILLAMETTE,SSE2,SM
 
				 (Ch_All, Ch_None, Ch_None)
			
 
				 xmmreg,xmmreg           \331\3\x66\x0F\x58\110          WILLAMETTE,SSE2
			
 
				 xmmreg,mem              \301\331\3\x66\x0F\x58\110      WILLAMETTE,SSE2,SM
			
 
				+
			
 
				+[ADDSD]
			
 
				+(Ch_All, Ch_None, Ch_None)
			
 
				 xmmreg,xmmreg           \331\3\xF2\x0F\x58\110          WILLAMETTE,SSE2
			
 
				 xmmreg,mem              \301\331\3\xF2\x0F\x58\110      WILLAMETTE,SSE2
			
 
				 
			
@@ -3394,7 +3401,11 @@ xmmreg,xmmreg           \3\xF3\x0F\x12\110              PRESCOTT,SSE3
 
				 
			
 
				 ;
			
 
				 ; $Log$
			
 
				-; Revision 1.5  2003-11-22 00:35:42  jonas
			
 
				+; Revision 1.6  2003-12-25 12:01:35  florian
			
 
				+;   + possible sse2 unit usage for double calculations
			
 
				+;   * some sse2 assembler issues fixed
			
 
				+;
			
 
				+; Revision 1.5  2003/11/22 00:35:42  jonas
			
 
				 ;   * fixed properties for MOVSB
			
 
				 ;
			
 
				 ; Revision 1.4  2003/11/13 18:54:22  jonas