пре 19 година · fb5e396881
--- a/compiler/cgbase.pas
+++ b/compiler/cgbase.pas
@@ -157,7 +157,8 @@ interface
 
				         R_SUBFD,   { = 7; Float that allocates 2 FPU registers }
			
 
				         R_SUBFQ,   { = 8; Float that allocates 4 FPU registers }
			
 
				         R_SUBMMS,  { = 9; single scalar in multi media register }
			
 
				-        R_SUBMMD   { = 10; double scalar in multi media register }
			
 
				+        R_SUBMMD,  { = 10; double scalar in multi media register }
			
 
				+        R_SUBMMWHOLE  { = 11; complete MM register, size depends on CPU }
			
 
				       );
			
 
				 
			
 
				       TSuperRegister = type word;
			
@@ -563,6 +564,8 @@ implementation
 
				             result:=result+'md';
			
 
				           R_SUBMMS:
			
 
				             result:=result+'ms';
			
 
				+          R_SUBMMWHOLE:
			
 
				+            result:=result+'ma';
			
 
				           else
			
 
				             internalerror(200308252);
			
 
				         end;
			
--- a/compiler/defutil.pas
+++ b/compiler/defutil.pas
@@ -208,12 +208,18 @@ interface
 
				     }
			
 
				     procedure getrange(def : tdef;var l : TConstExprInt;var h : TConstExprInt);
			
 
				 
			
 
				+    { type being a vector? }
			
 
				+    function is_vector(p : tdef) : boolean;
			
 
				+
			
 
				     { some type helper routines for MMX support }
			
 
				     function is_mmx_able_array(p : tdef) : boolean;
			
 
				 
			
 
				     {# returns the mmx type }
			
 
				     function mmx_type(p : tdef) : tmmxtype;
			
 
				 
			
 
				+    { returns if the passed type (array) fits into an mm register }
			
 
				+    function fits_in_mm_register(p : tdef) : boolean;
			
 
				+
			
 
				     {# From a definition return the abstract code generator size enum. It is
			
 
				        to note that the value returned can be @var(OS_NO) }
			
 
				     function def_cgsize(def: tdef): tcgsize;
			
@@ -808,6 +814,42 @@ implementation
 
				       end;
			
 
				 
			
 
				 
			
 
				+    function is_vector(p : tdef) : boolean;
			
 
				+      begin
			
 
				+        result:=(p.deftype=arraydef) and
			
 
				+                not(is_special_array(p)) and
			
 
				+                (tarraydef(p).elementtype.def.deftype=floatdef) and (tfloatdef(tarraydef(p).elementtype.def).typ in [s32real,s64real]);
			
 
				+      end;
			
 
				+
			
 
				+
			
 
				+    { returns if the passed type (array) fits into an mm register }
			
 
				+    function fits_in_mm_register(p : tdef) : boolean;
			
 
				+      begin
			
 
				+{$ifdef x86}
			
 
				+        result:= is_vector(p) and
			
 
				+                 (
			
 
				+                  (tarraydef(p).elementtype.def.deftype=floatdef) and
			
 
				+                  (
			
 
				+                   (tarraydef(p).lowrange=0) and
			
 
				+                   (tarraydef(p).highrange=3) and
			
 
				+                   (tfloatdef(tarraydef(p).elementtype.def).typ=s32real)
			
 
				+                  )
			
 
				+                 ) or
			
 
				+
			
 
				+                 (
			
 
				+                  (tarraydef(p).elementtype.def.deftype=floatdef) and
			
 
				+                  (
			
 
				+                   (tarraydef(p).lowrange=0) and
			
 
				+                   (tarraydef(p).highrange=1) and
			
 
				+                   (tfloatdef(tarraydef(p).elementtype.def).typ=s64real)
			
 
				+                  )
			
 
				+                 );
			
 
				+{$else x86}
			
 
				+        result:=false;
			
 
				+{$endif x86}
			
 
				+      end;
			
 
				+
			
 
				+
			
 
				     function is_mmx_able_array(p : tdef) : boolean;
			
 
				       begin
			
 
				 {$ifdef SUPPORT_MMX}
			
--- a/compiler/globtype.pas
+++ b/compiler/globtype.pas
@@ -127,6 +127,7 @@ than 255 characters. That's why using Ansi Strings}
 
				          cs_load_objpas_unit,
			
 
				          cs_load_gpc_unit,
			
 
				          cs_load_fpcylix_unit,
			
 
				+         cs_support_vectors,
			
 
				          { browser }
			
 
				          cs_browser_log,
			
 
				          { debuginfo }
			
--- a/compiler/htypechk.pas
+++ b/compiler/htypechk.pas
@@ -229,9 +229,11 @@ implementation
 
				               end;
			
 
				             arraydef :
			
 
				               begin
			
 
				-                { not mmx }
			
 
				-                if (cs_mmx in aktlocalswitches) and
			
 
				-                   is_mmx_able_array(ld) then
			
 
				+                { not vector/mmx }
			
 
				+                if ((cs_mmx in aktlocalswitches) and
			
 
				+                   is_mmx_able_array(ld)) or
			
 
				+                   ((cs_support_vectors in aktglobalswitches) and
			
 
				+                   is_vector(ld)) then
			
 
				                  begin
			
 
				                    allowed:=false;
			
 
				                    exit;
			
--- a/compiler/i386/cpubase.inc
+++ b/compiler/i386/cpubase.inc
@@ -100,7 +100,7 @@
 
				       {# the maximum float size for a processor,           }
			
 
				       OS_FLOAT = OS_F80;
			
 
				       {# the size of a vector register for a processor     }
			
 
				-      OS_VECTOR = OS_M64;
			
 
				+      OS_VECTOR = OS_M128;
			
 
				 
			
 
				 {*****************************************************************************
			
 
				                           Generic Register names
			
--- a/compiler/nadd.pas
+++ b/compiler/nadd.pas
@@ -760,7 +760,7 @@ implementation
 
				         lt:=left.nodetype;
			
 
				 
			
 
				          { but an int/int gives real/real! }
			
 
				-         if nodetype=slashn then
			
 
				+         if (nodetype=slashn) and not(is_vector(left.resulttype.def)) and not(is_vector(right.resulttype.def)) then
			
 
				           begin
			
 
				             if is_currency(left.resulttype.def) and
			
 
				                is_currency(right.resulttype.def) then
			
@@ -1402,6 +1402,18 @@ implementation
 
				               end;
			
 
				             end
			
 
				 {$endif SUPPORT_MMX}
			
 
				+         { vector support, this must be before the zero based array
			
 
				+           check }
			
 
				+         else if (cs_support_vectors in aktglobalswitches) and
			
 
				+                 is_vector(ld) and
			
 
				+                 is_vector(rd) and
			
 
				+                 equal_defs(ld,rd) then
			
 
				+            begin
			
 
				+              if not(nodetype in [addn,subn,xorn,orn,andn,muln,slashn]) then
			
 
				+                CGMessage3(type_e_operator_not_supported_for_types,node2opstr(nodetype),ld.typename,rd.typename);
			
 
				+              { both defs must be equal, so taking left or right as resulttype doesn't matter }
			
 
				+              resulttype:=left.resulttype;
			
 
				+            end
			
 
				 
			
 
				          { this is a little bit dangerous, also the left type }
			
 
				          { pointer to should be checked! This broke the mmx support      }
			
--- a/compiler/ncgadd.pas
+++ b/compiler/ncgadd.pas
@@ -56,6 +56,7 @@ interface
 
				           procedure second_opmmx;virtual;abstract;
			
 
				 {$endif SUPPORT_MMX}
			
 
				 {$endif x86}
			
 
				+          procedure second_opvector;virtual;abstract;
			
 
				           procedure second_add64bit;virtual;
			
 
				           procedure second_addordinal;virtual;
			
 
				           procedure second_cmpfloat;virtual;abstract;
			
@@ -777,6 +778,10 @@ interface
 
				               { support dynarr=nil }
			
 
				               if is_dynamic_array(left.resulttype.def) then
			
 
				                 second_opordinal
			
 
				+              else
			
 
				+                if (cs_support_vectors in aktglobalswitches) and
			
 
				+                   is_vector(left.resulttype.def) then
			
 
				+                  second_opvector
			
 
				 {$ifdef SUPPORT_MMX}
			
 
				               else
			
 
				                 if is_mmx_able_array(left.resulttype.def) then
			
--- a/compiler/ncgutil.pas
+++ b/compiler/ncgutil.pas
@@ -64,6 +64,7 @@ interface
 
				     procedure location_force_fpureg(list:TAsmList;var l: tlocation;maybeconst:boolean);
			
 
				     procedure location_force_mem(list:TAsmList;var l:tlocation);
			
 
				     procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;maybeconst:boolean);
			
 
				+    procedure location_force_mmreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
			
 
				 
			
 
				     { Retrieve the location of the data pointed to in location l, when the location is
			
 
				       a register it is expected to contain the address of the data }
			
@@ -727,6 +728,23 @@ implementation
 
				       end;
			
 
				 
			
 
				 
			
 
				+    procedure location_force_mmreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
			
 
				+      var
			
 
				+        reg : tregister;
			
 
				+        href : treference;
			
 
				+      begin
			
 
				+        if (l.loc<>LOC_MMREGISTER)  and
			
 
				+           ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then
			
 
				+          begin
			
 
				+            reg:=cg.getmmregister(list,OS_VECTOR);
			
 
				+            cg.a_loadmm_loc_reg(list,OS_VECTOR,l,reg,nil);
			
 
				+            location_freetemp(list,l);
			
 
				+            location_reset(l,LOC_MMREGISTER,OS_VECTOR);
			
 
				+            l.register:=reg;
			
 
				+          end;
			
 
				+      end;
			
 
				+
			
 
				+
			
 
				     procedure location_force_mem(list:TAsmList;var l:tlocation);
			
 
				       var
			
 
				         r : treference;
			
--- a/compiler/options.pas
+++ b/compiler/options.pas
@@ -1052,6 +1052,8 @@ begin
 
				                          include(initglobalswitches,cs_constructor_name);
			
 
				                        't' :
			
 
				                          include(initmoduleswitches,cs_static_keyword);
			
 
				+                       'v' :
			
 
				+                         include(initglobalswitches,cs_support_vectors);
			
 
				                        '-' :
			
 
				                          begin
			
 
				                            exclude(initglobalswitches,cs_constructor_name);
			
--- a/compiler/x86/aasmcpu.pas
+++ b/compiler/x86/aasmcpu.pas
@@ -2435,6 +2435,8 @@ implementation
 
				                 result:=taicpu.op_ref_reg(A_MOVSD,reg2opsize(r),ref,r);
			
 
				               R_SUBMMS:
			
 
				                 result:=taicpu.op_ref_reg(A_MOVSS,reg2opsize(r),ref,r);
			
 
				+              R_SUBMMWHOLE:
			
 
				+                result:=taicpu.op_ref_reg(A_MOVQ,S_NO,ref,r);
			
 
				               else
			
 
				                 internalerror(200506043);
			
 
				             end;
			
@@ -2455,6 +2457,8 @@ implementation
 
				                 result:=taicpu.op_reg_ref(A_MOVSD,reg2opsize(r),r,ref);
			
 
				               R_SUBMMS:
			
 
				                 result:=taicpu.op_reg_ref(A_MOVSS,reg2opsize(r),r,ref);
			
 
				+              R_SUBMMWHOLE:
			
 
				+                result:=taicpu.op_reg_ref(A_MOVQ,S_NO,r,ref);
			
 
				               else
			
 
				                 internalerror(200506042);
			
 
				             end;
			
--- a/compiler/x86/cgx86.pas
+++ b/compiler/x86/cgx86.pas
@@ -195,6 +195,8 @@ unit cgx86;
 
				             result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD);
			
 
				           OS_F32:
			
 
				             result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
			
 
				+          OS_M128:
			
 
				+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMWHOLE);
			
 
				           else
			
 
				             internalerror(200506041);
			
 
				         end;
			
@@ -1003,10 +1005,10 @@ unit cgx86;
 
				               these
			
 
				             }
			
 
				             ( { OS_F32 }
			
 
				-              A_NOP,A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
			
 
				+              A_NOP,A_NOP,A_ADDPS,A_NOP,A_DIVPS,A_NOP,A_NOP,A_MULPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPS,A_XORPS
			
 
				             ),
			
 
				             ( { OS_F64 }
			
 
				-              A_NOP,A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPD
			
 
				+              A_NOP,A_NOP,A_ADDPD,A_NOP,A_DIVPD,A_NOP,A_NOP,A_MULPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPD,A_XORPD
			
 
				             )
			
 
				           )
			
 
				         );
			
@@ -1041,7 +1043,7 @@ unit cgx86;
 
				         else
			
 
				           internalerror(200312211);
			
 
				         if asmop=A_NOP then
			
 
				-          internalerror(200312215);
			
 
				+          internalerror(200312216);
			
 
				         case loc.loc of
			
 
				           LOC_CREFERENCE,LOC_REFERENCE:
			
 
				             begin
			
--- a/compiler/x86/cpubase.pas
+++ b/compiler/x86/cpubase.pas
@@ -319,7 +319,7 @@ implementation
 
				 
			
 
				     function reg_cgsize(const reg: tregister): tcgsize;
			
 
				       const subreg2cgsize:array[Tsubregister] of Tcgsize =
			
 
				-            (OS_NO,OS_8,OS_8,OS_16,OS_32,OS_64,OS_NO,OS_NO,OS_NO,OS_F32,OS_F64);
			
 
				+            (OS_NO,OS_8,OS_8,OS_16,OS_32,OS_64,OS_NO,OS_NO,OS_NO,OS_F32,OS_F64,OS_M128);
			
 
				       begin
			
 
				         case getregtype(reg) of
			
 
				           R_INTREGISTER :
			
@@ -346,7 +346,7 @@ implementation
 
				     function reg2opsize(r:Tregister):topsize;
			
 
				       const
			
 
				         subreg2opsize : array[tsubregister] of topsize =
			
 
				-          (S_NO,S_B,S_B,S_W,S_L,S_Q,S_NO,S_NO,S_NO,S_NO,S_NO);
			
 
				+          (S_NO,S_B,S_B,S_W,S_L,S_Q,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO);
			
 
				       begin
			
 
				         reg2opsize:=S_L;
			
 
				         case getregtype(r) of
			
@@ -429,7 +429,7 @@ implementation
 
				         { for the name the sub reg doesn't matter }
			
 
				         hr:=r;
			
 
				         case getsubreg(hr) of
			
 
				-          R_SUBMMS,R_SUBMMD:
			
 
				+          R_SUBMMS,R_SUBMMD,R_SUBMMWHOLE:
			
 
				             setsubreg(hr,R_SUBNONE);
			
 
				         end;
			
 
				         result:=findreg_by_number_table(hr,regnumber_index);
			
--- a/compiler/x86/nx86add.pas
+++ b/compiler/x86/nx86add.pas
@@ -55,6 +55,7 @@ unit nx86add;
 
				         procedure second_opmmxset;override;
			
 
				         procedure second_opmmx;override;
			
 
				 {$endif SUPPORT_MMX}
			
 
				+        procedure second_opvector;override;
			
 
				       end;
			
 
				 
			
 
				 
			
@@ -810,6 +811,52 @@ unit nx86add;
 
				       end;
			
 
				 
			
 
				 
			
 
				+    procedure tx86addnode.second_opvector;
			
 
				+      var
			
 
				+        op : topcg;
			
 
				+      begin
			
 
				+        pass_left_right;
			
 
				+        if (nf_swaped in flags) then
			
 
				+          swapleftright;
			
 
				+
			
 
				+        case nodetype of
			
 
				+          addn :
			
 
				+            op:=OP_ADD;
			
 
				+          muln :
			
 
				+            op:=OP_MUL;
			
 
				+          subn :
			
 
				+            op:=OP_SUB;
			
 
				+          slashn :
			
 
				+            op:=OP_DIV;
			
 
				+          else
			
 
				+            internalerror(200610071);
			
 
				+        end;
			
 
				+
			
 
				+        if fits_in_mm_register(left.resulttype.def) then
			
 
				+          begin
			
 
				+            location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
			
 
				+            { we can use only right as left operand if the operation is commutative }
			
 
				+            if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
			
 
				+              begin
			
 
				+                location.register:=right.location.register;
			
 
				+                cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resulttype.def).typ],left.location,location.register,nil);
			
 
				+              end
			
 
				+            else
			
 
				+              begin
			
 
				+                location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
			
 
				+                location.register:=left.location.register;
			
 
				+                cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
			
 
				+                  tfloat2tcgsize[tfloatdef(tarraydef(left.resulttype.def).elementtype.def).typ],right.location,location.register,nil);
			
 
				+              end;
			
 
				+          end
			
 
				+        else
			
 
				+          begin
			
 
				+            { not yet supported }
			
 
				+            internalerror(200610072);
			
 
				+          end
			
 
				+      end;
			
 
				+
			
 
				+
			
 
				     procedure tx86addnode.second_addfloat;
			
 
				       var
			
 
				         op : TAsmOp;
			
--- a/compiler/x86_64/cpubase.inc
+++ b/compiler/x86_64/cpubase.inc
@@ -82,7 +82,7 @@ const
 
				       { the maximum float size for a processor,           }
			
 
				       OS_FLOAT = OS_F80;
			
 
				       { the size of a vector register for a processor     }
			
 
				-      OS_VECTOR = OS_M64;
			
 
				+      OS_VECTOR = OS_M128;
			
 
				 
			
 
				 {*****************************************************************************
			
 
				                           Generic Register names