Bläddra i källkod

+ experimental -Sv option to support vector arithmetics

git-svn-id: trunk@4825 -
florian 19 år sedan
förälder
incheckning
fb5e396881

+ 4 - 1
compiler/cgbase.pas

@@ -157,7 +157,8 @@ interface
         R_SUBFD,   { = 7; Float that allocates 2 FPU registers }
         R_SUBFQ,   { = 8; Float that allocates 4 FPU registers }
         R_SUBMMS,  { = 9; single scalar in multi media register }
-        R_SUBMMD   { = 10; double scalar in multi media register }
+        R_SUBMMD,  { = 10; double scalar in multi media register }
+        R_SUBMMWHOLE  { = 11; complete MM register, size depends on CPU }
       );
 
       TSuperRegister = type word;
@@ -563,6 +564,8 @@ implementation
             result:=result+'md';
           R_SUBMMS:
             result:=result+'ms';
+          R_SUBMMWHOLE:
+            result:=result+'ma';
           else
             internalerror(200308252);
         end;

+ 42 - 0
compiler/defutil.pas

@@ -208,12 +208,18 @@ interface
     }
     procedure getrange(def : tdef;var l : TConstExprInt;var h : TConstExprInt);
 
+    { type being a vector? }
+    function is_vector(p : tdef) : boolean;
+
     { some type helper routines for MMX support }
     function is_mmx_able_array(p : tdef) : boolean;
 
     {# returns the mmx type }
     function mmx_type(p : tdef) : tmmxtype;
 
+    { returns if the passed type (array) fits into an mm register }
+    function fits_in_mm_register(p : tdef) : boolean;
+
     {# From a definition return the abstract code generator size enum. It is
        to note that the value returned can be @var(OS_NO) }
     function def_cgsize(def: tdef): tcgsize;
@@ -808,6 +814,42 @@ implementation
       end;
 
 
+    function is_vector(p : tdef) : boolean;
+      begin
+        result:=(p.deftype=arraydef) and
+                not(is_special_array(p)) and
+                (tarraydef(p).elementtype.def.deftype=floatdef) and (tfloatdef(tarraydef(p).elementtype.def).typ in [s32real,s64real]);
+      end;
+
+
+    { returns if the passed type (array) fits into an mm register }
+    function fits_in_mm_register(p : tdef) : boolean;
+      begin
+{$ifdef x86}
+        result:= is_vector(p) and
+                 (
+                  (tarraydef(p).elementtype.def.deftype=floatdef) and
+                  (
+                   (tarraydef(p).lowrange=0) and
+                   (tarraydef(p).highrange=3) and
+                   (tfloatdef(tarraydef(p).elementtype.def).typ=s32real)
+                  )
+                 ) or
+
+                 (
+                  (tarraydef(p).elementtype.def.deftype=floatdef) and
+                  (
+                   (tarraydef(p).lowrange=0) and
+                   (tarraydef(p).highrange=1) and
+                   (tfloatdef(tarraydef(p).elementtype.def).typ=s64real)
+                  )
+                 );
+{$else x86}
+        result:=false;
+{$endif x86}
+      end;
+
+
     function is_mmx_able_array(p : tdef) : boolean;
       begin
 {$ifdef SUPPORT_MMX}

+ 1 - 0
compiler/globtype.pas

@@ -127,6 +127,7 @@ than 255 characters. That's why using Ansi Strings}
          cs_load_objpas_unit,
          cs_load_gpc_unit,
          cs_load_fpcylix_unit,
+         cs_support_vectors,
          { browser }
          cs_browser_log,
          { debuginfo }

+ 5 - 3
compiler/htypechk.pas

@@ -229,9 +229,11 @@ implementation
               end;
             arraydef :
               begin
-                { not mmx }
-                if (cs_mmx in aktlocalswitches) and
-                   is_mmx_able_array(ld) then
+                { not vector/mmx }
+                if ((cs_mmx in aktlocalswitches) and
+                   is_mmx_able_array(ld)) or
+                   ((cs_support_vectors in aktglobalswitches) and
+                   is_vector(ld)) then
                  begin
                    allowed:=false;
                    exit;

+ 1 - 1
compiler/i386/cpubase.inc

@@ -100,7 +100,7 @@
       {# the maximum float size for a processor,           }
       OS_FLOAT = OS_F80;
       {# the size of a vector register for a processor     }
-      OS_VECTOR = OS_M64;
+      OS_VECTOR = OS_M128;
 
 {*****************************************************************************
                           Generic Register names

+ 13 - 1
compiler/nadd.pas

@@ -760,7 +760,7 @@ implementation
         lt:=left.nodetype;
 
          { but an int/int gives real/real! }
-         if nodetype=slashn then
+         if (nodetype=slashn) and not(is_vector(left.resulttype.def)) and not(is_vector(right.resulttype.def)) then
           begin
             if is_currency(left.resulttype.def) and
                is_currency(right.resulttype.def) then
@@ -1402,6 +1402,18 @@ implementation
               end;
             end
 {$endif SUPPORT_MMX}
+         { vector support, this must be before the zero based array
+           check }
+         else if (cs_support_vectors in aktglobalswitches) and
+                 is_vector(ld) and
+                 is_vector(rd) and
+                 equal_defs(ld,rd) then
+            begin
+              if not(nodetype in [addn,subn,xorn,orn,andn,muln,slashn]) then
+                CGMessage3(type_e_operator_not_supported_for_types,node2opstr(nodetype),ld.typename,rd.typename);
+              { both defs must be equal, so taking left or right as resulttype doesn't matter }
+              resulttype:=left.resulttype;
+            end
 
          { this is a little bit dangerous, also the left type }
          { pointer to should be checked! This broke the mmx support      }

+ 5 - 0
compiler/ncgadd.pas

@@ -56,6 +56,7 @@ interface
           procedure second_opmmx;virtual;abstract;
 {$endif SUPPORT_MMX}
 {$endif x86}
+          procedure second_opvector;virtual;abstract;
           procedure second_add64bit;virtual;
           procedure second_addordinal;virtual;
           procedure second_cmpfloat;virtual;abstract;
@@ -777,6 +778,10 @@ interface
               { support dynarr=nil }
               if is_dynamic_array(left.resulttype.def) then
                 second_opordinal
+              else
+                if (cs_support_vectors in aktglobalswitches) and
+                   is_vector(left.resulttype.def) then
+                  second_opvector
 {$ifdef SUPPORT_MMX}
               else
                 if is_mmx_able_array(left.resulttype.def) then

+ 18 - 0
compiler/ncgutil.pas

@@ -64,6 +64,7 @@ interface
     procedure location_force_fpureg(list:TAsmList;var l: tlocation;maybeconst:boolean);
     procedure location_force_mem(list:TAsmList;var l:tlocation);
     procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;maybeconst:boolean);
+    procedure location_force_mmreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
 
     { Retrieve the location of the data pointed to in location l, when the location is
       a register it is expected to contain the address of the data }
@@ -727,6 +728,23 @@ implementation
       end;
 
 
+    procedure location_force_mmreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
+      var
+        reg : tregister;
+        href : treference;
+      begin
+        if (l.loc<>LOC_MMREGISTER)  and
+           ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then
+          begin
+            reg:=cg.getmmregister(list,OS_VECTOR);
+            cg.a_loadmm_loc_reg(list,OS_VECTOR,l,reg,nil);
+            location_freetemp(list,l);
+            location_reset(l,LOC_MMREGISTER,OS_VECTOR);
+            l.register:=reg;
+          end;
+      end;
+
+
     procedure location_force_mem(list:TAsmList;var l:tlocation);
       var
         r : treference;

+ 2 - 0
compiler/options.pas

@@ -1052,6 +1052,8 @@ begin
                          include(initglobalswitches,cs_constructor_name);
                        't' :
                          include(initmoduleswitches,cs_static_keyword);
+                       'v' :
+                         include(initglobalswitches,cs_support_vectors);
                        '-' :
                          begin
                            exclude(initglobalswitches,cs_constructor_name);

+ 4 - 0
compiler/x86/aasmcpu.pas

@@ -2435,6 +2435,8 @@ implementation
                 result:=taicpu.op_ref_reg(A_MOVSD,reg2opsize(r),ref,r);
               R_SUBMMS:
                 result:=taicpu.op_ref_reg(A_MOVSS,reg2opsize(r),ref,r);
+              R_SUBMMWHOLE:
+                result:=taicpu.op_ref_reg(A_MOVQ,S_NO,ref,r);
               else
                 internalerror(200506043);
             end;
@@ -2455,6 +2457,8 @@ implementation
                 result:=taicpu.op_reg_ref(A_MOVSD,reg2opsize(r),r,ref);
               R_SUBMMS:
                 result:=taicpu.op_reg_ref(A_MOVSS,reg2opsize(r),r,ref);
+              R_SUBMMWHOLE:
+                result:=taicpu.op_reg_ref(A_MOVQ,S_NO,r,ref);
               else
                 internalerror(200506042);
             end;

+ 5 - 3
compiler/x86/cgx86.pas

@@ -195,6 +195,8 @@ unit cgx86;
             result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD);
           OS_F32:
             result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
+          OS_M128:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMWHOLE);
           else
             internalerror(200506041);
         end;
@@ -1003,10 +1005,10 @@ unit cgx86;
               these
             }
             ( { OS_F32 }
-              A_NOP,A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
+              A_NOP,A_NOP,A_ADDPS,A_NOP,A_DIVPS,A_NOP,A_NOP,A_MULPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPS,A_XORPS
             ),
             ( { OS_F64 }
-              A_NOP,A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPD
+              A_NOP,A_NOP,A_ADDPD,A_NOP,A_DIVPD,A_NOP,A_NOP,A_MULPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPD,A_XORPD
             )
           )
         );
@@ -1041,7 +1043,7 @@ unit cgx86;
         else
           internalerror(200312211);
         if asmop=A_NOP then
-          internalerror(200312215);
+          internalerror(200312216);
         case loc.loc of
           LOC_CREFERENCE,LOC_REFERENCE:
             begin

+ 3 - 3
compiler/x86/cpubase.pas

@@ -319,7 +319,7 @@ implementation
 
     function reg_cgsize(const reg: tregister): tcgsize;
       const subreg2cgsize:array[Tsubregister] of Tcgsize =
-            (OS_NO,OS_8,OS_8,OS_16,OS_32,OS_64,OS_NO,OS_NO,OS_NO,OS_F32,OS_F64);
+            (OS_NO,OS_8,OS_8,OS_16,OS_32,OS_64,OS_NO,OS_NO,OS_NO,OS_F32,OS_F64,OS_M128);
       begin
         case getregtype(reg) of
           R_INTREGISTER :
@@ -346,7 +346,7 @@ implementation
     function reg2opsize(r:Tregister):topsize;
       const
         subreg2opsize : array[tsubregister] of topsize =
-          (S_NO,S_B,S_B,S_W,S_L,S_Q,S_NO,S_NO,S_NO,S_NO,S_NO);
+          (S_NO,S_B,S_B,S_W,S_L,S_Q,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO);
       begin
         reg2opsize:=S_L;
         case getregtype(r) of
@@ -429,7 +429,7 @@ implementation
         { for the name the sub reg doesn't matter }
         hr:=r;
         case getsubreg(hr) of
-          R_SUBMMS,R_SUBMMD:
+          R_SUBMMS,R_SUBMMD,R_SUBMMWHOLE:
             setsubreg(hr,R_SUBNONE);
         end;
         result:=findreg_by_number_table(hr,regnumber_index);

+ 47 - 0
compiler/x86/nx86add.pas

@@ -55,6 +55,7 @@ unit nx86add;
         procedure second_opmmxset;override;
         procedure second_opmmx;override;
 {$endif SUPPORT_MMX}
+        procedure second_opvector;override;
       end;
 
 
@@ -810,6 +811,52 @@ unit nx86add;
       end;
 
 
+    procedure tx86addnode.second_opvector;
+      var
+        op : topcg;
+      begin
+        pass_left_right;
+        if (nf_swaped in flags) then
+          swapleftright;
+
+        case nodetype of
+          addn :
+            op:=OP_ADD;
+          muln :
+            op:=OP_MUL;
+          subn :
+            op:=OP_SUB;
+          slashn :
+            op:=OP_DIV;
+          else
+            internalerror(200610071);
+        end;
+
+        if fits_in_mm_register(left.resulttype.def) then
+          begin
+            location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
+            { we can use only right as left operand if the operation is commutative }
+            if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
+              begin
+                location.register:=right.location.register;
+                cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resulttype.def).typ],left.location,location.register,nil);
+              end
+            else
+              begin
+                location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
+                location.register:=left.location.register;
+                cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
+                  tfloat2tcgsize[tfloatdef(tarraydef(left.resulttype.def).elementtype.def).typ],right.location,location.register,nil);
+              end;
+          end
+        else
+          begin
+            { not yet supported }
+            internalerror(200610072);
+          end
+      end;
+
+
     procedure tx86addnode.second_addfloat;
       var
         op : TAsmOp;

+ 1 - 1
compiler/x86_64/cpubase.inc

@@ -82,7 +82,7 @@ const
       { the maximum float size for a processor,           }
       OS_FLOAT = OS_F80;
       { the size of a vector register for a processor     }
-      OS_VECTOR = OS_M64;
+      OS_VECTOR = OS_M128;
 
 {*****************************************************************************
                           Generic Register names