Procházet zdrojové kódy

* rtl and compiler compile with -Cfsse2

florian před 21 roky
rodič
revize
ace2d682b0

+ 36 - 1
compiler/cgobj.pas

@@ -1400,12 +1400,44 @@ implementation
 
 
     procedure tcg.a_opmm_ref_reg(list: taasmoutput; Op: TOpCG; size : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle);
+      var
+         hr : tregister;
+         hs : tmmshuffle;
       begin
+         hr:=getmmregister(list,size);
+         a_loadmm_ref_reg(list,size,size,ref,hr,shuffle);
+         if realshuffle(shuffle) then
+           begin
+             hs:=shuffle^;
+             removeshuffles(hs);
+             a_opmm_reg_reg(list,op,size,hr,reg,@hs);
+           end
+         else
+           a_opmm_reg_reg(list,op,size,hr,reg,shuffle);
+         ungetregister(list,hr);
       end;
 
 
     procedure tcg.a_opmm_reg_ref(list: taasmoutput; Op: TOpCG; size : tcgsize;reg: tregister; const ref: treference; shuffle : pmmshuffle);
+      var
+         hr : tregister;
+         hs : tmmshuffle;
       begin
+         hr:=getmmregister(list,size);
+         a_loadmm_ref_reg(list,size,size,ref,hr,shuffle);
+         if realshuffle(shuffle) then
+           begin
+             hs:=shuffle^;
+             removeshuffles(hs);
+             a_opmm_reg_reg(list,op,size,reg,hr,@hs);
+             a_loadmm_reg_ref(list,size,size,hr,ref,@hs);
+           end
+         else
+           begin
+             a_opmm_reg_reg(list,op,size,reg,hr,shuffle);
+             a_loadmm_reg_ref(list,size,size,hr,ref,shuffle);
+           end;
+         ungetregister(list,hr);
       end;
 
 
@@ -2000,7 +2032,10 @@ finalization
 end.
 {
   $Log$
-  Revision 1.144  2003-12-24 00:10:02  florian
+  Revision 1.145  2003-12-26 13:19:16  florian
+    * rtl and compiler compile with -Cfsse2
+
+  Revision 1.144  2003/12/24 00:10:02  florian
     - delete parameter in cg64 methods removed
 
   Revision 1.143  2003/12/23 14:38:07  florian

+ 7 - 136
compiler/i386/n386mat.pas

@@ -27,7 +27,7 @@ unit n386mat;
 interface
 
     uses
-      node,nmat,ncgmat;
+      node,nmat,ncgmat,nx86mat;
 
     type
       ti386moddivnode = class(tmoddivnode)
@@ -40,12 +40,7 @@ interface
          function first_shlshr64bitint: tnode; override;
       end;
 
-      ti386unaryminusnode = class(tcgunaryminusnode)
-{$ifdef SUPPORT_MMX}
-         procedure second_mmx;override;
-{$endif SUPPORT_MMX}
-         procedure second_float;override;
-         function pass_1:tnode;override;
+      ti386unaryminusnode = class(tx86unaryminusnode)
       end;
 
       ti386notnode = class(tcgnotnode)
@@ -355,133 +350,6 @@ implementation
     end;
 
 
-{*****************************************************************************
-                          TI386UNARYMINUSNODE
-*****************************************************************************}
-
-    function ti386unaryminusnode.pass_1 : tnode;
-      begin
-         result:=nil;
-         firstpass(left);
-         if codegenerror then
-           exit;
-
-         if (left.resulttype.def.deftype=floatdef) then
-           begin
-             if (registersfpu < 1) then
-               registersfpu := 1;
-             expectloc:=LOC_FPUREGISTER;
-           end
-{$ifdef SUPPORT_MMX}
-         else
-           if (cs_mmx in aktlocalswitches) and
-              is_mmx_able_array(left.resulttype.def) then
-             begin
-               registers32:=left.registers32;
-               registersfpu:=left.registersfpu;
-               registersmmx:=left.registersmmx;
-               if (left.location.loc<>LOC_MMXREGISTER) and
-                  (registersmmx<1) then
-                 registersmmx:=1;
-             end
-{$endif SUPPORT_MMX}
-         else
-           inherited pass_1;
-      end;
-
-
-{$ifdef SUPPORT_MMX}
-    procedure ti386unaryminusnode.second_mmx;
-      var
-        op : tasmop;
-        hreg : tregister;
-      begin
-        secondpass(left);
-        location_reset(location,LOC_MMXREGISTER,OS_NO);
-        hreg:=cg.getmmxregister(exprasmlist,OS_M64);
-        emit_reg_reg(A_PXOR,S_NO,hreg,hreg);
-        case left.location.loc of
-          LOC_MMXREGISTER:
-            begin
-               location.register:=left.location.register;
-            end;
-          LOC_CMMXREGISTER:
-            begin
-               location.register:=cg.getmmxregister(exprasmlist,OS_M64);
-               emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
-            end;
-          LOC_REFERENCE,
-          LOC_CREFERENCE:
-            begin
-               reference_release(exprasmlist,left.location.reference);
-               location.register:=cg.getmmxregister(exprasmlist,OS_M64);
-               emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
-            end;
-          else
-            internalerror(200203225);
-        end;
-        if cs_mmx_saturation in aktlocalswitches then
-          case mmx_type(resulttype.def) of
-             mmxs8bit:
-               op:=A_PSUBSB;
-             mmxu8bit:
-               op:=A_PSUBUSB;
-             mmxs16bit,mmxfixed16:
-               op:=A_PSUBSW;
-             mmxu16bit:
-               op:=A_PSUBUSW;
-          end
-        else
-          case mmx_type(resulttype.def) of
-             mmxs8bit,mmxu8bit:
-               op:=A_PSUBB;
-             mmxs16bit,mmxu16bit,mmxfixed16:
-               op:=A_PSUBW;
-             mmxs32bit,mmxu32bit:
-               op:=A_PSUBD;
-          end;
-        emit_reg_reg(op,S_NO,location.register,hreg);
-        cg.ungetregister(exprasmlist,hreg);
-        emit_reg_reg(A_MOVQ,S_NO,hreg,location.register);
-      end;
-{$endif SUPPORT_MMX}
-
-
-    procedure ti386unaryminusnode.second_float;
-      begin
-        secondpass(left);
-        location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
-        case left.location.loc of
-          LOC_REFERENCE,
-          LOC_CREFERENCE:
-            begin
-              reference_release(exprasmlist,left.location.reference);
-              location.register:=NR_ST;
-              cg.a_loadfpu_ref_reg(exprasmlist,
-                 def_cgsize(left.resulttype.def),
-                 left.location.reference,location.register);
-              emit_none(A_FCHS,S_NO);
-            end;
-          LOC_FPUREGISTER,
-          LOC_CFPUREGISTER:
-            begin
-               { "load st,st" is ignored by the code generator }
-               cg.a_loadfpu_reg_reg(exprasmlist,left.location.size,left.location.register,NR_ST);
-               location.register:=NR_ST;
-               emit_none(A_FCHS,S_NO);
-            end;
-          {
-          LOC_MMREGISTER,
-          LOC_CMMREGISTER:
-            begin
-            end;
-          }
-          else
-            internalerror(200312241);
-        end;
-      end;
-
-
 {*****************************************************************************
                                TI386NOTNODE
 *****************************************************************************}
@@ -580,14 +448,17 @@ implementation
 {$endif SUPPORT_MMX}
 
 begin
+   cunaryminusnode:=ti386unaryminusnode;
    cmoddivnode:=ti386moddivnode;
    cshlshrnode:=ti386shlshrnode;
-   cunaryminusnode:=ti386unaryminusnode;
    cnotnode:=ti386notnode;
 end.
 {
   $Log$
-  Revision 1.67  2003-12-25 01:07:09  florian
+  Revision 1.68  2003-12-26 13:19:16  florian
+    * rtl and compiler compile with -Cfsse2
+
+  Revision 1.67  2003/12/25 01:07:09  florian
     + $fputype directive support
     + single data type operations with sse unit
     * fixed more x86-64 stuff

+ 18 - 1
compiler/ncgcal.pas

@@ -156,6 +156,20 @@ implementation
                    reference_reset_base(href,tempparaloc.reference.index,tempparaloc.reference.offset);
                  cg.a_loadfpu_reg_ref(exprasmlist,def_cgsize(left.resulttype.def),left.location.register,href);
                end;
+             LOC_MMREGISTER,
+             LOC_CMMREGISTER:
+               begin
+                 size:=align(tfloatdef(left.resulttype.def).size,tempparaloc.alignment);
+                 inc(tcgcallnode(aktcallnode).pushedparasize,size);
+                 if tempparaloc.reference.index=NR_STACK_POINTER_REG then
+                   begin
+                     cg.g_stackpointer_alloc(exprasmlist,size);
+                     reference_reset_base(href,NR_STACK_POINTER_REG,0);
+                   end
+                 else
+                   reference_reset_base(href,tempparaloc.reference.index,tempparaloc.reference.offset);
+                 cg.a_loadmm_reg_ref(exprasmlist,def_cgsize(left.resulttype.def),def_cgsize(left.resulttype.def),left.location.register,href,mms_movescalar);
+               end;
              LOC_REFERENCE,
              LOC_CREFERENCE :
                begin
@@ -1131,7 +1145,10 @@ begin
 end.
 {
   $Log$
-  Revision 1.147  2003-12-21 19:42:42  florian
+  Revision 1.148  2003-12-26 13:19:16  florian
+    * rtl and compiler compile with -Cfsse2
+
+  Revision 1.147  2003/12/21 19:42:42  florian
     * fixed ppc inlining stuff
     * fixed wrong unit writing
     + added some sse stuff

+ 13 - 1
compiler/ncgutil.pas

@@ -670,6 +670,15 @@ implementation
               location_reset(l,LOC_REFERENCE,l.size);
               l.reference:=r;
             end;
+          LOC_MMREGISTER,
+          LOC_CMMREGISTER:
+            begin
+              tg.GetTemp(list,TCGSize2Size[l.size],tt_normal,r);
+              cg.a_loadmm_reg_ref(list,l.size,l.size,l.register,r,mms_movescalar);
+              location_release(list,l);
+              location_reset(l,LOC_REFERENCE,l.size);
+              l.reference:=r;
+            end;
           LOC_CONSTANT,
           LOC_REGISTER,
           LOC_CREGISTER :
@@ -2039,7 +2048,10 @@ implementation
 end.
 {
   $Log$
-  Revision 1.178  2003-12-26 00:32:21  florian
+  Revision 1.179  2003-12-26 13:19:16  florian
+    * rtl and compiler compile with -Cfsse2
+
+  Revision 1.178  2003/12/26 00:32:21  florian
     + fpu<->mm register conversion
 
   Revision 1.177  2003/12/24 00:10:02  florian

+ 9 - 3
compiler/x86/cgx86.pas

@@ -830,11 +830,14 @@ unit cgx86;
             )
           ),
           ( { vectorized/packed }
+            { because the logical packed single instructions have shorter op codes, we use always
+              these
+            }
             ( { OS_F32 }
-              A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP
+              A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
             ),
             ( { OS_F64 }
-              A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP
+              A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
             )
           )
         );
@@ -1918,7 +1921,10 @@ unit cgx86;
 end.
 {
   $Log$
-  Revision 1.98  2003-12-26 00:32:22  florian
+  Revision 1.99  2003-12-26 13:19:16  florian
+    * rtl and compiler compile with -Cfsse2
+
+  Revision 1.98  2003/12/26 00:32:22  florian
     + fpu<->mm register conversion
 
   Revision 1.97  2003/12/25 12:01:35  florian

+ 6 - 2
compiler/x86/cpubase.pas

@@ -421,7 +421,8 @@ implementation
             cgsize2subreg:=R_SUBQ;
           OS_M64:
             cgsize2subreg:=R_SUBNONE;
-          OS_F32,OS_F64:
+          OS_F32,OS_F64,
+          OS_M128,OS_MS128:
             cgsize2subreg:=R_SUBWHOLE;
           else
             internalerror(200301231);
@@ -534,7 +535,10 @@ implementation
 end.
 {
   $Log$
-  Revision 1.33  2003-12-25 01:07:09  florian
+  Revision 1.34  2003-12-26 13:19:16  florian
+    * rtl and compiler compile with -Cfsse2
+
+  Revision 1.33  2003/12/25 01:07:09  florian
     + $fputype directive support
     + single data type operations with sse unit
     * fixed more x86-64 stuff

+ 18 - 1
compiler/x86/nx86add.pas

@@ -240,6 +240,13 @@ unit nx86add;
         if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
           begin
             location.register:=right.location.register;
+            { force floating point reg. location to be written to memory,
+              we don't force it to mm register because writing to memory
+              allows probably shorter code because there is no direct fpu->mm register
+              copy instruction
+            }
+            if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
+              location_force_mem(exprasmlist,left.location);
             cg.a_opmm_loc_reg(exprasmlist,op,location.size,left.location,location.register,mms_movescalar);
             location_release(exprasmlist,left.location);
           end
@@ -247,6 +254,13 @@ unit nx86add;
           begin
             location_force_mmregscalar(exprasmlist,left.location,false);
             location.register:=left.location.register;
+            { force floating point reg. location to be written to memory,
+              we don't force it to mm register because writing to memory
+              allows probably shorter code because there is no direct fpu->mm register
+              copy instruction
+            }
+            if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
+              location_force_mem(exprasmlist,right.location);
             cg.a_opmm_loc_reg(exprasmlist,op,location.size,right.location,location.register,mms_movescalar);
             location_release(exprasmlist,right.location);
           end;
@@ -255,7 +269,10 @@ unit nx86add;
 end.
 {
   $Log$
-  Revision 1.4  2003-12-26 00:32:22  florian
+  Revision 1.5  2003-12-26 13:19:16  florian
+    * rtl and compiler compile with -Cfsse2
+
+  Revision 1.4  2003/12/26 00:32:22  florian
     + fpu<->mm register conversion
 
   Revision 1.3  2003/12/25 01:07:09  florian