瀏覽代碼

* Some work to restore the MMX capabilities

daniel 21 年之前
父節點
當前提交
d84b7d0743
共有 6 個文件被更改,包括 226 次插入93 次删除
  1. 8 1
      compiler/cgbase.pas
  2. 32 4
      compiler/i386/cgcpu.pas
  3. 6 1
      compiler/psub.pas
  4. 7 2
      compiler/symconst.pas
  5. 162 83
      compiler/x86/cgx86.pas
  6. 11 2
      compiler/x86/cpubase.pas

+ 8 - 1
compiler/cgbase.pas

@@ -165,6 +165,10 @@ interface
       end;
 {$endif cpu64bit}
 
+      Tregistermmxset = packed record
+        reg0,reg1,reg2,reg3:Tregister
+      end;
+
       { Set type definition for registers }
       tcpuregisterset = set of byte;
       tsuperregisterset = array[byte] of set of byte;
@@ -574,7 +578,10 @@ finalization
 end.
 {
   $Log$
-  Revision 1.79  2003-12-15 21:25:48  peter
+  Revision 1.80  2003-12-19 22:08:44  daniel
+    * Some work to restore the MMX capabilities
+
+  Revision 1.79  2003/12/15 21:25:48  peter
     * reg allocations for imaginary register are now inserted just
       before reg allocation
     * tregister changed to enum to allow compile time check

+ 32 - 4
compiler/i386/cgcpu.pas

@@ -61,16 +61,41 @@ unit cgcpu;
 
 
     class function tcg386.reg_cgsize(const reg: tregister): tcgsize;
-      const
+
+    const subreg2cgsize:array[Tsubregister] of Tcgsize =
+          (OS_NO,OS_8,OS_8,OS_16,OS_32,OS_64,OS_NO);
+
+    begin
+      case getregtype(reg) of
+        R_INTREGISTER :
+          reg_cgsize:=subreg2cgsize[getsubreg(reg)];
+        R_FPUREGISTER :
+          reg_cgsize:=OS_F80;
+        R_MMXREGISTER,
+        R_MMREGISTER :
+          reg_cgsize:=OS_M64;
+        R_SPECIALREGISTER :
+          case reg of
+            NR_CS,NR_DS,NR_ES,NR_SS,NR_FS,NR_GS:
+              reg_cgsize:=OS_16
+            else
+              reg_cgsize:=OS_32
+          end
+        else
+            internalerror(200303181);
+        end;
+      end;
+
+{      const
         opsize_2_cgsize: array[topsize] of tcgsize = (OS_NO,
           OS_8,OS_16,OS_32,OS_NO,OS_NO,OS_NO,
           OS_32,OS_64,OS_64,
-          OS_F32,OS_F64,OS_F80,OS_F32,OS_F64,OS_NO,OS_NO,
+          OS_F32,OS_F64,OS_F80,OS_F32,OS_F64,OS_M64,OS_NO,
           OS_NO,OS_NO,OS_NO
         );
       begin
         result := opsize_2_cgsize[reg2opsize(reg)];
-      end;
+      end;}
 
 
 { ************* 64bit operations ************ }
@@ -207,7 +232,10 @@ begin
 end.
 {
   $Log$
-  Revision 1.40  2003-10-10 17:48:14  peter
+  Revision 1.41  2003-12-19 22:08:44  daniel
+    * Some work to restore the MMX capabilities
+
+  Revision 1.40  2003/10/10 17:48:14  peter
     * old trgobj moved to x86/rgcpu and renamed to trgx86fpu
     * tregisteralloctor renamed to trgobj
     * removed rgobj from a lot of units

+ 6 - 1
compiler/psub.pas

@@ -632,6 +632,8 @@ implementation
         { firstpass everything }
         flowcontrol:=[];
         do_firstpass(code);
+        if code.registersfpu>0 then
+          include(current_procinfo.flags,pi_uses_fpu);
 
         { only do secondpass if there are no errors }
         if ErrorCount=0 then
@@ -1334,7 +1336,10 @@ implementation
 end.
 {
   $Log$
-  Revision 1.179  2003-12-16 22:36:19  florian
+  Revision 1.180  2003-12-19 22:08:44  daniel
+    * Some work to restore the MMX capabilities
+
+  Revision 1.179  2003/12/16 22:36:19  florian
     * forgot a commit
 
   Revision 1.178  2003/12/16 21:29:24  florian

+ 7 - 2
compiler/symconst.pas

@@ -329,7 +329,9 @@ type
     {# procedure is declared as @var(assembler), don't optimize}
     pi_is_assembler,
     {# procedure contains data which needs to be finalized }
-    pi_needs_implicit_finally
+    pi_needs_implicit_finally,
+    {# procedure uses fpu}
+    pi_uses_fpu
   );
   tprocinfoflags=set of tprocinfoflag;
 
@@ -393,7 +395,10 @@ initialization
 end.
 {
   $Log$
-  Revision 1.72  2003-12-16 21:29:24  florian
+  Revision 1.73  2003-12-19 22:08:44  daniel
+    * Some work to restore the MMX capabilities
+
+  Revision 1.72  2003/12/16 21:29:24  florian
     + inlined procedures inherit procinfo flags
 
   Revision 1.71  2003/11/23 17:05:16  peter

+ 162 - 83
compiler/x86/cgx86.pas

@@ -40,7 +40,9 @@ unit cgx86;
         procedure init_register_allocators;override;
         procedure done_register_allocators;override;
 
-        function  getfpuregister(list:Taasmoutput;size:Tcgsize):Tregister;override;
+        function getfpuregister(list:Taasmoutput;size:Tcgsize):Tregister;override;
+        function getmmxregister(list:Taasmoutput):Tregister;
+
         procedure getexplicitregister(list:Taasmoutput;r:Tregister);override;
         procedure ungetregister(list:Taasmoutput;r:Tregister);override;
         procedure allocexplicitregisters(list:Taasmoutput;rt:Tregistertype;r:Tcpuregisterset);override;
@@ -172,6 +174,7 @@ unit cgx86;
           rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_EAX,RS_EDX,RS_ECX,RS_ESI,RS_EDI],first_int_imreg,[RS_EBP,RS_EBX])
         else
           rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_EAX,RS_EDX,RS_ECX,RS_EBX,RS_ESI,RS_EDI],first_int_imreg,[RS_EBP]);
+        rg[R_MMXREGISTER]:=trgcpu.create(R_MMXREGISTER,R_SUBNONE,[RS_MM0,RS_MM1,RS_MM2,RS_MM3,RS_MM4,RS_MM5,RS_MM6,RS_MM7],first_sse_imreg,[]);
         rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,[RS_MM0,RS_MM1,RS_MM2,RS_MM3,RS_MM4,RS_MM5,RS_MM6,RS_MM7],first_sse_imreg,[]);
         rgfpu:=Trgx86fpu.create;
       end;
@@ -183,6 +186,8 @@ unit cgx86;
         rg[R_INTREGISTER]:=nil;
         rg[R_MMREGISTER].free;
         rg[R_MMREGISTER]:=nil;
+        rg[R_MMXREGISTER].free;
+        rg[R_MMXREGISTER]:=nil;
         rgfpu.free;
       end;
 
@@ -192,6 +197,13 @@ unit cgx86;
         result:=rgfpu.getregisterfpu(list);
       end;
 
+    function Tcgx86.getmmxregister(list:Taasmoutput):Tregister;
+
+    begin
+      if not assigned(rg[R_MMXREGISTER]) then
+        internalerror(200312124);
+      result:=rg[R_MMXREGISTER].getregister(list,R_SUBNONE);
+    end;
 
     procedure Tcgx86.getexplicitregister(list:Taasmoutput;r:Tregister);
       begin
@@ -1142,99 +1154,161 @@ unit cgx86;
 
     procedure Tcgx86.g_concatcopy(list:Taasmoutput;const source,dest:Treference;
                                   len:aword;delsource,loadref:boolean);
+
+    type  copymode=(copy_move,copy_mmx,copy_string);
+
     var srcref,dstref:Treference;
-        r:Tregister;
+        r,r0,r1,r2,r3:Tregister;
         helpsize:aword;
         copysize:byte;
         cgsize:Tcgsize;
+        cm:copymode;
 
     begin
+      cm:=copy_move;
       helpsize:=12;
       if cs_littlesize in aktglobalswitches then
         helpsize:=8;
-      if not loadref and (len<=helpsize) then
-        begin
-          dstref:=dest;
-          srcref:=source;
-          copysize:=4;
-          cgsize:=OS_32;
-          while len<>0 do
-            begin
-              if len<2 then
-                begin
-                  copysize:=1;
-                  cgsize:=OS_8;
-                end
-              else if len<4 then
-                begin
-                  copysize:=2;
-                  cgsize:=OS_16;
-                end;
-              dec(len,copysize);
-              if (len=0) and delsource then
-                reference_release(list,source);
-              r:=getintregister(list,cgsize);
-              a_load_ref_reg(list,cgsize,cgsize,srcref,r);
-              ungetregister(list,r);
-              a_load_reg_ref(list,cgsize,cgsize,r,dstref);
-              inc(srcref.offset,copysize);
-              inc(dstref.offset,copysize);
-            end;
-        end
-      else
-        begin
-          getexplicitregister(list,NR_EDI);
-          a_loadaddr_ref_reg(list,dest,NR_EDI);
-          getexplicitregister(list,NR_ESI);
-          if loadref then
-            a_load_ref_reg(list,OS_ADDR,OS_ADDR,source,NR_ESI)
-          else
-            begin
-              a_loadaddr_ref_reg(list,source,NR_ESI);
-              if delsource then
-                begin
-                  srcref:=source;
-                  { Don't release ESI register yet, it's needed
-                    by the movsl }
-                  if (srcref.base=NR_ESI) then
-                    srcref.base:=NR_NO
-                  else if (srcref.index=NR_ESI) then
-                    srcref.index:=NR_NO;
-                  reference_release(list,srcref);
-                end;
-            end;
+      if (cs_mmx in aktlocalswitches) and
+         not(pi_uses_fpu in current_procinfo.flags) and
+         ((len=8) or (len=16) or (len=24) or (len=32)) then
+        cm:=copy_mmx;
+      if (cs_littlesize in aktglobalswitches) and 
+         (len>helpsize) and
+         not((len<=16) and (cm=copy_mmx)) then
+        cm:=copy_string;
+      if loadref then
+        cm:=copy_string;
+      case cm of
+        copy_move:
+          begin
+            dstref:=dest;
+            srcref:=source;
+            copysize:=4;
+            cgsize:=OS_32;
+            while len<>0 do
+              begin
+                if len<2 then
+                  begin
+                    copysize:=1;
+                    cgsize:=OS_8;
+                  end
+                else if len<4 then
+                  begin
+                    copysize:=2;
+                    cgsize:=OS_16;
+                  end;
+                dec(len,copysize);
+                if (len=0) and delsource then
+                  reference_release(list,source);
+                r:=getintregister(list,cgsize);
+                a_load_ref_reg(list,cgsize,cgsize,srcref,r);
+                ungetregister(list,r);
+                a_load_reg_ref(list,cgsize,cgsize,r,dstref);
+                inc(srcref.offset,copysize);
+                inc(dstref.offset,copysize);
+              end;
+          end;
+        copy_mmx:
+          begin
+            dstref:=dest;
+            srcref:=source;
+            r0:=getmmxregister(list);
+            a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r0,nil);
+            if len>=16 then
+              begin
+                inc(srcref.offset,8);
+                r1:=getmmxregister(list);
+                a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r1,nil);
+              end;
+            if len>=24 then
+              begin
+                inc(srcref.offset,8);
+                r2:=getmmxregister(list);
+                a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r2,nil);
+              end;
+            if len>=32 then
+              begin
+                inc(srcref.offset,8);
+                r3:=getmmxregister(list);
+                a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r3,nil);
+              end;
+            a_loadmm_reg_ref(list,OS_M64,OS_M64,r0,dstref,nil);
+            ungetregister(list,r0);
+            if len>=16 then
+              begin
+                inc(dstref.offset,8);
+                a_loadmm_reg_ref(list,OS_M64,OS_M64,r1,dstref,nil);
+                ungetregister(list,r1);
+              end;
+            if len>=24 then
+              begin
+                inc(dstref.offset,8);
+                a_loadmm_reg_ref(list,OS_M64,OS_M64,r2,dstref,nil);
+                ungetregister(list,r2);
+              end;
+            if len>=32 then
+              begin
+                inc(dstref.offset,8);
+                a_loadmm_reg_ref(list,OS_M64,OS_M64,r3,dstref,nil);
+                ungetregister(list,r3);
+              end;
+          end
+        else {copy_string, should be a good fallback in case of unhandled}
+          begin
+            getexplicitregister(list,NR_EDI);
+            a_loadaddr_ref_reg(list,dest,NR_EDI);
+            getexplicitregister(list,NR_ESI);
+            if loadref then
+              a_load_ref_reg(list,OS_ADDR,OS_ADDR,source,NR_ESI)
+            else
+              begin
+                a_loadaddr_ref_reg(list,source,NR_ESI);
+                if delsource then
+                  begin
+                    srcref:=source;
+                    { Don't release ESI register yet, it's needed
+                      by the movsl }
+                    if (srcref.base=NR_ESI) then
+                      srcref.base:=NR_NO
+                    else if (srcref.index=NR_ESI) then
+                      srcref.index:=NR_NO;
+                    reference_release(list,srcref);
+                  end;
+              end;
 
-          getexplicitregister(list,NR_ECX);
+            getexplicitregister(list,NR_ECX);
 
-          list.concat(Taicpu.op_none(A_CLD,S_NO));
-          if cs_littlesize in aktglobalswitches  then
-            begin
-              a_load_const_reg(list,OS_INT,len,NR_ECX);
-              list.concat(Taicpu.op_none(A_REP,S_NO));
-              list.concat(Taicpu.op_none(A_MOVSB,S_NO));
-            end
-          else
-            begin
-              helpsize:=len shr 2;
-              len:=len and 3;
-              if helpsize>1 then
-                begin
-                  a_load_const_reg(list,OS_INT,helpsize,NR_ECX);
-                  list.concat(Taicpu.op_none(A_REP,S_NO));
-                end;
-              if helpsize>0 then
-                list.concat(Taicpu.op_none(A_MOVSD,S_NO));
-              if len>1 then
-                begin
-                  dec(len,2);
-                  list.concat(Taicpu.op_none(A_MOVSW,S_NO));
-                end;
-              if len=1 then
+            list.concat(Taicpu.op_none(A_CLD,S_NO));
+            if cs_littlesize in aktglobalswitches  then
+              begin
+                a_load_const_reg(list,OS_INT,len,NR_ECX);
+                list.concat(Taicpu.op_none(A_REP,S_NO));
                 list.concat(Taicpu.op_none(A_MOVSB,S_NO));
-              end;
-          ungetregister(list,NR_ECX);
-          ungetregister(list,NR_ESI);
-          ungetregister(list,NR_EDI);
+              end
+            else
+              begin
+                helpsize:=len shr 2;
+                len:=len and 3;
+                if helpsize>1 then
+                  begin
+                    a_load_const_reg(list,OS_INT,helpsize,NR_ECX);
+                    list.concat(Taicpu.op_none(A_REP,S_NO));
+                  end;
+                if helpsize>0 then
+                  list.concat(Taicpu.op_none(A_MOVSD,S_NO));
+                if len>1 then
+                  begin
+                    dec(len,2);
+                    list.concat(Taicpu.op_none(A_MOVSW,S_NO));
+                  end;
+                if len=1 then
+                  list.concat(Taicpu.op_none(A_MOVSB,S_NO));
+                end;
+            ungetregister(list,NR_ECX);
+            ungetregister(list,NR_ESI);
+            ungetregister(list,NR_EDI);
+          end;
         end;
       if delsource then
         tg.ungetiftemp(list,source);
@@ -1528,6 +1602,8 @@ unit cgx86;
         list.concat(tai_regalloc.dealloc(NR_EBX));
       list.concat(tai_regalloc.dealloc(NR_EBP));
       list.concat(Taicpu.op_none(A_LEAVE,S_NO));
+      if assigned(rg[R_MMXREGISTER]) and (rg[R_MMXREGISTER].uses_registers) then
+        list.concat(Taicpu.op_none(A_EMMS,S_NO));
     end;
 
 
@@ -1682,7 +1758,10 @@ unit cgx86;
 end.
 {
   $Log$
-  Revision 1.91  2003-12-15 21:25:49  peter
+  Revision 1.92  2003-12-19 22:08:44  daniel
+    * Some work to restore the MMX capabilities
+
+  Revision 1.91  2003/12/15 21:25:49  peter
     * reg allocations for imaginary register are now inserted just
       before reg allocation
     * tregister changed to enum to allow compile time check

+ 11 - 2
compiler/x86/cpubase.pas

@@ -270,7 +270,11 @@ uses
                 4 : (register64 : tregister64);
               );
             { it's only for better handling }
-            LOC_MMXREGISTER,LOC_CMMXREGISTER : (mmxreg : tregister);
+            LOC_MMXREGISTER,LOC_CMMXREGISTER : (
+              case longint of
+              0: (mmxreg : tregister);
+              1: (mmxregset : Tregistermmxset);
+            );
       end;
 
       tlocation = packed record
@@ -415,6 +419,8 @@ implementation
             cgsize2subreg:=R_SUBD;
           OS_64,OS_S64:
             cgsize2subreg:=R_SUBQ;
+          OS_M64:
+            cgsize2subreg:=R_SUBNONE;
           else
             internalerror(200301231);
         end;
@@ -526,7 +532,10 @@ implementation
 end.
 {
   $Log$
-  Revision 1.31  2003-12-15 21:25:49  peter
+  Revision 1.32  2003-12-19 22:08:44  daniel
+    * Some work to restore the MMX capabilities
+
+  Revision 1.31  2003/12/15 21:25:49  peter
     * reg allocations for imaginary register are now inserted just
       before reg allocation
     * tregister changed to enum to allow compile time check