Sfoglia il codice sorgente

* r199 is merged almost now

git-svn-id: branches/fixes_2_0@3996 -
florian 19 anni fa
parent
commit
949d7b3a8c

+ 19 - 3
compiler/ncgld.pas

@@ -569,6 +569,13 @@ implementation
                         else
                         else
                           cg.g_concatcopy(exprasmlist,right.location.reference,left.location.reference,len);
                           cg.g_concatcopy(exprasmlist,right.location.reference,left.location.reference,len);
                       end;
                       end;
+                    LOC_MMREGISTER,
+                    LOC_CMMREGISTER:
+                      cg.a_loadmm_ref_reg(exprasmlist,
+                        right.location.size,
+                        left.location.size,
+                        right.location.reference,
+                        left.location.register,mms_movescalar);
                     else
                     else
                       internalerror(200203284);
                       internalerror(200203284);
                   end;
                   end;
@@ -622,9 +629,18 @@ implementation
                     fputyp:=tfloatdef(ttypeconvnode(right).left.resulttype.def).typ
                     fputyp:=tfloatdef(ttypeconvnode(right).left.resulttype.def).typ
                   else
                   else
                     fputyp:=s32real;
                     fputyp:=s32real;
-                  cg.a_loadfpu_reg_loc(exprasmlist,
-                      tfloat2tcgsize[fputyp],
-                      right.location.register,left.location);
+                  { we can't do direct moves between fpu and mm registers }
+                  if left.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER] then
+                    begin
+                      location_force_mmregscalar(exprasmlist,right.location,false);
+                      cg.a_loadmm_reg_reg(exprasmlist,
+                          tfloat2tcgsize[fputyp],tfloat2tcgsize[fputyp],
+                          right.location.register,left.location.register,mms_movescalar);
+                    end
+                  else
+                    cg.a_loadfpu_reg_loc(exprasmlist,
+                        tfloat2tcgsize[fputyp],
+                        right.location.register,left.location);
                 end;
                 end;
               LOC_JUMP :
               LOC_JUMP :
                 begin
                 begin

+ 10 - 0
compiler/ncgutil.pas

@@ -837,6 +837,10 @@ implementation
                cg.a_load_const_reg(taasmoutput(arg),reg_cgsize(tglobalvarsym(p).localloc.register),0,
                cg.a_load_const_reg(taasmoutput(arg),reg_cgsize(tglobalvarsym(p).localloc.register),0,
                    tglobalvarsym(p).localloc.register);
                    tglobalvarsym(p).localloc.register);
              LOC_REFERENCE : ;
              LOC_REFERENCE : ;
+             LOC_CMMREGISTER :
+               ;
+             LOC_CFPUREGISTER :
+               ;
              else
              else
                internalerror(200410124);
                internalerror(200410124);
            end;
            end;
@@ -1206,6 +1210,9 @@ implementation
                       cg.getcpuregister(list,funcretloc.register);
                       cg.getcpuregister(list,funcretloc.register);
                       cg.ungetcpuregister(list,funcretloc.register);
                       cg.ungetcpuregister(list,funcretloc.register);
                     end;
                     end;
+                  { we can't do direct moves between fpu and mm registers }
+                  if restmploc.loc in [LOC_MMREGISTER,LOC_CMMREGISTER] then
+                    location_force_fpureg(list,restmploc,false);
                   cg.a_loadfpu_loc_reg(list,restmploc,funcretloc.register);
                   cg.a_loadfpu_loc_reg(list,restmploc,funcretloc.register);
                 end;
                 end;
               LOC_MMREGISTER:
               LOC_MMREGISTER:
@@ -1484,8 +1491,11 @@ implementation
                 begin
                 begin
                   unget_para(paraloc^);
                   unget_para(paraloc^);
                   gen_load_reg(paraloc^,currpara.localloc.register);
                   gen_load_reg(paraloc^,currpara.localloc.register);
+                  { data could come in two memory locations, for now
+                    we simply ignore the sanity check (FK)
                   if assigned(paraloc^.next) then
                   if assigned(paraloc^.next) then
                     internalerror(200410108);
                     internalerror(200410108);
+                  }
                 end;
                 end;
             end;
             end;
           end;
           end;

+ 12 - 4
compiler/symsym.pas

@@ -1470,13 +1470,21 @@ implementation
             if tstoreddef(vartype.def).is_intregable then
             if tstoreddef(vartype.def).is_intregable then
               varregable:=vr_intreg
               varregable:=vr_intreg
             else
             else
-{$warning TODO: no fpu regvar in staticsymtable yet, need initialization with 0}
-              if (
+{ $warning TODO: no fpu regvar in staticsymtable yet, need initialization with 0 }
+              if {(
                   not assigned(owner) or
                   not assigned(owner) or
                   (owner.symtabletype<>staticsymtable)
                   (owner.symtabletype<>staticsymtable)
-                 ) and
+                 ) and }
                  tstoreddef(vartype.def).is_fpuregable then
                  tstoreddef(vartype.def).is_fpuregable then
-                varregable:=vr_fpureg;
+                 begin
+{$ifdef x86}
+                   if use_sse(vartype.def) then
+                     varregable:=vr_mmreg
+                   else
+{$else x86}
+                     varregable:=vr_fpureg;
+{$endif x86}
+                 end;
           end;
           end;
       end;
       end;
 
 

+ 1 - 1
compiler/x86/cgx86.pas

@@ -188,7 +188,7 @@ unit cgx86;
 
 
     function Tcgx86.getmmregister(list:Taasmoutput;size:Tcgsize):Tregister;
     function Tcgx86.getmmregister(list:Taasmoutput;size:Tcgsize):Tregister;
       begin
       begin
-        if not assigned(rg[R_MMXREGISTER]) then
+        if not assigned(rg[R_MMREGISTER]) then
           internalerror(200312124);
           internalerror(200312124);
         case size of
         case size of
           OS_F64:
           OS_F64:

+ 4 - 3
compiler/x86/nx86mat.pas

@@ -50,7 +50,8 @@ interface
       globtype,
       globtype,
       systems,
       systems,
       cutils,verbose,globals,
       cutils,verbose,globals,
-      symconst,symdef,aasmbase,aasmtai,defutil,
+      symconst,symdef,
+      aasmbase,aasmtai,defutil,
       cgbase,pass_1,pass_2,
       cgbase,pass_1,pass_2,
       ncon,
       ncon,
       cpubase,
       cpubase,
@@ -170,10 +171,10 @@ interface
             location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
             location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
 
 
             { make life of register allocator easier }
             { make life of register allocator easier }
-            location.register:=cg.getmmregister(exprasmlist,OS_M128);
+            location.register:=cg.getmmregister(exprasmlist,def_cgsize(resulttype.def));
             cg.a_loadmm_reg_reg(exprasmlist,def_cgsize(resulttype.def),def_cgsize(resulttype.def),left.location.register,location.register,mms_movescalar);
             cg.a_loadmm_reg_reg(exprasmlist,def_cgsize(resulttype.def),def_cgsize(resulttype.def),left.location.register,location.register,mms_movescalar);
 
 
-            reg:=cg.getmmregister(exprasmlist,OS_M128);
+            reg:=cg.getmmregister(exprasmlist,def_cgsize(resulttype.def));
 
 
             objectlibrary.getdatalabel(l1);
             objectlibrary.getdatalabel(l1);
             consts.concat(Tai_label.Create(l1));
             consts.concat(Tai_label.Create(l1));

+ 1 - 1
compiler/x86_64/cgcpu.pas

@@ -53,7 +53,7 @@ unit cgcpu;
         inherited init_register_allocators;
         inherited init_register_allocators;
         rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RBX,RS_RSI,RS_RDI,
         rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RBX,RS_RSI,RS_RDI,
           RS_R8,RS_R9,RS_R10,RS_R11,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[RS_RBP]);
           RS_R8,RS_R9,RS_R10,RS_R11,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[RS_RBP]);
-        rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
+        rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
           RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]);
           RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]);
         rgfpu:=Trgx86fpu.create;
         rgfpu:=Trgx86fpu.create;
       end;
       end;