Browse Source

* fixed a lot of stuff for fpu/mm register variables

git-svn-id: trunk@199 -
florian 20 years ago
parent
commit
ebcb69478f

+ 1 - 1
.gitattributes

@@ -125,7 +125,6 @@ compiler/i386/i386tab.inc svneol=native#text/plain
 compiler/i386/n386add.pas svneol=native#text/plain
 compiler/i386/n386add.pas svneol=native#text/plain
 compiler/i386/n386cal.pas svneol=native#text/plain
 compiler/i386/n386cal.pas svneol=native#text/plain
 compiler/i386/n386cnv.pas svneol=native#text/plain
 compiler/i386/n386cnv.pas svneol=native#text/plain
-compiler/i386/n386con.pas svneol=native#text/plain
 compiler/i386/n386inl.pas svneol=native#text/plain
 compiler/i386/n386inl.pas svneol=native#text/plain
 compiler/i386/n386mat.pas svneol=native#text/plain
 compiler/i386/n386mat.pas svneol=native#text/plain
 compiler/i386/n386mem.pas svneol=native#text/plain
 compiler/i386/n386mem.pas svneol=native#text/plain
@@ -467,6 +466,7 @@ compiler/x86/itcpugas.pas svneol=native#text/plain
 compiler/x86/itx86int.pas svneol=native#text/plain
 compiler/x86/itx86int.pas svneol=native#text/plain
 compiler/x86/nx86add.pas svneol=native#text/plain
 compiler/x86/nx86add.pas svneol=native#text/plain
 compiler/x86/nx86cnv.pas svneol=native#text/plain
 compiler/x86/nx86cnv.pas svneol=native#text/plain
+compiler/x86/nx86con.pas svneol=native#text/plain
 compiler/x86/nx86inl.pas svneol=native#text/plain
 compiler/x86/nx86inl.pas svneol=native#text/plain
 compiler/x86/nx86mat.pas svneol=native#text/plain
 compiler/x86/nx86mat.pas svneol=native#text/plain
 compiler/x86/nx86set.pas svneol=native#text/plain
 compiler/x86/nx86set.pas svneol=native#text/plain

+ 7 - 1
compiler/cgbase.pas

@@ -129,7 +129,9 @@ interface
         { For Sparc floats that use F0:F1 to store doubles }
         { For Sparc floats that use F0:F1 to store doubles }
         R_SUBFS,   { = 6; Float that allocates 1 FPU register }
         R_SUBFS,   { = 6; Float that allocates 1 FPU register }
         R_SUBFD,   { = 7; Float that allocates 2 FPU registers }
         R_SUBFD,   { = 7; Float that allocates 2 FPU registers }
-        R_SUBFQ    { = 8; Float that allocates 4 FPU registers }
+        R_SUBFQ,   { = 8; Float that allocates 4 FPU registers }
+        R_SUBMMS,  { = 9; single scalar in multi media register }
+        R_SUBMMD   { = 10; double scalar in multi media register }
       );
       );
 
 
       TSuperRegister = type word;
       TSuperRegister = type word;
@@ -513,6 +515,10 @@ implementation
             result:=result+'fs';
             result:=result+'fs';
           R_SUBFD:
           R_SUBFD:
             result:=result+'fd';
             result:=result+'fd';
+          R_SUBMMD:
+            result:=result+'md';
+          R_SUBMMS:
+            result:=result+'ms';
           else
           else
             internalerror(200308252);
             internalerror(200308252);
         end;
         end;

+ 4 - 0
compiler/cgobj.pas

@@ -1376,7 +1376,9 @@ implementation
         paramanager.freeparaloc(list,cgpara1);
         paramanager.freeparaloc(list,cgpara1);
         alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
         alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
+        alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
         a_call_name(list,'FPC_SHORTSTR_ASSIGN');
         a_call_name(list,'FPC_SHORTSTR_ASSIGN');
+        dealloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
         dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
         dealloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
         dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
         cgpara3.done;
         cgpara3.done;
@@ -1426,6 +1428,8 @@ implementation
             a_param_ref(list,OS_ADDR,ref,cgpara1);
             a_param_ref(list,OS_ADDR,ref,cgpara1);
             paramanager.freeparaloc(list,cgpara1);
             paramanager.freeparaloc(list,cgpara1);
             alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
             alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+            alloccpuregisters(list,R_FPUREGISTER,paramanager.get_volatile_registers_fpu(pocall_default));
+            alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_int(pocall_default));
             a_call_name(list,incrfunc);
             a_call_name(list,incrfunc);
             dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
             dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
           end
           end

+ 0 - 2
compiler/cgutils.pas

@@ -113,8 +113,6 @@ unit cgutils;
     procedure location_copy(var destloc:tlocation; const sourceloc : tlocation);
     procedure location_copy(var destloc:tlocation; const sourceloc : tlocation);
     procedure location_swap(var destloc,sourceloc : tlocation);
     procedure location_swap(var destloc,sourceloc : tlocation);
 
 
-
-
 implementation
 implementation
 
 
 {****************************************************************************
 {****************************************************************************

+ 2 - 2
compiler/i386/cgcpu.pas

@@ -83,7 +83,7 @@ unit cgcpu;
         else
         else
           rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_EAX,RS_EDX,RS_ECX,RS_EBX,RS_ESI,RS_EDI],first_int_imreg,[RS_EBP]);
           rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_EAX,RS_EDX,RS_ECX,RS_EBX,RS_ESI,RS_EDI],first_int_imreg,[RS_EBP]);
         rg[R_MMXREGISTER]:=trgcpu.create(R_MMXREGISTER,R_SUBNONE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7],first_mm_imreg,[]);
         rg[R_MMXREGISTER]:=trgcpu.create(R_MMXREGISTER,R_SUBNONE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7],first_mm_imreg,[]);
-        rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7],first_mm_imreg,[]);
+        rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7],first_mm_imreg,[]);
         rgfpu:=Trgx86fpu.create;
         rgfpu:=Trgx86fpu.create;
       end;
       end;
 
 
@@ -164,7 +164,7 @@ unit cgcpu;
         if use_push(cgpara) then
         if use_push(cgpara) then
           begin
           begin
             { Record copy? }
             { Record copy? }
-            if (cgpara.size=OS_NO) or (size=OS_NO) then
+            if (cgpara.size in [OS_NO,OS_F64,OS_F80]) or (size=OS_NO) then
               begin
               begin
                 cgpara.check_simple_location;
                 cgpara.check_simple_location;
                 len:=align(cgpara.intsize,cgpara.alignment);
                 len:=align(cgpara.intsize,cgpara.alignment);

+ 1 - 1
compiler/i386/cpunode.pas

@@ -45,10 +45,10 @@ unit cpunode;
          after the generic one (FK)
          after the generic one (FK)
        }
        }
        nx86set,
        nx86set,
+       nx86con,
 
 
        n386add,
        n386add,
        n386cal,
        n386cal,
-       n386con,
        n386mem,
        n386mem,
        n386set,
        n386set,
        n386inl,
        n386inl,

+ 30 - 12
compiler/i386/cpupara.pas

@@ -408,14 +408,23 @@ unit cpupara;
                   internalerror(200501163);
                   internalerror(200501163);
                 while (paralen>0) do
                 while (paralen>0) do
                   begin
                   begin
-                    { We can allocate at maximum 32 bits per location }
-                    if paralen>sizeof(aint) then
-                      l:=sizeof(aint)
-                    else
-                      l:=paralen;
                     paraloc:=hp.paraloc[side].add_location;
                     paraloc:=hp.paraloc[side].add_location;
                     paraloc^.loc:=LOC_REFERENCE;
                     paraloc^.loc:=LOC_REFERENCE;
-                    paraloc^.size:=int_cgsize(l);
+                    { Extended and double need a single location }
+                    if (paracgsize in [OS_F80,OS_F64,OS_F32]) then
+                      begin
+                        paraloc^.size:=paracgsize;
+                        l:=paralen;
+                      end
+                    else
+                      begin
+                        { We can allocate at maximum 32 bits per location }
+                        if paralen>sizeof(aint) then
+                          l:=sizeof(aint)
+                        else
+                          l:=paralen;
+                        paraloc^.size:=int_cgsize(l);
+                      end;
                     if side=callerside then
                     if side=callerside then
                       paraloc^.reference.index:=NR_STACK_POINTER_REG
                       paraloc^.reference.index:=NR_STACK_POINTER_REG
                     else
                     else
@@ -512,14 +521,23 @@ unit cpupara;
                       internalerror(200501163);
                       internalerror(200501163);
                     while (paralen>0) do
                     while (paralen>0) do
                       begin
                       begin
-                        { We can allocate at maximum 32 bits per location }
-                        if paralen>sizeof(aint) then
-                          l:=sizeof(aint)
-                        else
-                          l:=paralen;
                         paraloc:=hp.paraloc[side].add_location;
                         paraloc:=hp.paraloc[side].add_location;
                         paraloc^.loc:=LOC_REFERENCE;
                         paraloc^.loc:=LOC_REFERENCE;
-                        paraloc^.size:=int_cgsize(l);
+                        { Extended and double need a single location }
+                        if (paracgsize in [OS_F80,OS_F64,OS_F32]) then
+                          begin
+                            paraloc^.size:=paracgsize;
+                            l:=paralen;
+                          end
+                        else
+                          begin
+                            { We can allocate at maximum 32 bits per location }
+                            if paralen>sizeof(aint) then
+                              l:=sizeof(aint)
+                            else
+                              l:=paralen;
+                            paraloc^.size:=int_cgsize(l);
+                          end;
                         if side=callerside then
                         if side=callerside then
                           paraloc^.reference.index:=NR_STACK_POINTER_REG
                           paraloc^.reference.index:=NR_STACK_POINTER_REG
                         else
                         else

+ 3 - 1
compiler/i386/i386att.inc

@@ -563,5 +563,7 @@
 'movddup',
 'movddup',
 'movshdup',
 'movshdup',
 'movsldup',
 'movsldup',
-'movabs'
+'movabs',
+'movslq',
+'cqto'
 );
 );

+ 2 - 0
compiler/i386/i386atts.inc

@@ -563,5 +563,7 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 attsufNONE
 );
 );

+ 3 - 1
compiler/i386/i386int.inc

@@ -563,5 +563,7 @@
 'movddup',
 'movddup',
 'movshdup',
 'movshdup',
 'movsldup',
 'movsldup',
-'movabs'
+'movabs',
+'movsxd',
+'cdo'
 );
 );

+ 3 - 1
compiler/i386/i386op.inc

@@ -563,5 +563,7 @@ A_LDDQU,
 A_MOVDDUP,
 A_MOVDDUP,
 A_MOVSHDUP,
 A_MOVSHDUP,
 A_MOVSLDUP,
 A_MOVSLDUP,
-A_MOVABS
+A_MOVABS,
+A_MOVSXD,
+A_CDO
 );
 );

+ 35 - 33
compiler/i386/i386prop.inc

@@ -215,11 +215,11 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -383,6 +383,10 @@
 (Ch: (Ch_ROp1, Ch_WOp2, Ch_RFLAGS)),
 (Ch: (Ch_ROp1, Ch_WOp2, Ch_RFLAGS)),
 (Ch: (Ch_None, Ch_None, Ch_None)),
 (Ch: (Ch_None, Ch_None, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_WOp1, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_WOp1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -408,6 +412,8 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -420,13 +426,21 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -479,6 +493,10 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -512,6 +530,8 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -521,7 +541,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -536,32 +563,7 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_Wop2, Ch_Rop1, Ch_None))
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_MRAX, Ch_WRDX, Ch_None))
 );
 );

+ 19 - 3
compiler/ncgld.pas

@@ -591,6 +591,13 @@ implementation
                         else
                         else
                           cg.g_concatcopy(exprasmlist,right.location.reference,left.location.reference,len);
                           cg.g_concatcopy(exprasmlist,right.location.reference,left.location.reference,len);
                       end;
                       end;
+                    LOC_MMREGISTER,
+                    LOC_CMMREGISTER:
+                      cg.a_loadmm_ref_reg(exprasmlist,
+                        right.location.size,
+                        left.location.size,
+                        right.location.reference,
+                        left.location.register,mms_movescalar);
                     else
                     else
                       internalerror(200203284);
                       internalerror(200203284);
                   end;
                   end;
@@ -644,9 +651,18 @@ implementation
                     fputyp:=tfloatdef(ttypeconvnode(right).left.resulttype.def).typ
                     fputyp:=tfloatdef(ttypeconvnode(right).left.resulttype.def).typ
                   else
                   else
                     fputyp:=s32real;
                     fputyp:=s32real;
-                  cg.a_loadfpu_reg_loc(exprasmlist,
-                      tfloat2tcgsize[fputyp],
-                      right.location.register,left.location);
+                  { we can't do direct moves between fpu and mm registers }
+                  if left.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER] then
+                    begin
+                      location_force_mmregscalar(exprasmlist,right.location,false);
+                      cg.a_loadmm_reg_reg(exprasmlist,
+                          tfloat2tcgsize[fputyp],tfloat2tcgsize[fputyp],
+                          right.location.register,left.location.register,mms_movescalar);
+                    end
+                  else
+                    cg.a_loadfpu_reg_loc(exprasmlist,
+                        tfloat2tcgsize[fputyp],
+                        right.location.register,left.location);
                 end;
                 end;
               LOC_JUMP :
               LOC_JUMP :
                 begin
                 begin

+ 10 - 0
compiler/ncgutil.pas

@@ -836,6 +836,10 @@ implementation
                cg.a_load_const_reg(taasmoutput(arg),reg_cgsize(tglobalvarsym(p).localloc.register),0,
                cg.a_load_const_reg(taasmoutput(arg),reg_cgsize(tglobalvarsym(p).localloc.register),0,
                    tglobalvarsym(p).localloc.register);
                    tglobalvarsym(p).localloc.register);
              LOC_REFERENCE : ;
              LOC_REFERENCE : ;
+             LOC_CMMREGISTER :
+               ;
+             LOC_CFPUREGISTER :
+               ;
              else
              else
                internalerror(200410124);
                internalerror(200410124);
            end;
            end;
@@ -1206,6 +1210,9 @@ implementation
                       cg.getcpuregister(list,funcretloc.register);
                       cg.getcpuregister(list,funcretloc.register);
                       cg.ungetcpuregister(list,funcretloc.register);
                       cg.ungetcpuregister(list,funcretloc.register);
                     end;
                     end;
+                  { we can't do direct moves between fpu and mm registers }
+                  if restmploc.loc in [LOC_MMREGISTER,LOC_CMMREGISTER] then
+                    location_force_fpureg(list,restmploc,false);
                   cg.a_loadfpu_loc_reg(list,restmploc,funcretloc.register);
                   cg.a_loadfpu_loc_reg(list,restmploc,funcretloc.register);
                 end;
                 end;
               LOC_MMREGISTER:
               LOC_MMREGISTER:
@@ -1463,8 +1470,11 @@ implementation
                 begin
                 begin
                   unget_para(paraloc^);
                   unget_para(paraloc^);
                   gen_load_reg(paraloc^,currpara.localloc.register);
                   gen_load_reg(paraloc^,currpara.localloc.register);
+                  { data could come in two memory locations, for now
+                    we simply ignore the sanity check (FK)
                   if assigned(paraloc^.next) then
                   if assigned(paraloc^.next) then
                     internalerror(200410108);
                     internalerror(200410108);
+                  }
                 end;
                 end;
             end;
             end;
           end;
           end;

+ 1 - 0
compiler/options.pas

@@ -2123,6 +2123,7 @@ begin
      initalignment.jumpalign:=1;
      initalignment.jumpalign:=1;
      initalignment.loopalign:=1;
      initalignment.loopalign:=1;
    end;
    end;
+
   UpdateAlignment(initalignment,option.paraalignment);
   UpdateAlignment(initalignment,option.paraalignment);
 
 
   set_system_macro('FPC_VERSION',version_nr);
   set_system_macro('FPC_VERSION',version_nr);

+ 14 - 1
compiler/symdef.pas

@@ -840,6 +840,10 @@ interface
     function is_class_or_interface(def: tdef): boolean;
     function is_class_or_interface(def: tdef): boolean;
 
 
 
 
+{$ifdef x86}
+    function use_sse(def : tdef) : boolean;
+{$endif x86}
+
 implementation
 implementation
 
 
     uses
     uses
@@ -1286,7 +1290,7 @@ implementation
    function tstoreddef.is_fpuregable : boolean;
    function tstoreddef.is_fpuregable : boolean;
      begin
      begin
 {$ifdef x86}
 {$ifdef x86}
-       result:=false;
+       result:=use_sse(self);
 {$else x86}
 {$else x86}
        result:=(deftype=floatdef);
        result:=(deftype=floatdef);
 {$endif x86}
 {$endif x86}
@@ -6405,4 +6409,13 @@ implementation
           (tobjectdef(def).objecttype in [odt_class,odt_interfacecom,odt_interfacecorba]);
           (tobjectdef(def).objecttype in [odt_class,odt_interfacecom,odt_interfacecorba]);
       end;
       end;
 
 
+
+{$ifdef x86}
+    function use_sse(def : tdef) : boolean;
+      begin
+        use_sse:=(is_single(def) and (aktfputype in sse_singlescalar)) or
+          (is_double(def) and (aktfputype in sse_doublescalar));
+      end;
+{$endif x86}
+
 end.
 end.

+ 12 - 4
compiler/symsym.pas

@@ -1429,13 +1429,21 @@ implementation
             if tstoreddef(vartype.def).is_intregable then
             if tstoreddef(vartype.def).is_intregable then
               varregable:=vr_intreg
               varregable:=vr_intreg
             else
             else
-{$warning TODO: no fpu regvar in staticsymtable yet, need initialization with 0}
-              if (
+{ $warning TODO: no fpu regvar in staticsymtable yet, need initialization with 0 }
+              if {(
                   not assigned(owner) or
                   not assigned(owner) or
                   (owner.symtabletype<>staticsymtable)
                   (owner.symtabletype<>staticsymtable)
-                 ) and
+                 ) and }
                  tstoreddef(vartype.def).is_fpuregable then
                  tstoreddef(vartype.def).is_fpuregable then
-                varregable:=vr_fpureg;
+                 begin
+{$ifdef x86}
+                   if use_sse(vartype.def) then
+                     varregable:=vr_mmreg
+                   else
+{$else x86}
+                     varregable:=vr_fpureg;
+{$endif x86}
+                 end;
           end;
           end;
       end;
       end;
 
 

+ 32 - 3
compiler/x86/aasmcpu.pas

@@ -2031,7 +2031,22 @@ implementation
 
 
     function taicpu.spilling_get_operation_type(opnr: longint): topertype;
     function taicpu.spilling_get_operation_type(opnr: longint): topertype;
       begin
       begin
-        result:=operation_type_table^[opcode,opnr];
+        { the information in the instruction table is made for the string copy
+          operation MOVSD so hack here (FK)
+        }
+        if (opcode=A_MOVSD) and (ops=2) then
+          begin
+            case opnr of
+              0:
+                result:=operand_read;
+              1:
+                result:=operand_write;
+              else
+                internalerror(200506055);
+            end
+          end
+        else
+          result:=operation_type_table^[opcode,opnr];
       end;
       end;
 
 
 
 
@@ -2041,7 +2056,14 @@ implementation
           R_INTREGISTER :
           R_INTREGISTER :
             result:=taicpu.op_ref_reg(A_MOV,reg2opsize(r),ref,r);
             result:=taicpu.op_ref_reg(A_MOV,reg2opsize(r),ref,r);
           R_MMREGISTER :
           R_MMREGISTER :
-            result:=taicpu.op_ref_reg(A_MOVSD,reg2opsize(r),ref,r);
+            case getsubreg(r) of
+              R_SUBMMD:
+                result:=taicpu.op_ref_reg(A_MOVSD,reg2opsize(r),ref,r);
+              R_SUBMMS:
+                result:=taicpu.op_ref_reg(A_MOVSS,reg2opsize(r),ref,r);
+              else
+                internalerror(200506043);
+            end;
           else
           else
             internalerror(200401041);
             internalerror(200401041);
         end;
         end;
@@ -2054,7 +2076,14 @@ implementation
           R_INTREGISTER :
           R_INTREGISTER :
             result:=taicpu.op_reg_ref(A_MOV,reg2opsize(r),r,ref);
             result:=taicpu.op_reg_ref(A_MOV,reg2opsize(r),r,ref);
           R_MMREGISTER :
           R_MMREGISTER :
-            result:=taicpu.op_reg_ref(A_MOVSD,reg2opsize(r),r,ref);
+            case getsubreg(r) of
+              R_SUBMMD:
+                result:=taicpu.op_reg_ref(A_MOVSD,reg2opsize(r),r,ref);
+              R_SUBMMS:
+                result:=taicpu.op_reg_ref(A_MOVSS,reg2opsize(r),r,ref);
+              else
+                internalerror(200506042);
+            end;
           else
           else
             internalerror(200401041);
             internalerror(200401041);
         end;
         end;

+ 18 - 9
compiler/x86/cgx86.pas

@@ -41,6 +41,7 @@ unit cgx86;
 
 
         function getfpuregister(list:Taasmoutput;size:Tcgsize):Tregister;override;
         function getfpuregister(list:Taasmoutput;size:Tcgsize):Tregister;override;
         function getmmxregister(list:Taasmoutput):Tregister;
         function getmmxregister(list:Taasmoutput):Tregister;
+        function getmmregister(list:Taasmoutput;size:Tcgsize):Tregister;override;
 
 
         procedure getcpuregister(list:Taasmoutput;r:Tregister);override;
         procedure getcpuregister(list:Taasmoutput;r:Tregister);override;
         procedure ungetcpuregister(list:Taasmoutput;r:Tregister);override;
         procedure ungetcpuregister(list:Taasmoutput;r:Tregister);override;
@@ -126,8 +127,6 @@ unit cgx86;
         procedure floatstoreops(t : tcgsize;var op : tasmop;var s : topsize);
         procedure floatstoreops(t : tcgsize;var op : tasmop;var s : topsize);
       end;
       end;
 
 
-    function use_sse(def : tdef) : boolean;
-
    const
    const
 {$ifdef x86_64}
 {$ifdef x86_64}
       TCGSize2OpSize: Array[tcgsize] of topsize =
       TCGSize2OpSize: Array[tcgsize] of topsize =
@@ -163,13 +162,6 @@ unit cgx86;
       TOpCmp2AsmCond: Array[topcmp] of TAsmCond = (C_NONE,
       TOpCmp2AsmCond: Array[topcmp] of TAsmCond = (C_NONE,
           C_E,C_G,C_L,C_GE,C_LE,C_NE,C_BE,C_B,C_AE,C_A);
           C_E,C_G,C_L,C_GE,C_LE,C_NE,C_BE,C_B,C_AE,C_A);
 
 
-    function use_sse(def : tdef) : boolean;
-      begin
-        use_sse:=(is_single(def) and (aktfputype in sse_singlescalar)) or
-          (is_double(def) and (aktfputype in sse_doublescalar));
-      end;
-
-
     procedure Tcgx86.done_register_allocators;
     procedure Tcgx86.done_register_allocators;
       begin
       begin
         rg[R_INTREGISTER].free;
         rg[R_INTREGISTER].free;
@@ -185,6 +177,7 @@ unit cgx86;
         result:=rgfpu.getregisterfpu(list);
         result:=rgfpu.getregisterfpu(list);
       end;
       end;
 
 
+
     function Tcgx86.getmmxregister(list:Taasmoutput):Tregister;
     function Tcgx86.getmmxregister(list:Taasmoutput):Tregister;
       begin
       begin
         if not assigned(rg[R_MMXREGISTER]) then
         if not assigned(rg[R_MMXREGISTER]) then
@@ -192,6 +185,22 @@ unit cgx86;
         result:=rg[R_MMXREGISTER].getregister(list,R_SUBNONE);
         result:=rg[R_MMXREGISTER].getregister(list,R_SUBNONE);
       end;
       end;
 
 
+
+    function Tcgx86.getmmregister(list:Taasmoutput;size:Tcgsize):Tregister;
+      begin
+        if not assigned(rg[R_MMXREGISTER]) then
+          internalerror(200312124);
+        case size of
+          OS_F64:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD);
+          OS_F32:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
+          else
+            internalerror(200506041);
+        end;
+      end;
+
+
     procedure Tcgx86.getcpuregister(list:Taasmoutput;r:Tregister);
     procedure Tcgx86.getcpuregister(list:Taasmoutput;r:Tregister);
       begin
       begin
         if getregtype(r)=R_FPUREGISTER then
         if getregtype(r)=R_FPUREGISTER then

+ 12 - 4
compiler/x86/cpubase.pas

@@ -314,7 +314,7 @@ implementation
 
 
     function reg_cgsize(const reg: tregister): tcgsize;
     function reg_cgsize(const reg: tregister): tcgsize;
       const subreg2cgsize:array[Tsubregister] of Tcgsize =
       const subreg2cgsize:array[Tsubregister] of Tcgsize =
-            (OS_NO,OS_8,OS_8,OS_16,OS_32,OS_64,OS_NO,OS_NO,OS_NO);
+            (OS_NO,OS_8,OS_8,OS_16,OS_32,OS_64,OS_NO,OS_NO,OS_NO,OS_F32,OS_F64);
       begin
       begin
         case getregtype(reg) of
         case getregtype(reg) of
           R_INTREGISTER :
           R_INTREGISTER :
@@ -324,7 +324,7 @@ implementation
           R_MMXREGISTER:
           R_MMXREGISTER:
             reg_cgsize:=OS_M64;
             reg_cgsize:=OS_M64;
           R_MMREGISTER:
           R_MMREGISTER:
-            reg_cgsize:=OS_M128;
+            reg_cgsize:=subreg2cgsize[getsubreg(reg)];
           R_SPECIALREGISTER :
           R_SPECIALREGISTER :
             case reg of
             case reg of
               NR_CS,NR_DS,NR_ES,NR_SS,NR_FS,NR_GS:
               NR_CS,NR_DS,NR_ES,NR_SS,NR_FS,NR_GS:
@@ -341,7 +341,7 @@ implementation
     function reg2opsize(r:Tregister):topsize;
     function reg2opsize(r:Tregister):topsize;
       const
       const
         subreg2opsize : array[tsubregister] of topsize =
         subreg2opsize : array[tsubregister] of topsize =
-          (S_NO,S_B,S_B,S_W,S_L,S_Q,S_NO,S_NO,S_NO);
+          (S_NO,S_B,S_B,S_W,S_L,S_Q,S_NO,S_NO,S_NO,S_NO,S_NO);
       begin
       begin
         reg2opsize:=S_L;
         reg2opsize:=S_L;
         case getregtype(r) of
         case getregtype(r) of
@@ -418,8 +418,16 @@ implementation
 
 
 
 
     function findreg_by_number(r:Tregister):tregisterindex;
     function findreg_by_number(r:Tregister):tregisterindex;
+      var
+        hr : tregister;
       begin
       begin
-        result:=findreg_by_number_table(r,regnumber_index);
+        { for the name the sub reg doesn't matter }
+        hr:=r;
+        case getsubreg(hr) of
+          R_SUBMMS,R_SUBMMD:
+            setsubreg(hr,R_SUBNONE);
+        end;
+        result:=findreg_by_number_table(hr,regnumber_index);
       end;
       end;
 
 
 
 

+ 1 - 1
compiler/x86/nx86add.pas

@@ -61,7 +61,7 @@ unit nx86add;
       verbose,cutils,
       verbose,cutils,
       cpuinfo,
       cpuinfo,
       aasmbase,aasmtai,aasmcpu,
       aasmbase,aasmtai,aasmcpu,
-      symconst,
+      symconst,symdef,
       cgobj,cgx86,cga,cgutils,
       cgobj,cgx86,cga,cgutils,
       paramgr,tgobj,ncgutil,
       paramgr,tgobj,ncgutil,
       ncon,nset,
       ncon,nset,

+ 9 - 8
compiler/i386/n386con.pas → compiler/x86/nx86con.pas

@@ -19,7 +19,7 @@
 
 
  ****************************************************************************
  ****************************************************************************
 }
 }
-unit n386con;
+unit nx86con;
 
 
 {$i fpcdefs.inc}
 {$i fpcdefs.inc}
 
 
@@ -29,7 +29,7 @@ interface
        node,ncon,ncgcon;
        node,ncon,ncgcon;
 
 
     type
     type
-       ti386realconstnode = class(tcgrealconstnode)
+       tx86realconstnode = class(tcgrealconstnode)
           function pass_1 : tnode;override;
           function pass_1 : tnode;override;
           procedure pass_2;override;
           procedure pass_2;override;
        end;
        end;
@@ -38,6 +38,7 @@ implementation
 
 
     uses
     uses
       systems,globals,
       systems,globals,
+      symdef,
       defutil,
       defutil,
       cpubase,
       cpubase,
       cga,cgx86,cgobj,cgbase,cgutils;
       cga,cgx86,cgobj,cgbase,cgutils;
@@ -46,10 +47,10 @@ implementation
                            TI386REALCONSTNODE
                            TI386REALCONSTNODE
 *****************************************************************************}
 *****************************************************************************}
 
 
-    function ti386realconstnode.pass_1 : tnode;
+    function tx86realconstnode.pass_1 : tnode;
       begin
       begin
          result:=nil;
          result:=nil;
-         if is_number_float(value_real) and (value_real=1.0) or (value_real=0.0) then
+         if is_number_float(value_real) and not(use_sse(resulttype.def)) and (value_real=1.0) or (value_real=0.0) then
            begin
            begin
               expectloc:=LOC_FPUREGISTER;
               expectloc:=LOC_FPUREGISTER;
               registersfpu:=1;
               registersfpu:=1;
@@ -58,19 +59,19 @@ implementation
            expectloc:=LOC_CREFERENCE;
            expectloc:=LOC_CREFERENCE;
       end;
       end;
 
 
-    procedure ti386realconstnode.pass_2;
+    procedure tx86realconstnode.pass_2;
 
 
       begin
       begin
          if is_number_float(value_real) then
          if is_number_float(value_real) then
            begin
            begin
-             if (value_real=1.0) then
+             if (value_real=1.0) and not(use_sse(resulttype.def)) then
                begin
                begin
                   emit_none(A_FLD1,S_NO);
                   emit_none(A_FLD1,S_NO);
                   location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
                   location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
                   location.register:=NR_ST;
                   location.register:=NR_ST;
                   tcgx86(cg).inc_fpu_stack;
                   tcgx86(cg).inc_fpu_stack;
                end
                end
-             else if (value_real=0.0) then
+             else if (value_real=0.0) and not(use_sse(resulttype.def)) then
                begin
                begin
                   emit_none(A_FLDZ,S_NO);
                   emit_none(A_FLDZ,S_NO);
                   location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
                   location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
@@ -86,5 +87,5 @@ implementation
 
 
 
 
 begin
 begin
-   crealconstnode:=ti386realconstnode;
+   crealconstnode:=tx86realconstnode;
 end.
 end.

+ 4 - 3
compiler/x86/nx86mat.pas

@@ -49,7 +49,8 @@ interface
     uses
     uses
       systems,
       systems,
       cutils,verbose,globals,
       cutils,verbose,globals,
-      symconst,aasmbase,aasmtai,defutil,
+      symconst,symdef,
+      aasmbase,aasmtai,defutil,
       cgbase,pass_1,pass_2,
       cgbase,pass_1,pass_2,
       ncon,
       ncon,
       cpubase,
       cpubase,
@@ -171,10 +172,10 @@ interface
             location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
             location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
 
 
             { make life of register allocator easier }
             { make life of register allocator easier }
-            location.register:=cg.getmmregister(exprasmlist,OS_M128);
+            location.register:=cg.getmmregister(exprasmlist,def_cgsize(resulttype.def));
             cg.a_loadmm_reg_reg(exprasmlist,def_cgsize(resulttype.def),def_cgsize(resulttype.def),left.location.register,location.register,mms_movescalar);
             cg.a_loadmm_reg_reg(exprasmlist,def_cgsize(resulttype.def),def_cgsize(resulttype.def),left.location.register,location.register,mms_movescalar);
 
 
-            reg:=cg.getmmregister(exprasmlist,OS_M128);
+            reg:=cg.getmmregister(exprasmlist,def_cgsize(resulttype.def));
 
 
             objectlibrary.getdatalabel(l1);
             objectlibrary.getdatalabel(l1);
             consts.concat(Tai_label.Create(l1));
             consts.concat(Tai_label.Create(l1));

+ 33 - 32
compiler/x86/x86ins.dat

@@ -1253,6 +1253,7 @@ xmmreg,mem            \301\333\2\x0F\x7E\110          WILLAMETTE,SSE2
 void                  \1\xA4                          8086
 void                  \1\xA4                          8086
 
 
 [MOVSD,movsl]
 [MOVSD,movsl]
+; Ch_All isn't correct for the sse move, but how can it be solved? (FK)
 (Ch_All, Ch_None, Ch_None)
 (Ch_All, Ch_None, Ch_None)
 void                  \321\1\xA5                      386
 void                  \321\1\xA5                      386
 xmmreg,xmmreg         \3\xF2\x0F\x10\110              WILLAMETTE,SSE2
 xmmreg,xmmreg         \3\xF2\x0F\x10\110              WILLAMETTE,SSE2
@@ -1380,33 +1381,33 @@ xmmreg,xmmreg         \3\x66\x0F\x67\110              WILLAMETTE,SSE2
 xmmreg,mem            \301\3\x66\x0F\x67\110          WILLAMETTE,SSE2,SM
 xmmreg,mem            \301\3\x66\x0F\x67\110          WILLAMETTE,SSE2,SM
 
 
 [PADDB]
 [PADDB]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 mmxreg,mem            \301\2\x0F\xFC\110              PENT,MMX,SM
 mmxreg,mem            \301\2\x0F\xFC\110              PENT,MMX,SM
 mmxreg,mmxreg         \2\x0F\xFC\110                  PENT,MMX
 mmxreg,mmxreg         \2\x0F\xFC\110                  PENT,MMX
 xmmreg,xmmreg         \3\x66\x0F\xFC\110              WILLAMETTE,SSE2
 xmmreg,xmmreg         \3\x66\x0F\xFC\110              WILLAMETTE,SSE2
 xmmreg,mem            \301\3\x66\x0F\xFC\110          WILLAMETTE,SSE2,SM
 xmmreg,mem            \301\3\x66\x0F\xFC\110          WILLAMETTE,SSE2,SM
 
 
 [PADDD]
 [PADDD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 mmxreg,mem            \301\2\x0F\xFE\110              PENT,MMX,SM
 mmxreg,mem            \301\2\x0F\xFE\110              PENT,MMX,SM
 mmxreg,mmxreg         \2\x0F\xFE\110                  PENT,MMX
 mmxreg,mmxreg         \2\x0F\xFE\110                  PENT,MMX
 xmmreg,xmmreg         \3\x66\x0F\xFE\110              WILLAMETTE,SSE2
 xmmreg,xmmreg         \3\x66\x0F\xFE\110              WILLAMETTE,SSE2
 xmmreg,mem            \301\3\x66\x0F\xFE\110          WILLAMETTE,SSE2,SM
 xmmreg,mem            \301\3\x66\x0F\xFE\110          WILLAMETTE,SSE2,SM
 
 
 [PADDSB]
 [PADDSB]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 mmxreg,mem            \301\2\x0F\xEC\110              PENT,MMX,SM
 mmxreg,mem            \301\2\x0F\xEC\110              PENT,MMX,SM
 mmxreg,mmxreg         \2\x0F\xEC\110                  PENT,MMX
 mmxreg,mmxreg         \2\x0F\xEC\110                  PENT,MMX
 xmmreg,mem            \301\3\x66\x0F\xEC\110          WILLAMETTE,SSE2,SM
 xmmreg,mem            \301\3\x66\x0F\xEC\110          WILLAMETTE,SSE2,SM
 xmmreg,xmmreg         \3\x66\x0F\xEC\110              WILLAMETTE,SSE2
 xmmreg,xmmreg         \3\x66\x0F\xEC\110              WILLAMETTE,SSE2
 
 
 [PADDSIW]
 [PADDSIW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 mmxreg,mem            \301\2\x0F\x51\110              PENT,MMX,SM,CYRIX
 mmxreg,mem            \301\2\x0F\x51\110              PENT,MMX,SM,CYRIX
 mmxreg,mmxreg         \2\x0F\x51\110                  PENT,MMX,CYRIX
 mmxreg,mmxreg         \2\x0F\x51\110                  PENT,MMX,CYRIX
 
 
 [PADDSW]
 [PADDSW]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 mmxreg,mem            \301\2\x0F\xED\110              PENT,MMX,SM
 mmxreg,mem            \301\2\x0F\xED\110              PENT,MMX,SM
 mmxreg,mmxreg         \2\x0F\xED\110                  PENT,MMX
 mmxreg,mmxreg         \2\x0F\xED\110                  PENT,MMX
 xmmreg,mem            \301\3\x66\x0F\xED\110          WILLAMETTE,SSE2,SM
 xmmreg,mem            \301\3\x66\x0F\xED\110          WILLAMETTE,SSE2,SM
@@ -2453,22 +2454,22 @@ reg8                  \300\1\x0F\330\x90\200          386
 ;
 ;
 
 
 [ADDPS]
 [ADDPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\331\2\x0F\x58\110          KATMAI,SSE
 xmmreg,mem            \301\331\2\x0F\x58\110          KATMAI,SSE
 xmmreg,xmmreg         \331\2\x0F\x58\110              KATMAI,SSE
 xmmreg,xmmreg         \331\2\x0F\x58\110              KATMAI,SSE
 
 
 [ADDSS]
 [ADDSS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\333\2\x0F\x58\110          KATMAI,SSE
 xmmreg,mem            \301\333\2\x0F\x58\110          KATMAI,SSE
 xmmreg,xmmreg         \333\2\x0F\x58\110              KATMAI,SSE
 xmmreg,xmmreg         \333\2\x0F\x58\110              KATMAI,SSE
 
 
 [ANDNPS]
 [ANDNPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\2\x0F\x55\110              KATMAI,SSE
 xmmreg,mem            \301\2\x0F\x55\110              KATMAI,SSE
 xmmreg,xmmreg         \2\x0F\x55\110                  KATMAI,SSE
 xmmreg,xmmreg         \2\x0F\x55\110                  KATMAI,SSE
 
 
 [ANDPS]
 [ANDPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\2\x0F\x54\110              KATMAI,SSE
 xmmreg,mem            \301\2\x0F\x54\110              KATMAI,SSE
 xmmreg,xmmreg         \2\x0F\x54\110                  KATMAI,SSE
 xmmreg,xmmreg         \2\x0F\x54\110                  KATMAI,SSE
 
 
@@ -2603,12 +2604,12 @@ reg32,mem             \301\333\2\x0F\x2C\110          KATMAI,SSE
 reg32,xmmreg          \333\2\x0F\x2C\110              KATMAI,SSE
 reg32,xmmreg          \333\2\x0F\x2C\110              KATMAI,SSE
 
 
 [DIVPS]
 [DIVPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\331\2\x0F\x5E\110          KATMAI,SSE
 xmmreg,mem            \301\331\2\x0F\x5E\110          KATMAI,SSE
 xmmreg,xmmreg         \331\2\x0F\x5E\110              KATMAI,SSE
 xmmreg,xmmreg         \331\2\x0F\x5E\110              KATMAI,SSE
 
 
 [DIVSS]
 [DIVSS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\333\2\x0F\x5E\110          KATMAI,SSE
 xmmreg,mem            \301\333\2\x0F\x5E\110          KATMAI,SSE
 xmmreg,xmmreg         \333\2\x0F\x5E\110              KATMAI,SSE
 xmmreg,xmmreg         \333\2\x0F\x5E\110              KATMAI,SSE
 
 
@@ -2670,7 +2671,7 @@ reg32,xmmreg          \2\x0F\x50\110                  KATMAI,SSE
 mem,xmmreg            \2\x0F\x2B\101                  KATMAI,SSE
 mem,xmmreg            \2\x0F\x2B\101                  KATMAI,SSE
 
 
 [MOVSS]
 [MOVSS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Wop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\333\2\x0F\x10\110          KATMAI,SSE
 xmmreg,mem            \301\333\2\x0F\x10\110          KATMAI,SSE
 mem,xmmreg            \300\333\2\x0F\x11\101          KATMAI,SSE
 mem,xmmreg            \300\333\2\x0F\x11\101          KATMAI,SSE
 xmmreg,xmmreg         \333\2\x0F\x10\110              KATMAI,SSE
 xmmreg,xmmreg         \333\2\x0F\x10\110              KATMAI,SSE
@@ -2684,17 +2685,17 @@ xmmreg,xmmreg         \331\2\x0F\x10\110              KATMAI,SSE
 xmmreg,xmmreg         \331\2\x0F\x11\101              KATMAI,SSE
 xmmreg,xmmreg         \331\2\x0F\x11\101              KATMAI,SSE
 
 
 [MULPS]
 [MULPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\2\x0F\x59\110              KATMAI,SSE
 xmmreg,mem            \301\2\x0F\x59\110              KATMAI,SSE
 xmmreg,xmmreg         \2\x0F\x59\110                  KATMAI,SSE
 xmmreg,xmmreg         \2\x0F\x59\110                  KATMAI,SSE
 
 
 [MULSS]
 [MULSS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\333\2\x0F\x59\110          KATMAI,SSE
 xmmreg,mem            \301\333\2\x0F\x59\110          KATMAI,SSE
 xmmreg,xmmreg         \333\2\x0F\x59\110              KATMAI,SSE
 xmmreg,xmmreg         \333\2\x0F\x59\110              KATMAI,SSE
 
 
 [ORPS]
 [ORPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\2\x0F\x56\110              KATMAI,SSE
 xmmreg,mem            \301\2\x0F\x56\110              KATMAI,SSE
 xmmreg,xmmreg         \2\x0F\x56\110                  KATMAI,SSE
 xmmreg,xmmreg         \2\x0F\x56\110                  KATMAI,SSE
 
 
@@ -2724,12 +2725,12 @@ xmmreg,mem,imm        \301\2\x0F\xC6\110\22           KATMAI,SSE,SB,AR2
 xmmreg,xmmreg,imm     \2\x0F\xC6\110\22               KATMAI,SSE,SB,AR2
 xmmreg,xmmreg,imm     \2\x0F\xC6\110\22               KATMAI,SSE,SB,AR2
 
 
 [SQRTPS]
 [SQRTPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\331\2\x0F\x51\110          KATMAI,SSE
 xmmreg,mem            \301\331\2\x0F\x51\110          KATMAI,SSE
 xmmreg,xmmreg         \331\2\x0F\x51\110              KATMAI,SSE
 xmmreg,xmmreg         \331\2\x0F\x51\110              KATMAI,SSE
 
 
 [SQRTSS]
 [SQRTSS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\333\2\x0F\x51\110          KATMAI,SSE
 xmmreg,mem            \301\333\2\x0F\x51\110          KATMAI,SSE
 xmmreg,xmmreg         \333\2\x0F\x51\110              KATMAI,SSE
 xmmreg,xmmreg         \333\2\x0F\x51\110              KATMAI,SSE
 
 
@@ -2738,12 +2739,12 @@ xmmreg,xmmreg         \333\2\x0F\x51\110              KATMAI,SSE
 mem                   \300\2\x0F\xAE\203              KATMAI,SSE,SD
 mem                   \300\2\x0F\xAE\203              KATMAI,SSE,SD
 
 
 [SUBPS]
 [SUBPS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\331\2\x0F\x5C\110          KATMAI,SSE
 xmmreg,mem            \301\331\2\x0F\x5C\110          KATMAI,SSE
 xmmreg,xmmreg         \331\2\x0F\x5C\110              KATMAI,SSE
 xmmreg,xmmreg         \331\2\x0F\x5C\110              KATMAI,SSE
 
 
 [SUBSS]
 [SUBSS]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem            \301\333\2\x0F\x5C\110          KATMAI,SSE
 xmmreg,mem            \301\333\2\x0F\x5C\110          KATMAI,SSE
 xmmreg,xmmreg         \333\2\x0F\x5C\110              KATMAI,SSE
 xmmreg,xmmreg         \333\2\x0F\x5C\110              KATMAI,SSE
 
 
@@ -3046,22 +3047,22 @@ xmmreg,mem              \301\3\x66\x0F\x6C\110          WILLAMETTE,SSE2,SM
 ; Willamette Streaming SIMD instructions (SSE2)
 ; Willamette Streaming SIMD instructions (SSE2)
 ;
 ;
 [ADDPD]
 [ADDPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg           \331\3\x66\x0F\x58\110          WILLAMETTE,SSE2
 xmmreg,xmmreg           \331\3\x66\x0F\x58\110          WILLAMETTE,SSE2
 xmmreg,mem              \301\331\3\x66\x0F\x58\110      WILLAMETTE,SSE2,SM
 xmmreg,mem              \301\331\3\x66\x0F\x58\110      WILLAMETTE,SSE2,SM
 
 
 [ADDSD]
 [ADDSD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg           \331\3\xF2\x0F\x58\110          WILLAMETTE,SSE2
 xmmreg,xmmreg           \331\3\xF2\x0F\x58\110          WILLAMETTE,SSE2
 xmmreg,mem              \301\331\3\xF2\x0F\x58\110      WILLAMETTE,SSE2
 xmmreg,mem              \301\331\3\xF2\x0F\x58\110      WILLAMETTE,SSE2
 
 
 [ANDNPD]
 [ANDNPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg           \331\3\x66\x0F\x55\110          WILLAMETTE,SSE2
 xmmreg,xmmreg           \331\3\x66\x0F\x55\110          WILLAMETTE,SSE2
 xmmreg,mem              \301\331\3\x66\x0F\x55\110      WILLAMETTE,SSE2,SM
 xmmreg,mem              \301\331\3\x66\x0F\x55\110      WILLAMETTE,SSE2,SM
 
 
 [ANDPD]
 [ANDPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg           \331\3\x66\x0F\x54\110          WILLAMETTE,SSE2
 xmmreg,xmmreg           \331\3\x66\x0F\x54\110          WILLAMETTE,SSE2
 xmmreg,mem              \301\331\3\x66\x0F\x54\110      WILLAMETTE,SSE2,SM
 xmmreg,mem              \301\331\3\x66\x0F\x54\110      WILLAMETTE,SSE2,SM
 
 
@@ -3235,12 +3236,12 @@ reg32,xmmreg            \3\xF2\x0F\x2C\110              WILLAMETTE,SSE2
 reg32,mem               \301\3\xF2\x0F\x2C\110          WILLAMETTE,SSE2
 reg32,mem               \301\3\xF2\x0F\x2C\110          WILLAMETTE,SSE2
 
 
 [DIVPD]
 [DIVPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg           \3\x66\x0F\x5E\110              WILLAMETTE,SSE2
 xmmreg,xmmreg           \3\x66\x0F\x5E\110              WILLAMETTE,SSE2
 xmmreg,mem              \301\3\x66\x0F\x5E\110          WILLAMETTE,SSE2,SM
 xmmreg,mem              \301\3\x66\x0F\x5E\110          WILLAMETTE,SSE2,SM
 
 
 [DIVSD]
 [DIVSD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg           \3\xF2\x0F\x5E\110              WILLAMETTE,SSE2
 xmmreg,xmmreg           \3\xF2\x0F\x5E\110              WILLAMETTE,SSE2
 xmmreg,mem              \301\3\xF2\x0F\x5E\110          WILLAMETTE,SSE2
 xmmreg,mem              \301\3\xF2\x0F\x5E\110          WILLAMETTE,SSE2
 
 
@@ -3293,17 +3294,17 @@ mem,xmmreg              \300\3\x66\x0F\x11\101          WILLAMETTE,SSE2,SM
 xmmreg,mem              \301\3\x66\x0F\x10\110          WILLAMETTE,SSE2,SM
 xmmreg,mem              \301\3\x66\x0F\x10\110          WILLAMETTE,SSE2,SM
 
 
 [MULPD]
 [MULPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg           \3\x66\x0F\x59\110              WILLAMETTE,SSE2
 xmmreg,xmmreg           \3\x66\x0F\x59\110              WILLAMETTE,SSE2
 xmmreg,mem              \301\3\x66\x0F\x59\110          WILLAMETTE,SSE2,SM
 xmmreg,mem              \301\3\x66\x0F\x59\110          WILLAMETTE,SSE2,SM
 
 
 [MULSD]
 [MULSD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg           \3\xF2\x0F\x59\110              WILLAMETTE,SSE2
 xmmreg,xmmreg           \3\xF2\x0F\x59\110              WILLAMETTE,SSE2
 xmmreg,mem              \301\3\xF2\x0F\x59\110          WILLAMETTE,SSE2
 xmmreg,mem              \301\3\xF2\x0F\x59\110          WILLAMETTE,SSE2
 
 
 [ORPD]
 [ORPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,mem              \301\3\x66\x0F\x56\110          WILLAMETTE,SSE2,SM
 xmmreg,mem              \301\3\x66\x0F\x56\110          WILLAMETTE,SSE2,SM
 xmmreg,xmmreg           \3\x66\x0F\x56\110              WILLAMETTE,SSE2
 xmmreg,xmmreg           \3\x66\x0F\x56\110              WILLAMETTE,SSE2
 
 
@@ -3313,22 +3314,22 @@ xmmreg,xmmreg,imm       \3\x66\x0F\xC6\110\26           WILLAMETTE,SSE2,SB,AR2
 xmmreg,mem,imm          \301\3\x66\x0F\xC6\110\26       WILLAMETTE,SSE2,SM,SB,AR2
 xmmreg,mem,imm          \301\3\x66\x0F\xC6\110\26       WILLAMETTE,SSE2,SM,SB,AR2
 
 
 [SQRTPD]
 [SQRTPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg           \3\x66\x0F\x51\110              WILLAMETTE,SSE2
 xmmreg,xmmreg           \3\x66\x0F\x51\110              WILLAMETTE,SSE2
 xmmreg,mem              \301\3\x66\x0F\x51\110          WILLAMETTE,SSE2,SM
 xmmreg,mem              \301\3\x66\x0F\x51\110          WILLAMETTE,SSE2,SM
 
 
 [SQRTSD]
 [SQRTSD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg           \3\xF2\x0F\x51\110              WILLAMETTE,SSE2
 xmmreg,xmmreg           \3\xF2\x0F\x51\110              WILLAMETTE,SSE2
 xmmreg,mem              \301\3\xF2\x0F\x51\110          WILLAMETTE,SSE2
 xmmreg,mem              \301\3\xF2\x0F\x51\110          WILLAMETTE,SSE2
 
 
 [SUBPD]
 [SUBPD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg           \3\x66\x0F\x5C\110              WILLAMETTE,SSE2
 xmmreg,xmmreg           \3\x66\x0F\x5C\110              WILLAMETTE,SSE2
 xmmreg,mem              \301\3\x66\x0F\x5C\110          WILLAMETTE,SSE2,SM
 xmmreg,mem              \301\3\x66\x0F\x5C\110          WILLAMETTE,SSE2,SM
 
 
 [SUBSD]
 [SUBSD]
-(Ch_All, Ch_None, Ch_None)
+(Ch_Mop2, Ch_Rop1, Ch_None)
 xmmreg,xmmreg           \3\xF2\x0F\x5C\110              WILLAMETTE,SSE2
 xmmreg,xmmreg           \3\xF2\x0F\x5C\110              WILLAMETTE,SSE2
 xmmreg,mem              \301\3\xF2\x0F\x5C\110          WILLAMETTE,SSE2
 xmmreg,mem              \301\3\xF2\x0F\x5C\110          WILLAMETTE,SSE2
 
 

+ 0 - 40
compiler/x86/x86reg.dat

@@ -140,43 +140,3 @@ NR_XMM12,$0400000c,xmm12,%xmm12,xmm12,xmm12,-1,-1,29,OT_XMMREG,4,64
 NR_XMM13,$0400000d,xmm13,%xmm13,xmm13,xmm13,-1,-1,30,OT_XMMREG,5,64
 NR_XMM13,$0400000d,xmm13,%xmm13,xmm13,xmm13,-1,-1,30,OT_XMMREG,5,64
 NR_XMM14,$0400000e,xmm14,%xmm14,xmm14,xmm14,-1,-1,31,OT_XMMREG,6,64
 NR_XMM14,$0400000e,xmm14,%xmm14,xmm14,xmm14,-1,-1,31,OT_XMMREG,6,64
 NR_XMM15,$0400000f,xmm15,%xmm15,xmm15,xmm15,-1,-1,32,OT_XMMREG,7,64
 NR_XMM15,$0400000f,xmm15,%xmm15,xmm15,xmm15,-1,-1,32,OT_XMMREG,7,64
-
-;
-; $Log: x86reg.dat,v $
-; Revision 1.6  2005/02/06 00:05:56  florian
-;   + x86_64 pic draft
-;
-; Revision 1.5  2004/06/16 20:07:11  florian
-;   * dwarf branch merged
-;
-; Revision 1.4.2.2  2004/04/20 16:35:58  peter
-;   * generate dwarf for stackframe entry
-;
-; Revision 1.4.2.1  2004/04/12 19:34:46  peter
-;   * basic framework for dwarf CFI
-;
-; Revision 1.4  2003/09/25 15:00:12  peter
-;   * %st is st0 in nasm
-;
-; Revision 1.3  2003/09/24 17:12:36  florian
-;   * x86-64 adaptions
-;
-; Revision 1.2  2003/09/03 15:55:02  peter
-;   * NEWRA branch merged
-;
-; Revision 1.1.2.5  2003/08/31 16:44:48  peter
-;   * OT fixed for DX
-;
-; Revision 1.1.2.4  2003/08/31 16:18:05  peter
-;   * more fixes
-;
-; Revision 1.1.2.3  2003/08/29 09:41:25  daniel
-;   * Further mkx86reg development
-;
-; Revision 1.1.2.2  2003/08/27 20:31:35  peter
-;   * make NR_ST unique value
-;
-; Revision 1.1.2.1  2003/08/27 19:55:54  peter
-;   * first tregister patch
-;
-;

+ 1 - 1
compiler/x86_64/cgcpu.pas

@@ -53,7 +53,7 @@ unit cgcpu;
         inherited init_register_allocators;
         inherited init_register_allocators;
         rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RBX,RS_RSI,RS_RDI,
         rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RBX,RS_RSI,RS_RDI,
           RS_R8,RS_R9,RS_R10,RS_R11,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[RS_RBP]);
           RS_R8,RS_R9,RS_R10,RS_R11,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[RS_RBP]);
-        rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
+        rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
           RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]);
           RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]);
         rgfpu:=Trgx86fpu.create;
         rgfpu:=Trgx86fpu.create;
       end;
       end;

+ 1 - 0
compiler/x86_64/cpunode.pas

@@ -47,6 +47,7 @@ unit cpunode;
        { the cpu specific node units must be used after the generic ones to
        { the cpu specific node units must be used after the generic ones to
          get the correct class pointer }
          get the correct class pointer }
        nx86set,
        nx86set,
+       nx86con,
        nx64add,
        nx64add,
        nx64cal,
        nx64cal,
        nx64cnv,
        nx64cnv,

+ 1 - 1
compiler/x86_64/x8664nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
-1640;
+1641;

+ 32 - 32
compiler/x86_64/x8664pro.inc

@@ -215,11 +215,11 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -383,6 +383,10 @@
 (Ch: (Ch_ROp1, Ch_WOp2, Ch_RFLAGS)),
 (Ch: (Ch_ROp1, Ch_WOp2, Ch_RFLAGS)),
 (Ch: (Ch_None, Ch_None, Ch_None)),
 (Ch: (Ch_None, Ch_None, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_WOp1, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_WOp1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -408,6 +412,8 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -420,13 +426,21 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -479,6 +493,10 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -512,6 +530,8 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -521,34 +541,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),

+ 7 - 0
compiler/x86_64/x8664tab.inc

@@ -4529,6 +4529,13 @@
     code    : #209#193#2#15#191#72;
     code    : #209#193#2#15#191#72;
     flags   : if_386
     flags   : if_386
   ),
   ),
+  (
+    opcode  : A_MOVSX;
+    ops     : 2;
+    optypes : (ot_reg64,ot_regmem or ot_bits16,ot_none);
+    code    : #209#193#2#15#191#72;
+    flags   : if_x86_64
+  ),
   (
   (
     opcode  : A_MOVZX;
     opcode  : A_MOVZX;
     ops     : 2;
     ops     : 2;