Browse Source

+ support for LOC_(C)MMREGISTER in hlcg
o migrated location_force_mmregister_scalar from ncgutil to hlcgobj

git-svn-id: trunk@24661 -

Jonas Maebe 12 years ago
parent
commit
5051453806

+ 8 - 8
compiler/arm/narmadd.pas

@@ -170,8 +170,8 @@ interface
             begin
             begin
               { force mmreg as location, left right doesn't matter
               { force mmreg as location, left right doesn't matter
                 as both will be in a fpureg }
                 as both will be in a fpureg }
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
-              location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
 
 
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               if left.location.loc<>LOC_CMMREGISTER then
               if left.location.loc<>LOC_CMMREGISTER then
@@ -214,8 +214,8 @@ interface
             begin
             begin
               { force mmreg as location, left right doesn't matter
               { force mmreg as location, left right doesn't matter
                 as both will be in a fpureg }
                 as both will be in a fpureg }
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
-              location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
 
 
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               if left.location.loc<>LOC_CMMREGISTER then
               if left.location.loc<>LOC_CMMREGISTER then
@@ -284,8 +284,8 @@ interface
           fpu_vfpv3,
           fpu_vfpv3,
           fpu_vfpv3_d16:
           fpu_vfpv3_d16:
             begin
             begin
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
-              location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
 
 
               if (tfloatdef(left.resultdef).floattype=s32real) then
               if (tfloatdef(left.resultdef).floattype=s32real) then
                 if nodetype in [equaln,unequaln] then
                 if nodetype in [equaln,unequaln] then
@@ -303,8 +303,8 @@ interface
             end;
             end;
           fpu_fpv4_s16:
           fpu_fpv4_s16:
             begin
             begin
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
-              location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
 
 
               if nodetype in [equaln,unequaln] then
               if nodetype in [equaln,unequaln] then
                 op:=A_VCMP
                 op:=A_VCMP

+ 2 - 2
compiler/arm/narmcnv.pas

@@ -246,7 +246,7 @@ implementation
             begin
             begin
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               signed:=left.location.size=OS_S32;
               signed:=left.location.size=OS_S32;
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
               if (left.location.size<>OS_F32) then
               if (left.location.size<>OS_F32) then
                 internalerror(2009112703);
                 internalerror(2009112703);
               if left.location.size<>location.size then
               if left.location.size<>location.size then
@@ -260,7 +260,7 @@ implementation
             begin
             begin
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               signed:=left.location.size=OS_S32;
               signed:=left.location.size=OS_S32;
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
               if (left.location.size<>OS_F32) then
               if (left.location.size<>OS_F32) then
                 internalerror(2009112703);
                 internalerror(2009112703);
               if left.location.size<>location.size then
               if left.location.size<>location.size then

+ 1 - 1
compiler/arm/narminl.pas

@@ -88,7 +88,7 @@ implementation
           fpu_vfpv3_d16,
           fpu_vfpv3_d16,
           fpu_fpv4_s16:
           fpu_fpv4_s16:
             begin
             begin
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location_copy(location,left.location);
               location_copy(location,left.location);
               if left.location.loc=LOC_CMMREGISTER then
               if left.location.loc=LOC_CMMREGISTER then
                 begin
                 begin

+ 2 - 2
compiler/arm/narmmat.pas

@@ -390,7 +390,7 @@ implementation
           fpu_vfpv3,
           fpu_vfpv3,
           fpu_vfpv3_d16:
           fpu_vfpv3_d16:
             begin
             begin
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location:=left.location;
               location:=left.location;
               if (left.location.loc=LOC_CMMREGISTER) then
               if (left.location.loc=LOC_CMMREGISTER) then
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
@@ -403,7 +403,7 @@ implementation
             end;
             end;
           fpu_fpv4_s16:
           fpu_fpv4_s16:
             begin
             begin
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location:=left.location;
               location:=left.location;
               if (left.location.loc=LOC_CMMREGISTER) then
               if (left.location.loc=LOC_CMMREGISTER) then
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);

+ 199 - 43
compiler/hlcg2ll.pas

@@ -67,8 +67,8 @@ unit hlcg2ll;
           {# Gets a register suitable to do integer operations on.}
           {# Gets a register suitable to do integer operations on.}
           function getaddressregister(list:TAsmList;size:tdef):Tregister;override;
           function getaddressregister(list:TAsmList;size:tdef):Tregister;override;
           function getfpuregister(list:TAsmList;size:tdef):Tregister;override;
           function getfpuregister(list:TAsmList;size:tdef):Tregister;override;
-//        we don't have high level defs yet that translate into all mm cgsizes
-//          function getmmregister(list:TAsmList;size:tdef):Tregister;override;
+          { warning: only works correctly for fpu types currently }
+          function getmmregister(list:TAsmList;size:tdef):Tregister;override;
           function getflagregister(list:TAsmList;size:tdef):Tregister;override;
           function getflagregister(list:TAsmList;size:tdef):Tregister;override;
           {Does the generic cg need SIMD registers, like getmmxregister? Or should
           {Does the generic cg need SIMD registers, like getmmxregister? Or should
            the cpu specific child cg object have such a method?}
            the cpu specific child cg object have such a method?}
@@ -188,14 +188,10 @@ unit hlcg2ll;
           procedure a_loadfpu_ref_cgpara(list : TAsmList;fromsize : tdef;const ref : treference;const cgpara : TCGPara);override;
           procedure a_loadfpu_ref_cgpara(list : TAsmList;fromsize : tdef;const ref : treference;const cgpara : TCGPara);override;
 
 
           { vector register move instructions }
           { vector register move instructions }
-//        we don't have high level defs yet that translate into all mm cgsizes
-{
           procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef;reg1, reg2: tregister;shuffle : pmmshuffle); override;
           procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef;reg1, reg2: tregister;shuffle : pmmshuffle); override;
           procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
           procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
           procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
           procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
-}
-          procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tcgsize; const loc: tlocation; const reg: tregister;shuffle : pmmshuffle);override;
-{
+          procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister;shuffle : pmmshuffle);override;
           procedure a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation;shuffle : pmmshuffle);override;
           procedure a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation;shuffle : pmmshuffle);override;
           procedure a_loadmm_reg_cgpara(list: TAsmList; fromsize: tdef; reg: tregister;const cgpara : TCGPara;shuffle : pmmshuffle); override;
           procedure a_loadmm_reg_cgpara(list: TAsmList; fromsize: tdef; reg: tregister;const cgpara : TCGPara;shuffle : pmmshuffle); override;
           procedure a_loadmm_ref_cgpara(list: TAsmList; fromsize: tdef; const ref: treference;const cgpara : TCGPara;shuffle : pmmshuffle); override;
           procedure a_loadmm_ref_cgpara(list: TAsmList; fromsize: tdef; const ref: treference;const cgpara : TCGPara;shuffle : pmmshuffle); override;
@@ -204,10 +200,8 @@ unit hlcg2ll;
           procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
           procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
           procedure a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tdef;const loc: tlocation; reg: tregister;shuffle : pmmshuffle); override;
           procedure a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tdef;const loc: tlocation; reg: tregister;shuffle : pmmshuffle); override;
           procedure a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size : tdef;reg: tregister;const ref: treference; shuffle : pmmshuffle); override;
           procedure a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size : tdef;reg: tregister;const ref: treference; shuffle : pmmshuffle); override;
-}
-//        we don't have high level defs yet that translate into all mm cgsizes
-//          procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
-//          procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tdef; mmreg, intreg: tregister; shuffle : pmmshuffle); override;
+          procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
+          procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tdef; mmreg, intreg: tregister; shuffle : pmmshuffle); override;
 
 
           { basic arithmetic operations }
           { basic arithmetic operations }
           { note: for operators which require only one argument (not, neg), use }
           { note: for operators which require only one argument (not, neg), use }
@@ -322,7 +316,7 @@ unit hlcg2ll;
           procedure location_force_reg(list:TAsmList;var l:tlocation;src_size,dst_size:tdef;maybeconst:boolean);override;
           procedure location_force_reg(list:TAsmList;var l:tlocation;src_size,dst_size:tdef;maybeconst:boolean);override;
           procedure location_force_fpureg(list:TAsmList;var l: tlocation;size: tdef;maybeconst:boolean);override;
           procedure location_force_fpureg(list:TAsmList;var l: tlocation;size: tdef;maybeconst:boolean);override;
           procedure location_force_mem(list:TAsmList;var l:tlocation;size:tdef);override;
           procedure location_force_mem(list:TAsmList;var l:tlocation;size:tdef);override;
-//          procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);override;
+          procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);override;
 //          procedure location_force_mmreg(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);override;
 //          procedure location_force_mmreg(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);override;
 
 
           procedure maketojumpbool(list:TAsmList; p : tnode);override;
           procedure maketojumpbool(list:TAsmList; p : tnode);override;
@@ -338,6 +332,7 @@ unit hlcg2ll;
 
 
          protected
          protected
           procedure initialize_regvars(p: TObject; arg: pointer); override;
           procedure initialize_regvars(p: TObject; arg: pointer); override;
+          function getmmcgsize(reg: tregister; size: tcgsize): tcgsize; virtual;
        end;
        end;
 
 
 
 
@@ -385,6 +380,12 @@ implementation
     begin
     begin
       result:=cg.getfpuregister(list,def_cgsize(size));
       result:=cg.getfpuregister(list,def_cgsize(size));
     end;
     end;
+
+  function thlcg2ll.getmmregister(list: TAsmList; size: tdef): Tregister;
+    begin
+      result:=cg.getmmregister(list,def_cgsize(size));
+    end;
+
 (*
 (*
   function thlcg2ll.getmmregister(list: TAsmList; size: tdef): Tregister;
   function thlcg2ll.getmmregister(list: TAsmList; size: tdef): Tregister;
     begin
     begin
@@ -659,93 +660,178 @@ implementation
       cg.a_loadfpu_ref_cgpara(list,def_cgsize(fromsize),ref,cgpara);
       cg.a_loadfpu_ref_cgpara(list,def_cgsize(fromsize),ref,cgpara);
     end;
     end;
 
 
-  procedure thlcg2ll.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tcgsize; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle);
+  procedure thlcg2ll.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister;shuffle : pmmshuffle);
     var
     var
       tmpreg: tregister;
       tmpreg: tregister;
+      tocgsize: tcgsize;
     begin
     begin
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012071225);
+      { sanity check }
+      if def_cgsize(fromsize)<>loc.size then
+        internalerror(2012071226);
+      tocgsize:=getmmcgsize(reg,def_cgsize(tosize));
       case loc.loc of
       case loc.loc of
         LOC_SUBSETREG,LOC_CSUBSETREG,
         LOC_SUBSETREG,LOC_CSUBSETREG,
         LOC_SUBSETREF,LOC_CSUBSETREF:
         LOC_SUBSETREF,LOC_CSUBSETREF:
           begin
           begin
             tmpreg:=cg.getintregister(list,loc.size);
             tmpreg:=cg.getintregister(list,loc.size);
-            a_load_loc_reg(list,tcgsize2orddef(fromsize),tcgsize2orddef(fromsize),loc,tmpreg);
-            cg.a_loadmm_intreg_reg(list,loc.size,tosize,tmpreg,reg,shuffle);
+            a_load_loc_reg(list,fromsize,fromsize,loc,tmpreg);
+            cg.a_loadmm_intreg_reg(list,def_cgsize(fromsize),tocgsize,tmpreg,reg,shuffle);
           end
           end
         else
         else
-          cg.a_loadmm_loc_reg(list,tosize,loc,reg,shuffle);
+          cg.a_loadmm_loc_reg(list,tocgsize,loc,reg,shuffle);
       end;
       end;
     end;
     end;
 
 
-(*
   procedure thlcg2ll.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle);
   procedure thlcg2ll.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle);
+    var
+      fromcgsize: tcgsize;
+      tocgsize: tcgsize;
     begin
     begin
-      cg.a_loadmm_reg_reg(list,def_cgsize(fromsize),def_cgsize(tosize),reg1,reg2,shuffle);
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012062305);
+      fromcgsize:=getmmcgsize(reg1,def_cgsize(fromsize));
+      tocgsize:=getmmcgsize(reg2,def_cgsize(tosize));
+      { records may be stored in mmregisters, but def_cgsize will return an
+        integer size for them... }
+      cg.a_loadmm_reg_reg(list,fromcgsize,tocgsize,reg1,reg2,shuffle);
     end;
     end;
 
 
   procedure thlcg2ll.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle);
   procedure thlcg2ll.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+    var
+      tocgsize: tcgsize;
     begin
     begin
-      cg.a_loadmm_ref_reg(list,def_cgsize(fromsize),def_cgsize(tosize),ref,reg,shuffle);
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012062306);
+      { records may be stored in mmregisters, but def_cgsize will return an
+        integer size for them... }
+      tocgsize:=getmmcgsize(reg,def_cgsize(tosize));
+      cg.a_loadmm_ref_reg(list,def_cgsize(fromsize),tocgsize,ref,reg,shuffle);
     end;
     end;
 
 
   procedure thlcg2ll.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle);
   procedure thlcg2ll.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+    var
+      fromcgsize: tcgsize;
     begin
     begin
-      cg.a_loadmm_reg_ref(list,def_cgsize(fromsize),def_cgsize(tosize),reg,ref,shuffle);
-    end;
-
-  procedure thlcg2ll.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle);
-    begin
-{$ifdef extdebug}
-      if def_cgsize(fromsize)<>loc.size then
-        internalerror(2010112103);
-{$endif}
-      cg.a_loadmm_loc_reg(list,def_cgsize(tosize),loc,reg,shuffle);
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012062307);
+      { records may be stored in mmregisters, but def_cgsize will return an
+        integer size for them... }
+      fromcgsize:=getmmcgsize(reg,def_cgsize(fromsize));
+      cg.a_loadmm_reg_ref(list,fromcgsize,def_cgsize(tosize),reg,ref,shuffle);
     end;
     end;
 
 
   procedure thlcg2ll.a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation; shuffle: pmmshuffle);
   procedure thlcg2ll.a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation; shuffle: pmmshuffle);
+    var
+      fromcgsize: tcgsize;
     begin
     begin
-{$ifdef extdebug}
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012071215);
+      { sanity check }
       if def_cgsize(tosize)<>loc.size then
       if def_cgsize(tosize)<>loc.size then
-        internalerror(2010112104);
-{$endif}
-      cg.a_loadmm_reg_loc(list,def_cgsize(fromsize),reg,loc,shuffle);
+        internalerror(2012071216);
+      { records may be stored in mmregisters, but def_cgsize will return an
+        integer size for them... }
+      fromcgsize:=getmmcgsize(reg,def_cgsize(fromsize));
+      cg.a_loadmm_reg_loc(list,fromcgsize,reg,loc,shuffle);
     end;
     end;
 
 
   procedure thlcg2ll.a_loadmm_reg_cgpara(list: TAsmList; fromsize: tdef; reg: tregister; const cgpara: TCGPara; shuffle: pmmshuffle);
   procedure thlcg2ll.a_loadmm_reg_cgpara(list: TAsmList; fromsize: tdef; reg: tregister; const cgpara: TCGPara; shuffle: pmmshuffle);
+    var
+      fromcgsize: tcgsize;
     begin
     begin
-      cg.a_loadmm_reg_cgpara(list,def_cgsize(fromsize),reg,cgpara,shuffle);
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012071217);
+      { records may be stored in mmregisters, but def_cgsize will return an
+        integer size for them... }
+      fromcgsize:=getmmcgsize(reg,def_cgsize(fromsize));
+      cg.a_loadmm_reg_cgpara(list,fromcgsize,reg,cgpara,shuffle);
     end;
     end;
 
 
   procedure thlcg2ll.a_loadmm_ref_cgpara(list: TAsmList; fromsize: tdef; const ref: treference; const cgpara: TCGPara; shuffle: pmmshuffle);
   procedure thlcg2ll.a_loadmm_ref_cgpara(list: TAsmList; fromsize: tdef; const ref: treference; const cgpara: TCGPara; shuffle: pmmshuffle);
     begin
     begin
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012071218);
       cg.a_loadmm_ref_cgpara(list,def_cgsize(fromsize),ref,cgpara,shuffle);
       cg.a_loadmm_ref_cgpara(list,def_cgsize(fromsize),ref,cgpara,shuffle);
     end;
     end;
 
 
   procedure thlcg2ll.a_loadmm_loc_cgpara(list: TAsmList; fromsize: tdef; const loc: tlocation; const cgpara: TCGPara; shuffle: pmmshuffle);
   procedure thlcg2ll.a_loadmm_loc_cgpara(list: TAsmList; fromsize: tdef; const loc: tlocation; const cgpara: TCGPara; shuffle: pmmshuffle);
     begin
     begin
-{$ifdef extdebug}
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012071219);
+      { sanity check }
       if def_cgsize(fromsize)<>loc.size then
       if def_cgsize(fromsize)<>loc.size then
-        internalerror(2010112105);
-{$endif}
+        internalerror(2012071220);
       cg.a_loadmm_loc_cgpara(list,loc,cgpara,shuffle);
       cg.a_loadmm_loc_cgpara(list,loc,cgpara,shuffle);
     end;
     end;
 
 
+  procedure thlcg2ll.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tdef; src, dst: tregister; shuffle: pmmshuffle);
+    begin
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012071221);
+      cg.a_opmm_reg_reg(list,op,def_cgsize(size),src,dst,shuffle);
+    end;
+
+  procedure thlcg2ll.a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+    begin
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012071222);
+      cg.a_opmm_ref_reg(list,op,def_cgsize(size),ref,reg,shuffle);
+    end;
+
   procedure thlcg2ll.a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size: tdef; const loc: tlocation; reg: tregister; shuffle: pmmshuffle);
   procedure thlcg2ll.a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size: tdef; const loc: tlocation; reg: tregister; shuffle: pmmshuffle);
     begin
     begin
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012071223);
       cg.a_opmm_loc_reg(list,op,def_cgsize(size),loc,reg,shuffle);
       cg.a_opmm_loc_reg(list,op,def_cgsize(size),loc,reg,shuffle);
     end;
     end;
-*)
 
 
-(*
+  procedure thlcg2ll.a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+    begin
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012071224);
+      cg.a_opmm_reg_ref(list,op,def_cgsize(size),reg,ref,shuffle);
+    end;
+
   procedure thlcg2ll.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tdef; intreg, mmreg: tregister; shuffle: pmmshuffle);
   procedure thlcg2ll.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tdef; intreg, mmreg: tregister; shuffle: pmmshuffle);
+    var
+      tocgsize: tcgsize;
     begin
     begin
-      cg.a_loadmm_intreg_reg(list,def_cgsize(fromsize),def_cgsize(tosize),intreg,mmreg,shuffle);
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012071227);
+      { records may be stored in mmregisters, but def_cgsize will return an
+        integer size for them... }
+      tocgsize:=getmmcgsize(mmreg,def_cgsize(tosize));
+      cg.a_loadmm_intreg_reg(list,def_cgsize(fromsize),tocgsize,intreg,mmreg,shuffle);
     end;
     end;
 
 
   procedure thlcg2ll.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tdef; mmreg, intreg: tregister; shuffle: pmmshuffle);
   procedure thlcg2ll.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tdef; mmreg, intreg: tregister; shuffle: pmmshuffle);
+    var
+      fromcgsize: tcgsize;
     begin
     begin
-      cg.a_loadmm_reg_intreg(list,def_cgsize(fromsize),def_cgsize(tosize),mmreg,intreg,shuffle);
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012071228);
+      { records may be stored in mmregisters, but def_cgsize will return an
+        integer size for them... }
+      fromcgsize:=getmmcgsize(mmreg,def_cgsize(fromsize));
+      cg.a_loadmm_reg_intreg(list,fromcgsize,def_cgsize(tosize),mmreg,intreg,shuffle);
     end;
     end;
-*)
+
   procedure thlcg2ll.a_op_const_reg(list: TAsmList; Op: TOpCG; size: tdef; a: tcgint; reg: TRegister);
   procedure thlcg2ll.a_op_const_reg(list: TAsmList; Op: TOpCG; size: tdef; a: tcgint; reg: TRegister);
     begin
     begin
       cg.a_op_const_reg(list,op,def_cgsize(size),a,reg);
       cg.a_op_const_reg(list,op,def_cgsize(size),a,reg);
@@ -1222,6 +1308,61 @@ implementation
           inherited;
           inherited;
       end;
       end;
     end;
     end;
+
+  procedure thlcg2ll.location_force_mmregscalar(list: TAsmList; var l: tlocation; size: tdef; maybeconst: boolean);
+    var
+      reg : tregister;
+      href : treference;
+      newsize : tdef;
+    begin
+      if (l.loc<>LOC_MMREGISTER)  and
+         ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then
+        begin
+          { if it's in an fpu register, store to memory first }
+          if (l.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
+            begin
+              tg.GetTemp(list,tcgsize2size[l.size],tcgsize2size[l.size],tt_normal,href);
+              cg.a_loadfpu_reg_ref(list,l.size,l.size,l.register,href);
+              location_reset_ref(l,LOC_REFERENCE,l.size,0);
+              l.reference:=href;
+            end;
+{$ifndef cpu64bitalu}
+          if (l.loc in [LOC_REGISTER,LOC_CREGISTER]) and
+             (l.size in [OS_64,OS_S64]) then
+            begin
+              reg:=cg.getmmregister(list,OS_F64);
+              cg64.a_loadmm_intreg64_reg(list,OS_F64,l.register64,reg);
+              l.size:=OS_F64;
+              size:=s64floattype;
+            end
+          else
+{$endif not cpu64bitalu}
+            begin
+               { on ARM, CFP values may be located in integer registers,
+                 and its second_int_to_real() also uses this routine to
+                 force integer (memory) values in an mmregister }
+               if (l.size in [OS_32,OS_S32]) then
+                 begin
+                   size:=tcgsize2orddef(l.size);
+                   newsize:=s32floattype;
+                 end
+               else if (l.size in [OS_64,OS_S64]) then
+                 begin
+                   size:=tcgsize2orddef(l.size);
+                   newsize:=s64floattype;
+                 end
+               else
+                 newsize:=size;
+               reg:=getmmregister(list,newsize);
+               a_loadmm_loc_reg(list,size,newsize,l,reg,mms_movescalar);
+               l.size:=def_cgsize(newsize);
+             end;
+          location_freetemp(list,l);
+          location_reset(l,LOC_MMREGISTER,l.size);
+          l.register:=reg;
+        end;
+    end;
+
 (*
 (*
   procedure thlcg2ll.location_force_mmregscalar(list: TAsmList; var l: tlocation; size: tdef; maybeconst: boolean);
   procedure thlcg2ll.location_force_mmregscalar(list: TAsmList; var l: tlocation; size: tdef; maybeconst: boolean);
     begin
     begin
@@ -1282,7 +1423,7 @@ implementation
             LOC_CMMREGISTER:
             LOC_CMMREGISTER:
               begin
               begin
                 tmploc:=l;
                 tmploc:=l;
-                location_force_mmregscalar(list,tmploc,false);
+                location_force_mmregscalar(list,tmploc,size,false);
                 cg.a_loadmm_reg_cgpara(list,tmploc.size,tmploc.register,cgpara,mms_movescalar);
                 cg.a_loadmm_reg_cgpara(list,tmploc.size,tmploc.register,cgpara,mms_movescalar);
               end;
               end;
             { Some targets pass floats in normal registers }
             { Some targets pass floats in normal registers }
@@ -1440,4 +1581,19 @@ implementation
         inherited initialize_regvars(p, arg);
         inherited initialize_regvars(p, arg);
     end;
     end;
 
 
+  function thlcg2ll.getmmcgsize(reg: tregister; size: tcgsize): tcgsize;
+    begin
+      result:=size;
+      if getregtype(reg)=R_MMREGISTER then
+        begin
+          case size of
+            OS_32:
+              result:=OS_F32;
+            OS_64:
+              result:=OS_F64;
+          end;
+        end;
+    end;
+
+
 end.
 end.

+ 175 - 82
compiler/hlcgobj.pas

@@ -69,8 +69,8 @@ unit hlcgobj;
           {# Gets a register suitable to do integer operations on.}
           {# Gets a register suitable to do integer operations on.}
           function getaddressregister(list:TAsmList;size:tdef):Tregister;virtual;
           function getaddressregister(list:TAsmList;size:tdef):Tregister;virtual;
           function getfpuregister(list:TAsmList;size:tdef):Tregister;virtual;
           function getfpuregister(list:TAsmList;size:tdef):Tregister;virtual;
-//        we don't have high level defs yet that translate into all mm cgsizes
-//          function getmmregister(list:TAsmList;size:tdef):Tregister;virtual;
+          { warning: only works correctly for fpu types currently }
+          function getmmregister(list:TAsmList;size:tdef):Tregister;virtual;
           function getflagregister(list:TAsmList;size:tdef):Tregister;virtual;
           function getflagregister(list:TAsmList;size:tdef):Tregister;virtual;
           function getregisterfordef(list: TAsmList;size:tdef):Tregister;virtual;
           function getregisterfordef(list: TAsmList;size:tdef):Tregister;virtual;
           {Does the generic cg need SIMD registers, like getmmxregister? Or should
           {Does the generic cg need SIMD registers, like getmmxregister? Or should
@@ -292,27 +292,26 @@ unit hlcgobj;
           procedure a_loadfpu_ref_cgpara(list : TAsmList;fromsize : tdef;const ref : treference;const cgpara : TCGPara);virtual;
           procedure a_loadfpu_ref_cgpara(list : TAsmList;fromsize : tdef;const ref : treference;const cgpara : TCGPara);virtual;
 
 
           { vector register move instructions }
           { vector register move instructions }
-//        we don't have high level defs yet that translate into all mm cgsizes
-{
-          procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef;reg1, reg2: tregister;shuffle : pmmshuffle); virtual;
-          procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); virtual;
-          procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef;reg: tregister; const ref: treference;shuffle : pmmshuffle); virtual;
-}
+          procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef;reg1, reg2: tregister;shuffle : pmmshuffle); virtual; abstract;
+          procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); virtual; abstract;
+          procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef;reg: tregister; const ref: treference;shuffle : pmmshuffle); virtual; abstract;
+          procedure a_loadmm_ref_ref(list: TAsmList; fromsize, tosize: tdef; const fromref, toref: treference; shuffle: pmmshuffle); virtual;
           { required for subsetreg/ref; still tcgsize rather than tdef because of reason mentioned above }
           { required for subsetreg/ref; still tcgsize rather than tdef because of reason mentioned above }
-          procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tcgsize; const loc: tlocation; const reg: tregister;shuffle : pmmshuffle);virtual; abstract;
-{
+          procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle : pmmshuffle);virtual;
           procedure a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation;shuffle : pmmshuffle);virtual;
           procedure a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation;shuffle : pmmshuffle);virtual;
           procedure a_loadmm_reg_cgpara(list: TAsmList; fromsize: tdef; reg: tregister;const cgpara : TCGPara;shuffle : pmmshuffle); virtual;
           procedure a_loadmm_reg_cgpara(list: TAsmList; fromsize: tdef; reg: tregister;const cgpara : TCGPara;shuffle : pmmshuffle); virtual;
           procedure a_loadmm_ref_cgpara(list: TAsmList; fromsize: tdef; const ref: treference;const cgpara : TCGPara;shuffle : pmmshuffle); virtual;
           procedure a_loadmm_ref_cgpara(list: TAsmList; fromsize: tdef; const ref: treference;const cgpara : TCGPara;shuffle : pmmshuffle); virtual;
           procedure a_loadmm_loc_cgpara(list: TAsmList; fromsize: tdef; const loc: tlocation; const cgpara : TCGPara;shuffle : pmmshuffle); virtual;
           procedure a_loadmm_loc_cgpara(list: TAsmList; fromsize: tdef; const loc: tlocation; const cgpara : TCGPara;shuffle : pmmshuffle); virtual;
-          procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tdef;src,dst: tregister;shuffle : pmmshuffle); virtual;
+          procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tdef;src,dst: tregister;shuffle : pmmshuffle); virtual; abstract;
           procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); virtual;
           procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); virtual;
           procedure a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tdef;const loc: tlocation; reg: tregister;shuffle : pmmshuffle); virtual;
           procedure a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tdef;const loc: tlocation; reg: tregister;shuffle : pmmshuffle); virtual;
           procedure a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size : tdef;reg: tregister;const ref: treference; shuffle : pmmshuffle); virtual;
           procedure a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size : tdef;reg: tregister;const ref: treference; shuffle : pmmshuffle); virtual;
-}
-//        we don't have high level defs yet that translate into all mm cgsizes
-//          procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); virtual;
-//          procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tdef; mmreg, intreg: tregister; shuffle : pmmshuffle); virtual;
+          { requires a temp that is interpreted in two different ways, and we
+            don't have a way (yet) to tag a treference with tdef information so
+            targets like LLVM can insert the necessary bitcast
+          }
+          procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); virtual; abstract;
+          procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tdef; mmreg, intreg: tregister; shuffle : pmmshuffle); virtual; abstract;
 
 
           { basic arithmetic operations }
           { basic arithmetic operations }
           { note: for operators which require only one argument (not, neg), use }
           { note: for operators which require only one argument (not, neg), use }
@@ -473,7 +472,7 @@ unit hlcgobj;
           procedure location_force_reg(list:TAsmList;var l:tlocation;src_size,dst_size:tdef;maybeconst:boolean);virtual;
           procedure location_force_reg(list:TAsmList;var l:tlocation;src_size,dst_size:tdef;maybeconst:boolean);virtual;
           procedure location_force_fpureg(list:TAsmList;var l: tlocation;size: tdef;maybeconst:boolean);virtual;
           procedure location_force_fpureg(list:TAsmList;var l: tlocation;size: tdef;maybeconst:boolean);virtual;
           procedure location_force_mem(list:TAsmList;var l:tlocation;size:tdef);virtual;
           procedure location_force_mem(list:TAsmList;var l:tlocation;size:tdef);virtual;
-//          procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);virtual;abstract;
+          procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);virtual;
 //          procedure location_force_mmreg(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);virtual;abstract;
 //          procedure location_force_mmreg(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);virtual;abstract;
 
 
           { Retrieve the location of the data pointed to in location l, when the location is
           { Retrieve the location of the data pointed to in location l, when the location is
@@ -607,6 +606,12 @@ implementation
     begin
     begin
       result:=cg.getfpuregister(list,def_cgsize(size));
       result:=cg.getfpuregister(list,def_cgsize(size));
     end;
     end;
+
+  function thlcgobj.getmmregister(list: TAsmList; size: tdef): Tregister;
+    begin
+      result:=cg.getmmregister(list,def_cgsize(size));
+    end;
+
 (*
 (*
   function thlcgobj.getmmregister(list: TAsmList; size: tdef): Tregister;
   function thlcgobj.getmmregister(list: TAsmList; size: tdef): Tregister;
     begin
     begin
@@ -771,10 +776,8 @@ implementation
               reference_reset_base(ref,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
               reference_reset_base(ref,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
               a_load_reg_ref(list,size,cgpara.def,r,ref);
               a_load_reg_ref(list,size,cgpara.def,r,ref);
            end;
            end;
-(*
          LOC_MMREGISTER,LOC_CMMREGISTER:
          LOC_MMREGISTER,LOC_CMMREGISTER:
            a_loadmm_intreg_reg(list,size,cgpara.def,r,cgpara.location^.register,mms_movescalar);
            a_loadmm_intreg_reg(list,size,cgpara.def,r,cgpara.location^.register,mms_movescalar);
-*)
          LOC_FPUREGISTER,LOC_CFPUREGISTER:
          LOC_FPUREGISTER,LOC_CFPUREGISTER:
            begin
            begin
              tg.gethltemp(list,size,size.size,tt_normal,ref);
              tg.gethltemp(list,size,size.size,tt_normal,ref);
@@ -942,10 +945,8 @@ implementation
           a_load_reg_subsetreg(list,fromsize,tosize,reg,loc.sreg);
           a_load_reg_subsetreg(list,fromsize,tosize,reg,loc.sreg);
         LOC_SUBSETREF,LOC_CSUBSETREF:
         LOC_SUBSETREF,LOC_CSUBSETREF:
           a_load_reg_subsetref(list,fromsize,tosize,reg,loc.sref);
           a_load_reg_subsetref(list,fromsize,tosize,reg,loc.sref);
-        { we don't have enough type information to handle these here
         LOC_MMREGISTER,LOC_CMMREGISTER:
         LOC_MMREGISTER,LOC_CMMREGISTER:
-          a_loadmm_intreg_reg(list,fromsize,loc.size,reg,loc.register,mms_movescalar);
-        }
+          a_loadmm_intreg_reg(list,fromsize,tosize,reg,loc.register,mms_movescalar);
         else
         else
           internalerror(2010120402);
           internalerror(2010120402);
       end;
       end;
@@ -2309,24 +2310,23 @@ implementation
           internalerror(2010120423);
           internalerror(2010120423);
       end;
       end;
     end;
     end;
-(*
-  procedure thlcgobj.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle);
-    begin
-      cg.a_loadmm_reg_reg(list,def_cgsize(fromsize),def_cgsize(tosize),reg1,reg2,shuffle);
-    end;
-
-  procedure thlcgobj.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle);
-    begin
-      cg.a_loadmm_ref_reg(list,def_cgsize(fromsize),def_cgsize(tosize),ref,reg,shuffle);
-    end;
 
 
-  procedure thlcgobj.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+  procedure thlcgobj.a_loadmm_ref_ref(list: TAsmList; fromsize, tosize: tdef; const fromref, toref: treference; shuffle: pmmshuffle);
+    var
+      reg: tregister;
     begin
     begin
-      cg.a_loadmm_reg_ref(list,def_cgsize(fromsize),def_cgsize(tosize),reg,ref,shuffle);
+      reg:=getmmregister(list,tosize);
+      a_loadmm_ref_reg(list,fromsize,tosize,fromref,reg,shuffle);
+      a_loadmm_reg_ref(list,tosize,tosize,reg,toref,shuffle);
     end;
     end;
 
 
   procedure thlcgobj.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle);
   procedure thlcgobj.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle);
+    var
+      tmpreg: tregister;
     begin
     begin
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012062302);
       case loc.loc of
       case loc.loc of
         LOC_MMREGISTER,LOC_CMMREGISTER:
         LOC_MMREGISTER,LOC_CMMREGISTER:
           a_loadmm_reg_reg(list,fromsize,tosize,loc.register,reg,shuffle);
           a_loadmm_reg_reg(list,fromsize,tosize,loc.register,reg,shuffle);
@@ -2334,6 +2334,13 @@ implementation
           a_loadmm_ref_reg(list,fromsize,tosize,loc.reference,reg,shuffle);
           a_loadmm_ref_reg(list,fromsize,tosize,loc.reference,reg,shuffle);
         LOC_REGISTER,LOC_CREGISTER:
         LOC_REGISTER,LOC_CREGISTER:
           a_loadmm_intreg_reg(list,fromsize,tosize,loc.register,reg,shuffle);
           a_loadmm_intreg_reg(list,fromsize,tosize,loc.register,reg,shuffle);
+        LOC_SUBSETREG,LOC_CSUBSETREG,
+        LOC_SUBSETREF,LOC_CSUBSETREF:
+          begin
+            tmpreg:=getintregister(list,fromsize);
+            a_load_loc_reg(list,fromsize,fromsize,loc,tmpreg);
+            a_loadmm_intreg_reg(list,fromsize,tosize,tmpreg,reg,shuffle);
+          end
         else
         else
           internalerror(2010120414);
           internalerror(2010120414);
       end;
       end;
@@ -2341,6 +2348,9 @@ implementation
 
 
   procedure thlcgobj.a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation; shuffle: pmmshuffle);
   procedure thlcgobj.a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation; shuffle: pmmshuffle);
     begin
     begin
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012062303);
       case loc.loc of
       case loc.loc of
         LOC_MMREGISTER,LOC_CMMREGISTER:
         LOC_MMREGISTER,LOC_CMMREGISTER:
           a_loadmm_reg_reg(list,fromsize,tosize,reg,loc.register,shuffle);
           a_loadmm_reg_reg(list,fromsize,tosize,reg,loc.register,shuffle);
@@ -2355,6 +2365,9 @@ implementation
     var
     var
       href  : treference;
       href  : treference;
     begin
     begin
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012062304);
        cgpara.check_simple_location;
        cgpara.check_simple_location;
        paramanager.alloccgpara(list,cgpara);
        paramanager.alloccgpara(list,cgpara);
        case cgpara.location^.loc of
        case cgpara.location^.loc of
@@ -2369,11 +2382,11 @@ implementation
           begin
           begin
             if assigned(shuffle) and
             if assigned(shuffle) and
                not shufflescalar(shuffle) then
                not shufflescalar(shuffle) then
-              internalerror(2009112510);
-             a_loadmm_reg_intreg(list,deomsize,cgpara.def,reg,cgpara.location^.register,mms_movescalar);
+              internalerror(2012071205);
+             a_loadmm_reg_intreg(list,fromsize,cgpara.def,reg,cgpara.location^.register,mms_movescalar);
           end
           end
         else
         else
-          internalerror(2010120427);
+          internalerror(2012071204);
       end;
       end;
     end;
     end;
 
 
@@ -2382,9 +2395,12 @@ implementation
        hr : tregister;
        hr : tregister;
        hs : tmmshuffle;
        hs : tmmshuffle;
     begin
     begin
+      { no vector support yet }
+      if shuffle<>mms_movescalar then
+        internalerror(2012062308);
        cgpara.check_simple_location;
        cgpara.check_simple_location;
-       hr:=cg.getmmregister(list,cgpara.size);
-       a_loadmm_ref_reg(list,deomsize,cgpara.def,ref,hr,shuffle);
+       hr:=getmmregister(list,cgpara.def);
+       a_loadmm_ref_reg(list,fromsize,cgpara.def,ref,hr,shuffle);
        if realshuffle(shuffle) then
        if realshuffle(shuffle) then
          begin
          begin
            hs:=shuffle^;
            hs:=shuffle^;
@@ -2399,31 +2415,68 @@ implementation
     begin
     begin
 {$ifdef extdebug}
 {$ifdef extdebug}
       if def_cgsize(fromsize)<>loc.size then
       if def_cgsize(fromsize)<>loc.size then
-        internalerror(2010112105);
+        internalerror(2012071203);
 {$endif}
 {$endif}
-      cg.a_loadmm_loc_cgpara(list,loc,cgpara,shuffle);
-    end;
-
-  procedure thlcgobj.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tdef; src, dst: tregister; shuffle: pmmshuffle);
-    begin
-      cg.a_opmm_reg_reg(list,op,def_cgsize(size),src,dst,shuffle);
+      case loc.loc of
+        LOC_MMREGISTER,LOC_CMMREGISTER:
+          a_loadmm_reg_cgpara(list,fromsize,loc.register,cgpara,shuffle);
+        LOC_REFERENCE,LOC_CREFERENCE:
+          a_loadmm_ref_cgpara(list,fromsize,loc.reference,cgpara,shuffle);
+        else
+          internalerror(2012071202);
+      end;
     end;
     end;
 
 
   procedure thlcgobj.a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle);
   procedure thlcgobj.a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+    var
+       hr : tregister;
+       hs : tmmshuffle;
     begin
     begin
-      cg.a_opmm_ref_reg(list,op,def_cgsize(size),ref,reg,shuffle)
+       hr:=getmmregister(list,size);
+       a_loadmm_ref_reg(list,size,size,ref,hr,shuffle);
+       if realshuffle(shuffle) then
+         begin
+           hs:=shuffle^;
+           removeshuffles(hs);
+           a_opmm_reg_reg(list,op,size,hr,reg,@hs);
+         end
+       else
+         a_opmm_reg_reg(list,op,size,hr,reg,shuffle);
     end;
     end;
 
 
   procedure thlcgobj.a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size: tdef; const loc: tlocation; reg: tregister; shuffle: pmmshuffle);
   procedure thlcgobj.a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size: tdef; const loc: tlocation; reg: tregister; shuffle: pmmshuffle);
     begin
     begin
-      cg.a_opmm_loc_reg(list,op,def_cgsize(size),loc,reg,shuffle);
+      case loc.loc of
+        LOC_CMMREGISTER,LOC_MMREGISTER:
+          a_opmm_reg_reg(list,op,size,loc.register,reg,shuffle);
+        LOC_CREFERENCE,LOC_REFERENCE:
+          a_opmm_ref_reg(list,op,size,loc.reference,reg,shuffle);
+        else
+          internalerror(2012071201);
+      end;
     end;
     end;
 
 
   procedure thlcgobj.a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle);
   procedure thlcgobj.a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+    var
+       hr : tregister;
+       hs : tmmshuffle;
     begin
     begin
-      cg.a_opmm_reg_ref(list,op,def_cgsize(size),reg,ref,shuffle);
+       hr:=getmmregister(list,size);
+       a_loadmm_ref_reg(list,size,size,ref,hr,shuffle);
+       if realshuffle(shuffle) then
+         begin
+           hs:=shuffle^;
+           removeshuffles(hs);
+           a_opmm_reg_reg(list,op,size,reg,hr,@hs);
+           a_loadmm_reg_ref(list,size,size,hr,ref,@hs);
+         end
+       else
+         begin
+           a_opmm_reg_reg(list,op,size,reg,hr,shuffle);
+           a_loadmm_reg_ref(list,size,size,hr,ref,shuffle);
+         end;
     end;
     end;
-*)
+
 (*
 (*
   procedure thlcgobj.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tdef; intreg, mmreg: tregister; shuffle: pmmshuffle);
   procedure thlcgobj.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tdef; intreg, mmreg: tregister; shuffle: pmmshuffle);
     begin
     begin
@@ -2801,12 +2854,9 @@ implementation
 
 
   procedure thlcgobj.g_concatcopy(list: TAsmList; size: tdef; const source, dest: treference);
   procedure thlcgobj.g_concatcopy(list: TAsmList; size: tdef; const source, dest: treference);
     begin
     begin
-{
       if use_vectorfpu(size) then
       if use_vectorfpu(size) then
-        a_loadmm_ref_ref()
-      else
- }
-      if size.typ<>floatdef then
+        a_loadmm_ref_ref(list,size,size,source,dest,mms_movescalar)
+      else if size.typ<>floatdef then
         a_load_ref_ref(list,size,size,source,dest)
         a_load_ref_ref(list,size,size,source,dest)
       else
       else
         a_loadfpu_ref_ref(list,size,size,source,dest);
         a_loadfpu_ref_ref(list,size,size,source,dest);
@@ -3560,16 +3610,17 @@ implementation
             location_reset_ref(l,LOC_REFERENCE,l.size,0);
             location_reset_ref(l,LOC_REFERENCE,l.size,0);
             l.reference:=r;
             l.reference:=r;
           end;
           end;
-(*
         LOC_MMREGISTER,
         LOC_MMREGISTER,
         LOC_CMMREGISTER:
         LOC_CMMREGISTER:
           begin
           begin
+            { vectors can't be represented yet using tdef }
+            if size.typ<>floatdef then
+              internalerror(2012062301);
             tg.gethltemp(list,size,size.size,tt_normal,r);
             tg.gethltemp(list,size,size.size,tt_normal,r);
-            cg.a_loadmm_reg_ref(list,l.size,l.size,l.register,r,mms_movescalar);
+            a_loadmm_reg_ref(list,size,size,l.register,r,mms_movescalar);
             location_reset_ref(l,LOC_REFERENCE,l.size,0);
             location_reset_ref(l,LOC_REFERENCE,l.size,0);
             l.reference:=r;
             l.reference:=r;
           end;
           end;
-*)
         LOC_CONSTANT,
         LOC_CONSTANT,
         LOC_REGISTER,
         LOC_REGISTER,
         LOC_CREGISTER,
         LOC_CREGISTER,
@@ -3582,7 +3633,7 @@ implementation
                not is_open_array(size) then
                not is_open_array(size) then
               forcesize:=size.size
               forcesize:=size.size
             else
             else
-              forcesize:=voidpointertype.size;
+              forcesize:=sizeof(pint);
             tg.gethltemp(list,size,forcesize,tt_normal,r);
             tg.gethltemp(list,size,forcesize,tt_normal,r);
             a_load_loc_ref(list,size,size,l,r);
             a_load_loc_ref(list,size,size,l,r);
             location_reset_ref(l,LOC_REFERENCE,l.size,0);
             location_reset_ref(l,LOC_REFERENCE,l.size,0);
@@ -3595,6 +3646,55 @@ implementation
       end;
       end;
     end;
     end;
 
 
+  procedure thlcgobj.location_force_mmregscalar(list: TAsmList; var l: tlocation; size: tdef; maybeconst: boolean);
+    var
+      reg : tregister;
+      href : treference;
+      newsize : tdef;
+    begin
+      if (l.loc<>LOC_MMREGISTER)  and
+         ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then
+        begin
+          { if it's in an fpu register, store to memory first }
+          if (l.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
+            begin
+              tg.gethltemp(list,size,-1,tt_normal,href);
+              hlcg.a_loadfpu_reg_ref(list,size,size,l.register,href);
+              location_reset_ref(l,LOC_REFERENCE,l.size,0);
+              l.reference:=href;
+            end;
+          { on ARM, CFP values may be located in integer registers,
+            and its second_int_to_real() also uses this routine to
+            force integer (memory) values in an mmregister }
+          if (l.size in [OS_32,OS_S32]) then
+            begin
+              size:=tcgsize2orddef(l.size);
+              newsize:=s32floattype;
+            end
+          else if (l.size in [OS_64,OS_S64]) then
+            begin
+              size:=tcgsize2orddef(l.size);
+              newsize:=s64floattype;
+            end
+          else
+            newsize:=size;
+          case size.size of
+            4:
+              newsize:=s32floattype;
+            8:
+              newsize:=s64floattype;
+            else
+              newsize:=size;
+          end;
+          reg:=hlcg.getmmregister(list,newsize);
+          hlcg.a_loadmm_loc_reg(list,size,newsize,l,reg,mms_movescalar);
+          l.size:=def_cgsize(newsize);
+          location_freetemp(list,l);
+          location_reset(l,LOC_MMREGISTER,l.size);
+          l.register:=reg;
+        end;
+    end;
+
     procedure thlcgobj.location_get_data_ref(list: TAsmList; def: tdef; const l: tlocation; var ref: treference; loadref: boolean; alignment: longint);
     procedure thlcgobj.location_get_data_ref(list: TAsmList; def: tdef; const l: tlocation; var ref: treference; loadref: boolean; alignment: longint);
       begin
       begin
         case l.loc of
         case l.loc of
@@ -3972,14 +4072,12 @@ implementation
                  a_load_const_reg(TAsmList(arg),tstaticvarsym(p).vardef,0,
                  a_load_const_reg(TAsmList(arg),tstaticvarsym(p).vardef,0,
                      tstaticvarsym(p).initialloc.register);
                      tstaticvarsym(p).initialloc.register);
              end;
              end;
-(*
            LOC_CMMREGISTER :
            LOC_CMMREGISTER :
              { clear the whole register }
              { clear the whole register }
-             cg.a_opmm_reg_reg(TAsmList(arg),OP_XOR,reg_cgsize(tstaticvarsym(p).initialloc.register),
+             a_opmm_reg_reg(TAsmList(arg),OP_XOR,tstaticvarsym(p).vardef,
                tstaticvarsym(p).initialloc.register,
                tstaticvarsym(p).initialloc.register,
                tstaticvarsym(p).initialloc.register,
                tstaticvarsym(p).initialloc.register,
                nil);
                nil);
-*)
            LOC_CFPUREGISTER :
            LOC_CFPUREGISTER :
              begin
              begin
                { initialize fpu regvar by loading from memory }
                { initialize fpu regvar by loading from memory }
@@ -4312,9 +4410,10 @@ implementation
     end;
     end;
 
 
   procedure thlcgobj.gen_loadfpu_loc_cgpara(list: TAsmList; size: tdef; const l: tlocation; const cgpara: tcgpara; locintsize: longint);
   procedure thlcgobj.gen_loadfpu_loc_cgpara(list: TAsmList; size: tdef; const l: tlocation; const cgpara: tcgpara; locintsize: longint);
+    var
+      tmploc: tlocation;
     begin
     begin
       case l.loc of
       case l.loc of
-(*
         LOC_MMREGISTER,
         LOC_MMREGISTER,
         LOC_CMMREGISTER:
         LOC_CMMREGISTER:
           case cgpara.location^.loc of
           case cgpara.location^.loc of
@@ -4324,30 +4423,27 @@ implementation
             LOC_CMMREGISTER,
             LOC_CMMREGISTER,
             LOC_REGISTER,
             LOC_REGISTER,
             LOC_CREGISTER :
             LOC_CREGISTER :
-              cg.a_loadmm_reg_cgpara(list,locsize,l.register,cgpara,mms_movescalar);
+              a_loadmm_reg_cgpara(list,size,l.register,cgpara,mms_movescalar);
             LOC_FPUREGISTER,
             LOC_FPUREGISTER,
             LOC_CFPUREGISTER:
             LOC_CFPUREGISTER:
               begin
               begin
                 tmploc:=l;
                 tmploc:=l;
-                location_force_fpureg(list,tmploc,false);
-                cg.a_loadfpu_reg_cgpara(list,tmploc.size,tmploc.register,cgpara);
+                location_force_fpureg(list,tmploc,size,false);
+                a_loadfpu_reg_cgpara(list,size,tmploc.register,cgpara);
               end;
               end;
             else
             else
               internalerror(200204249);
               internalerror(200204249);
           end;
           end;
-*)
         LOC_FPUREGISTER,
         LOC_FPUREGISTER,
         LOC_CFPUREGISTER:
         LOC_CFPUREGISTER:
           case cgpara.location^.loc of
           case cgpara.location^.loc of
-(*
             LOC_MMREGISTER,
             LOC_MMREGISTER,
             LOC_CMMREGISTER:
             LOC_CMMREGISTER:
               begin
               begin
                 tmploc:=l;
                 tmploc:=l;
-                location_force_mmregscalar(list,tmploc,false);
-                cg.a_loadmm_reg_cgpara(list,tmploc.size,tmploc.register,cgpara,mms_movescalar);
+                location_force_mmregscalar(list,tmploc,size,false);
+                a_loadmm_reg_cgpara(list,size,tmploc.register,cgpara,mms_movescalar);
               end;
               end;
-*)
             { Some targets pass floats in normal registers }
             { Some targets pass floats in normal registers }
             LOC_REGISTER,
             LOC_REGISTER,
             LOC_CREGISTER,
             LOC_CREGISTER,
@@ -4362,11 +4458,9 @@ implementation
         LOC_REFERENCE,
         LOC_REFERENCE,
         LOC_CREFERENCE:
         LOC_CREFERENCE:
           case cgpara.location^.loc of
           case cgpara.location^.loc of
-(*
             LOC_MMREGISTER,
             LOC_MMREGISTER,
             LOC_CMMREGISTER:
             LOC_CMMREGISTER:
-              cg.a_loadmm_ref_cgpara(list,locsize,l.reference,cgpara,mms_movescalar);
-*)
+              a_loadmm_ref_cgpara(list,size,l.reference,cgpara,mms_movescalar);
             { Some targets pass floats in normal registers }
             { Some targets pass floats in normal registers }
             LOC_REGISTER,
             LOC_REGISTER,
             LOC_CREGISTER,
             LOC_CREGISTER,
@@ -4416,19 +4510,18 @@ implementation
           begin
           begin
             a_load_loc_cgpara(list,vardef,l,cgpara);
             a_load_loc_cgpara(list,vardef,l,cgpara);
           end;
           end;
-(*
         LOC_MMREGISTER,
         LOC_MMREGISTER,
         LOC_CMMREGISTER:
         LOC_CMMREGISTER:
           begin
           begin
-            case l.size of
-              OS_F32,
-              OS_F64:
-                cg.a_loadmm_loc_cgpara(list,l,cgpara,mms_movescalar);
-              else
-                cg.a_loadmm_loc_cgpara(list,l,cgpara,nil);
-            end;
+            if use_vectorfpu(vardef) then
+              a_loadmm_loc_cgpara(list,vardef,l,cgpara,mms_movescalar)
+            else
+              { no vector support yet }
+              internalerror(2012071212);
+              {
+              cg.a_loadmm_loc_cgpara(list,l,cgpara,nil);
+              }
           end;
           end;
-*)
         else
         else
           internalerror(2011010213);
           internalerror(2011010213);
       end;
       end;

+ 1 - 1
compiler/ncgadd.pas

@@ -153,7 +153,7 @@ interface
             if use_vectorfpu(left.resultdef) then
             if use_vectorfpu(left.resultdef) then
               begin
               begin
                 tmpreg := cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
                 tmpreg := cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
-                hlcg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,left.location.size,left.location.size,left.location,tmpreg,mms_movescalar);
+                hlcg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,left.resultdef,left.resultdef,left.location,tmpreg,mms_movescalar);
                 location_freetemp(current_asmdata.CurrAsmList,left.location);
                 location_freetemp(current_asmdata.CurrAsmList,left.location);
                 location_reset(left.location,LOC_MMREGISTER,left.location.size);
                 location_reset(left.location,LOC_MMREGISTER,left.location.size);
                 left.location.register:=tmpreg;
                 left.location.register:=tmpreg;

+ 4 - 3
compiler/ncgcnv.pas

@@ -407,11 +407,12 @@ interface
              cg.a_loadfpu_reg_ref(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,tr);
              cg.a_loadfpu_reg_ref(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,tr);
              location_reset_ref(left.location,LOC_REFERENCE,location.size,tr.alignment);
              location_reset_ref(left.location,LOC_REFERENCE,location.size,tr.alignment);
              left.location.reference:=tr;
              left.location.reference:=tr;
+             left.resultdef:=resultdef;
            end;
            end;
 {$endif x86}
 {$endif x86}
          { ARM VFP values are in integer registers when they are function results }
          { ARM VFP values are in integer registers when they are function results }
          if (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
          if (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
-           location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+           hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
          case left.location.loc of
          case left.location.loc of
             LOC_FPUREGISTER,
             LOC_FPUREGISTER,
             LOC_CFPUREGISTER:
             LOC_CFPUREGISTER:
@@ -427,7 +428,7 @@ interface
                     end;
                     end;
                   LOC_MMREGISTER:
                   LOC_MMREGISTER:
                     begin
                     begin
-                      location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+                      hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
                       location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
                       location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
                       cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,location.register,mms_movescalar);
                       cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,location.register,mms_movescalar);
                     end
                     end
@@ -442,7 +443,7 @@ interface
                  if expectloc=LOC_MMREGISTER then
                  if expectloc=LOC_MMREGISTER then
                    begin
                    begin
                      location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
                      location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
-                     hlcg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location,location.register,mms_movescalar)
+                     hlcg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,left.location,location.register,mms_movescalar)
                    end
                    end
                   else
                   else
                     begin
                     begin

+ 12 - 10
compiler/ncgld.pas

@@ -818,11 +818,12 @@ implementation
                             releaseright:=true;
                             releaseright:=true;
                             location_reset_ref(right.location,LOC_REFERENCE,left.location.size,0);
                             location_reset_ref(right.location,LOC_REFERENCE,left.location.size,0);
                             right.location.reference:=href;
                             right.location.reference:=href;
+                            right.resultdef:=left.resultdef;
                           end;
                           end;
 {$endif}
 {$endif}
-                        cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,
-                          right.location.size,
-                          left.location.size,
+                        hlcg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,
+                          right.resultdef,
+                          left.resultdef,
                           right.location.reference,
                           right.location.reference,
                           left.location.register,mms_movescalar);
                           left.location.register,mms_movescalar);
                       end;
                       end;
@@ -862,10 +863,10 @@ implementation
                       case left.location.loc of
                       case left.location.loc of
                         LOC_CMMREGISTER,
                         LOC_CMMREGISTER,
                         LOC_MMREGISTER:
                         LOC_MMREGISTER:
-                          cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,left.location.size,right.location.register,left.location.register,mms_movescalar);
+                          hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.resultdef,left.resultdef,right.location.register,left.location.register,mms_movescalar);
                         LOC_REFERENCE,
                         LOC_REFERENCE,
                         LOC_CREFERENCE:
                         LOC_CREFERENCE:
-                          cg.a_loadmm_reg_ref(current_asmdata.CurrAsmList,right.location.size,left.location.size,right.location.register,left.location.reference,mms_movescalar);
+                          hlcg.a_loadmm_reg_ref(current_asmdata.CurrAsmList,right.resultdef,left.resultdef,right.location.register,left.location.reference,mms_movescalar);
                         else
                         else
                           internalerror(2009112601);
                           internalerror(2009112601);
                       end;
                       end;
@@ -899,15 +900,16 @@ implementation
                         begin
                         begin
                           { perform size conversion if needed (the mm-code cannot convert an   }
                           { perform size conversion if needed (the mm-code cannot convert an   }
                           { extended into a double/single, since sse doesn't support extended) }
                           { extended into a double/single, since sse doesn't support extended) }
-                          tg.gethltemp(current_asmdata.CurrAsmList,left.resultdef, left.resultdef.size,tt_normal,href);
+                          tg.gethltemp(current_asmdata.CurrAsmList,left.resultdef,left.resultdef.size,tt_normal,href);
                           cg.a_loadfpu_reg_ref(current_asmdata.CurrAsmList,right.location.size,left.location.size,right.location.register,href);
                           cg.a_loadfpu_reg_ref(current_asmdata.CurrAsmList,right.location.size,left.location.size,right.location.register,href);
                           location_reset_ref(right.location,LOC_REFERENCE,left.location.size,0);
                           location_reset_ref(right.location,LOC_REFERENCE,left.location.size,0);
                           right.location.reference:=href;
                           right.location.reference:=href;
+                          right.resultdef:=left.resultdef;
                         end;
                         end;
 {$endif}
 {$endif}
-                      location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,false);
-                      cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,
-                          right.location.size,left.location.size,
+                      hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
+                      hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,
+                          right.resultdef,left.resultdef,
                           right.location.register,left.location.register,mms_movescalar);
                           right.location.register,left.location.register,mms_movescalar);
                     end
                     end
                   else
                   else
@@ -1295,7 +1297,7 @@ implementation
                  case hp.left.location.loc of
                  case hp.left.location.loc of
                    LOC_MMREGISTER,
                    LOC_MMREGISTER,
                    LOC_CMMREGISTER:
                    LOC_CMMREGISTER:
-                     cg.a_loadmm_reg_ref(current_asmdata.CurrAsmList,hp.left.location.size,hp.left.location.size,
+                     hlcg.a_loadmm_reg_ref(current_asmdata.CurrAsmList,hp.left.resultdef,hp.left.resultdef,
                        hp.left.location.register,href,mms_movescalar);
                        hp.left.location.register,href,mms_movescalar);
                    LOC_FPUREGISTER,
                    LOC_FPUREGISTER,
                    LOC_CFPUREGISTER :
                    LOC_CFPUREGISTER :

+ 1 - 50
compiler/ncgutil.pas

@@ -61,7 +61,6 @@ interface
 //    procedure remove_non_regvars_from_loc(const t: tlocation; var regs:Tsuperregisterset);
 //    procedure remove_non_regvars_from_loc(const t: tlocation; var regs:Tsuperregisterset);
 
 
     procedure location_force_fpureg(list:TAsmList;var l: tlocation;maybeconst:boolean);
     procedure location_force_fpureg(list:TAsmList;var l: tlocation;maybeconst:boolean);
-    procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;maybeconst:boolean);
     procedure location_force_mmreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
     procedure location_force_mmreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
     procedure location_allocate_register(list:TAsmList;out l: tlocation;def: tdef;constant: boolean);
     procedure location_allocate_register(list:TAsmList;out l: tlocation;def: tdef;constant: boolean);
 
 
@@ -512,54 +511,6 @@ implementation
       end;
       end;
 
 
 
 
-    procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;maybeconst:boolean);
-      var
-        reg : tregister;
-        href : treference;
-        newsize : tcgsize;
-      begin
-        if (l.loc<>LOC_MMREGISTER)  and
-           ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then
-          begin
-            { if it's in an fpu register, store to memory first }
-            if (l.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
-              begin
-                tg.GetTemp(list,tcgsize2size[l.size],tcgsize2size[l.size],tt_normal,href);
-                cg.a_loadfpu_reg_ref(list,l.size,l.size,l.register,href);
-                location_reset_ref(l,LOC_REFERENCE,l.size,0);
-                l.reference:=href;
-              end;
-{$ifndef cpu64bitalu}
-            if (l.loc in [LOC_REGISTER,LOC_CREGISTER]) and
-               (l.size in [OS_64,OS_S64]) then
-              begin
-                reg:=cg.getmmregister(list,OS_F64);
-                cg64.a_loadmm_intreg64_reg(list,OS_F64,l.register64,reg);
-                l.size:=OS_F64
-              end
-            else
-{$endif not cpu64bitalu}
-              begin
-                 { on ARM, CFP values may be located in integer registers,
-                   and its second_int_to_real() also uses this routine to
-                   force integer (memory) values in an mmregister }
-                 if (l.size in [OS_32,OS_S32]) then
-                   newsize:=OS_F32
-                 else if (l.size in [OS_64,OS_S64]) then
-                   newsize:=OS_F64
-                 else
-                   newsize:=l.size;
-                 reg:=cg.getmmregister(list,newsize);
-                 hlcg.a_loadmm_loc_reg(list,l.size,newsize,l,reg,mms_movescalar);
-                 l.size:=newsize;
-               end;
-            location_freetemp(list,l);
-            location_reset(l,LOC_MMREGISTER,l.size);
-            l.register:=reg;
-          end;
-      end;
-
-
     procedure register_maybe_adjust_setbase(list: TAsmList; var l: tlocation; setbase: aint);
     procedure register_maybe_adjust_setbase(list: TAsmList; var l: tlocation; setbase: aint);
       var
       var
         tmpreg: tregister;
         tmpreg: tregister;
@@ -594,7 +545,7 @@ implementation
            ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then
            ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then
           begin
           begin
             reg:=cg.getmmregister(list,OS_VECTOR);
             reg:=cg.getmmregister(list,OS_VECTOR);
-            hlcg.a_loadmm_loc_reg(list,l.size,OS_VECTOR,l,reg,nil);
+            cg.a_loadmm_loc_reg(list,OS_VECTOR,l,reg,nil);
             location_freetemp(list,l);
             location_freetemp(list,l);
             location_reset(l,LOC_MMREGISTER,OS_VECTOR);
             location_reset(l,LOC_MMREGISTER,OS_VECTOR);
             l.register:=reg;
             l.register:=reg;

+ 4 - 4
compiler/x86/nx86add.pas

@@ -726,8 +726,8 @@ unit nx86add;
             if nf_swapped in flags then
             if nf_swapped in flags then
               swapleftright;
               swapleftright;
 
 
-            location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
-            location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
+            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
+            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
             location:=left.location;
             location:=left.location;
             if is_double(resultdef) then
             if is_double(resultdef) then
               begin
               begin
@@ -781,7 +781,7 @@ unit nx86add;
             if (nf_swapped in flags) then
             if (nf_swapped in flags) then
               swapleftright;
               swapleftright;
 
 
-            location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
             location.register:=left.location.register;
             location.register:=left.location.register;
             { force floating point reg. location to be written to memory,
             { force floating point reg. location to be written to memory,
               we don't force it to mm register because writing to memory
               we don't force it to mm register because writing to memory
@@ -836,7 +836,7 @@ unit nx86add;
           end
           end
         else
         else
           begin
           begin
-            location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
             { force floating point reg. location to be written to memory,
             { force floating point reg. location to be written to memory,
               we don't force it to mm register because writing to memory
               we don't force it to mm register because writing to memory
               allows probably shorter code because there is no direct fpu->mm register
               allows probably shorter code because there is no direct fpu->mm register

+ 5 - 5
compiler/x86/nx86inl.pas

@@ -259,7 +259,7 @@ implementation
          if use_vectorfpu(resultdef) then
          if use_vectorfpu(resultdef) then
            begin
            begin
              secondpass(left);
              secondpass(left);
-             location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+             hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
              location:=left.location;
              location:=left.location;
              case tfloatdef(resultdef).floattype of
              case tfloatdef(resultdef).floattype of
                s32real:
                s32real:
@@ -286,7 +286,7 @@ implementation
          if use_vectorfpu(left.resultdef) then
          if use_vectorfpu(left.resultdef) then
            begin
            begin
              secondpass(left);
              secondpass(left);
-             location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+             hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
              location_reset(location,LOC_REGISTER,OS_S64);
              location_reset(location,LOC_REGISTER,OS_S64);
              location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
              location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
              case left.location.size of
              case left.location.size of
@@ -320,7 +320,7 @@ implementation
            not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
            not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
            begin
            begin
              secondpass(left);
              secondpass(left);
-             location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+             hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
              location_reset(location,LOC_REGISTER,OS_S64);
              location_reset(location,LOC_REGISTER,OS_S64);
              location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
              location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
              case left.location.size of
              case left.location.size of
@@ -371,7 +371,7 @@ implementation
          if use_vectorfpu(resultdef) then
          if use_vectorfpu(resultdef) then
            begin
            begin
              secondpass(left);
              secondpass(left);
-             location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+             hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
              location:=left.location;
              location:=left.location;
              cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location,left.location.register,mms_movescalar);
              cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location,left.location.register,mms_movescalar);
            end
            end
@@ -388,7 +388,7 @@ implementation
          if use_vectorfpu(resultdef) then
          if use_vectorfpu(resultdef) then
            begin
            begin
              secondpass(left);
              secondpass(left);
-             location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+             hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
              location:=left.location;
              location:=left.location;
              case tfloatdef(resultdef).floattype of
              case tfloatdef(resultdef).floattype of
                s32real:
                s32real:

+ 1 - 1
compiler/x86/nx86mat.pas

@@ -154,7 +154,7 @@ interface
 
 
         if expectloc=LOC_MMREGISTER then
         if expectloc=LOC_MMREGISTER then
           begin
           begin
-            location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
             location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
             location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
 
 
             { make life of register allocator easier }
             { make life of register allocator easier }