Преглед изворни кода

* Merging recent m68k FPU fixes from trunk: r42829, r48413, r48414, r48522, r48523, r48524

git-svn-id: branches/fixes_3_2@48666 -
Károly Balogh пре 4 година
родитељ
комит
ab4f34e3ff
5 измењених фајлова са 77 додато и 35 уклоњено
  1. 10 6
      compiler/m68k/ag68kgas.pas
  2. 26 22
      compiler/m68k/aoptcpu.pas
  3. 37 3
      compiler/m68k/cgcpu.pas
  4. 1 1
      compiler/m68k/n68kadd.pas
  5. 3 3
      compiler/m68k/n68kinl.pas

+ 10 - 6
compiler/m68k/ag68kgas.pas

@@ -169,7 +169,7 @@ interface
       end;
       end;
 
 
 
 
-    function getopstr(var o:toper) : string;
+    function getopstr(size: topsize; var o:toper) : string;
       var
       var
         i : tsuperregister;
         i : tsuperregister;
       begin
       begin
@@ -220,10 +220,14 @@ interface
             getopstr:='#'+tostr(longint(o.val));
             getopstr:='#'+tostr(longint(o.val));
           top_realconst:
           top_realconst:
             begin
             begin
-              str(o.val_real,getopstr);
-              if getopstr[1]=' ' then
-                getopstr[1]:='+';
-              getopstr:='#0d'+getopstr;
+              case size of
+                S_FS:
+                  getopstr:='#0x'+hexstr(longint(single(o.val_real)),sizeof(single)*2);
+                S_FD:
+                  getopstr:='#0x'+hexstr(BestRealRec(o.val_real).Data,sizeof(bestreal)*2);
+              else
+                internalerror(2021020801);
+              end;
             end;
             end;
           else internalerror(200405021);
           else internalerror(200405021);
         end;
         end;
@@ -337,7 +341,7 @@ interface
                         sep:=':'
                         sep:=':'
                       else
                       else
                         sep:=',';
                         sep:=',';
-                      s:=s+sep+getopstr(taicpu(hp).oper[i]^);
+                      s:=s+sep+getopstr(taicpu(hp).opsize,taicpu(hp).oper[i]^);
                     end;
                     end;
                 end;
                 end;
            end;
            end;

+ 26 - 22
compiler/m68k/aoptcpu.pas

@@ -154,26 +154,30 @@ unit aoptcpu;
               opstr:=opname(p);
               opstr:=opname(p);
               case taicpu(p).oper[0]^.typ of
               case taicpu(p).oper[0]^.typ of
                 top_reg:
                 top_reg:
-                  begin
-                    {  move %reg0, %tmpreg; move %tmpreg, <ea> -> move %reg0, <ea> }
-                    taicpu(p).loadOper(1,taicpu(next).oper[1]^);
-                    asml.remove(next);
-                    next.free;
-                    result:=true;
-                    { also remove leftover move %reg0, %reg0, which can occur as the result
-                      of the previous optimization, if %reg0 and %tmpreg was different types
-                      (addr vs. data), so these moves were left in by the cg }
-                    if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
-                      begin
-                        DebugMsg('Optimizer: '+opstr+' + '+opstr+' removed',p);
-			GetNextInstruction(p,next);
-                        asml.remove(p);
-                        p.free;
-			p:=next;
-                      end
-                    else
-                      DebugMsg('Optimizer: '+opstr+' + '+opstr+' to '+opstr+' #1',p)
-                  end;
+                  { do not optimize away FPU to INT to FPU reg moves. These are used for 
+                    to-single-rounding on FPUs which have no FSMOVE/FDMOVE. (KB) }
+                  if not ((taicpu(p).opcode = A_FMOVE) and
+                    (getregtype(taicpu(p).oper[0]^.reg) <> getregtype(taicpu(p).oper[1]^.reg))) then
+                    begin
+                      {  move %reg0, %tmpreg; move %tmpreg, <ea> -> move %reg0, <ea> }
+                      taicpu(p).loadOper(1,taicpu(next).oper[1]^);
+                      asml.remove(next);
+                      next.free;
+                      result:=true;
+                      { also remove leftover move %reg0, %reg0, which can occur as the result
+                        of the previous optimization, if %reg0 and %tmpreg was different types
+                        (addr vs. data), so these moves were left in by the cg }
+                      if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
+                        begin
+                          DebugMsg('Optimizer: '+opstr+' + '+opstr+' removed',p);
+                          GetNextInstruction(p,next);
+                          asml.remove(p);
+                          p.free;
+                          p:=next;
+                        end
+                      else
+                        DebugMsg('Optimizer: '+opstr+' + '+opstr+' to '+opstr+' #1',p)
+                    end;
                 top_const:
                 top_const:
                   begin
                   begin
                     // DebugMsg('Optimizer: '+opstr+' + '+opstr+' to '+opstr+' #2',p);
                     // DebugMsg('Optimizer: '+opstr+' + '+opstr+' to '+opstr+' #2',p);
@@ -271,10 +275,10 @@ unit aoptcpu;
                    (taicpu(p).oper[0]^.ref^.offset = 0) then
                    (taicpu(p).oper[0]^.ref^.offset = 0) then
                   begin
                   begin
                     DebugMsg('Optimizer: LEA 0(Ax),Ax removed',p);
                     DebugMsg('Optimizer: LEA 0(Ax),Ax removed',p);
-		    GetNextInstruction(p,next);
+                    GetNextInstruction(p,next);
                     asml.remove(p);
                     asml.remove(p);
                     p.free;
                     p.free;
-		    p:=next;
+                    p:=next;
                     result:=true;
                     result:=true;
                   end;
                   end;
               { Address register sub/add can be replaced with ADDQ/SUBQ or LEA if the value is in the
               { Address register sub/add can be replaced with ADDQ/SUBQ or LEA if the value is in the

+ 37 - 3
compiler/m68k/cgcpu.pas

@@ -1051,10 +1051,42 @@ unit cgcpu;
     procedure tcg68k.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
     procedure tcg68k.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
       var
       var
         instr : taicpu;
         instr : taicpu;
+        op: tasmop;
+        href: treference;
+        hreg: tregister;
       begin
       begin
-        instr:=taicpu.op_reg_reg(A_FMOVE,fpuregopsize,reg1,reg2);
-        add_move_instruction(instr);
-        list.concat(instr);
+        if fromsize > tosize then
+          begin
+            { we have to do a load-store through an intregister or the stack in this case,
+              which is probably the fastest way, and simpler than messing around with FPU control
+              words for one-off custom rounding (KB) }
+            case tosize of
+              OS_F32:
+                  begin
+                    //list.concat(tai_comment.create(strpnew('a_loadfpu_reg_reg rounding via intreg')));
+                    hreg := getintregister(list,OS_32);
+                    list.concat(taicpu.op_reg_reg(A_FMOVE, tcgsize2opsize[tosize], reg1, hreg));
+                    list.concat(taicpu.op_reg_reg(A_FMOVE, tcgsize2opsize[tosize], hreg, reg2));
+                  end;
+              OS_F64:
+                  begin
+                    //list.concat(tai_comment.create(strpnew('a_loadfpu_reg_reg rounding via stack')));
+                    reference_reset_base(href, NR_STACK_POINTER_REG, 0, ctempposinvalid, 0, []);
+                    href.direction:=dir_dec;
+                    list.concat(taicpu.op_reg_ref(A_FMOVE, tcgsize2opsize[tosize], reg1, href));
+                    href.direction:=dir_inc;
+                    list.concat(taicpu.op_ref_reg(A_FMOVE, tcgsize2opsize[tosize], href, reg2));
+                  end;
+            else
+              internalerror(2021020802);
+            end;
+          end
+        else
+          begin
+            instr:=taicpu.op_reg_reg(A_FMOVE,fpuregopsize,reg1,reg2);
+            add_move_instruction(instr);
+            list.concat(instr);
+          end;
       end;
       end;
 
 
 
 
@@ -1067,6 +1099,8 @@ unit cgcpu;
         href := ref;
         href := ref;
         fixref(list,href,current_settings.fputype = fpu_coldfire);
         fixref(list,href,current_settings.fputype = fpu_coldfire);
         list.concat(taicpu.op_ref_reg(A_FMOVE,opsize,href,reg));
         list.concat(taicpu.op_ref_reg(A_FMOVE,opsize,href,reg));
+        if fromsize > tosize then
+          a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg);
       end;
       end;
 
 
     procedure tcg68k.a_loadfpu_reg_ref(list: TAsmList; fromsize,tosize: tcgsize; reg: tregister; const ref: treference);
     procedure tcg68k.a_loadfpu_reg_ref(list: TAsmList; fromsize,tosize: tcgsize; reg: tregister; const ref: treference);

+ 1 - 1
compiler/m68k/n68kadd.pas

@@ -202,7 +202,7 @@ implementation
                   hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
                   hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
 
 
                   location.register := cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
                   location.register := cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
-                  cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmlist,OS_NO,OS_NO,left.location.register,location.register);
+                  cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmlist,left.location.size,location.size,left.location.register,location.register);
                 end;
                 end;
 
 
               { emit the actual operation }
               { emit the actual operation }

+ 3 - 3
compiler/m68k/n68kinl.pas

@@ -210,7 +210,7 @@ implementation
                   //current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('second_srq_real called!: left was cfpuregister!')));
                   //current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('second_srq_real called!: left was cfpuregister!')));
                   location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
                   location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
                   location.loc := LOC_FPUREGISTER;
                   location.loc := LOC_FPUREGISTER;
-                  cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmlist,OS_NO,OS_NO,left.location.register,location.register);
+                  cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmlist,left.location.size,location.size,left.location.register,location.register);
                 end;
                 end;
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FMUL,fpuregopsize,left.location.register,location.register));
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FMUL,fpuregopsize,left.location.register,location.register));
             end;
             end;
@@ -296,7 +296,7 @@ implementation
                   begin
                   begin
                     hreg:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
                     hreg:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
                     location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
                     location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
-                    cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmlist,OS_NO,OS_NO,left.location.register,location.register);
+                    cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmlist,left.location.size,location.size,left.location.register,location.register);
                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FINTRZ,fpuregopsize,left.location.register,hreg));
                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FINTRZ,fpuregopsize,left.location.register,hreg));
                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSUB,fpuregopsize,hreg,location.register));
                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSUB,fpuregopsize,hreg,location.register));
                   end;
                   end;
@@ -306,7 +306,7 @@ implementation
                     location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
                     location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
                     href:=left.location.reference;
                     href:=left.location.reference;
                     tcg68k(cg).fixref(current_asmdata.CurrAsmList,href,current_settings.fputype = fpu_coldfire);
                     tcg68k(cg).fixref(current_asmdata.CurrAsmList,href,current_settings.fputype = fpu_coldfire);
-                    cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmlist,left.location.size,OS_NO,href,location.register);
+                    cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmlist,left.location.size,location.size,href,location.register);
                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FINTRZ,fpuregopsize,location.register,hreg));
                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FINTRZ,fpuregopsize,location.register,hreg));
                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSUB,fpuregopsize,hreg,location.register));
                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSUB,fpuregopsize,hreg,location.register));
                   end;
                   end;