Jelajahi Sumber

+ (V)Cvtss2CvtSd(V)Cvtsd2ss2Nop optimization, resolves #39360

florian 3 tahun lalu
induk
melakukan
ec40db3da7
3 mengubah file dengan 39 tambahan dan 0 penghapusan
  1. 3 0
      compiler/i386/aoptcpu.pas
  2. 33 0
      compiler/x86/aoptx86.pas
  3. 3 0
      compiler/x86_64/aoptcpu.pas

+ 3 - 0
compiler/i386/aoptcpu.pas

@@ -228,6 +228,9 @@ unit aoptcpu;
                 A_SHRX,
                 A_SHRX,
                 A_SHLX:
                 A_SHLX:
                   Result:=OptPass1SHXX(p);
                   Result:=OptPass1SHXX(p);
+                A_VCVTSS2SD,
+                A_CVTSS2SD:
+                  Result:=OptPass1_V_Cvtss2sd(p);
                 else
                 else
                   ;
                   ;
               end;
               end;

+ 33 - 0
compiler/x86/aoptx86.pas

@@ -158,6 +158,7 @@ unit aoptx86;
         function OptPass1Imul(var p : tai) : boolean;
         function OptPass1Imul(var p : tai) : boolean;
         function OptPass1Jcc(var p : tai) : boolean;
         function OptPass1Jcc(var p : tai) : boolean;
         function OptPass1SHXX(var p: tai): boolean;
         function OptPass1SHXX(var p: tai): boolean;
+        function OptPass1_V_Cvtss2sd(var p: tai): boolean;
 
 
         function OptPass2Movx(var p : tai): Boolean;
         function OptPass2Movx(var p : tai): Boolean;
         function OptPass2MOV(var p : tai) : boolean;
         function OptPass2MOV(var p : tai) : boolean;
@@ -5855,6 +5856,38 @@ unit aoptx86;
      end;
      end;
 
 
 
 
+   function TX86AsmOptimizer.OptPass1_V_Cvtss2sd(var p: tai): boolean;
+     var
+       hp1: tai;
+     begin
+       Result:=false;
+       { get rid of
+
+         (v)cvtss2sd reg0,<reg1,>reg2
+         (v)cvtss2sd reg2,<reg2,>reg0
+       }
+       if GetNextInstruction(p,hp1) and
+         (((taicpu(p).opcode=A_CVTSS2SD) and MatchInstruction(hp1,A_CVTSD2SS,[taicpu(p).opsize]) and
+           MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^)) or
+          ((taicpu(p).opcode=A_VCVTSS2SD) and MatchInstruction(hp1,A_VCVTSD2SS,[taicpu(p).opsize]) and
+           MatchOpType(taicpu(p),top_reg,top_reg,top_reg) and
+           MatchOpType(taicpu(hp1),top_reg,top_reg,top_reg) and
+           (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
+           (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
+           (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[2]^.reg)) and
+           (getsupreg(taicpu(p).oper[2]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg))
+          )
+         ) then
+         begin
+           DebugMsg(SPeepholeOptimization + '(V)Cvtss2CvtSd(V)Cvtsd2ss2Nop done',p);
+           RemoveCurrentP(p);
+           RemoveInstruction(hp1);
+           Result:=true;
+           Exit;
+         end;
+     end;
+
+
    function TX86AsmOptimizer.OptPass1Jcc(var p : tai) : boolean;
    function TX86AsmOptimizer.OptPass1Jcc(var p : tai) : boolean;
      var
      var
        hp1, hp2, hp3, hp4, hp5: tai;
        hp1, hp2, hp3, hp4, hp5: tai;

+ 3 - 0
compiler/x86_64/aoptcpu.pas

@@ -152,6 +152,9 @@ uses
                 A_SHRX,
                 A_SHRX,
                 A_SHLX:
                 A_SHLX:
                   Result:=OptPass1SHXX(p);
                   Result:=OptPass1SHXX(p);
+                A_VCVTSS2SD,
+                A_CVTSS2SD:
+                  Result:=OptPass1_V_Cvtss2sd(p);
                 else
                 else
                   ;
                   ;
               end;
               end;