Browse Source

* x86: Extended FuncMov2Func optimisation to handle 1, 3 and 4-operand functions

J. Gareth "Curious Kit" Moreton 2 years ago
parent
commit
d35be8a9ea
1 changed files with 35 additions and 8 deletions
  1. 35 8
      compiler/x86/aoptx86.pas

+ 35 - 8
compiler/x86/aoptx86.pas

@@ -4817,6 +4817,9 @@ unit aoptx86;
 
           Change to:
             func.  %reg1,%reg3 (see comment below for what a valid func. is)
+
+          Perform similar optimisations with 1, 3 and 4-operand instructions
+          that only have one output.
         }
         if MatchOpType(taicpu(p), top_reg, top_reg) then
           begin
@@ -4827,12 +4830,29 @@ unit aoptx86;
               GetLastInstruction(p, hp2) and
               (hp2.typ = ait_instruction) and
               { Have to make sure it's an instruction that only reads from
-                operand 1 and only writes (not reads or modifies) from operand 2;
-                in essence, a one-operand pure function such as BSR or POPCNT }
-              (taicpu(hp2).ops = 2) and
-              (insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Wop2] = [Ch_Rop1, Ch_Wop2]) and
-              (taicpu(hp2).oper[1]^.typ = top_reg) and
-              (taicpu(hp2).oper[1]^.reg = p_SourceReg) then
+                the first operands and only writes (not reads or modifies) to
+                the last one; in essence, a pure function such as BSR, POPCNT
+                or ANDN }
+              (
+                (
+                  (taicpu(hp2).ops = 1) and
+                  (insprop[taicpu(hp2).opcode].Ch * [Ch_Wop1] = [Ch_Wop1])
+                ) or
+                (
+                  (taicpu(hp2).ops = 2) and
+                  (insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Wop2] = [Ch_Rop1, Ch_Wop2])
+                ) or
+                (
+                  (taicpu(hp2).ops = 3) and
+                  (insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Rop2, Ch_Wop3] = [Ch_Rop1, Ch_Rop2, Ch_Wop3])
+                ) or
+                (
+                  (taicpu(hp2).ops = 4) and
+                  (insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Rop2, Ch_Rop3, Ch_Wop4] = [Ch_Rop1, Ch_Rop2, Ch_Rop3, Ch_Wop4])
+                )
+              ) and
+              (taicpu(hp2).oper[taicpu(hp2).ops-1]^.typ = top_reg) and
+              (taicpu(hp2).oper[taicpu(hp2).ops-1]^.reg = p_SourceReg) then
               begin
                 case taicpu(hp2).opcode of
                   A_FSTSW, A_FNSTSW,
@@ -4845,8 +4865,15 @@ unit aoptx86;
                   else
                     begin
                       DebugMsg(SPeepholeOptimization + 'Removed MOV and changed destination on previous instruction to optimise register usage (FuncMov2Func)', p);
-                      taicpu(hp2).oper[1]^.reg := p_TargetReg;
-                      AllocRegBetween(p_TargetReg, hp2, p, TmpUsedRegs);
+                      taicpu(hp2).oper[taicpu(hp2).ops-1]^.reg := p_TargetReg;
+
+                      if not RegInInstruction(p_TargetReg, hp2) then
+                        begin
+                          { Since we're allocating from an earlier point, we
+                            need to remove the register from the tracking }
+                          ExcludeRegFromUsedRegs(p_TargetReg, TmpUsedRegs);
+                          AllocRegBetween(p_TargetReg, hp2, p, TmpUsedRegs);
+                        end;
                       RemoveCurrentp(p, hp1);
                       Result := True;
                       Exit;