Browse Source

* x86: New "shr %cl; shr x" swap optimisation

J. Gareth "Curious Kit" Moreton 3 years ago
parent
commit
b872322678
1 changed files with 26 additions and 2 deletions
  1. 26 2
      compiler/x86/aoptx86.pas

+ 26 - 2
compiler/x86/aoptx86.pas

@@ -13090,7 +13090,7 @@ unit aoptx86;
         hp1 := p;
         repeat
           if not GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[1]^.reg) or (hp1.typ <> ait_instruction) then
-            Exit;
+            Break;
 
           { Detect:
               shr x, %reg
@@ -13235,7 +13235,7 @@ unit aoptx86;
                     end;
 
                   if Shift < topsize2memsize[taicpu(p).opsize] - topsize2memsize[LimitSize] then
-                    Exit;
+                    Break;
 
                   { Since we've established that the combined shift is within
                     limits, we can actually combine the adjacent SHR
@@ -13259,6 +13259,30 @@ unit aoptx86;
           Break;
         until False;
 
+        { Detect the following (looking backwards):
+            shr %cl,%reg
+            shr x,  %reg
+
+          Swap the two SHR instructions to minimise a pipeline stall.
+        }
+        if GetLastInstruction(p, hp1) and
+          MatchInstruction(hp1, A_SHR, [taicpu(p).opsize]) and
+          MatchOpType(taicpu(hp1), top_reg, top_reg) and
+          { First operand will be %cl }
+          (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
+          { Just to be sure }
+          (getsupreg(taicpu(hp1).oper[1]^.reg) <> RS_ECX) then
+          begin
+            DebugMsg(SPeepholeOptimization + 'Swapped variable and constant SHR instructions to minimise pipeline stall (ShrShr2ShrShr)', hp1);
+
+            { Moving the entries this way ensures the register tracking remains correct }
+            Asml.Remove(p);
+            Asml.InsertBefore(p, hp1);
+            p := hp1;
+            { Don't set Result to True because the current instruction is now
+              "shr %cl,%reg" and there's nothing more we can do with it }
+          end;
+
       end;