瀏覽代碼

* x86: Added new post-peephole optimisations to convert BMI2 shift/rotate instructions to their smaller traditional versions if possible

J. Gareth "Curious Kit" Moreton 1 周之前
父節點
當前提交
23217f4ac9
共有 3 個文件被更改,包括 59 次插入0 次删除
  1. 6 0
      compiler/i386/aoptcpu.pas
  2. 47 0
      compiler/x86/aoptx86.pas
  3. 6 0
      compiler/x86_64/aoptcpu.pas

+ 6 - 0
compiler/i386/aoptcpu.pas

@@ -455,6 +455,12 @@ unit aoptcpu;
                   Result:=PostPeepholeOptRET(p);
                 A_VPXOR:
                   Result:=PostPeepholeOptVPXOR(p);
+                A_SARX,
+                A_SHLX,
+                A_SHRX:
+                  Result:=PostPeepholeOptSARXSHLXSHRX(p);
+                A_RORX:
+                  Result:=PostPeepholeOptRORX(p);
                 else
                   ;
               end;

+ 47 - 0
compiler/x86/aoptx86.pas

@@ -235,6 +235,8 @@ unit aoptx86;
         function PostPeepholeOptADDSUB(var p : tai) : Boolean;
         function PostPeepholeOptVPXOR(var p: tai): Boolean;
         function PostPeepholeOptRET(var p: tai): Boolean;
+        function PostPeepholeOptRORX(var p: tai): Boolean;
+        function PostPeepholeOptSARXSHLXSHRX(var p: tai): Boolean;
 
         procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
 
@@ -18140,6 +18142,51 @@ unit aoptx86;
       end;
 
 
+    function TX86AsmOptimizer.PostPeepholeOptRORX(var p: tai): Boolean;
+      begin
+        Result := False;
+        { Change:                 To:
+            rorx #x,%reg,%reg       ror #x,%reg
+
+          (Smaller instruction size)
+        }
+        if MatchOperand(taicpu(p).oper[1]^,taicpu(p).oper[2]^.reg) and
+          not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) then
+          begin
+            taicpu(p).opcode:=A_ROR;
+            taicpu(p).ops:=2;
+            taicpu(p).clearop(2);
+          end;
+      end;
+
+
+    function TX86AsmOptimizer.PostPeepholeOptSARXSHLXSHRX(var p: tai): Boolean;
+      begin
+        Result := False;
+        { Change:                bTo:
+            shlx %ecx,%reg,%reg     shl %cl,%reg
+
+          (Smaller instruction size)
+          Same with SARX and SHRX (and when using %rcx for 64-bit)
+        }
+        if (getsupreg(taicpu(p).oper[0]^.reg)=RS_ECX) and
+          MatchOperand(taicpu(p).oper[1]^,taicpu(p).oper[2]^.reg) and
+          not RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) then
+          begin
+            case taicpu(p).opcode of
+              A_SARX: taicpu(p).opcode:=A_SAR;
+              A_SHLX: taicpu(p).opcode:=A_SHL;
+              A_SHRX: taicpu(p).opcode:=A_SHR;
+              else
+                InternalError(2025090501);
+            end;
+            setsubreg(taicpu(p).oper[0]^.reg, R_SUBL);
+            taicpu(p).ops:=2;
+            taicpu(p).clearop(2);
+          end;
+      end;
+
+
     class procedure TX86AsmOptimizer.OptimizeRefs(var p: taicpu);
       var
         OperIdx: Integer;

+ 6 - 0
compiler/x86_64/aoptcpu.pas

@@ -325,6 +325,12 @@ uses
                   Result:=PostPeepholeOptRET(p);
                 A_VPXOR:
                   Result:=PostPeepholeOptVPXOR(p);
+                A_SARX,
+                A_SHLX,
+                A_SHRX:
+                  Result:=PostPeepholeOptSARXSHLXSHRX(p);
+                A_RORX:
+                  Result:=PostPeepholeOptRORX(p);
                 else
                   ;
               end;