Browse Source

+ Aarch64: AndCmpB.E/NE2Tbnz/Tbz optimization

florian 3 years ago
parent
commit
b5c7997c88
2 changed files with 52 additions and 0 deletions
  1. 11 0
      compiler/aarch64/aasmcpu.pas
  2. 41 0
      compiler/aarch64/aoptcpu.pas

+ 11 - 0
compiler/aarch64/aasmcpu.pas

@@ -187,6 +187,7 @@ uses
          constructor op_reg_reg_shifterop(op : tasmop;_op1,_op2 : tregister;_op3 : tshifterop);
          constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
          constructor op_reg_reg_reg_cond(op : tasmop;_op1,_op2,_op3 : tregister; const _op4: tasmcond);
+         constructor op_reg_const_ref(op: tasmop; _op1: tregister; _op2: aint; _op3: treference);
 
          constructor op_const_ref(op:tasmop; _op1: aint; _op2: treference);
 
@@ -607,6 +608,16 @@ implementation
       end;
 
 
+    constructor taicpu.op_reg_const_ref(op : tasmop;_op1 : tregister;_op2 : aint;_op3 : treference);
+      begin
+         inherited create(op);
+         ops:=3;
+         loadreg(0,_op1);
+         loadconst(1,_op2);
+         loadref(2,_op3);
+      end;
+
+
     function taicpu.is_same_reg_move(regtype: Tregistertype):boolean;
       begin
         { allow the register allocator to remove unnecessary moves }

+ 41 - 0
compiler/aarch64/aoptcpu.pas

@@ -52,6 +52,7 @@ Interface
         function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean;
         function OptPass1Shift(var p: tai): boolean;
         function OptPostCMP(var p: tai): boolean;
+        function OptPostAnd(var p: tai): Boolean;
         function OptPass1Data(var p: tai): boolean;
         function OptPass1FData(var p: tai): Boolean;
         function OptPass1STP(var p: tai): boolean;
@@ -787,6 +788,44 @@ Implementation
     end;
 
 
+  function TCpuAsmOptimizer.OptPostAnd(var p: tai): Boolean;
+    var
+      hp1, hp2: tai;
+      hp3: taicpu;
+    begin
+      Result:=false;
+      if MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
+        (PopCnt(QWord(taicpu(p).oper[2]^.val))=1) and
+        GetNextInstruction(p,hp1) and
+        MatchInstruction(hp1,A_CMP,[PF_None]) and
+        MatchOpType(taicpu(hp1),top_reg,top_const) and
+        (taicpu(hp1).oper[1]^.val=0) and
+        MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[0]^) and
+        RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
+        GetNextInstruction(hp1,hp2) and
+        MatchInstruction(hp2,A_B,[PF_None]) and
+        (taicpu(hp2).condition in [C_EQ,C_NE]) then
+        begin
+           case taicpu(hp2).condition of
+            C_NE:
+              hp3:=taicpu.op_reg_const_ref(A_TBNZ,taicpu(p).oper[1]^.reg,BsfQWord(taicpu(p).oper[2]^.val),taicpu(hp2).oper[0]^.ref^);
+            C_EQ:
+              hp3:=taicpu.op_reg_const_ref(A_TBZ,taicpu(p).oper[1]^.reg,BsfQWord(taicpu(p).oper[2]^.val),taicpu(hp2).oper[0]^.ref^);
+            else
+              Internalerror(2021100201);
+          end;
+          taicpu(hp3).fileinfo:=taicpu(hp1).fileinfo;
+          asml.insertbefore(hp3, hp1);
+
+          RemoveInstruction(hp1);
+          RemoveInstruction(hp2);
+          RemoveCurrentP(p);
+          DebugMsg(SPeepholeOptimization + 'AndCmpB.E/NE2Tbnz/Tbz done', p);
+          Result:=true;
+        end;
+    end;
+
+
   function TCpuAsmOptimizer.OptPostCMP(var p : tai): boolean;
     var
      hp1,hp2: tai;
@@ -907,6 +946,8 @@ Implementation
           case taicpu(p).opcode of
             A_CMP:
               Result:=OptPostCMP(p);
+            A_AND:
+              Result:=OptPostAnd(p);
             else
               ;
           end;