6 lat temu · 0c6f7321bf
--- a/compiler/aarch64/agcpugas.pas
+++ b/compiler/aarch64/agcpugas.pas
@@ -50,7 +50,7 @@ unit agcpugas;
 
				 
			
 
				     const
			
 
				       gas_shiftmode2str : array[tshiftmode] of string[4] = (
			
 
				-        '','lsl','lsr','asr',
			
 
				+        '','lsl','lsr','asr','ror',
			
 
				         'uxtb','uxth','uxtw','uxtx',
			
 
				         'sxtb','sxth','sxtw','sxtx');
			
 
				 
			
--- a/compiler/aarch64/aoptcpu.pas
+++ b/compiler/aarch64/aoptcpu.pas
@@ -39,15 +39,21 @@ Interface
 
				         { uses the same constructor as TAopObj }
			
 
				         function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
			
 
				         procedure PeepHoleOptPass2;override;
			
 
				+        function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
			
 
				         function GetNextInstructionUsingReg(Current : tai; out Next : tai; reg : TRegister) : Boolean;
			
 
				         function LookForPostindexedPattern(p : taicpu) : boolean;
			
 
				         procedure DebugMsg(const s : string; p : tai);
			
 
				+      private
			
 
				+        function OptPass1Shift(var p: tai): boolean;
			
 
				       End;
			
 
				 
			
 
				 Implementation
			
 
				 
			
 
				   uses
			
 
				-    aasmbase;
			
 
				+    aasmbase,
			
 
				+    aoptutils,
			
 
				+    cgutils,
			
 
				+    verbose;
			
 
				 
			
 
				 {$ifdef DEBUG_AOPTCPU}
			
 
				   procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
			
@@ -66,6 +72,22 @@ Implementation
 
				     end;
			
 
				 
			
 
				 
			
 
				+  function RefsEqual(const r1, r2: treference): boolean;
			
 
				+    begin
			
 
				+      refsequal :=
			
 
				+        (r1.offset = r2.offset) and
			
 
				+        (r1.base = r2.base) and
			
 
				+        (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
			
 
				+        (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
			
 
				+        (r1.relsymbol = r2.relsymbol) and
			
 
				+        (r1.shiftimm = r2.shiftimm) and
			
 
				+        (r1.addressmode = r2.addressmode) and
			
 
				+        (r1.shiftmode = r2.shiftmode) and
			
 
				+        (r1.volatility=[]) and
			
 
				+        (r2.volatility=[]);
			
 
				+    end;
			
 
				+
			
 
				+
			
 
				   function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
			
 
				     begin
			
 
				       result :=
			
@@ -84,6 +106,33 @@ Implementation
 
				     end;
			
 
				 
			
 
				 
			
 
				+  function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
			
 
				+    begin
			
 
				+      result := (oper.typ = top_reg) and (oper.reg = reg);
			
 
				+    end;
			
 
				+
			
 
				+
			
 
				+  function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
			
 
				+    begin
			
 
				+      result := oper1.typ = oper2.typ;
			
 
				+
			
 
				+      if result then
			
 
				+        case oper1.typ of
			
 
				+          top_const:
			
 
				+            Result:=oper1.val = oper2.val;
			
 
				+          top_reg:
			
 
				+            Result:=oper1.reg = oper2.reg;
			
 
				+          top_conditioncode:
			
 
				+            Result:=oper1.cc = oper2.cc;
			
 
				+          top_realconst:
			
 
				+            Result:=oper1.val_real = oper2.val_real;
			
 
				+          top_ref:
			
 
				+            Result:=RefsEqual(oper1.ref^, oper2.ref^);
			
 
				+          else Result:=false;
			
 
				+        end
			
 
				+    end;
			
 
				+
			
 
				+
			
 
				   function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
			
 
				     Out Next: tai; reg: TRegister): Boolean;
			
 
				     begin
			
@@ -97,6 +146,54 @@ Implementation
 
				             is_calljmp(taicpu(Next).opcode);
			
 
				     end;
			
 
				 
			
 
				+
			
 
				+  function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
			
 
				+    var
			
 
				+      p: taicpu;
			
 
				+    begin
			
 
				+      p := taicpu(hp);
			
 
				+      Result := false;
			
 
				+      if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
			
 
				+        exit;
			
 
				+
			
 
				+      case p.opcode of
			
 
				+        { These operands do not write into a register at all }
			
 
				+        A_CMP, A_CMN, A_TST, A_B, A_BL, A_MSR, A_FCMP:
			
 
				+          exit;
			
 
				+        {Take care of post/preincremented store and loads, they will change their base register}
			
 
				+        A_STR, A_LDR:
			
 
				+          begin
			
 
				+            Result := false;
			
 
				+            { actually, this does not apply here because post-/preindexed does not mean that a register
			
 
				+              is loaded with a new value, it is only modified
			
 
				+              (taicpu(p).oper[1]^.typ=top_ref) and
			
 
				+              (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
			
 
				+              (taicpu(p).oper[1]^.ref^.base = reg);
			
 
				+            }
			
 
				+            { STR does not load into it's first register }
			
 
				+            if p.opcode = A_STR then
			
 
				+              exit;
			
 
				+          end;
			
 
				+        else
			
 
				+          ;
			
 
				+      end;
			
 
				+
			
 
				+      if Result then
			
 
				+        exit;
			
 
				+
			
 
				+      case p.oper[0]^.typ of
			
 
				+        top_reg:
			
 
				+          Result := (p.oper[0]^.reg = reg);
			
 
				+        top_ref:
			
 
				+          Result :=
			
 
				+            (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
			
 
				+            (taicpu(p).oper[0]^.ref^.base = reg);
			
 
				+        else
			
 
				+          ;
			
 
				+      end;
			
 
				+    end;
			
 
				+
			
 
				+
			
 
				   {
			
 
				     optimize
			
 
				       ldr/str regX,[reg1]
			
@@ -145,6 +242,111 @@ Implementation
 
				     end;
			
 
				 
			
 
				 
			
 
				+  function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
			
 
				+    var
			
 
				+      hp1,hp2: tai;
			
 
				+      I2, I: Integer;
			
 
				+      shifterop: tshifterop;
			
 
				+    begin
			
 
				+      Result:=false;
			
 
				+      { This folds shifterops into following instructions
			
 
				+        <shiftop> r0, r1, #imm
			
 
				+        <op> r2, r3, r0
			
 
				+
			
 
				+        to
			
 
				+
			
 
				+        <op> r2, r3, r1, <shiftop> #imm
			
 
				+      }
			
 
				+      { do not handle ROR yet, only part of the instructions below support ROR as shifter operand }
			
 
				+      if MatchInstruction(p,[A_LSL, A_LSR, A_ASR{, A_ROR}],[PF_None]) and
			
 
				+         MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
			
 
				+         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
			
 
				+         MatchInstruction(hp1, [A_ADD, A_AND, A_BIC, A_CMP, A_CMN,
			
 
				+                                A_EON, A_EOR, A_MOV, A_NEG, A_ORN, A_ORR,
			
 
				+                                A_SUB, A_TST], [PF_None]) and
			
 
				+         RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
			
 
				+         (taicpu(hp1).ops >= 2) and
			
 
				+         { Currently we can't fold into another shifterop }
			
 
				+         (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
			
 
				+         { SP does not work completely with shifted registers, as I didn't find the exact rules,
			
 
				+           we do not operate on SP }
			
 
				+         (taicpu(hp1).oper[0]^.reg<>NR_SP) and
			
 
				+         (taicpu(hp1).oper[1]^.reg<>NR_SP) and
			
 
				+         (taicpu(hp1).oper[taicpu(hp1).ops-1]^.reg<>NR_SP) and
			
 
				+         { reg1 might not be modified inbetween }
			
 
				+         not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
			
 
				+         (
			
 
				+           { Only ONE of the two src operands is allowed to match }
			
 
				+           MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
			
 
				+           MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
			
 
				+         ) and
			
 
				+         { for SUB, the last operand must match, there is no RSB on AArch64 }
			
 
				+         ((taicpu(hp1).opcode<>A_SUB) or
			
 
				+          MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)) then
			
 
				+        begin
			
 
				+          if taicpu(hp1).opcode in [A_TST, A_CMP, A_CMN, A_MOV] then
			
 
				+            I2:=0
			
 
				+          else
			
 
				+            I2:=1;
			
 
				+          for I:=I2 to taicpu(hp1).ops-1 do
			
 
				+            if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
			
 
				+              begin
			
 
				+                { If the parameter matched on the second op from the RIGHT
			
 
				+                  we have to switch the parameters, this will not happen for CMP
			
 
				+                  were we're only evaluating the most right parameter
			
 
				+                }
			
 
				+                shifterop_reset(shifterop);
			
 
				+                case taicpu(p).opcode of
			
 
				+                  A_LSL:
			
 
				+                    shifterop.shiftmode:=SM_LSL;
			
 
				+                  A_ROR:
			
 
				+                    shifterop.shiftmode:=SM_ROR;
			
 
				+                  A_LSR:
			
 
				+                    shifterop.shiftmode:=SM_LSR;
			
 
				+                  A_ASR:
			
 
				+                    shifterop.shiftmode:=SM_ASR;
			
 
				+                  else
			
 
				+                    InternalError(2019090401);
			
 
				+                end;
			
 
				+                shifterop.shiftimm:=taicpu(p).oper[2]^.val;
			
 
				+
			
 
				+                if I <> taicpu(hp1).ops-1 then
			
 
				+                  begin
			
 
				+                    if taicpu(hp1).ops = 3 then
			
 
				+                      hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
			
 
				+                           taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
			
 
				+                           taicpu(p).oper[1]^.reg, shifterop)
			
 
				+                    else
			
 
				+                      hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
			
 
				+                           taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
			
 
				+                           shifterop);
			
 
				+                  end
			
 
				+                else
			
 
				+                  if taicpu(hp1).ops = 3 then
			
 
				+                    hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
			
 
				+                         taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
			
 
				+                         taicpu(p).oper[1]^.reg,shifterop)
			
 
				+                  else
			
 
				+                    hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
			
 
				+                         taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
			
 
				+                         shifterop);
			
 
				+
			
 
				+                taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
			
 
				+                asml.insertbefore(hp2, hp1);
			
 
				+                GetNextInstruction(p, hp2);
			
 
				+                asml.remove(p);
			
 
				+                asml.remove(hp1);
			
 
				+                p.free;
			
 
				+                hp1.free;
			
 
				+                p:=hp2;
			
 
				+                DebugMsg('Peephole FoldShiftProcess done', p);
			
 
				+                Result:=true;
			
 
				+                break;
			
 
				+              end;
			
 
				+        end;
			
 
				+    end;
			
 
				+
			
 
				+
			
 
				   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
			
 
				     begin
			
 
				       result := false;
			
@@ -159,6 +361,11 @@ Implementation
 
				               begin
			
 
				                 Result:=LookForPostindexedPattern(taicpu(p));
			
 
				               end;
			
 
				+            A_LSR,
			
 
				+            A_ROR,
			
 
				+            A_ASR,
			
 
				+            A_LSL:
			
 
				+              Result:=OptPass1Shift(p);
			
 
				             else
			
 
				               ;
			
 
				           end;
			
--- a/compiler/aarch64/cpubase.pas
+++ b/compiler/aarch64/cpubase.pas
@@ -199,7 +199,7 @@ unit cpubase;
 
				       tshiftmode = (SM_None,
			
 
				                     { shifted register instructions. LSL can also be used for
			
 
				                       the index register of certain loads/stores }
			
 
				-                    SM_LSL,SM_LSR,SM_ASR,
			
 
				+                    SM_LSL,SM_LSR,SM_ASR,SM_ROR,
			
 
				                     { extended register instructions: zero/sign extension +
			
 
				                         optional shift (interpreted as LSL after extension)
			
 
				                        -- the index register of certain loads/stores can be
			
--- a/compiler/aarch64/racpugas.pas
+++ b/compiler/aarch64/racpugas.pas
@@ -461,7 +461,7 @@ Unit racpugas;
 
				 
			
 
				       const
			
 
				         shiftmode2str: array[SM_LSL..SM_SXTX] of string[4] =
			
 
				-          ('LSL','LSR','ASR',
			
 
				+          ('LSL','LSR','ASR','ROR',
			
 
				            'UXTB','UXTH','UXTW','UXTX',
			
 
				            'SXTB','SXTH','SXTW','SXTX');
			
 
				       var