Explorar o código

+ AArch64: profiler support for aarch64-linux

git-svn-id: trunk@44216 -
florian %!s(int64=5) %!d(string=hai) anos
pai
achega
95e8992e2f
Modificáronse 3 ficheiros con 147 adicións e 38 borrados
  1. 13 0
      compiler/aarch64/cgcpu.pas
  2. 3 1
      compiler/options.pas
  3. 131 37
      compiler/x86/aoptx86.pas

+ 13 - 0
compiler/aarch64/cgcpu.pas

@@ -101,6 +101,7 @@ interface
         procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
         procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
         procedure g_check_for_fpu_exception(list: TAsmList; force, clear: boolean);override;
+        procedure g_profilecode(list: TAsmList);override;
        private
         function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
         procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
@@ -2301,6 +2302,18 @@ implementation
       end;
 
 
+    procedure tcgaarch64.g_profilecode(list : TAsmList);
+      begin
+        if target_info.system = system_aarch64_linux then
+          begin
+            list.concat(taicpu.op_reg_reg(A_MOV,NR_X0,NR_X30));
+            a_call_name(list,'_mcount',false);
+          end
+        else
+          internalerror(2020021901);
+      end;
+
+
     procedure create_codegen;
       begin
         cg:=tcgaarch64.Create;

+ 3 - 1
compiler/options.pas

@@ -136,7 +136,9 @@ const
                         + [system_i386_freebsd]
                         + [system_i386_netbsd]
                         + [system_i386_wdosx]
-                        + [system_riscv32_linux,system_riscv64_linux];
+                        + [system_riscv32_linux,system_riscv64_linux]
+                        + [system_aarch64_linux];
+
 
   suppported_targets_x_smallr = systems_linux + systems_solaris + systems_android
                              + [system_i386_haiku,system_x86_64_haiku]

+ 131 - 37
compiler/x86/aoptx86.pas

@@ -1914,29 +1914,142 @@ unit aoptx86;
         { Depending on the DeepMOVOpt above, it may turn out that hp1 completely
           overwrites the original destination register.  e.g.
 
-          movl   %reg1d,%reg2d
-          movslq %reg1d,%reg2q
+          movl   ###,%reg2d
+          movslq ###,%reg2q (### doesn't have to be the same as the first one)
 
-          In this case, we can remove the MOV
+          In this case, we can remove the MOV (Go to "Mov2Nop 5" below)
         }
         if (taicpu(p).oper[1]^.typ = top_reg) and
           MatchInstruction(hp1, [A_LEA, A_MOV, A_MOVSX, A_MOVZX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}], []) and
-          { The RegInOp check makes sure that movb r/m,%reg1b; movzbl %reg1b,%reg1l"
-            and "movl r/m,%reg1; leal $1(%reg1,%reg2),%reg1" etc. are not incorrectly
-            optimised }
           (taicpu(hp1).oper[1]^.typ = top_reg) and
-          not RegInOp(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[0]^) and
           Reg1WriteOverwritesReg2Entirely(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[1]^.reg) then
-          begin
-            DebugMsg(SPeepholeOptimization + 'Mov2Nop 5 done',p);
-            { take care of the register (de)allocs following p }
-            UpdateUsedRegs(tai(p.next));
-            asml.remove(p);
-            p.free;
-            p:=hp1;
-            Result := True;
-            Exit;
-          end;
+            begin
+              if RegInOp(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[0]^) then
+                begin
+                  if (taicpu(hp1).oper[0]^.typ = top_reg) then
+                    case taicpu(p).oper[0]^.typ of
+                      top_const:
+                        { We have something like:
+
+                          movb   $x,   %regb
+                          movzbl %regb,%regd
+
+                          Change to:
+
+                          movl   $x,   %regd
+                        }
+                        begin
+                          case taicpu(hp1).opsize of
+                            S_BW:
+                              begin
+                                if (taicpu(hp1).opcode = A_MOVSX) and
+                                  (taicpu(p).oper[0]^.val > $7F) then
+                                  taicpu(p).oper[0]^.val := taicpu(p).oper[0]^.val - $100; { Convert to signed }
+
+                                setsubreg(taicpu(p).oper[1]^.reg, R_SUBW);
+                                taicpu(p).opsize := S_W;
+                              end;
+                            S_BL:
+                              begin
+                                if (taicpu(hp1).opcode = A_MOVSX) and
+                                  (taicpu(p).oper[0]^.val > $7F) then
+                                  taicpu(p).oper[0]^.val := taicpu(p).oper[0]^.val - $100; { Convert to signed }
+
+                                setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
+                                taicpu(p).opsize := S_L;
+                              end;
+                            S_WL:
+                              begin
+                                if (taicpu(hp1).opcode = A_MOVSX) and
+                                  (taicpu(p).oper[0]^.val > $7FFF) then
+                                  taicpu(p).oper[0]^.val := taicpu(p).oper[0]^.val - $10000; { Convert to signed }
+
+                                setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
+                                taicpu(p).opsize := S_L;
+                              end;
+{$ifdef x86_64}
+                            S_BQ:
+                              begin
+                                if (taicpu(hp1).opcode = A_MOVSX) and
+                                  (taicpu(p).oper[0]^.val > $7F) then
+                                  taicpu(p).oper[0]^.val := taicpu(p).oper[0]^.val - $100; { Convert to signed }
+
+                                setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
+                                taicpu(p).opsize := S_Q;
+                              end;
+                            S_WQ:
+                              begin
+                                if (taicpu(hp1).opcode = A_MOVSX) and
+                                  (taicpu(p).oper[0]^.val > $7FFF) then
+                                  taicpu(p).oper[0]^.val := taicpu(p).oper[0]^.val - $10000; { Convert to signed }
+
+                                setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
+                                taicpu(p).opsize := S_Q;
+                              end;
+                            S_LQ:
+                              begin
+                                if (taicpu(hp1).opcode = A_MOVSXD) and { Note it's MOVSXD, not MOVSX }
+                                  (taicpu(p).oper[0]^.val > $7FFFFFFF) then
+                                  taicpu(p).oper[0]^.val := taicpu(p).oper[0]^.val - $100000000; { Convert to signed }
+
+                                setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
+                                taicpu(p).opsize := S_Q;
+                              end;
+{$endif x86_64}
+                            else
+                              { If hp1 was a MOV instruction, it should have been
+                                optimised already }
+                              InternalError(2020021001);
+                          end;
+                          DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 2 done',p);
+                          asml.Remove(hp1);
+                          hp1.Free;
+                          Result := True;
+                          Exit;
+                        end;
+                      top_ref:
+                        { We have something like:
+
+                          movb   mem,  %regb
+                          movzbl %regb,%regd
+
+                          Change to:
+
+                          movzbl mem,  %regd
+                        }
+                        if IsMOVZXAcceptable or (taicpu(hp1).opcode <> A_MOVZX) then
+                          begin
+                            DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 1 done',p);
+                            taicpu(hp1).loadref(0, taicpu(p).oper[0]^.ref^);
+                            { take care of the register (de)allocs following p }
+                            UpdateUsedRegs(tai(p.next));
+                            asml.remove(p);
+                            p.free;
+                            p:=hp1;
+                            Result := True;
+                            Exit;
+                          end;
+                      else
+                        if (taicpu(hp1).opcode <> A_MOV) and (taicpu(hp1).opcode <> A_LEA) then
+                          { Just to make a saving, since there are no more optimisations with MOVZX and MOVSX/D }
+                          Exit;
+                  end;
+                end
+             { The RegInOp check makes sure that movl r/m,%reg1l; movzbl (%reg1l),%reg1l"
+               and "movl r/m,%reg1; leal $1(%reg1,%reg2),%reg1" etc. are not incorrectly
+               optimised }
+              else
+                begin
+                  DebugMsg(SPeepholeOptimization + 'Mov2Nop 5 done',p);
+                  { take care of the register (de)allocs following p }
+                  UpdateUsedRegs(tai(p.next));
+                  asml.remove(p);
+                  p.free;
+                  p:=hp1;
+                  Result := True;
+                  Exit;
+                end;
+            end;
 
         if (taicpu(hp1).opcode = A_AND) and
           (taicpu(p).oper[1]^.typ = top_reg) and
@@ -2339,27 +2452,8 @@ unit aoptx86;
                 Result:=true;
                 exit;
               end;
-            {
-              mov*  x,reg1
-              mov*  y,reg1
 
-              to
-
-              mov*  y,reg1
-            }
-            if (taicpu(p).oper[1]^.typ=top_reg) and
-              MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
-              not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^)) then
-              begin
-                DebugMsg(SPeepholeOptimization + 'MovMov2Mov 4 done',p);
-                { take care of the register (de)allocs following p }
-                UpdateUsedRegs(tai(p.next));
-                asml.remove(p);
-                p.free;
-                p:=hp1;
-                Result:=true;
-                exit;
-              end;
+              { mov x,reg1; mov y,reg1 -> mov y,reg1 is handled by the Mov2Nop 5 optimisation }
           end;
 
         { search further than the next instruction for a mov }