Browse Source

* patch by J. Gareth Moreton: EAX -> EDX:EAX sign extension shortcuts, and MOVSX shortcuts for AX register, part 1 of #36551

git-svn-id: trunk@43917 -
florian 5 years ago
parent
commit
236c11ef71
1 changed files with 289 additions and 3 deletions
  1. 289 3
      compiler/x86/aoptx86.pas

+ 289 - 3
compiler/x86/aoptx86.pas

@@ -3363,9 +3363,10 @@ unit aoptx86;
        end;
 
       var
-       hp1,hp2: tai;
-{$ifdef x86_64}
-       hp3: tai;
+       hp1,hp2,hp3: tai;
+{$ifndef x86_64}
+       hp4: tai;
+       OperIdx: Integer;
 {$endif x86_64}
       begin
         Result:=false;
@@ -3510,6 +3511,291 @@ unit aoptx86;
             Result:=true;
             exit;
           end
+        else if MatchOpType(taicpu(p),top_reg,top_reg) and
+          MatchInstruction(hp1, A_SAR, []) then
+          begin
+            if MatchOperand(taicpu(hp1).oper[0]^, 31) then
+              begin
+                { the use of %edx also covers the opsize being S_L }
+                if MatchOperand(taicpu(hp1).oper[1]^, NR_EDX) then
+                  begin
+                    { Note it has to be specifically "movl %eax,%edx", and those specific sub-registers }
+                    if (taicpu(p).oper[0]^.reg = NR_EAX) and
+                      (taicpu(p).oper[1]^.reg = NR_EDX) then
+                      begin
+                        { Change:
+                            movl %eax,%edx
+                            sarl $31,%edx
+                          To:
+                            cltd
+                        }
+                        DebugMsg(SPeepholeOptimization + 'MovSar2Cltd', p);
+                        Asml.Remove(hp1);
+                        hp1.Free;
+                        taicpu(p).opcode := A_CDQ;
+                        taicpu(p).opsize := S_NO;
+                        taicpu(p).clearop(1);
+                        taicpu(p).clearop(0);
+                        taicpu(p).ops:=0;
+                        Result := True;
+                      end
+                    else if (cs_opt_size in current_settings.optimizerswitches) and
+                      (taicpu(p).oper[0]^.reg = NR_EDX) and
+                      (taicpu(p).oper[1]^.reg = NR_EAX) then
+                      begin
+                        { Change:
+                            movl %edx,%eax
+                            sarl $31,%edx
+                          To:
+                            movl %edx,%eax
+                            cltd
+
+                          Note that this creates a dependency between the two instructions,
+                            so only perform if optimising for size.
+                        }
+                        DebugMsg(SPeepholeOptimization + 'MovSar2MovCltd', p);
+                        taicpu(hp1).opcode := A_CDQ;
+                        taicpu(hp1).opsize := S_NO;
+                        taicpu(hp1).clearop(1);
+                        taicpu(hp1).clearop(0);
+                        taicpu(hp1).ops:=0;
+                      end;
+{$ifndef x86_64}
+                  end
+                { Don't bother if CMOV is supported, because a more optimal
+                  sequence would have been generated for the Abs() intrinsic }
+                else if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
+                  { the use of %eax also covers the opsize being S_L }
+                  MatchOperand(taicpu(hp1).oper[1]^, NR_EAX) and
+                  (taicpu(p).oper[0]^.reg = NR_EAX) and
+                  (taicpu(p).oper[1]^.reg = NR_EDX) and
+                  GetNextInstruction(hp1, hp2) and
+                  MatchInstruction(hp2, A_XOR, [S_L]) and
+                  MatchOperand(taicpu(hp2).oper[0]^, NR_EAX) and
+                  MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) and
+
+                  GetNextInstruction(hp2, hp3) and
+                  MatchInstruction(hp3, A_SUB, [S_L]) and
+                  MatchOperand(taicpu(hp3).oper[0]^, NR_EAX) and
+                  MatchOperand(taicpu(hp3).oper[1]^, NR_EDX) then
+                  begin
+                    { Change:
+                        movl %eax,%edx
+                        sarl $31,%eax
+                        xorl %eax,%edx
+                        subl %eax,%edx
+                        (Instruction that uses %edx)
+                        (%eax deallocated)
+                        (%edx deallocated)
+                      To:
+                        cltd
+                        xorl %edx,%eax  <-- Note the registers have swapped
+                        subl %edx,%eax
+                        (Instruction that uses %eax) <-- %eax rather than %edx
+                    }
+
+                    TransferUsedRegs(TmpUsedRegs);
+                    UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
+                    UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
+                    UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
+
+                    if not RegUsedAfterInstruction(NR_EAX, hp3, TmpUsedRegs) then
+                      begin
+                        if GetNextInstruction(hp3, hp4) and
+                          not RegModifiedByInstruction(NR_EDX, hp4) and
+                          not RegUsedAfterInstruction(NR_EDX, hp4, TmpUsedRegs) then
+                          begin
+                            DebugMsg(SPeepholeOptimization + 'abs() intrinsic optimisation', p);
+
+                            taicpu(p).opcode := A_CDQ;
+                            taicpu(p).clearop(1);
+                            taicpu(p).clearop(0);
+                            taicpu(p).ops:=0;
+
+                            AsmL.Remove(hp1);
+                            hp1.Free;
+
+                            taicpu(hp2).loadreg(0, NR_EDX);
+                            taicpu(hp2).loadreg(1, NR_EAX);
+
+                            taicpu(hp3).loadreg(0, NR_EDX);
+                            taicpu(hp3).loadreg(1, NR_EAX);
+
+                            AllocRegBetween(NR_EAX, hp3, hp4, TmpUsedRegs);
+                            { Convert references in the following instruction (hp4) from %edx to %eax }
+                            for OperIdx := 0 to taicpu(hp4).ops - 1 do
+                              with taicpu(hp4).oper[OperIdx]^ do
+                                case typ of
+                                  top_reg:
+                                    if reg = NR_EDX then
+                                      reg := NR_EAX;
+                                  top_ref:
+                                    begin
+                                      if ref^.base = NR_EDX then
+                                        ref^.base := NR_EAX;
+                                      if ref^.index = NR_EDX then
+                                        ref^.index := NR_EAX;
+                                    end;
+                                  else
+                                    ;
+                                end;
+                          end;
+                      end;
+{$else x86_64}
+                  end;
+              end
+            else if MatchOperand(taicpu(hp1).oper[0]^, 63) and
+              { the use of %rdx also covers the opsize being S_Q }
+              MatchOperand(taicpu(hp1).oper[1]^, NR_RDX) then
+              begin
+                { Note it has to be specifically "movq %rax,%rdx", and those specific sub-registers }
+                if (taicpu(p).oper[0]^.reg = NR_RAX) and
+                  (taicpu(p).oper[1]^.reg = NR_RDX) then
+                  begin
+                    { Change:
+                        movq %rax,%rdx
+                        sarq $63,%rdx
+                      To:
+                        cqto
+                    }
+                    DebugMsg(SPeepholeOptimization + 'MovSar2Cqto', p);
+                    Asml.Remove(hp1);
+                    hp1.Free;
+                    taicpu(p).opcode := A_CQO;
+                    taicpu(p).opsize := S_NO;
+                    taicpu(p).clearop(1);
+                    taicpu(p).clearop(0);
+                    taicpu(p).ops:=0;
+                    Result := True;
+                  end
+                else if (cs_opt_size in current_settings.optimizerswitches) and
+                  (taicpu(p).oper[0]^.reg = NR_RDX) and
+                  (taicpu(p).oper[1]^.reg = NR_RAX) then
+                  begin
+                    { Change:
+                        movq %rdx,%rax
+                        sarq $63,%rdx
+                      To:
+                        movq %rdx,%rax
+                        cqto
+
+                      Note that this creates a dependency between the two instructions,
+                        so only perform if optimising for size.
+                    }
+                    DebugMsg(SPeepholeOptimization + 'MovSar2MovCqto', p);
+                    taicpu(hp1).opcode := A_CQO;
+                    taicpu(hp1).opsize := S_NO;
+                    taicpu(hp1).clearop(1);
+                    taicpu(hp1).clearop(0);
+                    taicpu(hp1).ops:=0;
+{$endif x86_64}
+                  end;
+              end;
+          end
+        else if MatchInstruction(hp1, A_MOV, []) and
+          (taicpu(hp1).oper[1]^.typ = top_reg) then
+          { Though "GetNextInstruction" could be factored out, along with
+            the instructions that depend on hp2, it is an expensive call that
+            should be delayed for as long as possible, hence we do cheaper
+            checks first that are likely to be False. [Kit] }
+          begin
+
+            if MatchOperand(taicpu(p).oper[1]^, NR_EDX) and
+              (
+                (
+                  (taicpu(hp1).oper[1]^.reg = NR_EAX) and
+                  (
+                    MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
+                    MatchOperand(taicpu(hp1).oper[0]^, NR_EDX)
+                  )
+                ) or
+                (
+                  (taicpu(hp1).oper[1]^.reg = NR_EDX) and
+                  (
+                    MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
+                    MatchOperand(taicpu(hp1).oper[0]^, NR_EAX)
+                  )
+                )
+              ) and
+              GetNextInstruction(hp1, hp2) and
+              MatchInstruction(hp2, A_SAR, []) and
+              MatchOperand(taicpu(hp2).oper[0]^, 31) then
+              begin
+                if MatchOperand(taicpu(hp2).oper[1]^, NR_EDX) then
+                  begin
+                    { Change:
+                        movl r/m,%edx         movl r/m,%eax         movl r/m,%edx         movl r/m,%eax
+                        movl %edx,%eax   or   movl %eax,%edx   or   movl r/m,%eax    or   movl r/m,%edx
+                        sarl $31,%edx         sarl $31,%edx         sarl $31,%edx         sarl $31,%edx
+                      To:
+                        movl r/m,%eax    <- Note the change in register
+                        cltd
+                    }
+                    DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCltd', p);
+
+                    AllocRegBetween(NR_EAX, p, hp1, UsedRegs);
+                    taicpu(p).loadreg(1, NR_EAX);
+
+                    taicpu(hp1).opcode := A_CDQ;
+                    taicpu(hp1).clearop(1);
+                    taicpu(hp1).clearop(0);
+                    taicpu(hp1).ops:=0;
+
+                    AsmL.Remove(hp2);
+                    hp2.Free;
+(*
+{$ifdef x86_64}
+                  end
+                else if MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) and
+                  { This code sequence does not get generated - however it might become useful
+                    if and when 128-bit signed integer types make an appearance, so the code
+                    is kept here for when it is eventually needed. [Kit] }
+                  (
+                    (
+                      (taicpu(hp1).oper[1]^.reg = NR_RAX) and
+                      (
+                        MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
+                        MatchOperand(taicpu(hp1).oper[0]^, NR_RDX)
+                      )
+                    ) or
+                    (
+                      (taicpu(hp1).oper[1]^.reg = NR_RDX) and
+                      (
+                        MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) or
+                        MatchOperand(taicpu(hp1).oper[0]^, NR_RAX)
+                      )
+                    )
+                  ) and
+                  GetNextInstruction(hp1, hp2) and
+                  MatchInstruction(hp2, A_SAR, [S_Q]) and
+                  MatchOperand(taicpu(hp2).oper[0]^, 63) and
+                  MatchOperand(taicpu(hp2).oper[1]^, NR_RDX) then
+                  begin
+                    { Change:
+                        movq r/m,%rdx         movq r/m,%rax         movq r/m,%rdx         movq r/m,%rax
+                        movq %rdx,%rax   or   movq %rax,%rdx   or   movq r/m,%rax    or   movq r/m,%rdx
+                        sarq $63,%rdx         sarq $63,%rdx         sarq $63,%rdx         sarq $63,%rdx
+                      To:
+                        movq r/m,%rax    <- Note the change in register
+                        cqto
+                    }
+                    DebugMsg(SPeepholeOptimization + 'MovMovSar2MovCqto', p);
+
+                    AllocRegBetween(NR_RAX, p, hp1, UsedRegs);
+                    taicpu(p).loadreg(1, NR_RAX);
+
+                    taicpu(hp1).opcode := A_CQO;
+                    taicpu(hp1).clearop(1);
+                    taicpu(hp1).clearop(0);
+                    taicpu(hp1).ops:=0;
+
+                    AsmL.Remove(hp2);
+                    hp2.Free;
+{$endif x86_64}
+*)
+                  end;
+              end;
+          end
         else if (taicpu(p).oper[0]^.typ = top_ref) and
           (hp1.typ = ait_instruction) and
           { while the GetNextInstruction(hp1,hp2) call could be factored out,