Browse Source

* patch by J. Gareth Moreton: processor-aware MOVZX optimisation cleanup, part one of #36687

git-svn-id: trunk@44153 -
florian 5 years ago
parent
commit
c6350e2903
1 changed files with 150 additions and 123 deletions
  1. 150 123
      compiler/x86/aoptx86.pas

+ 150 - 123
compiler/x86/aoptx86.pas

@@ -43,6 +43,8 @@ unit aoptx86;
         function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
         function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
       protected
+        class function IsMOVZXAcceptable: Boolean; static; inline;
+
         { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
         function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
         { checks whether reading the value in reg1 depends on the value of reg2. This
@@ -851,6 +853,25 @@ unit aoptx86;
       end;
 {$endif DEBUG_AOPTCPU}
 
+    class function TX86AsmOptimizer.IsMOVZXAcceptable: Boolean; inline;
+      begin
+{$ifdef x86_64}
+        { Always fine on x86-64 }
+        Result := True;
+{$else x86_64}
+        Result :=
+{$ifdef i8086}
+          (current_settings.cputype >= cpu_386) and
+{$endif i8086}
+          (
+            { Always accept if optimising for size }
+            (cs_opt_size in current_settings.optimizerswitches) or
+            { From the Pentium II onwards, MOVZX only takes 1 cycle. [Kit] }
+            (current_settings.optimizecputype >= cpu_Pentium2)
+          );
+{$endif x86_64}
+      end;
+
     function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
       begin
         if not SuperRegistersEqual(reg1,reg2) then
@@ -1813,17 +1834,13 @@ unit aoptx86;
                       change it to a MOVZX instruction when optimising for speed.
                     }
                     if not (cs_opt_size in current_settings.optimizerswitches) and
-{$ifdef i8086}
-                      { MOVZX was only introduced on the 386 }
-                      (current_settings.cputype >= cpu_386) and
-{$endif i8086}
-                      (
-                        (taicpu(hp1).opsize < taicpu(p).opsize)
+                      IsMOVZXAcceptable and
+                      (taicpu(hp1).opsize < taicpu(p).opsize)
 {$ifdef x86_64}
-                        { operations already implicitly set the upper 64 bits to zero }
-                        and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
+                      { operations already implicitly set the upper 64 bits to zero }
+                      and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
 {$endif x86_64}
-                      ) then
+                      then
                       begin
                         CurrentReg := taicpu(hp1).oper[1]^.reg;
 
@@ -1933,7 +1950,8 @@ unit aoptx86;
                     ;
                 end;
               end
-            else if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
+            else if IsMOVZXAcceptable and
+              (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
               (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
               (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
               then
@@ -5124,103 +5142,133 @@ unit aoptx86;
                     ;
                 end;
               end;
-            { changes some movzx constructs to faster synonims (all examples
+            { changes some movzx constructs to faster synonyms (all examples
               are given with eax/ax, but are also valid for other registers)}
             if (taicpu(p).oper[1]^.typ = top_reg) then
               if (taicpu(p).oper[0]^.typ = top_reg) then
-                case taicpu(p).opsize of
-                  S_BW:
-                    begin
-                      if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
-                         not(cs_opt_size in current_settings.optimizerswitches) then
-                        {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
-                        begin
-                          taicpu(p).opcode := A_AND;
-                          taicpu(p).changeopsize(S_W);
-                          taicpu(p).loadConst(0,$ff);
-                          DebugMsg(SPeepholeOptimization + 'var7',p);
-                        end
-                      else if GetNextInstruction(p, hp1) and
-                        (tai(hp1).typ = ait_instruction) and
-                        (taicpu(hp1).opcode = A_AND) and
-                        (taicpu(hp1).oper[0]^.typ = top_const) and
-                        (taicpu(hp1).oper[1]^.typ = top_reg) and
-                        (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
-                      { Change "movzbw %reg1, %reg2; andw $const, %reg2"
-                        to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
-                        begin
-                          DebugMsg(SPeepholeOptimization + 'var8',p);
-                          taicpu(p).opcode := A_MOV;
-                          taicpu(p).changeopsize(S_W);
-                          setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
-                          taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
-                        end;
-                    end;
-                  S_BL:
-                    begin
-                      if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
-                         not(cs_opt_size in current_settings.optimizerswitches) then
-                        { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
-                        begin
-                          taicpu(p).opcode := A_AND;
-                          taicpu(p).changeopsize(S_L);
-                          taicpu(p).loadConst(0,$ff)
-                        end
-                      else if GetNextInstruction(p, hp1) and
-                        (tai(hp1).typ = ait_instruction) and
-                        (taicpu(hp1).opcode = A_AND) and
-                        (taicpu(hp1).oper[0]^.typ = top_const) and
-                        (taicpu(hp1).oper[1]^.typ = top_reg) and
-                        (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
-                        { Change "movzbl %reg1, %reg2; andl $const, %reg2"
-                          to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
-                        begin
-                          DebugMsg(SPeepholeOptimization + 'var10',p);
-                          taicpu(p).opcode := A_MOV;
-                          taicpu(p).changeopsize(S_L);
-                          { do not use R_SUBWHOLE
-                            as movl %rdx,%eax
-                            is invalid in assembler PM }
-                          setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
-                          taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
-                        end
-                    end;
-{$ifndef i8086}
-                  S_WL:
-                    begin
-                      if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
-                        not(cs_opt_size in current_settings.optimizerswitches) then
-                        { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
-                        begin
-                          DebugMsg(SPeepholeOptimization + 'var11',p);
-                          taicpu(p).opcode := A_AND;
-                          taicpu(p).changeopsize(S_L);
-                          taicpu(p).loadConst(0,$ffff);
-                        end
-                      else if GetNextInstruction(p, hp1) and
-                        (tai(hp1).typ = ait_instruction) and
-                        (taicpu(hp1).opcode = A_AND) and
-                        (taicpu(hp1).oper[0]^.typ = top_const) and
-                        (taicpu(hp1).oper[1]^.typ = top_reg) and
-                        (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
-                        { Change "movzwl %reg1, %reg2; andl $const, %reg2"
-                          to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
+                begin
+                  case taicpu(p).opsize of
+                    { Technically, movzbw %al,%ax cannot be encoded in 32/64-bit mode
+                      (the machine code is equivalent to movzbl %al,%eax), but the
+                      code generator still generates that assembler instruction and
+                      it is silently converted.  This should probably be checked.
+                      [Kit] }
+                    S_BW:
+                      begin
+                        if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
+                          (
+                            not IsMOVZXAcceptable
+                            { and $0xff,%ax has a smaller encoding but risks a partial write penalty }
+                            or (
+                              (cs_opt_size in current_settings.optimizerswitches) and
+                              (taicpu(p).oper[1]^.reg = NR_AX)
+                            )
+                          ) then
+                          {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
+                          begin
+                            DebugMsg(SPeepholeOptimization + 'var7',p);
+                            taicpu(p).opcode := A_AND;
+                            taicpu(p).changeopsize(S_W);
+                            taicpu(p).loadConst(0,$ff);
+                            Result := True;
+                          end
+                        else if not IsMOVZXAcceptable and
+                          GetNextInstruction(p, hp1) and
+                          (tai(hp1).typ = ait_instruction) and
+                          (taicpu(hp1).opcode = A_AND) and
+                          (taicpu(hp1).oper[0]^.typ = top_const) and
+                          (taicpu(hp1).oper[1]^.typ = top_reg) and
+                          (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
+                        { Change "movzbw %reg1, %reg2; andw $const, %reg2"
+                          to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
+                          begin
+                            DebugMsg(SPeepholeOptimization + 'var8',p);
+                            taicpu(p).opcode := A_MOV;
+                            taicpu(p).changeopsize(S_W);
+                            setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
+                            taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
+                            Result := True;
+                          end;
+                      end;
+{$ifndef i8086} { movzbl %al,%eax cannot be encoded in 16-bit mode (the machine code is equivalent to movzbw %al,%ax }
+                    S_BL:
+                      begin
+                        if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
+                          (
+                            not IsMOVZXAcceptable
+                            { and $0xff,%eax has a smaller encoding but risks a partial write penalty }
+                            or (
+                              (cs_opt_size in current_settings.optimizerswitches) and
+                              (taicpu(p).oper[1]^.reg = NR_EAX)
+                            )
+                          ) then
+                          { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
+                          begin
+                            DebugMsg(SPeepholeOptimization + 'var9',p);
+                            taicpu(p).opcode := A_AND;
+                            taicpu(p).changeopsize(S_L);
+                            taicpu(p).loadConst(0,$ff);
+                            Result := True;
+                          end
+                        else if not IsMOVZXAcceptable and
+                          GetNextInstruction(p, hp1) and
+                          (tai(hp1).typ = ait_instruction) and
+                          (taicpu(hp1).opcode = A_AND) and
+                          (taicpu(hp1).oper[0]^.typ = top_const) and
+                          (taicpu(hp1).oper[1]^.typ = top_reg) and
+                          (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
+                          { Change "movzbl %reg1, %reg2; andl $const, %reg2"
+                            to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
+                          begin
+                            DebugMsg(SPeepholeOptimization + 'var10',p);
+                            taicpu(p).opcode := A_MOV;
+                            taicpu(p).changeopsize(S_L);
+                            { do not use R_SUBWHOLE
+                              as movl %rdx,%eax
+                              is invalid in assembler PM }
+                            setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
+                            taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
+                            Result := True;
+                          end;
+                      end;
+{$endif i8086}
+                    S_WL:
+                      if not IsMOVZXAcceptable then
                         begin
-                          DebugMsg(SPeepholeOptimization + 'var12',p);
-                          taicpu(p).opcode := A_MOV;
-                          taicpu(p).changeopsize(S_L);
-                          { do not use R_SUBWHOLE
-                            as movl %rdx,%eax
-                            is invalid in assembler PM }
-                          setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
-                          taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
+                          if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) then
+                            { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
+                            begin
+                              DebugMsg(SPeepholeOptimization + 'var11',p);
+                              taicpu(p).opcode := A_AND;
+                              taicpu(p).changeopsize(S_L);
+                              taicpu(p).loadConst(0,$ffff);
+                              Result := True;
+                            end
+                          else if GetNextInstruction(p, hp1) and
+                            (tai(hp1).typ = ait_instruction) and
+                            (taicpu(hp1).opcode = A_AND) and
+                            (taicpu(hp1).oper[0]^.typ = top_const) and
+                            (taicpu(hp1).oper[1]^.typ = top_reg) and
+                            (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
+                            { Change "movzwl %reg1, %reg2; andl $const, %reg2"
+                              to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
+                            begin
+                              DebugMsg(SPeepholeOptimization + 'var12',p);
+                              taicpu(p).opcode := A_MOV;
+                              taicpu(p).changeopsize(S_L);
+                              { do not use R_SUBWHOLE
+                                as movl %rdx,%eax
+                                is invalid in assembler PM }
+                              setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
+                              taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
+                              Result := True;
+                            end;
                         end;
-                    end;
-{$endif i8086}
-                  else
-                    ;
+                    else
+                      InternalError(2017050705);
+                  end;
                 end
-              else if (taicpu(p).oper[0]^.typ = top_ref) then
+              else if not IsMOVZXAcceptable and (taicpu(p).oper[0]^.typ = top_ref) then
                   begin
                     if GetNextInstruction(p, hp1) and
                       (tai(hp1).typ = ait_instruction) and
@@ -5248,31 +5296,10 @@ unit aoptx86;
                               taicpu(hp1).changeopsize(S_W);
                               taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
                             end;
-{$ifdef x86_64}
-                          S_BQ:
-                            begin
-                              DebugMsg(SPeepholeOptimization + 'var16',p);
-                              taicpu(hp1).changeopsize(S_Q);
-                              taicpu(hp1).loadConst(
-                                0, taicpu(hp1).oper[0]^.val and $ff);
-                            end;
-                          S_WQ:
-                            begin
-                              DebugMsg(SPeepholeOptimization + 'var17',p);
-                              taicpu(hp1).changeopsize(S_Q);
-                              taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
-                            end;
-                          S_LQ:
-                            begin
-                              DebugMsg(SPeepholeOptimization + 'var18',p);
-                              taicpu(hp1).changeopsize(S_Q);
-                              taicpu(hp1).loadConst(
-                                0, taicpu(hp1).oper[0]^.val and $ffffffff);
-                            end;
-{$endif x86_64}
                           else
                             Internalerror(2017050704)
                         end;
+                        Result := True;
                       end;
                   end;
           end;