5 年之前 · c6350e2903
--- a/compiler/x86/aoptx86.pas
+++ b/compiler/x86/aoptx86.pas
@@ -43,6 +43,8 @@ unit aoptx86;
 
				         function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
			
 
				         function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
			
 
				       protected
			
 
				+        class function IsMOVZXAcceptable: Boolean; static; inline;
			
 
				+
			
 
				         { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
			
 
				         function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
			
 
				         { checks whether reading the value in reg1 depends on the value of reg2. This
			
@@ -851,6 +853,25 @@ unit aoptx86;
 
				       end;
			
 
				 {$endif DEBUG_AOPTCPU}
			
 
				 
			
 
				+    class function TX86AsmOptimizer.IsMOVZXAcceptable: Boolean; inline;
			
 
				+      begin
			
 
				+{$ifdef x86_64}
			
 
				+        { Always fine on x86-64 }
			
 
				+        Result := True;
			
 
				+{$else x86_64}
			
 
				+        Result :=
			
 
				+{$ifdef i8086}
			
 
				+          (current_settings.cputype >= cpu_386) and
			
 
				+{$endif i8086}
			
 
				+          (
			
 
				+            { Always accept if optimising for size }
			
 
				+            (cs_opt_size in current_settings.optimizerswitches) or
			
 
				+            { From the Pentium II onwards, MOVZX only takes 1 cycle. [Kit] }
			
 
				+            (current_settings.optimizecputype >= cpu_Pentium2)
			
 
				+          );
			
 
				+{$endif x86_64}
			
 
				+      end;
			
 
				+
			
 
				     function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
			
 
				       begin
			
 
				         if not SuperRegistersEqual(reg1,reg2) then
			
@@ -1813,17 +1834,13 @@ unit aoptx86;
 
				                       change it to a MOVZX instruction when optimising for speed.
			
 
				                     }
			
 
				                     if not (cs_opt_size in current_settings.optimizerswitches) and
			
 
				-{$ifdef i8086}
			
 
				-                      { MOVZX was only introduced on the 386 }
			
 
				-                      (current_settings.cputype >= cpu_386) and
			
 
				-{$endif i8086}
			
 
				-                      (
			
 
				-                        (taicpu(hp1).opsize < taicpu(p).opsize)
			
 
				+                      IsMOVZXAcceptable and
			
 
				+                      (taicpu(hp1).opsize < taicpu(p).opsize)
			
 
				 {$ifdef x86_64}
			
 
				-                        { operations already implicitly set the upper 64 bits to zero }
			
 
				-                        and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
			
 
				+                      { operations already implicitly set the upper 64 bits to zero }
			
 
				+                      and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
			
 
				 {$endif x86_64}
			
 
				-                      ) then
			
 
				+                      then
			
 
				                       begin
			
 
				                         CurrentReg := taicpu(hp1).oper[1]^.reg;
			
 
				 
			
@@ -1933,7 +1950,8 @@ unit aoptx86;
 
				                     ;
			
 
				                 end;
			
 
				               end
			
 
				-            else if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
			
 
				+            else if IsMOVZXAcceptable and
			
 
				+              (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
			
 
				               (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
			
 
				               (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
			
 
				               then
			
@@ -5124,103 +5142,133 @@ unit aoptx86;
 
				                     ;
			
 
				                 end;
			
 
				               end;
			
 
				-            { changes some movzx constructs to faster synonims (all examples
			
 
				+            { changes some movzx constructs to faster synonyms (all examples
			
 
				               are given with eax/ax, but are also valid for other registers)}
			
 
				             if (taicpu(p).oper[1]^.typ = top_reg) then
			
 
				               if (taicpu(p).oper[0]^.typ = top_reg) then
			
 
				-                case taicpu(p).opsize of
			
 
				-                  S_BW:
			
 
				-                    begin
			
 
				-                      if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
			
 
				-                         not(cs_opt_size in current_settings.optimizerswitches) then
			
 
				-                        {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
			
 
				-                        begin
			
 
				-                          taicpu(p).opcode := A_AND;
			
 
				-                          taicpu(p).changeopsize(S_W);
			
 
				-                          taicpu(p).loadConst(0,$ff);
			
 
				-                          DebugMsg(SPeepholeOptimization + 'var7',p);
			
 
				-                        end
			
 
				-                      else if GetNextInstruction(p, hp1) and
			
 
				-                        (tai(hp1).typ = ait_instruction) and
			
 
				-                        (taicpu(hp1).opcode = A_AND) and
			
 
				-                        (taicpu(hp1).oper[0]^.typ = top_const) and
			
 
				-                        (taicpu(hp1).oper[1]^.typ = top_reg) and
			
 
				-                        (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
			
 
				-                      { Change "movzbw %reg1, %reg2; andw $const, %reg2"
			
 
				-                        to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
			
 
				-                        begin
			
 
				-                          DebugMsg(SPeepholeOptimization + 'var8',p);
			
 
				-                          taicpu(p).opcode := A_MOV;
			
 
				-                          taicpu(p).changeopsize(S_W);
			
 
				-                          setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
			
 
				-                          taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
			
 
				-                        end;
			
 
				-                    end;
			
 
				-                  S_BL:
			
 
				-                    begin
			
 
				-                      if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
			
 
				-                         not(cs_opt_size in current_settings.optimizerswitches) then
			
 
				-                        { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
			
 
				-                        begin
			
 
				-                          taicpu(p).opcode := A_AND;
			
 
				-                          taicpu(p).changeopsize(S_L);
			
 
				-                          taicpu(p).loadConst(0,$ff)
			
 
				-                        end
			
 
				-                      else if GetNextInstruction(p, hp1) and
			
 
				-                        (tai(hp1).typ = ait_instruction) and
			
 
				-                        (taicpu(hp1).opcode = A_AND) and
			
 
				-                        (taicpu(hp1).oper[0]^.typ = top_const) and
			
 
				-                        (taicpu(hp1).oper[1]^.typ = top_reg) and
			
 
				-                        (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
			
 
				-                        { Change "movzbl %reg1, %reg2; andl $const, %reg2"
			
 
				-                          to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
			
 
				-                        begin
			
 
				-                          DebugMsg(SPeepholeOptimization + 'var10',p);
			
 
				-                          taicpu(p).opcode := A_MOV;
			
 
				-                          taicpu(p).changeopsize(S_L);
			
 
				-                          { do not use R_SUBWHOLE
			
 
				-                            as movl %rdx,%eax
			
 
				-                            is invalid in assembler PM }
			
 
				-                          setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
			
 
				-                          taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
			
 
				-                        end
			
 
				-                    end;
			
 
				-{$ifndef i8086}
			
 
				-                  S_WL:
			
 
				-                    begin
			
 
				-                      if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
			
 
				-                        not(cs_opt_size in current_settings.optimizerswitches) then
			
 
				-                        { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
			
 
				-                        begin
			
 
				-                          DebugMsg(SPeepholeOptimization + 'var11',p);
			
 
				-                          taicpu(p).opcode := A_AND;
			
 
				-                          taicpu(p).changeopsize(S_L);
			
 
				-                          taicpu(p).loadConst(0,$ffff);
			
 
				-                        end
			
 
				-                      else if GetNextInstruction(p, hp1) and
			
 
				-                        (tai(hp1).typ = ait_instruction) and
			
 
				-                        (taicpu(hp1).opcode = A_AND) and
			
 
				-                        (taicpu(hp1).oper[0]^.typ = top_const) and
			
 
				-                        (taicpu(hp1).oper[1]^.typ = top_reg) and
			
 
				-                        (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
			
 
				-                        { Change "movzwl %reg1, %reg2; andl $const, %reg2"
			
 
				-                          to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
			
 
				+                begin
			
 
				+                  case taicpu(p).opsize of
			
 
				+                    { Technically, movzbw %al,%ax cannot be encoded in 32/64-bit mode
			
 
				+                      (the machine code is equivalent to movzbl %al,%eax), but the
			
 
				+                      code generator still generates that assembler instruction and
			
 
				+                      it is silently converted.  This should probably be checked.
			
 
				+                      [Kit] }
			
 
				+                    S_BW:
			
 
				+                      begin
			
 
				+                        if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
			
 
				+                          (
			
 
				+                            not IsMOVZXAcceptable
			
 
				+                            { and $0xff,%ax has a smaller encoding but risks a partial write penalty }
			
 
				+                            or (
			
 
				+                              (cs_opt_size in current_settings.optimizerswitches) and
			
 
				+                              (taicpu(p).oper[1]^.reg = NR_AX)
			
 
				+                            )
			
 
				+                          ) then
			
 
				+                          {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
			
 
				+                          begin
			
 
				+                            DebugMsg(SPeepholeOptimization + 'var7',p);
			
 
				+                            taicpu(p).opcode := A_AND;
			
 
				+                            taicpu(p).changeopsize(S_W);
			
 
				+                            taicpu(p).loadConst(0,$ff);
			
 
				+                            Result := True;
			
 
				+                          end
			
 
				+                        else if not IsMOVZXAcceptable and
			
 
				+                          GetNextInstruction(p, hp1) and
			
 
				+                          (tai(hp1).typ = ait_instruction) and
			
 
				+                          (taicpu(hp1).opcode = A_AND) and
			
 
				+                          (taicpu(hp1).oper[0]^.typ = top_const) and
			
 
				+                          (taicpu(hp1).oper[1]^.typ = top_reg) and
			
 
				+                          (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
			
 
				+                        { Change "movzbw %reg1, %reg2; andw $const, %reg2"
			
 
				+                          to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
			
 
				+                          begin
			
 
				+                            DebugMsg(SPeepholeOptimization + 'var8',p);
			
 
				+                            taicpu(p).opcode := A_MOV;
			
 
				+                            taicpu(p).changeopsize(S_W);
			
 
				+                            setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
			
 
				+                            taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
			
 
				+                            Result := True;
			
 
				+                          end;
			
 
				+                      end;
			
 
				+{$ifndef i8086} { movzbl %al,%eax cannot be encoded in 16-bit mode (the machine code is equivalent to movzbw %al,%ax }
			
 
				+                    S_BL:
			
 
				+                      begin
			
 
				+                        if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
			
 
				+                          (
			
 
				+                            not IsMOVZXAcceptable
			
 
				+                            { and $0xff,%eax has a smaller encoding but risks a partial write penalty }
			
 
				+                            or (
			
 
				+                              (cs_opt_size in current_settings.optimizerswitches) and
			
 
				+                              (taicpu(p).oper[1]^.reg = NR_EAX)
			
 
				+                            )
			
 
				+                          ) then
			
 
				+                          { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
			
 
				+                          begin
			
 
				+                            DebugMsg(SPeepholeOptimization + 'var9',p);
			
 
				+                            taicpu(p).opcode := A_AND;
			
 
				+                            taicpu(p).changeopsize(S_L);
			
 
				+                            taicpu(p).loadConst(0,$ff);
			
 
				+                            Result := True;
			
 
				+                          end
			
 
				+                        else if not IsMOVZXAcceptable and
			
 
				+                          GetNextInstruction(p, hp1) and
			
 
				+                          (tai(hp1).typ = ait_instruction) and
			
 
				+                          (taicpu(hp1).opcode = A_AND) and
			
 
				+                          (taicpu(hp1).oper[0]^.typ = top_const) and
			
 
				+                          (taicpu(hp1).oper[1]^.typ = top_reg) and
			
 
				+                          (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
			
 
				+                          { Change "movzbl %reg1, %reg2; andl $const, %reg2"
			
 
				+                            to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
			
 
				+                          begin
			
 
				+                            DebugMsg(SPeepholeOptimization + 'var10',p);
			
 
				+                            taicpu(p).opcode := A_MOV;
			
 
				+                            taicpu(p).changeopsize(S_L);
			
 
				+                            { do not use R_SUBWHOLE
			
 
				+                              as movl %rdx,%eax
			
 
				+                              is invalid in assembler PM }
			
 
				+                            setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
			
 
				+                            taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
			
 
				+                            Result := True;
			
 
				+                          end;
			
 
				+                      end;
			
 
				+{$endif i8086}
			
 
				+                    S_WL:
			
 
				+                      if not IsMOVZXAcceptable then
			
 
				                         begin
			
 
				-                          DebugMsg(SPeepholeOptimization + 'var12',p);
			
 
				-                          taicpu(p).opcode := A_MOV;
			
 
				-                          taicpu(p).changeopsize(S_L);
			
 
				-                          { do not use R_SUBWHOLE
			
 
				-                            as movl %rdx,%eax
			
 
				-                            is invalid in assembler PM }
			
 
				-                          setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
			
 
				-                          taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
			
 
				+                          if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) then
			
 
				+                            { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
			
 
				+                            begin
			
 
				+                              DebugMsg(SPeepholeOptimization + 'var11',p);
			
 
				+                              taicpu(p).opcode := A_AND;
			
 
				+                              taicpu(p).changeopsize(S_L);
			
 
				+                              taicpu(p).loadConst(0,$ffff);
			
 
				+                              Result := True;
			
 
				+                            end
			
 
				+                          else if GetNextInstruction(p, hp1) and
			
 
				+                            (tai(hp1).typ = ait_instruction) and
			
 
				+                            (taicpu(hp1).opcode = A_AND) and
			
 
				+                            (taicpu(hp1).oper[0]^.typ = top_const) and
			
 
				+                            (taicpu(hp1).oper[1]^.typ = top_reg) and
			
 
				+                            (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
			
 
				+                            { Change "movzwl %reg1, %reg2; andl $const, %reg2"
			
 
				+                              to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
			
 
				+                            begin
			
 
				+                              DebugMsg(SPeepholeOptimization + 'var12',p);
			
 
				+                              taicpu(p).opcode := A_MOV;
			
 
				+                              taicpu(p).changeopsize(S_L);
			
 
				+                              { do not use R_SUBWHOLE
			
 
				+                                as movl %rdx,%eax
			
 
				+                                is invalid in assembler PM }
			
 
				+                              setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
			
 
				+                              taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
			
 
				+                              Result := True;
			
 
				+                            end;
			
 
				                         end;
			
 
				-                    end;
			
 
				-{$endif i8086}
			
 
				-                  else
			
 
				-                    ;
			
 
				+                    else
			
 
				+                      InternalError(2017050705);
			
 
				+                  end;
			
 
				                 end
			
 
				-              else if (taicpu(p).oper[0]^.typ = top_ref) then
			
 
				+              else if not IsMOVZXAcceptable and (taicpu(p).oper[0]^.typ = top_ref) then
			
 
				                   begin
			
 
				                     if GetNextInstruction(p, hp1) and
			
 
				                       (tai(hp1).typ = ait_instruction) and
			
@@ -5248,31 +5296,10 @@ unit aoptx86;
 
				                               taicpu(hp1).changeopsize(S_W);
			
 
				                               taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
			
 
				                             end;
			
 
				-{$ifdef x86_64}
			
 
				-                          S_BQ:
			
 
				-                            begin
			
 
				-                              DebugMsg(SPeepholeOptimization + 'var16',p);
			
 
				-                              taicpu(hp1).changeopsize(S_Q);
			
 
				-                              taicpu(hp1).loadConst(
			
 
				-                                0, taicpu(hp1).oper[0]^.val and $ff);
			
 
				-                            end;
			
 
				-                          S_WQ:
			
 
				-                            begin
			
 
				-                              DebugMsg(SPeepholeOptimization + 'var17',p);
			
 
				-                              taicpu(hp1).changeopsize(S_Q);
			
 
				-                              taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
			
 
				-                            end;
			
 
				-                          S_LQ:
			
 
				-                            begin
			
 
				-                              DebugMsg(SPeepholeOptimization + 'var18',p);
			
 
				-                              taicpu(hp1).changeopsize(S_Q);
			
 
				-                              taicpu(hp1).loadConst(
			
 
				-                                0, taicpu(hp1).oper[0]^.val and $ffffffff);
			
 
				-                            end;
			
 
				-{$endif x86_64}
			
 
				                           else
			
 
				                             Internalerror(2017050704)
			
 
				                         end;
			
 
				+                        Result := True;
			
 
				                       end;
			
 
				                   end;
			
 
				           end;