19 years ago · 588cccb3ca
--- a/compiler/cgbase.pas
+++ b/compiler/cgbase.pas
@@ -81,6 +81,7 @@ interface
 
				        topcg =
			
 
				        (
			
 
				           OP_NONE,
			
 
				+          OP_MOVE,      { replaced operation with direct load }
			
 
				           OP_ADD,       { simple addition          }
			
 
				           OP_AND,       { simple logical and       }
			
 
				           OP_DIV,       { simple unsigned division }
			
@@ -593,7 +594,7 @@ implementation
 
				     function commutativeop(op: topcg): boolean;{$ifdef USEINLINE}inline;{$endif}
			
 
				       const
			
 
				         list: array[topcg] of boolean =
			
 
				-          (true,true,true,false,false,true,true,false,false,
			
 
				+          (true,false,true,true,false,false,true,true,false,false,
			
 
				            true,false,false,false,false,true);
			
 
				       begin
			
 
				         commutativeop := list[op];
			
--- a/compiler/cgobj.pas
+++ b/compiler/cgobj.pas
@@ -285,18 +285,16 @@ unit cgobj;
 
				           procedure g_flags2ref(list: taasmoutput; size: TCgSize; const f: tresflags; const ref:TReference); virtual;
			
 
				 
			
 
				           {
			
 
				-             This routine tries to optimize the const_reg opcode, and should be
			
 
				-             called at the start of a_op_const_reg. It returns the actual opcode
			
 
				-             to emit, and the constant value to emit. If this routine returns
			
 
				-             TRUE, @var(no) instruction should be emitted (.eg : imul reg by 1 )
			
 
				+             This routine tries to optimize the op_const_reg/ref opcode, and should be
			
 
				+             called at the start of a_op_const_reg/ref. It returns the actual opcode
			
 
				+             to emit, and the constant value to emit. This function can opcode OP_NONE to
			
 
				+             remove the opcode and OP_MOVE to replace it with a simple load
			
 
				 
			
 
				              @param(op The opcode to emit, returns the opcode which must be emitted)
			
 
				              @param(a  The constant which should be emitted, returns the constant which must
			
 
				                     be emitted)
			
 
				-             @param(reg The register to emit the opcode with, returns the register with
			
 
				-                   which the opcode will be emitted)
			
 
				           }
			
 
				-          function optimize_op_const_reg(list: taasmoutput; var op: topcg; var a : aint; var reg: tregister): boolean;virtual;
			
 
				+          procedure optimize_op_const(var op: topcg; var a : aint);virtual;
			
 
				 
			
 
				          {#
			
 
				              This routine is used in exception management nodes. It should
			
@@ -936,55 +934,69 @@ implementation
 
				       end;
			
 
				 
			
 
				 
			
 
				-    function tcg.optimize_op_const_reg(list: taasmoutput; var op: topcg; var a : aint; var reg:tregister): boolean;
			
 
				+    procedure tcg.optimize_op_const(var op: topcg; var a : aint);
			
 
				       var
			
 
				         powerval : longint;
			
 
				       begin
			
 
				-        optimize_op_const_reg := false;
			
 
				         case op of
			
 
				-          { or with zero returns same result }
			
 
				-          OP_OR : if a = 0 then optimize_op_const_reg := true;
			
 
				-          { and with max returns same result }
			
 
				-          OP_AND : if (a = high(a)) then optimize_op_const_reg := true;
			
 
				-          { division by 1 returns result }
			
 
				+          OP_OR :
			
 
				+            begin
			
 
				+              { or with zero returns same result }
			
 
				+              if a = 0 then
			
 
				+                op:=OP_NONE
			
 
				+              else
			
 
				+              { or with max returns max }
			
 
				+                if a = -1 then
			
 
				+                  op:=OP_MOVE;
			
 
				+            end;
			
 
				+          OP_AND :
			
 
				+            begin
			
 
				+              { and with max returns same result }
			
 
				+              if (a = -1) then
			
 
				+                op:=OP_NONE
			
 
				+              else
			
 
				+              { and with 0 returns 0 }
			
 
				+                if a=0 then
			
 
				+                  op:=OP_MOVE;
			
 
				+            end;
			
 
				           OP_DIV :
			
 
				             begin
			
 
				+              { division by 1 returns result }
			
 
				               if a = 1 then
			
 
				-                optimize_op_const_reg := true
			
 
				+                op:=OP_NONE
			
 
				               else if ispowerof2(int64(a), powerval) then
			
 
				                 begin
			
 
				                   a := powerval;
			
 
				                   op:= OP_SHR;
			
 
				                 end;
			
 
				-              exit;
			
 
				             end;
			
 
				           OP_IDIV:
			
 
				             begin
			
 
				               if a = 1 then
			
 
				-                optimize_op_const_reg := true
			
 
				-              else if ispowerof2(int64(a), powerval) then
			
 
				-                begin
			
 
				-                  a := powerval;
			
 
				-                  op:= OP_SAR;
			
 
				-                end;
			
 
				-               exit;
			
 
				+                op:=OP_NONE;
			
 
				             end;
			
 
				-        OP_MUL,OP_IMUL:
			
 
				+         OP_MUL,OP_IMUL:
			
 
				             begin
			
 
				                if a = 1 then
			
 
				-                  optimize_op_const_reg := true
			
 
				+                 op:=OP_NONE
			
 
				+               else
			
 
				+                 if a=0 then
			
 
				+                   op:=OP_MOVE
			
 
				                else if ispowerof2(int64(a), powerval) then
			
 
				                  begin
			
 
				                    a := powerval;
			
 
				                    op:= OP_SHL;
			
 
				                  end;
			
 
				-               exit;
			
 
				+            end;
			
 
				+        OP_ADD,OP_SUB:
			
 
				+            begin
			
 
				+               if a = 0 then
			
 
				+                 op:=OP_NONE;
			
 
				             end;
			
 
				         OP_SAR,OP_SHL,OP_SHR:
			
 
				            begin
			
 
				               if a = 0 then
			
 
				-                 optimize_op_const_reg := true;
			
 
				-              exit;
			
 
				+                op:=OP_NONE;
			
 
				            end;
			
 
				         end;
			
 
				       end;
			
@@ -1729,7 +1741,7 @@ implementation
 
				 {$endif}
			
 
				                 if to_signed then
			
 
				                   begin
			
 
				-                    { calculation of the low/high ranges must not overflow 64 bit 
			
 
				+                    { calculation of the low/high ranges must not overflow 64 bit
			
 
				                      otherwise we end up comparing with zero for 64 bit data types on
			
 
				                      64 bit processors }
			
 
				                     if (lto = (int64(-1) << (tosize * 8 - 1))) and
			
@@ -1738,7 +1750,7 @@ implementation
 
				                   end
			
 
				                 else
			
 
				                   begin
			
 
				-                    { calculation of the low/high ranges must not overflow 64 bit 
			
 
				+                    { calculation of the low/high ranges must not overflow 64 bit
			
 
				                      otherwise we end up having all zeros for 64 bit data types on
			
 
				                      64 bit processors }
			
 
				                     if (lto = 0) and
			
--- a/compiler/i386/n386add.pas
+++ b/compiler/i386/n386add.pas
@@ -349,6 +349,8 @@ interface
 
				         hl4 : tasmlabel;
			
 
				 
			
 
				     begin
			
 
				+      pass_left_right;
			
 
				+
			
 
				       {The location.register will be filled in later (JM)}
			
 
				       location_reset(location,LOC_REGISTER,OS_INT);
			
 
				       {Get a temp register and load the left value into it
			
--- a/compiler/i386/n386mat.pas
+++ b/compiler/i386/n386mat.pas
@@ -33,9 +33,8 @@ interface
 
				          procedure pass_2;override;
			
 
				       end;
			
 
				 
			
 
				-      ti386shlshrnode = class(tshlshrnode)
			
 
				-         procedure pass_2;override;
			
 
				-         { everything will be handled in pass_2 }
			
 
				+      ti386shlshrnode = class(tcgshlshrnode)
			
 
				+         procedure second_64bit;override;
			
 
				          function first_shlshr64bitint: tnode; override;
			
 
				       end;
			
 
				 
			
@@ -165,7 +164,7 @@ implementation
 
				                         m_high:=m_high shr 1;
			
 
				                         dec(l);
			
 
				                       end;
			
 
				-                    m:=m_high;
			
 
				+                    m:=dword(m_high);
			
 
				                     s:=l;
			
 
				                     if (m_high shr 31)<>0 then
			
 
				                       a:=1
			
@@ -223,7 +222,7 @@ implementation
 
				                     d:=tordconstnode(right).value;
			
 
				                     if d>=$80000000 then
			
 
				                       begin
			
 
				-                        emit_const_reg(A_CMP,S_L,d,hreg1);
			
 
				+                        emit_const_reg(A_CMP,S_L,aint(d),hreg1);
			
 
				                         location.register:=cg.getintregister(exprasmlist,OS_INT);
			
 
				                         emit_const_reg(A_MOV,S_L,0,location.register);
			
 
				                         emit_const_reg(A_SBB,S_L,-1,location.register);
			
@@ -359,147 +358,111 @@ implementation
 
				 
			
 
				 
			
 
				     function ti386shlshrnode.first_shlshr64bitint: tnode;
			
 
				+      begin
			
 
				+        result := nil;
			
 
				+      end;
			
 
				 
			
 
				-    begin
			
 
				-      result := nil;
			
 
				-    end;
			
 
				-
			
 
				-    procedure ti386shlshrnode.pass_2;
			
 
				-
			
 
				-    var hreg64hi,hreg64lo:Tregister;
			
 
				-        op:Tasmop;
			
 
				+    procedure ti386shlshrnode.second_64bit;
			
 
				+      var
			
 
				+        hreg64hi,hreg64lo:Tregister;
			
 
				         v : TConstExprInt;
			
 
				         l1,l2,l3:Tasmlabel;
			
 
				+      begin
			
 
				+        location_reset(location,LOC_REGISTER,OS_64);
			
 
				+
			
 
				+        { load left operator in a register }
			
 
				+        location_force_reg(exprasmlist,left.location,OS_64,false);
			
 
				+        hreg64hi:=left.location.register64.reghi;
			
 
				+        hreg64lo:=left.location.register64.reglo;
			
 
				 
			
 
				-    begin
			
 
				-      secondpass(left);
			
 
				-      secondpass(right);
			
 
				-
			
 
				-      { determine operator }
			
 
				-      if nodetype=shln then
			
 
				-        op:=A_SHL
			
 
				-      else
			
 
				-        op:=A_SHR;
			
 
				-
			
 
				-      if is_64bitint(left.resulttype.def) then
			
 
				-        begin
			
 
				-          location_reset(location,LOC_REGISTER,OS_64);
			
 
				-
			
 
				-          { load left operator in a register }
			
 
				-          location_force_reg(exprasmlist,left.location,OS_64,false);
			
 
				-          hreg64hi:=left.location.register64.reghi;
			
 
				-          hreg64lo:=left.location.register64.reglo;
			
 
				-
			
 
				-          { shifting by a constant directly coded: }
			
 
				-          if (right.nodetype=ordconstn) then
			
 
				-            begin
			
 
				-              v:=Tordconstnode(right).value and 63;
			
 
				-              if v>31 then
			
 
				-                begin
			
 
				-                  if nodetype=shln then
			
 
				-                    begin
			
 
				-                      emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
			
 
				-                      if ((v and 31) <> 0) then
			
 
				-                        emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
			
 
				-                    end
			
 
				-                  else
			
 
				-                    begin
			
 
				-                      emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
			
 
				-                      if ((v and 31) <> 0) then
			
 
				-                        emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
			
 
				-                    end;
			
 
				-                  location.register64.reghi:=hreg64lo;
			
 
				-                  location.register64.reglo:=hreg64hi;
			
 
				-                end
			
 
				-              else
			
 
				-                begin
			
 
				-                  if nodetype=shln then
			
 
				-                    begin
			
 
				-                      emit_const_reg_reg(A_SHLD,S_L,v and 31,hreg64lo,hreg64hi);
			
 
				+        { shifting by a constant directly coded: }
			
 
				+        if (right.nodetype=ordconstn) then
			
 
				+          begin
			
 
				+            v:=Tordconstnode(right).value and 63;
			
 
				+            if v>31 then
			
 
				+              begin
			
 
				+                if nodetype=shln then
			
 
				+                  begin
			
 
				+                    emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
			
 
				+                    if ((v and 31) <> 0) then
			
 
				                       emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
			
 
				-                    end
			
 
				-                  else
			
 
				-                    begin
			
 
				-                      emit_const_reg_reg(A_SHRD,S_L,v and 31,hreg64hi,hreg64lo);
			
 
				+                  end
			
 
				+                else
			
 
				+                  begin
			
 
				+                    emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
			
 
				+                    if ((v and 31) <> 0) then
			
 
				                       emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
			
 
				-                    end;
			
 
				-                  location.register64.reglo:=hreg64lo;
			
 
				-                  location.register64.reghi:=hreg64hi;
			
 
				-                end;
			
 
				-            end
			
 
				-          else
			
 
				-            begin
			
 
				-              { load right operators in a register }
			
 
				-              cg.getcpuregister(exprasmlist,NR_ECX);
			
 
				-              cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
			
 
				-
			
 
				-              { left operator is already in a register }
			
 
				-              { hence are both in a register }
			
 
				-              { is it in the case ECX ? }
			
 
				-
			
 
				-              { the damned shift instructions work only til a count of 32 }
			
 
				-              { so we've to do some tricks here                           }
			
 
				-              objectlibrary.getjumplabel(l1);
			
 
				-              objectlibrary.getjumplabel(l2);
			
 
				-              objectlibrary.getjumplabel(l3);
			
 
				-              emit_const_reg(A_CMP,S_L,64,NR_ECX);
			
 
				-              cg.a_jmp_flags(exprasmlist,F_L,l1);
			
 
				-              emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
			
 
				-              emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
			
 
				-              cg.a_jmp_always(exprasmlist,l3);
			
 
				-              cg.a_label(exprasmlist,l1);
			
 
				-              emit_const_reg(A_CMP,S_L,32,NR_ECX);
			
 
				-              cg.a_jmp_flags(exprasmlist,F_L,l2);
			
 
				-              emit_const_reg(A_SUB,S_L,32,NR_ECX);
			
 
				-              if nodetype=shln then
			
 
				-                begin
			
 
				-                  emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
			
 
				-                  emit_reg_reg(A_MOV,S_L,hreg64lo,hreg64hi);
			
 
				-                  emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
			
 
				-                  cg.a_jmp_always(exprasmlist,l3);
			
 
				-                  cg.a_label(exprasmlist,l2);
			
 
				-                  emit_reg_reg_reg(A_SHLD,S_L,NR_CL,hreg64lo,hreg64hi);
			
 
				-                  emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
			
 
				-                end
			
 
				-              else
			
 
				-                begin
			
 
				-                  emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
			
 
				-                  emit_reg_reg(A_MOV,S_L,hreg64hi,hreg64lo);
			
 
				-                  emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
			
 
				-                  cg.a_jmp_always(exprasmlist,l3);
			
 
				-                  cg.a_label(exprasmlist,l2);
			
 
				-                  emit_reg_reg_reg(A_SHRD,S_L,NR_CL,hreg64hi,hreg64lo);
			
 
				-                  emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
			
 
				-                end;
			
 
				-              cg.a_label(exprasmlist,l3);
			
 
				-
			
 
				-              cg.ungetcpuregister(exprasmlist,NR_ECX);
			
 
				-              location.register64.reglo:=hreg64lo;
			
 
				-              location.register64.reghi:=hreg64hi;
			
 
				-            end;
			
 
				-        end
			
 
				-      else
			
 
				-        begin
			
 
				-          { load left operators in a register }
			
 
				-          location_copy(location,left.location);
			
 
				-          location_force_reg(exprasmlist,location,OS_INT,false);
			
 
				-
			
 
				-          { shifting by a constant directly coded: }
			
 
				-          if (right.nodetype=ordconstn) then
			
 
				-            { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)}
			
 
				-            emit_const_reg(op,S_L,tordconstnode(right).value and 31,location.register)
			
 
				-          else
			
 
				-            begin
			
 
				-              { load right operators in a ECX }
			
 
				-              cg.getcpuregister(exprasmlist,NR_ECX);
			
 
				-              cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
			
 
				-
			
 
				-              { right operand is in ECX }
			
 
				-              cg.ungetcpuregister(exprasmlist,NR_ECX);
			
 
				-              emit_reg_reg(op,S_L,NR_CL,location.register);
			
 
				-            end;
			
 
				-        end;
			
 
				-    end;
			
 
				+                  end;
			
 
				+                location.register64.reghi:=hreg64lo;
			
 
				+                location.register64.reglo:=hreg64hi;
			
 
				+              end
			
 
				+            else
			
 
				+              begin
			
 
				+                if nodetype=shln then
			
 
				+                  begin
			
 
				+                    emit_const_reg_reg(A_SHLD,S_L,v and 31,hreg64lo,hreg64hi);
			
 
				+                    emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
			
 
				+                  end
			
 
				+                else
			
 
				+                  begin
			
 
				+                    emit_const_reg_reg(A_SHRD,S_L,v and 31,hreg64hi,hreg64lo);
			
 
				+                    emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
			
 
				+                  end;
			
 
				+                location.register64.reglo:=hreg64lo;
			
 
				+                location.register64.reghi:=hreg64hi;
			
 
				+              end;
			
 
				+          end
			
 
				+        else
			
 
				+          begin
			
 
				+            { load right operators in a register }
			
 
				+            cg.getcpuregister(exprasmlist,NR_ECX);
			
 
				+            cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
			
 
				+
			
 
				+            { left operator is already in a register }
			
 
				+            { hence are both in a register }
			
 
				+            { is it in the case ECX ? }
			
 
				+
			
 
				+            { the damned shift instructions work only til a count of 32 }
			
 
				+            { so we've to do some tricks here                           }
			
 
				+            objectlibrary.getjumplabel(l1);
			
 
				+            objectlibrary.getjumplabel(l2);
			
 
				+            objectlibrary.getjumplabel(l3);
			
 
				+            emit_const_reg(A_CMP,S_L,64,NR_ECX);
			
 
				+            cg.a_jmp_flags(exprasmlist,F_L,l1);
			
 
				+            emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
			
 
				+            emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
			
 
				+            cg.a_jmp_always(exprasmlist,l3);
			
 
				+            cg.a_label(exprasmlist,l1);
			
 
				+            emit_const_reg(A_CMP,S_L,32,NR_ECX);
			
 
				+            cg.a_jmp_flags(exprasmlist,F_L,l2);
			
 
				+            emit_const_reg(A_SUB,S_L,32,NR_ECX);
			
 
				+            if nodetype=shln then
			
 
				+              begin
			
 
				+                emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
			
 
				+                emit_reg_reg(A_MOV,S_L,hreg64lo,hreg64hi);
			
 
				+                emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
			
 
				+                cg.a_jmp_always(exprasmlist,l3);
			
 
				+                cg.a_label(exprasmlist,l2);
			
 
				+                emit_reg_reg_reg(A_SHLD,S_L,NR_CL,hreg64lo,hreg64hi);
			
 
				+                emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
			
 
				+              end
			
 
				+            else
			
 
				+              begin
			
 
				+                emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
			
 
				+                emit_reg_reg(A_MOV,S_L,hreg64hi,hreg64lo);
			
 
				+                emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
			
 
				+                cg.a_jmp_always(exprasmlist,l3);
			
 
				+                cg.a_label(exprasmlist,l2);
			
 
				+                emit_reg_reg_reg(A_SHRD,S_L,NR_CL,hreg64hi,hreg64lo);
			
 
				+                emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
			
 
				+              end;
			
 
				+            cg.a_label(exprasmlist,l3);
			
 
				+
			
 
				+            cg.ungetcpuregister(exprasmlist,NR_ECX);
			
 
				+            location.register64.reglo:=hreg64lo;
			
 
				+            location.register64.reghi:=hreg64hi;
			
 
				+          end;
			
 
				+      end;
			
 
				 
			
 
				 
			
 
				 begin
			
--- a/compiler/m68k/cgcpu.pas
+++ b/compiler/m68k/cgcpu.pas
@@ -404,11 +404,13 @@ unit cgcpu;
 
				        opcode : tasmop;
			
 
				        r,r2 : Tregister;
			
 
				       begin
			
 
				-        { need to emit opcode? }
			
 
				-        if optimize_op_const_reg(list, op, a, reg) then
			
 
				-           exit;
			
 
				+        optimize_op_const_reg(list, op, a, reg);
			
 
				         opcode := topcg2tasmop[op];
			
 
				         case op of
			
 
				+          OP_NONE :
			
 
				+              begin
			
 
				+                { Opcode is optimized away }
			
 
				+              end;
			
 
				           OP_ADD :
			
 
				               begin
			
 
				                 if (a >= 1) and (a <= 8) then
			
--- a/compiler/ncgadd.pas
+++ b/compiler/ncgadd.pas
@@ -459,8 +459,7 @@ interface
 
				         ovloc.loc:=LOC_VOID;
			
 
				 
			
 
				         pass_left_right;
			
 
				-        force_reg_left_right(false,(cs_check_overflow in aktlocalswitches) and
			
 
				-                                   (nodetype in [addn,subn]));
			
 
				+        force_reg_left_right(false,true);
			
 
				         set_result_location_reg;
			
 
				 
			
 
				         { assume no overflow checking is required }
			
@@ -634,8 +633,7 @@ interface
 
				         ovloc.loc:=LOC_VOID;
			
 
				 
			
 
				         pass_left_right;
			
 
				-        force_reg_left_right(false,(cs_check_overflow in aktlocalswitches) and
			
 
				-                                   (nodetype in [addn,subn,muln]));
			
 
				+        force_reg_left_right(false,true);
			
 
				         set_result_location_reg;
			
 
				 
			
 
				         { determine if the comparison will be unsigned }
			
@@ -680,7 +678,7 @@ interface
 
				 
			
 
				        if nodetype<>subn then
			
 
				         begin
			
 
				-          if (right.location.loc >LOC_CONSTANT) then
			
 
				+          if (right.location.loc<>LOC_CONSTANT) then
			
 
				             cg.a_op_reg_reg_reg_checkoverflow(exprasmlist,cgop,location.size,
			
 
				                left.location.register,right.location.register,
			
 
				                location.register,checkoverflow and (cs_check_overflow in aktlocalswitches),ovloc)
			
@@ -708,7 +706,7 @@ interface
 
				             begin
			
 
				               tmpreg:=cg.getintregister(exprasmlist,location.size);
			
 
				               cg.a_load_const_reg(exprasmlist,location.size,
			
 
				-                aword(left.location.value),tmpreg);
			
 
				+                left.location.value,tmpreg);
			
 
				               cg.a_op_reg_reg_reg_checkoverflow(exprasmlist,OP_SUB,location.size,
			
 
				                 right.location.register,tmpreg,location.register,checkoverflow and (cs_check_overflow in aktlocalswitches),ovloc);
			
 
				             end;
			
--- a/compiler/x86/cgx86.pas
+++ b/compiler/x86/cgx86.pas
@@ -63,11 +63,6 @@ unit cgx86;
 
				         procedure a_op_ref_reg(list : taasmoutput; Op: TOpCG; size: TCGSize; const ref: TReference; reg: TRegister); override;
			
 
				         procedure a_op_reg_ref(list : taasmoutput; Op: TOpCG; size: TCGSize;reg: TRegister; const ref: TReference); override;
			
 
				 
			
 
				-        procedure a_op_const_reg_reg(list: taasmoutput; op: TOpCg;
			
 
				-          size: tcgsize; a: aint; src, dst: tregister); override;
			
 
				-        procedure a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;
			
 
				-          size: tcgsize; src1, src2, dst: tregister); override;
			
 
				-
			
 
				         { move instructions }
			
 
				         procedure a_load_const_reg(list : taasmoutput; tosize: tcgsize; a : aint;reg : tregister);override;
			
 
				         procedure a_load_const_ref(list : taasmoutput; tosize: tcgsize; a : aint;const ref : treference);override;
			
@@ -160,8 +155,8 @@ unit cgx86;
 
				        fmodule;
			
 
				 
			
 
				     const
			
 
				-      TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_ADD,A_AND,A_DIV,
			
 
				-                            A_IDIV,A_MUL, A_IMUL, A_NEG,A_NOT,A_OR,
			
 
				+      TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_MOV,A_ADD,A_AND,A_DIV,
			
 
				+                            A_IDIV,A_IMUL,A_MUL,A_NEG,A_NOT,A_OR,
			
 
				                             A_SAR,A_SHL,A_SHR,A_SUB,A_XOR);
			
 
				 
			
 
				       TOpCmp2AsmCond: Array[topcmp] of TAsmCond = (C_NONE,
			
@@ -572,7 +567,7 @@ unit cgx86;
 
				         sym : tasmsymbol;
			
 
				         r : treference;
			
 
				       begin
			
 
				- 
			
 
				+
			
 
				         if (target_info.system <> system_i386_darwin) then
			
 
				           begin
			
 
				             sym:=objectlibrary.newasmsymbol(s,AB_EXTERNAL,AT_FUNCTION);
			
@@ -984,10 +979,10 @@ unit cgx86;
 
				         opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
			
 
				           ( { scalar }
			
 
				             ( { OS_F32 }
			
 
				-              A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP
			
 
				+              A_NOP,A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP
			
 
				             ),
			
 
				             ( { OS_F64 }
			
 
				-              A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP
			
 
				+              A_NOP,A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP
			
 
				             )
			
 
				           ),
			
 
				           ( { vectorized/packed }
			
@@ -995,10 +990,10 @@ unit cgx86;
 
				               these
			
 
				             }
			
 
				             ( { OS_F32 }
			
 
				-              A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
			
 
				+              A_NOP,A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
			
 
				             ),
			
 
				             ( { OS_F64 }
			
 
				-              A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPD
			
 
				+              A_NOP,A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPD
			
 
				             )
			
 
				           )
			
 
				         );
			
@@ -1062,9 +1057,11 @@ unit cgx86;
 
				         tmpreg : tregister;
			
 
				 {$endif x86_64}
			
 
				       begin
			
 
				+        optimize_op_const(op, a);
			
 
				 {$ifdef x86_64}
			
 
				         { x86_64 only supports signed 32 bits constants directly }
			
 
				-        if (size in [OS_S64,OS_64]) and
			
 
				+        if not(op in [OP_NONE,OP_MOVE) and
			
 
				+           (size in [OS_S64,OS_64]) and
			
 
				             ((a<low(longint)) or (a>high(longint))) then
			
 
				           begin
			
 
				             tmpreg:=getintregister(list,size);
			
@@ -1075,6 +1072,15 @@ unit cgx86;
 
				 {$endif x86_64}
			
 
				         check_register_size(size,reg);
			
 
				         case op of
			
 
				+          OP_NONE :
			
 
				+            begin
			
 
				+              { Opcode is optimized away }
			
 
				+            end;
			
 
				+          OP_MOVE :
			
 
				+            begin
			
 
				+              { Optimized, replaced with a simple load }
			
 
				+              a_load_const_reg(list,size,a,reg);
			
 
				+            end;
			
 
				           OP_DIV, OP_IDIV:
			
 
				             begin
			
 
				               if ispowerof2(int64(a),power) then
			
@@ -1155,11 +1161,13 @@ unit cgx86;
 
				 {$endif x86_64}
			
 
				         tmpref  : treference;
			
 
				       begin
			
 
				+        optimize_op_const(op, a);
			
 
				         tmpref:=ref;
			
 
				         make_simple_ref(list,tmpref);
			
 
				 {$ifdef x86_64}
			
 
				         { x86_64 only supports signed 32 bits constants directly }
			
 
				-        if (size in [OS_S64,OS_64]) and
			
 
				+        if not(op in [OP_NONE,OP_MOVE) and
			
 
				+           (size in [OS_S64,OS_64]) and
			
 
				             ((a<low(longint)) or (a>high(longint))) then
			
 
				           begin
			
 
				             tmpreg:=getintregister(list,size);
			
@@ -1169,6 +1177,15 @@ unit cgx86;
 
				           end;
			
 
				 {$endif x86_64}
			
 
				         Case Op of
			
 
				+          OP_NONE :
			
 
				+            begin
			
 
				+              { Opcode is optimized away }
			
 
				+            end;
			
 
				+          OP_MOVE :
			
 
				+            begin
			
 
				+              { Optimized, replaced with a simple load }
			
 
				+              a_load_const_ref(list,size,a,ref);
			
 
				+            end;
			
 
				           OP_DIV, OP_IDIV:
			
 
				             Begin
			
 
				               if ispowerof2(int64(a),power) then
			
@@ -1266,10 +1283,11 @@ unit cgx86;
 
				             internalerror(200109233);
			
 
				           OP_SHR,OP_SHL,OP_SAR:
			
 
				             begin
			
 
				-              getcpuregister(list,NR_CL);
			
 
				-              a_load_reg_reg(list,OS_8,OS_8,makeregsize(list,src,OS_8),NR_CL);
			
 
				-              list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,src));
			
 
				-              ungetcpuregister(list,NR_CL);
			
 
				+              { Use ecx to load the value, that allows beter coalescing }
			
 
				+              getcpuregister(list,NR_ECX);
			
 
				+              a_load_reg_reg(list,size,OS_32,src,NR_ECX);
			
 
				+              list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,dst));
			
 
				+              ungetcpuregister(list,NR_ECX);
			
 
				             end;
			
 
				           else
			
 
				             begin
			
@@ -1338,98 +1356,6 @@ unit cgx86;
 
				       end;
			
 
				 
			
 
				 
			
 
				-    procedure tcgx86.a_op_const_reg_reg(list: taasmoutput; op: TOpCg; size: tcgsize; a: aint; src, dst: tregister);
			
 
				-      var
			
 
				-        tmpref: treference;
			
 
				-        power: longint;
			
 
				-{$ifdef x86_64}
			
 
				-        tmpreg : tregister;
			
 
				-{$endif x86_64}
			
 
				-      begin
			
 
				-{$ifdef x86_64}
			
 
				-        { x86_64 only supports signed 32 bits constants directly }
			
 
				-        if (size in [OS_S64,OS_64]) and
			
 
				-            ((a<low(longint)) or (a>high(longint))) then
			
 
				-          begin
			
 
				-            tmpreg:=getintregister(list,size);
			
 
				-            a_load_const_reg(list,size,a,tmpreg);
			
 
				-            a_op_reg_reg_reg(list,op,size,tmpreg,src,dst);
			
 
				-            exit;
			
 
				-          end;
			
 
				-{$endif x86_64}
			
 
				-        check_register_size(size,src);
			
 
				-        check_register_size(size,dst);
			
 
				-        if tcgsize2size[size]<>tcgsize2size[OS_INT] then
			
 
				-          begin
			
 
				-            inherited a_op_const_reg_reg(list,op,size,a,src,dst);
			
 
				-            exit;
			
 
				-          end;
			
 
				-        { if we get here, we have to do a 32 bit calculation, guaranteed }
			
 
				-        case op of
			
 
				-          OP_DIV, OP_IDIV, OP_MUL, OP_AND, OP_OR, OP_XOR, OP_SHL, OP_SHR,
			
 
				-          OP_SAR:
			
 
				-            { can't do anything special for these }
			
 
				-            inherited a_op_const_reg_reg(list,op,size,a,src,dst);
			
 
				-          OP_IMUL:
			
 
				-            begin
			
 
				-              if not(cs_check_overflow in aktlocalswitches) and
			
 
				-                 ispowerof2(int64(a),power) then
			
 
				-                { can be done with a shift }
			
 
				-                begin
			
 
				-                  inherited a_op_const_reg_reg(list,op,size,a,src,dst);
			
 
				-                  exit;
			
 
				-                end;
			
 
				-              list.concat(taicpu.op_const_reg_reg(A_IMUL,tcgsize2opsize[size],a,src,dst));
			
 
				-            end;
			
 
				-          OP_ADD, OP_SUB:
			
 
				-            if (a = 0) then
			
 
				-              a_load_reg_reg(list,size,size,src,dst)
			
 
				-            else
			
 
				-              begin
			
 
				-                reference_reset(tmpref);
			
 
				-                tmpref.base := src;
			
 
				-                tmpref.offset := longint(a);
			
 
				-                if op = OP_SUB then
			
 
				-                  tmpref.offset := -tmpref.offset;
			
 
				-                list.concat(taicpu.op_ref_reg(A_LEA,tcgsize2opsize[size],tmpref,dst));
			
 
				-              end
			
 
				-          else internalerror(200112302);
			
 
				-        end;
			
 
				-      end;
			
 
				-
			
 
				-
			
 
				-    procedure tcgx86.a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;size: tcgsize; src1, src2, dst: tregister);
			
 
				-      var
			
 
				-        tmpref: treference;
			
 
				-      begin
			
 
				-        check_register_size(size,src1);
			
 
				-        check_register_size(size,src2);
			
 
				-        check_register_size(size,dst);
			
 
				-        if tcgsize2size[size]<>tcgsize2size[OS_INT] then
			
 
				-          begin
			
 
				-            inherited a_op_reg_reg_reg(list,op,size,src1,src2,dst);
			
 
				-            exit;
			
 
				-          end;
			
 
				-        { if we get here, we have to do a 32 bit calculation, guaranteed }
			
 
				-        Case Op of
			
 
				-          OP_DIV, OP_IDIV, OP_MUL, OP_AND, OP_OR, OP_XOR, OP_SHL, OP_SHR,
			
 
				-          OP_SAR,OP_SUB,OP_NOT,OP_NEG:
			
 
				-            { can't do anything special for these }
			
 
				-            inherited a_op_reg_reg_reg(list,op,size,src1,src2,dst);
			
 
				-          OP_IMUL:
			
 
				-            list.concat(taicpu.op_reg_reg_reg(A_IMUL,tcgsize2opsize[size],src1,src2,dst));
			
 
				-          OP_ADD:
			
 
				-            begin
			
 
				-              reference_reset(tmpref);
			
 
				-              tmpref.base := src1;
			
 
				-              tmpref.index := src2;
			
 
				-              tmpref.scalefactor := 1;
			
 
				-              list.concat(taicpu.op_ref_reg(A_LEA,tcgsize2opsize[size],tmpref,dst));
			
 
				-            end
			
 
				-          else internalerror(200112303);
			
 
				-        end;
			
 
				-      end;
			
 
				-
			
 
				 {*************** compare instructructions ****************}
			
 
				 
			
 
				     procedure tcgx86.a_cmp_const_reg_label(list : taasmoutput;size : tcgsize;cmp_op : topcmp;a : aint;reg : tregister;
			
--- a/compiler/x86/nx86add.pas
+++ b/compiler/x86/nx86add.pas
@@ -978,65 +978,17 @@ unit nx86add;
 
				 *****************************************************************************}
			
 
				 
			
 
				     procedure tx86addnode.second_addordinal;
			
 
				-      var
			
 
				-         mboverflow : boolean;
			
 
				-         op : tasmop;
			
 
				-         opsize : tcgsize;
			
 
				-         { true, if unsigned types are compared }
			
 
				-         unsigned : boolean;
			
 
				-         { true, if for sets subtractions the extra not should generated }
			
 
				-         extra_not : boolean;
			
 
				       begin
			
 
				-         { defaults }
			
 
				-         extra_not:=false;
			
 
				-         mboverflow:=false;
			
 
				-         unsigned:=not(is_signed(left.resulttype.def)) or
			
 
				-                   not(is_signed(right.resulttype.def));
			
 
				-         opsize:=def_cgsize(left.resulttype.def);
			
 
				-
			
 
				-         pass_left_right;
			
 
				-
			
 
				-         case nodetype of
			
 
				-           addn :
			
 
				-             begin
			
 
				-               op:=A_ADD;
			
 
				-               mboverflow:=true;
			
 
				-             end;
			
 
				-           muln :
			
 
				-             begin
			
 
				-               if unsigned then
			
 
				-                 op:=A_MUL
			
 
				-               else
			
 
				-                 op:=A_IMUL;
			
 
				-               mboverflow:=true;
			
 
				-             end;
			
 
				-           subn :
			
 
				-             begin
			
 
				-               op:=A_SUB;
			
 
				-               mboverflow:=true;
			
 
				-             end;
			
 
				-           xorn :
			
 
				-             op:=A_XOR;
			
 
				-           orn :
			
 
				-             op:=A_OR;
			
 
				-           andn :
			
 
				-             op:=A_AND;
			
 
				-           else
			
 
				-             internalerror(200304229);
			
 
				-         end;
			
 
				-
			
 
				-         { filter MUL, which requires special handling }
			
 
				-         if op=A_MUL then
			
 
				+         { filter unsigned MUL opcode, which requires special handling }
			
 
				+         if (nodetype=muln) and
			
 
				+            (not(is_signed(left.resulttype.def)) or
			
 
				+             not(is_signed(right.resulttype.def))) then
			
 
				            begin
			
 
				              second_mul;
			
 
				              exit;
			
 
				            end;
			
 
				 
			
 
				-         left_must_be_reg(opsize,false);
			
 
				-         emit_generic_code(op,opsize,unsigned,extra_not,mboverflow);
			
 
				-         location_freetemp(exprasmlist,right.location);
			
 
				-
			
 
				-         set_result_location_reg;
			
 
				+         inherited second_addordinal;
			
 
				       end;