19 years ago · 588cccb3ca
--- a/compiler/cgbase.pas
+++ b/compiler/cgbase.pas
@@ -81,6 +81,7 @@ interface
 
															        topcg =
														
 
															        (
														
 
															           OP_NONE,
														
 
															+          OP_MOVE,      { replaced operation with direct load }
														
 
															           OP_ADD,       { simple addition          }
														
 
															           OP_AND,       { simple logical and       }
														
 
															           OP_DIV,       { simple unsigned division }
														
@@ -593,7 +594,7 @@ implementation
 
															     function commutativeop(op: topcg): boolean;{$ifdef USEINLINE}inline;{$endif}
														
 
															       const
														
 
															         list: array[topcg] of boolean =
														
 
															-          (true,true,true,false,false,true,true,false,false,
														
 
															+          (true,false,true,true,false,false,true,true,false,false,
														
 
															            true,false,false,false,false,true);
														
 
															       begin
														
 
															         commutativeop := list[op];
														
--- a/compiler/cgobj.pas
+++ b/compiler/cgobj.pas
@@ -285,18 +285,16 @@ unit cgobj;
 
															           procedure g_flags2ref(list: taasmoutput; size: TCgSize; const f: tresflags; const ref:TReference); virtual;
														
 
															           {
														
 
															-             This routine tries to optimize the const_reg opcode, and should be
														
 
															-             called at the start of a_op_const_reg. It returns the actual opcode
														
 
															-             to emit, and the constant value to emit. If this routine returns
														
 
															-             TRUE, @var(no) instruction should be emitted (.eg : imul reg by 1 )
														
 
															+             This routine tries to optimize the op_const_reg/ref opcode, and should be
														
 
															+             called at the start of a_op_const_reg/ref. It returns the actual opcode
														
 
															+             to emit, and the constant value to emit. This function can opcode OP_NONE to
														
 
															+             remove the opcode and OP_MOVE to replace it with a simple load
														
 
															              @param(op The opcode to emit, returns the opcode which must be emitted)
														
 
															              @param(a  The constant which should be emitted, returns the constant which must
														
 
															                     be emitted)
														
 
															-             @param(reg The register to emit the opcode with, returns the register with
														
 
															-                   which the opcode will be emitted)
														
 
															           }
														
 
															-          function optimize_op_const_reg(list: taasmoutput; var op: topcg; var a : aint; var reg: tregister): boolean;virtual;
														
 
															+          procedure optimize_op_const(var op: topcg; var a : aint);virtual;
														
 
															          {#
														
 
															              This routine is used in exception management nodes. It should
														
@@ -936,55 +934,69 @@ implementation
 
															       end;
														
 
															-    function tcg.optimize_op_const_reg(list: taasmoutput; var op: topcg; var a : aint; var reg:tregister): boolean;
														
 
															+    procedure tcg.optimize_op_const(var op: topcg; var a : aint);
														
 
															       var
														
 
															         powerval : longint;
														
 
															       begin
														
 
															-        optimize_op_const_reg := false;
														
 
															         case op of
														
 
															-          { or with zero returns same result }
														
 
															-          OP_OR : if a = 0 then optimize_op_const_reg := true;
														
 
															-          { and with max returns same result }
														
 
															-          OP_AND : if (a = high(a)) then optimize_op_const_reg := true;
														
 
															-          { division by 1 returns result }
														
 
															+          OP_OR :
														
 
															+            begin
														
 
															+              { or with zero returns same result }
														
 
															+              if a = 0 then
														
 
															+                op:=OP_NONE
														
 
															+              else
														
 
															+              { or with max returns max }
														
 
															+                if a = -1 then
														
 
															+                  op:=OP_MOVE;
														
 
															+            end;
														
 
															+          OP_AND :
														
 
															+            begin
														
 
															+              { and with max returns same result }
														
 
															+              if (a = -1) then
														
 
															+                op:=OP_NONE
														
 
															+              else
														
 
															+              { and with 0 returns 0 }
														
 
															+                if a=0 then
														
 
															+                  op:=OP_MOVE;
														
 
															+            end;
														
 
															           OP_DIV :
														
 
															             begin
														
 
															+              { division by 1 returns result }
														
 
															               if a = 1 then
														
 
															-                optimize_op_const_reg := true
														
 
															+                op:=OP_NONE
														
 
															               else if ispowerof2(int64(a), powerval) then
														
 
															                 begin
														
 
															                   a := powerval;
														
 
															                   op:= OP_SHR;
														
 
															                 end;
														
 
															-              exit;
														
 
															             end;
														
 
															           OP_IDIV:
														
 
															             begin
														
 
															               if a = 1 then
														
 
															-                optimize_op_const_reg := true
														
 
															-              else if ispowerof2(int64(a), powerval) then
														
 
															-                begin
														
 
															-                  a := powerval;
														
 
															-                  op:= OP_SAR;
														
 
															-                end;
														
 
															-               exit;
														
 
															+                op:=OP_NONE;
														
 
															             end;
														
 
															-        OP_MUL,OP_IMUL:
														
 
															+         OP_MUL,OP_IMUL:
														
 
															             begin
														
 
															                if a = 1 then
														
 
															-                  optimize_op_const_reg := true
														
 
															+                 op:=OP_NONE
														
 
															+               else
														
 
															+                 if a=0 then
														
 
															+                   op:=OP_MOVE
														
 
															                else if ispowerof2(int64(a), powerval) then
														
 
															                  begin
														
 
															                    a := powerval;
														
 
															                    op:= OP_SHL;
														
 
															                  end;
														
 
															-               exit;
														
 
															+            end;
														
 
															+        OP_ADD,OP_SUB:
														
 
															+            begin
														
 
															+               if a = 0 then
														
 
															+                 op:=OP_NONE;
														
 
															             end;
														
 
															         OP_SAR,OP_SHL,OP_SHR:
														
 
															            begin
														
 
															               if a = 0 then
														
 
															-                 optimize_op_const_reg := true;
														
 
															-              exit;
														
 
															+                op:=OP_NONE;
														
 
															            end;
														
 
															         end;
														
 
															       end;
														
@@ -1729,7 +1741,7 @@ implementation
 
															 {$endif}
														
 
															                 if to_signed then
														
 
															                   begin
														
 
															-                    { calculation of the low/high ranges must not overflow 64 bit 
														
 
															+                    { calculation of the low/high ranges must not overflow 64 bit
														
 
															                      otherwise we end up comparing with zero for 64 bit data types on
														
 
															                      64 bit processors }
														
 
															                     if (lto = (int64(-1) << (tosize * 8 - 1))) and
														
@@ -1738,7 +1750,7 @@ implementation
 
															                   end
														
 
															                 else
														
 
															                   begin
														
 
															-                    { calculation of the low/high ranges must not overflow 64 bit 
														
 
															+                    { calculation of the low/high ranges must not overflow 64 bit
														
 
															                      otherwise we end up having all zeros for 64 bit data types on
														
 
															                      64 bit processors }
														
 
															                     if (lto = 0) and
														
--- a/compiler/i386/n386add.pas
+++ b/compiler/i386/n386add.pas
@@ -349,6 +349,8 @@ interface
 
															         hl4 : tasmlabel;
														
 
															     begin
														
 
															+      pass_left_right;
														
 
															+
														
 
															       {The location.register will be filled in later (JM)}
														
 
															       location_reset(location,LOC_REGISTER,OS_INT);
														
 
															       {Get a temp register and load the left value into it
														
--- a/compiler/i386/n386mat.pas
+++ b/compiler/i386/n386mat.pas
@@ -33,9 +33,8 @@ interface
 
															          procedure pass_2;override;
														
 
															       end;
														
 
															-      ti386shlshrnode = class(tshlshrnode)
														
 
															-         procedure pass_2;override;
														
 
															-         { everything will be handled in pass_2 }
														
 
															+      ti386shlshrnode = class(tcgshlshrnode)
														
 
															+         procedure second_64bit;override;
														
 
															          function first_shlshr64bitint: tnode; override;
														
 
															       end;
														
@@ -165,7 +164,7 @@ implementation
 
															                         m_high:=m_high shr 1;
														
 
															                         dec(l);
														
 
															                       end;
														
 
															-                    m:=m_high;
														
 
															+                    m:=dword(m_high);
														
 
															                     s:=l;
														
 
															                     if (m_high shr 31)<>0 then
														
 
															                       a:=1
														
@@ -223,7 +222,7 @@ implementation
 
															                     d:=tordconstnode(right).value;
														
 
															                     if d>=$80000000 then
														
 
															                       begin
														
 
															-                        emit_const_reg(A_CMP,S_L,d,hreg1);
														
 
															+                        emit_const_reg(A_CMP,S_L,aint(d),hreg1);
														
 
															                         location.register:=cg.getintregister(exprasmlist,OS_INT);
														
 
															                         emit_const_reg(A_MOV,S_L,0,location.register);
														
 
															                         emit_const_reg(A_SBB,S_L,-1,location.register);
														
@@ -359,147 +358,111 @@ implementation
 
															     function ti386shlshrnode.first_shlshr64bitint: tnode;
														
 
															+      begin
														
 
															+        result := nil;
														
 
															+      end;
														
 
															-    begin
														
 
															-      result := nil;
														
 
															-    end;
														
 
															-
														
 
															-    procedure ti386shlshrnode.pass_2;
														
 
															-
														
 
															-    var hreg64hi,hreg64lo:Tregister;
														
 
															-        op:Tasmop;
														
 
															+    procedure ti386shlshrnode.second_64bit;
														
 
															+      var
														
 
															+        hreg64hi,hreg64lo:Tregister;
														
 
															         v : TConstExprInt;
														
 
															         l1,l2,l3:Tasmlabel;
														
 
															+      begin
														
 
															+        location_reset(location,LOC_REGISTER,OS_64);
														
 
															+
														
 
															+        { load left operator in a register }
														
 
															+        location_force_reg(exprasmlist,left.location,OS_64,false);
														
 
															+        hreg64hi:=left.location.register64.reghi;
														
 
															+        hreg64lo:=left.location.register64.reglo;
														
 
															-    begin
														
 
															-      secondpass(left);
														
 
															-      secondpass(right);
														
 
															-
														
 
															-      { determine operator }
														
 
															-      if nodetype=shln then
														
 
															-        op:=A_SHL
														
 
															-      else
														
 
															-        op:=A_SHR;
														
 
															-
														
 
															-      if is_64bitint(left.resulttype.def) then
														
 
															-        begin
														
 
															-          location_reset(location,LOC_REGISTER,OS_64);
														
 
															-
														
 
															-          { load left operator in a register }
														
 
															-          location_force_reg(exprasmlist,left.location,OS_64,false);
														
 
															-          hreg64hi:=left.location.register64.reghi;
														
 
															-          hreg64lo:=left.location.register64.reglo;
														
 
															-
														
 
															-          { shifting by a constant directly coded: }
														
 
															-          if (right.nodetype=ordconstn) then
														
 
															-            begin
														
 
															-              v:=Tordconstnode(right).value and 63;
														
 
															-              if v>31 then
														
 
															-                begin
														
 
															-                  if nodetype=shln then
														
 
															-                    begin
														
 
															-                      emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
														
 
															-                      if ((v and 31) <> 0) then
														
 
															-                        emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
														
 
															-                    end
														
 
															-                  else
														
 
															-                    begin
														
 
															-                      emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
														
 
															-                      if ((v and 31) <> 0) then
														
 
															-                        emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
														
 
															-                    end;
														
 
															-                  location.register64.reghi:=hreg64lo;
														
 
															-                  location.register64.reglo:=hreg64hi;
														
 
															-                end
														
 
															-              else
														
 
															-                begin
														
 
															-                  if nodetype=shln then
														
 
															-                    begin
														
 
															-                      emit_const_reg_reg(A_SHLD,S_L,v and 31,hreg64lo,hreg64hi);
														
 
															+        { shifting by a constant directly coded: }
														
 
															+        if (right.nodetype=ordconstn) then
														
 
															+          begin
														
 
															+            v:=Tordconstnode(right).value and 63;
														
 
															+            if v>31 then
														
 
															+              begin
														
 
															+                if nodetype=shln then
														
 
															+                  begin
														
 
															+                    emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
														
 
															+                    if ((v and 31) <> 0) then
														
 
															                       emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
														
 
															-                    end
														
 
															-                  else
														
 
															-                    begin
														
 
															-                      emit_const_reg_reg(A_SHRD,S_L,v and 31,hreg64hi,hreg64lo);
														
 
															+                  end
														
 
															+                else
														
 
															+                  begin
														
 
															+                    emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
														
 
															+                    if ((v and 31) <> 0) then
														
 
															                       emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
														
 
															-                    end;
														
 
															-                  location.register64.reglo:=hreg64lo;
														
 
															-                  location.register64.reghi:=hreg64hi;
														
 
															-                end;
														
 
															-            end
														
 
															-          else
														
 
															-            begin
														
 
															-              { load right operators in a register }
														
 
															-              cg.getcpuregister(exprasmlist,NR_ECX);
														
 
															-              cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
														
 
															-
														
 
															-              { left operator is already in a register }
														
 
															-              { hence are both in a register }
														
 
															-              { is it in the case ECX ? }
														
 
															-
														
 
															-              { the damned shift instructions work only til a count of 32 }
														
 
															-              { so we've to do some tricks here                           }
														
 
															-              objectlibrary.getjumplabel(l1);
														
 
															-              objectlibrary.getjumplabel(l2);
														
 
															-              objectlibrary.getjumplabel(l3);
														
 
															-              emit_const_reg(A_CMP,S_L,64,NR_ECX);
														
 
															-              cg.a_jmp_flags(exprasmlist,F_L,l1);
														
 
															-              emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
														
 
															-              emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
														
 
															-              cg.a_jmp_always(exprasmlist,l3);
														
 
															-              cg.a_label(exprasmlist,l1);
														
 
															-              emit_const_reg(A_CMP,S_L,32,NR_ECX);
														
 
															-              cg.a_jmp_flags(exprasmlist,F_L,l2);
														
 
															-              emit_const_reg(A_SUB,S_L,32,NR_ECX);
														
 
															-              if nodetype=shln then
														
 
															-                begin
														
 
															-                  emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
														
 
															-                  emit_reg_reg(A_MOV,S_L,hreg64lo,hreg64hi);
														
 
															-                  emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
														
 
															-                  cg.a_jmp_always(exprasmlist,l3);
														
 
															-                  cg.a_label(exprasmlist,l2);
														
 
															-                  emit_reg_reg_reg(A_SHLD,S_L,NR_CL,hreg64lo,hreg64hi);
														
 
															-                  emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
														
 
															-                end
														
 
															-              else
														
 
															-                begin
														
 
															-                  emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
														
 
															-                  emit_reg_reg(A_MOV,S_L,hreg64hi,hreg64lo);
														
 
															-                  emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
														
 
															-                  cg.a_jmp_always(exprasmlist,l3);
														
 
															-                  cg.a_label(exprasmlist,l2);
														
 
															-                  emit_reg_reg_reg(A_SHRD,S_L,NR_CL,hreg64hi,hreg64lo);
														
 
															-                  emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
														
 
															-                end;
														
 
															-              cg.a_label(exprasmlist,l3);
														
 
															-
														
 
															-              cg.ungetcpuregister(exprasmlist,NR_ECX);
														
 
															-              location.register64.reglo:=hreg64lo;
														
 
															-              location.register64.reghi:=hreg64hi;
														
 
															-            end;
														
 
															-        end
														
 
															-      else
														
 
															-        begin
														
 
															-          { load left operators in a register }
														
 
															-          location_copy(location,left.location);
														
 
															-          location_force_reg(exprasmlist,location,OS_INT,false);
														
 
															-
														
 
															-          { shifting by a constant directly coded: }
														
 
															-          if (right.nodetype=ordconstn) then
														
 
															-            { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)}
														
 
															-            emit_const_reg(op,S_L,tordconstnode(right).value and 31,location.register)
														
 
															-          else
														
 
															-            begin
														
 
															-              { load right operators in a ECX }
														
 
															-              cg.getcpuregister(exprasmlist,NR_ECX);
														
 
															-              cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
														
 
															-
														
 
															-              { right operand is in ECX }
														
 
															-              cg.ungetcpuregister(exprasmlist,NR_ECX);
														
 
															-              emit_reg_reg(op,S_L,NR_CL,location.register);
														
 
															-            end;
														
 
															-        end;
														
 
															-    end;
														
 
															+                  end;
														
 
															+                location.register64.reghi:=hreg64lo;
														
 
															+                location.register64.reglo:=hreg64hi;
														
 
															+              end
														
 
															+            else
														
 
															+              begin
														
 
															+                if nodetype=shln then
														
 
															+                  begin
														
 
															+                    emit_const_reg_reg(A_SHLD,S_L,v and 31,hreg64lo,hreg64hi);
														
 
															+                    emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
														
 
															+                  end
														
 
															+                else
														
 
															+                  begin
														
 
															+                    emit_const_reg_reg(A_SHRD,S_L,v and 31,hreg64hi,hreg64lo);
														
 
															+                    emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
														
 
															+                  end;
														
 
															+                location.register64.reglo:=hreg64lo;
														
 
															+                location.register64.reghi:=hreg64hi;
														
 
															+              end;
														
 
															+          end
														
 
															+        else
														
 
															+          begin
														
 
															+            { load right operators in a register }
														
 
															+            cg.getcpuregister(exprasmlist,NR_ECX);
														
 
															+            cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
														
 
															+
														
 
															+            { left operator is already in a register }
														
 
															+            { hence are both in a register }
														
 
															+            { is it in the case ECX ? }
														
 
															+
														
 
															+            { the damned shift instructions work only til a count of 32 }
														
 
															+            { so we've to do some tricks here                           }
														
 
															+            objectlibrary.getjumplabel(l1);
														
 
															+            objectlibrary.getjumplabel(l2);
														
 
															+            objectlibrary.getjumplabel(l3);
														
 
															+            emit_const_reg(A_CMP,S_L,64,NR_ECX);
														
 
															+            cg.a_jmp_flags(exprasmlist,F_L,l1);
														
 
															+            emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
														
 
															+            emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
														
 
															+            cg.a_jmp_always(exprasmlist,l3);
														
 
															+            cg.a_label(exprasmlist,l1);
														
 
															+            emit_const_reg(A_CMP,S_L,32,NR_ECX);
														
 
															+            cg.a_jmp_flags(exprasmlist,F_L,l2);
														
 
															+            emit_const_reg(A_SUB,S_L,32,NR_ECX);
														
 
															+            if nodetype=shln then
														
 
															+              begin
														
 
															+                emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
														
 
															+                emit_reg_reg(A_MOV,S_L,hreg64lo,hreg64hi);
														
 
															+                emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
														
 
															+                cg.a_jmp_always(exprasmlist,l3);
														
 
															+                cg.a_label(exprasmlist,l2);
														
 
															+                emit_reg_reg_reg(A_SHLD,S_L,NR_CL,hreg64lo,hreg64hi);
														
 
															+                emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
														
 
															+              end
														
 
															+            else
														
 
															+              begin
														
 
															+                emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
														
 
															+                emit_reg_reg(A_MOV,S_L,hreg64hi,hreg64lo);
														
 
															+                emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
														
 
															+                cg.a_jmp_always(exprasmlist,l3);
														
 
															+                cg.a_label(exprasmlist,l2);
														
 
															+                emit_reg_reg_reg(A_SHRD,S_L,NR_CL,hreg64hi,hreg64lo);
														
 
															+                emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
														
 
															+              end;
														
 
															+            cg.a_label(exprasmlist,l3);
														
 
															+
														
 
															+            cg.ungetcpuregister(exprasmlist,NR_ECX);
														
 
															+            location.register64.reglo:=hreg64lo;
														
 
															+            location.register64.reghi:=hreg64hi;
														
 
															+          end;
														
 
															+      end;
														
 
															 begin
														
--- a/compiler/m68k/cgcpu.pas
+++ b/compiler/m68k/cgcpu.pas
@@ -404,11 +404,13 @@ unit cgcpu;
 
															        opcode : tasmop;
														
 
															        r,r2 : Tregister;
														
 
															       begin
														
 
															-        { need to emit opcode? }
														
 
															-        if optimize_op_const_reg(list, op, a, reg) then
														
 
															-           exit;
														
 
															+        optimize_op_const_reg(list, op, a, reg);
														
 
															         opcode := topcg2tasmop[op];
														
 
															         case op of
														
 
															+          OP_NONE :
														
 
															+              begin
														
 
															+                { Opcode is optimized away }
														
 
															+              end;
														
 
															           OP_ADD :
														
 
															               begin
														
 
															                 if (a >= 1) and (a <= 8) then
														
--- a/compiler/ncgadd.pas
+++ b/compiler/ncgadd.pas
@@ -459,8 +459,7 @@ interface
 
															         ovloc.loc:=LOC_VOID;
														
 
															         pass_left_right;
														
 
															-        force_reg_left_right(false,(cs_check_overflow in aktlocalswitches) and
														
 
															-                                   (nodetype in [addn,subn]));
														
 
															+        force_reg_left_right(false,true);
														
 
															         set_result_location_reg;
														
 
															         { assume no overflow checking is required }
														
@@ -634,8 +633,7 @@ interface
 
															         ovloc.loc:=LOC_VOID;
														
 
															         pass_left_right;
														
 
															-        force_reg_left_right(false,(cs_check_overflow in aktlocalswitches) and
														
 
															-                                   (nodetype in [addn,subn,muln]));
														
 
															+        force_reg_left_right(false,true);
														
 
															         set_result_location_reg;
														
 
															         { determine if the comparison will be unsigned }
														
@@ -680,7 +678,7 @@ interface
 
															        if nodetype<>subn then
														
 
															         begin
														
 
															-          if (right.location.loc >LOC_CONSTANT) then
														
 
															+          if (right.location.loc<>LOC_CONSTANT) then
														
 
															             cg.a_op_reg_reg_reg_checkoverflow(exprasmlist,cgop,location.size,
														
 
															                left.location.register,right.location.register,
														
 
															                location.register,checkoverflow and (cs_check_overflow in aktlocalswitches),ovloc)
														
@@ -708,7 +706,7 @@ interface
 
															             begin
														
 
															               tmpreg:=cg.getintregister(exprasmlist,location.size);
														
 
															               cg.a_load_const_reg(exprasmlist,location.size,
														
 
															-                aword(left.location.value),tmpreg);
														
 
															+                left.location.value,tmpreg);
														
 
															               cg.a_op_reg_reg_reg_checkoverflow(exprasmlist,OP_SUB,location.size,
														
 
															                 right.location.register,tmpreg,location.register,checkoverflow and (cs_check_overflow in aktlocalswitches),ovloc);
														
 
															             end;
														
--- a/compiler/x86/cgx86.pas
+++ b/compiler/x86/cgx86.pas
@@ -63,11 +63,6 @@ unit cgx86;
 
															         procedure a_op_ref_reg(list : taasmoutput; Op: TOpCG; size: TCGSize; const ref: TReference; reg: TRegister); override;
														
 
															         procedure a_op_reg_ref(list : taasmoutput; Op: TOpCG; size: TCGSize;reg: TRegister; const ref: TReference); override;
														
 
															-        procedure a_op_const_reg_reg(list: taasmoutput; op: TOpCg;
														
 
															-          size: tcgsize; a: aint; src, dst: tregister); override;
														
 
															-        procedure a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;
														
 
															-          size: tcgsize; src1, src2, dst: tregister); override;
														
 
															-
														
 
															         { move instructions }
														
 
															         procedure a_load_const_reg(list : taasmoutput; tosize: tcgsize; a : aint;reg : tregister);override;
														
 
															         procedure a_load_const_ref(list : taasmoutput; tosize: tcgsize; a : aint;const ref : treference);override;
														
@@ -160,8 +155,8 @@ unit cgx86;
 
															        fmodule;
														
 
															     const
														
 
															-      TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_ADD,A_AND,A_DIV,
														
 
															-                            A_IDIV,A_MUL, A_IMUL, A_NEG,A_NOT,A_OR,
														
 
															+      TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_MOV,A_ADD,A_AND,A_DIV,
														
 
															+                            A_IDIV,A_IMUL,A_MUL,A_NEG,A_NOT,A_OR,
														
 
															                             A_SAR,A_SHL,A_SHR,A_SUB,A_XOR);
														
 
															       TOpCmp2AsmCond: Array[topcmp] of TAsmCond = (C_NONE,
														
@@ -572,7 +567,7 @@ unit cgx86;
 
															         sym : tasmsymbol;
														
 
															         r : treference;
														
 
															       begin
														
 
															- 
														
 
															+
														
 
															         if (target_info.system <> system_i386_darwin) then
														
 
															           begin
														
 
															             sym:=objectlibrary.newasmsymbol(s,AB_EXTERNAL,AT_FUNCTION);
														
@@ -984,10 +979,10 @@ unit cgx86;
 
															         opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
														
 
															           ( { scalar }
														
 
															             ( { OS_F32 }
														
 
															-              A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP
														
 
															+              A_NOP,A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP
														
 
															             ),
														
 
															             ( { OS_F64 }
														
 
															-              A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP
														
 
															+              A_NOP,A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP
														
 
															             )
														
 
															           ),
														
 
															           ( { vectorized/packed }
														
@@ -995,10 +990,10 @@ unit cgx86;
 
															               these
														
 
															             }
														
 
															             ( { OS_F32 }
														
 
															-              A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
														
 
															+              A_NOP,A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
														
 
															             ),
														
 
															             ( { OS_F64 }
														
 
															-              A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPD
														
 
															+              A_NOP,A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPD
														
 
															             )
														
 
															           )
														
 
															         );
														
@@ -1062,9 +1057,11 @@ unit cgx86;
 
															         tmpreg : tregister;
														
 
															 {$endif x86_64}
														
 
															       begin
														
 
															+        optimize_op_const(op, a);
														
 
															 {$ifdef x86_64}
														
 
															         { x86_64 only supports signed 32 bits constants directly }
														
 
															-        if (size in [OS_S64,OS_64]) and
														
 
															+        if not(op in [OP_NONE,OP_MOVE) and
														
 
															+           (size in [OS_S64,OS_64]) and
														
 
															             ((a<low(longint)) or (a>high(longint))) then
														
 
															           begin
														
 
															             tmpreg:=getintregister(list,size);
														
@@ -1075,6 +1072,15 @@ unit cgx86;
 
															 {$endif x86_64}
														
 
															         check_register_size(size,reg);
														
 
															         case op of
														
 
															+          OP_NONE :
														
 
															+            begin
														
 
															+              { Opcode is optimized away }
														
 
															+            end;
														
 
															+          OP_MOVE :
														
 
															+            begin
														
 
															+              { Optimized, replaced with a simple load }
														
 
															+              a_load_const_reg(list,size,a,reg);
														
 
															+            end;
														
 
															           OP_DIV, OP_IDIV:
														
 
															             begin
														
 
															               if ispowerof2(int64(a),power) then
														
@@ -1155,11 +1161,13 @@ unit cgx86;
 
															 {$endif x86_64}
														
 
															         tmpref  : treference;
														
 
															       begin
														
 
															+        optimize_op_const(op, a);
														
 
															         tmpref:=ref;
														
 
															         make_simple_ref(list,tmpref);
														
 
															 {$ifdef x86_64}
														
 
															         { x86_64 only supports signed 32 bits constants directly }
														
 
															-        if (size in [OS_S64,OS_64]) and
														
 
															+        if not(op in [OP_NONE,OP_MOVE) and
														
 
															+           (size in [OS_S64,OS_64]) and
														
 
															             ((a<low(longint)) or (a>high(longint))) then
														
 
															           begin
														
 
															             tmpreg:=getintregister(list,size);
														
@@ -1169,6 +1177,15 @@ unit cgx86;
 
															           end;
														
 
															 {$endif x86_64}
														
 
															         Case Op of
														
 
															+          OP_NONE :
														
 
															+            begin
														
 
															+              { Opcode is optimized away }
														
 
															+            end;
														
 
															+          OP_MOVE :
														
 
															+            begin
														
 
															+              { Optimized, replaced with a simple load }
														
 
															+              a_load_const_ref(list,size,a,ref);
														
 
															+            end;
														
 
															           OP_DIV, OP_IDIV:
														
 
															             Begin
														
 
															               if ispowerof2(int64(a),power) then
														
@@ -1266,10 +1283,11 @@ unit cgx86;
 
															             internalerror(200109233);
														
 
															           OP_SHR,OP_SHL,OP_SAR:
														
 
															             begin
														
 
															-              getcpuregister(list,NR_CL);
														
 
															-              a_load_reg_reg(list,OS_8,OS_8,makeregsize(list,src,OS_8),NR_CL);
														
 
															-              list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,src));
														
 
															-              ungetcpuregister(list,NR_CL);
														
 
															+              { Use ecx to load the value, that allows beter coalescing }
														
 
															+              getcpuregister(list,NR_ECX);
														
 
															+              a_load_reg_reg(list,size,OS_32,src,NR_ECX);
														
 
															+              list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,dst));
														
 
															+              ungetcpuregister(list,NR_ECX);
														
 
															             end;
														
 
															           else
														
 
															             begin
														
@@ -1338,98 +1356,6 @@ unit cgx86;
 
															       end;
														
 
															-    procedure tcgx86.a_op_const_reg_reg(list: taasmoutput; op: TOpCg; size: tcgsize; a: aint; src, dst: tregister);
														
 
															-      var
														
 
															-        tmpref: treference;
														
 
															-        power: longint;
														
 
															-{$ifdef x86_64}
														
 
															-        tmpreg : tregister;
														
 
															-{$endif x86_64}
														
 
															-      begin
														
 
															-{$ifdef x86_64}
														
 
															-        { x86_64 only supports signed 32 bits constants directly }
														
 
															-        if (size in [OS_S64,OS_64]) and
														
 
															-            ((a<low(longint)) or (a>high(longint))) then
														
 
															-          begin
														
 
															-            tmpreg:=getintregister(list,size);
														
 
															-            a_load_const_reg(list,size,a,tmpreg);
														
 
															-            a_op_reg_reg_reg(list,op,size,tmpreg,src,dst);
														
 
															-            exit;
														
 
															-          end;
														
 
															-{$endif x86_64}
														
 
															-        check_register_size(size,src);
														
 
															-        check_register_size(size,dst);
														
 
															-        if tcgsize2size[size]<>tcgsize2size[OS_INT] then
														
 
															-          begin
														
 
															-            inherited a_op_const_reg_reg(list,op,size,a,src,dst);
														
 
															-            exit;
														
 
															-          end;
														
 
															-        { if we get here, we have to do a 32 bit calculation, guaranteed }
														
 
															-        case op of
														
 
															-          OP_DIV, OP_IDIV, OP_MUL, OP_AND, OP_OR, OP_XOR, OP_SHL, OP_SHR,
														
 
															-          OP_SAR:
														
 
															-            { can't do anything special for these }
														
 
															-            inherited a_op_const_reg_reg(list,op,size,a,src,dst);
														
 
															-          OP_IMUL:
														
 
															-            begin
														
 
															-              if not(cs_check_overflow in aktlocalswitches) and
														
 
															-                 ispowerof2(int64(a),power) then
														
 
															-                { can be done with a shift }
														
 
															-                begin
														
 
															-                  inherited a_op_const_reg_reg(list,op,size,a,src,dst);
														
 
															-                  exit;
														
 
															-                end;
														
 
															-              list.concat(taicpu.op_const_reg_reg(A_IMUL,tcgsize2opsize[size],a,src,dst));
														
 
															-            end;
														
 
															-          OP_ADD, OP_SUB:
														
 
															-            if (a = 0) then
														
 
															-              a_load_reg_reg(list,size,size,src,dst)
														
 
															-            else
														
 
															-              begin
														
 
															-                reference_reset(tmpref);
														
 
															-                tmpref.base := src;
														
 
															-                tmpref.offset := longint(a);
														
 
															-                if op = OP_SUB then
														
 
															-                  tmpref.offset := -tmpref.offset;
														
 
															-                list.concat(taicpu.op_ref_reg(A_LEA,tcgsize2opsize[size],tmpref,dst));
														
 
															-              end
														
 
															-          else internalerror(200112302);
														
 
															-        end;
														
 
															-      end;
														
 
															-
														
 
															-
														
 
															-    procedure tcgx86.a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;size: tcgsize; src1, src2, dst: tregister);
														
 
															-      var
														
 
															-        tmpref: treference;
														
 
															-      begin
														
 
															-        check_register_size(size,src1);
														
 
															-        check_register_size(size,src2);
														
 
															-        check_register_size(size,dst);
														
 
															-        if tcgsize2size[size]<>tcgsize2size[OS_INT] then
														
 
															-          begin
														
 
															-            inherited a_op_reg_reg_reg(list,op,size,src1,src2,dst);
														
 
															-            exit;
														
 
															-          end;
														
 
															-        { if we get here, we have to do a 32 bit calculation, guaranteed }
														
 
															-        Case Op of
														
 
															-          OP_DIV, OP_IDIV, OP_MUL, OP_AND, OP_OR, OP_XOR, OP_SHL, OP_SHR,
														
 
															-          OP_SAR,OP_SUB,OP_NOT,OP_NEG:
														
 
															-            { can't do anything special for these }
														
 
															-            inherited a_op_reg_reg_reg(list,op,size,src1,src2,dst);
														
 
															-          OP_IMUL:
														
 
															-            list.concat(taicpu.op_reg_reg_reg(A_IMUL,tcgsize2opsize[size],src1,src2,dst));
														
 
															-          OP_ADD:
														
 
															-            begin
														
 
															-              reference_reset(tmpref);
														
 
															-              tmpref.base := src1;
														
 
															-              tmpref.index := src2;
														
 
															-              tmpref.scalefactor := 1;
														
 
															-              list.concat(taicpu.op_ref_reg(A_LEA,tcgsize2opsize[size],tmpref,dst));
														
 
															-            end
														
 
															-          else internalerror(200112303);
														
 
															-        end;
														
 
															-      end;
														
 
															-
														
 
															 {*************** compare instructructions ****************}
														
 
															     procedure tcgx86.a_cmp_const_reg_label(list : taasmoutput;size : tcgsize;cmp_op : topcmp;a : aint;reg : tregister;
														
--- a/compiler/x86/nx86add.pas
+++ b/compiler/x86/nx86add.pas
@@ -978,65 +978,17 @@ unit nx86add;
 
															 *****************************************************************************}
														
 
															     procedure tx86addnode.second_addordinal;
														
 
															-      var
														
 
															-         mboverflow : boolean;
														
 
															-         op : tasmop;
														
 
															-         opsize : tcgsize;
														
 
															-         { true, if unsigned types are compared }
														
 
															-         unsigned : boolean;
														
 
															-         { true, if for sets subtractions the extra not should generated }
														
 
															-         extra_not : boolean;
														
 
															       begin
														
 
															-         { defaults }
														
 
															-         extra_not:=false;
														
 
															-         mboverflow:=false;
														
 
															-         unsigned:=not(is_signed(left.resulttype.def)) or
														
 
															-                   not(is_signed(right.resulttype.def));
														
 
															-         opsize:=def_cgsize(left.resulttype.def);
														
 
															-
														
 
															-         pass_left_right;
														
 
															-
														
 
															-         case nodetype of
														
 
															-           addn :
														
 
															-             begin
														
 
															-               op:=A_ADD;
														
 
															-               mboverflow:=true;
														
 
															-             end;
														
 
															-           muln :
														
 
															-             begin
														
 
															-               if unsigned then
														
 
															-                 op:=A_MUL
														
 
															-               else
														
 
															-                 op:=A_IMUL;
														
 
															-               mboverflow:=true;
														
 
															-             end;
														
 
															-           subn :
														
 
															-             begin
														
 
															-               op:=A_SUB;
														
 
															-               mboverflow:=true;
														
 
															-             end;
														
 
															-           xorn :
														
 
															-             op:=A_XOR;
														
 
															-           orn :
														
 
															-             op:=A_OR;
														
 
															-           andn :
														
 
															-             op:=A_AND;
														
 
															-           else
														
 
															-             internalerror(200304229);
														
 
															-         end;
														
 
															-
														
 
															-         { filter MUL, which requires special handling }
														
 
															-         if op=A_MUL then
														
 
															+         { filter unsigned MUL opcode, which requires special handling }
														
 
															+         if (nodetype=muln) and
														
 
															+            (not(is_signed(left.resulttype.def)) or
														
 
															+             not(is_signed(right.resulttype.def))) then
														
 
															            begin
														
 
															              second_mul;
														
 
															              exit;
														
 
															            end;
														
 
															-         left_must_be_reg(opsize,false);
														
 
															-         emit_generic_code(op,opsize,unsigned,extra_not,mboverflow);
														
 
															-         location_freetemp(exprasmlist,right.location);
														
 
															-
														
 
															-         set_result_location_reg;
														
 
															+         inherited second_addordinal;
														
 
															       end;