12 years ago · c8743c4826
--- a/compiler/x86_64/nx64add.pas
+++ b/compiler/x86_64/nx64add.pas
@@ -37,10 +37,10 @@ interface
 
				   implementation
			
 
				 
			
 
				     uses
			
 
				-      globtype,globals,
			
 
				+      globtype,globals,verbose,
			
 
				       aasmbase,aasmtai,aasmdata,
			
 
				       symdef,defutil,
			
 
				-      cgbase,cgutils,cga,cgobj,hlcgobj,
			
 
				+      cgbase,cgutils,cga,cgobj,hlcgobj,cgx86,
			
 
				       tgobj;
			
 
				 
			
 
				 {*****************************************************************************
			
@@ -66,59 +66,77 @@ interface
 
				 *****************************************************************************}
			
 
				 
			
 
				     procedure tx8664addnode.second_mul;
			
 
				-
			
 
				-    var reg:Tregister;
			
 
				+      var
			
 
				+        reg,rega,regd:Tregister;
			
 
				         ref:Treference;
			
 
				         use_ref:boolean;
			
 
				         hl4 : tasmlabel;
			
 
				-
			
 
				-    begin
			
 
				-      pass_left_right;
			
 
				-
			
 
				-      { The location.register will be filled in later (JM) }
			
 
				-      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
			
 
				-      { Mul supports registers and references, so if not register/reference,
			
 
				-        load the location into a register}
			
 
				-      use_ref:=false;
			
 
				-      if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
			
 
				-        reg:=left.location.register
			
 
				-      else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
			
 
				-        begin
			
 
				-          ref:=left.location.reference;
			
 
				-          use_ref:=true;
			
 
				-        end
			
 
				-      else
			
 
				-        begin
			
 
				-          {LOC_CONSTANT for example.}
			
 
				-          reg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				-          hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,osuinttype,left.location,reg);
			
 
				+        cgsize:TCgSize;
			
 
				+        opsize:topsize;
			
 
				+      begin
			
 
				+        cgsize:=def_cgsize(resultdef);
			
 
				+        opsize:=TCGSize2OpSize[cgsize];
			
 
				+        case cgsize of
			
 
				+          OS_S64,OS_64:
			
 
				+            begin
			
 
				+              rega:=NR_RAX;
			
 
				+              regd:=NR_RDX;
			
 
				+            end;
			
 
				+          OS_S32,OS_32:
			
 
				+            begin
			
 
				+              rega:=NR_EAX;
			
 
				+              regd:=NR_EDX;
			
 
				+            end;
			
 
				+          else
			
 
				+            internalerror(2013102703);
			
 
				         end;
			
 
				-      { Allocate RAX. }
			
 
				-      cg.getcpuregister(current_asmdata.CurrAsmList,NR_RAX);
			
 
				-      { Load the right value. }
			
 
				-      hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_RAX);
			
 
				-      { Also allocate RDX, since it is also modified by a mul (JM). }
			
 
				-      cg.getcpuregister(current_asmdata.CurrAsmList,NR_RDX);
			
 
				-      if use_ref then
			
 
				-        emit_ref(A_MUL,S_Q,ref)
			
 
				-      else
			
 
				-        emit_reg(A_MUL,S_Q,reg);
			
 
				-      if cs_check_overflow in current_settings.localswitches  then
			
 
				-       begin
			
 
				-         current_asmdata.getjumplabel(hl4);
			
 
				-         cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
			
 
				-         cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
			
 
				-         cg.a_label(current_asmdata.CurrAsmList,hl4);
			
 
				-       end;
			
 
				-      { Free RDX,RAX }
			
 
				-      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RDX);
			
 
				-      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RAX);
			
 
				-      { Allocate a new register and store the result in RAX in it. }
			
 
				-      location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				-      emit_reg_reg(A_MOV,S_Q,NR_RAX,location.register);
			
 
				-      location_freetemp(current_asmdata.CurrAsmList,left.location);
			
 
				-      location_freetemp(current_asmdata.CurrAsmList,right.location);
			
 
				-    end;
			
 
				+
			
 
				+        pass_left_right;
			
 
				+
			
 
				+        { The location.register will be filled in later (JM) }
			
 
				+        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
			
 
				+        { Mul supports registers and references, so if not register/reference,
			
 
				+          load the location into a register}
			
 
				+        use_ref:=false;
			
 
				+        if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
			
 
				+          reg:=left.location.register
			
 
				+        else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
			
 
				+          begin
			
 
				+            ref:=left.location.reference;
			
 
				+            use_ref:=true;
			
 
				+          end
			
 
				+        else
			
 
				+          begin
			
 
				+            {LOC_CONSTANT for example.}
			
 
				+            reg:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
			
 
				+            hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,left.location,reg);
			
 
				+          end;
			
 
				+        { Allocate RAX. }
			
 
				+        cg.getcpuregister(current_asmdata.CurrAsmList,rega);
			
 
				+        { Load the right value. }
			
 
				+        hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,resultdef,right.location,rega);
			
 
				+        { Also allocate RDX, since it is also modified by a mul (JM). }
			
 
				+        cg.getcpuregister(current_asmdata.CurrAsmList,regd);
			
 
				+        if use_ref then
			
 
				+          emit_ref(A_MUL,opsize,ref)
			
 
				+        else
			
 
				+          emit_reg(A_MUL,opsize,reg);
			
 
				+        if cs_check_overflow in current_settings.localswitches  then
			
 
				+         begin
			
 
				+           current_asmdata.getjumplabel(hl4);
			
 
				+           cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
			
 
				+           cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
			
 
				+           cg.a_label(current_asmdata.CurrAsmList,hl4);
			
 
				+         end;
			
 
				+        { Free RDX,RAX }
			
 
				+        cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
			
 
				+        cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
			
 
				+        { Allocate a new register and store the result in RAX in it. }
			
 
				+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
			
 
				+        emit_reg_reg(A_MOV,opsize,rega,location.register);
			
 
				+        location_freetemp(current_asmdata.CurrAsmList,left.location);
			
 
				+        location_freetemp(current_asmdata.CurrAsmList,right.location);
			
 
				+      end;
			
 
				 
			
 
				 
			
 
				 begin
			
--- a/compiler/x86_64/nx64mat.pas
+++ b/compiler/x86_64/nx64mat.pas
@@ -61,9 +61,11 @@ implementation
 
				 
			
 
				     procedure tx8664moddivnode.pass_generate_code;
			
 
				       var
			
 
				-        hreg1,hreg2:Tregister;
			
 
				+        hreg1,hreg2,rega,regd:Tregister;
			
 
				         power:longint;
			
 
				         op:Tasmop;
			
 
				+        cgsize:TCgSize;
			
 
				+        opsize:topsize;
			
 
				       begin
			
 
				         secondpass(left);
			
 
				         if codegenerror then
			
@@ -73,7 +75,24 @@ implementation
 
				           exit;
			
 
				 
			
 
				         { put numerator in register }
			
 
				-        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
			
 
				+        cgsize:=def_cgsize(resultdef);
			
 
				+        opsize:=TCGSize2OpSize[cgsize];
			
 
				+        case cgsize of
			
 
				+          OS_S64,OS_64:
			
 
				+            begin
			
 
				+              rega:=NR_RAX;
			
 
				+              regd:=NR_RDX;
			
 
				+            end;
			
 
				+          OS_S32,OS_32:
			
 
				+            begin
			
 
				+              rega:=NR_EAX;
			
 
				+              regd:=NR_EDX;
			
 
				+            end;
			
 
				+          else
			
 
				+            internalerror(2013102702);
			
 
				+        end;
			
 
				+
			
 
				+        location_reset(location,LOC_REGISTER,cgsize);
			
 
				         hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
			
 
				         hreg1:=left.location.register;
			
 
				 
			
@@ -88,60 +107,67 @@ implementation
 
				                   { use a sequence without jumps, saw this in
			
 
				                     comp.compilers (JM) }
			
 
				                   { no jumps, but more operations }
			
 
				-                  hreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				-                  emit_reg_reg(A_MOV,S_Q,hreg1,hreg2);
			
 
				+                  hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
			
 
				+                  emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
			
 
				                   {If the left value is signed, hreg2=$ffffffff, otherwise 0.}
			
 
				-                  emit_const_reg(A_SAR,S_Q,63,hreg2);
			
 
				+                  emit_const_reg(A_SAR,opsize,63,hreg2);
			
 
				                   {If signed, hreg2=right value-1, otherwise 0.}
			
 
				                   { (don't use emit_const_reg, because if value>high(longint)
			
 
				                      then it must first be loaded into a register) }
			
 
				-                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_S64,tordconstnode(right).value-1,hreg2);
			
 
				+                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,cgsize,tordconstnode(right).value-1,hreg2);
			
 
				                   { add to the left value }
			
 
				-                  emit_reg_reg(A_ADD,S_Q,hreg2,hreg1);
			
 
				+                  emit_reg_reg(A_ADD,opsize,hreg2,hreg1);
			
 
				                   { do the shift }
			
 
				-                  emit_const_reg(A_SAR,S_Q,power,hreg1);
			
 
				+                  emit_const_reg(A_SAR,opsize,power,hreg1);
			
 
				               end
			
 
				             else
			
 
				-              emit_const_reg(A_SHR,S_Q,power,hreg1);
			
 
				+              emit_const_reg(A_SHR,opsize,power,hreg1);
			
 
				             location.register:=hreg1;
			
 
				           end
			
 
				         else
			
 
				           begin
			
 
				             {Bring denominator to a register.}
			
 
				-            cg.getcpuregister(current_asmdata.CurrAsmList,NR_RAX);
			
 
				-            emit_reg_reg(A_MOV,S_Q,hreg1,NR_RAX);
			
 
				-            cg.getcpuregister(current_asmdata.CurrAsmList,NR_RDX);
			
 
				+            cg.getcpuregister(current_asmdata.CurrAsmList,rega);
			
 
				+            emit_reg_reg(A_MOV,opsize,hreg1,rega);
			
 
				+            cg.getcpuregister(current_asmdata.CurrAsmList,regd);
			
 
				             {Sign extension depends on the left type.}
			
 
				-            if torddef(left.resultdef).ordtype=u64bit then
			
 
				-              emit_reg_reg(A_XOR,S_Q,NR_RDX,NR_RDX)
			
 
				+            if is_signed(left.resultdef) then
			
 
				+              case left.resultdef.size of
			
 
				+                8:
			
 
				+                  emit_none(A_CQO,S_NO);
			
 
				+                4:
			
 
				+                  emit_none(A_CDQ,S_NO);
			
 
				+                else
			
 
				+                  internalerror(2013102701);
			
 
				+              end
			
 
				             else
			
 
				-              emit_none(A_CQO,S_NO);
			
 
				+              emit_reg_reg(A_XOR,opsize,regd,regd);
			
 
				 
			
 
				             {Division depends on the right type.}
			
 
				-            if Torddef(right.resultdef).ordtype=u64bit then
			
 
				-              op:=A_DIV
			
 
				+            if is_signed(right.resultdef) then
			
 
				+              op:=A_IDIV
			
 
				             else
			
 
				-              op:=A_IDIV;
			
 
				+              op:=A_DIV;
			
 
				 
			
 
				             if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
			
 
				-              emit_ref(op,S_Q,right.location.reference)
			
 
				+              emit_ref(op,opsize,right.location.reference)
			
 
				             else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
			
 
				-              emit_reg(op,S_Q,right.location.register)
			
 
				+              emit_reg(op,opsize,right.location.register)
			
 
				             else
			
 
				               begin
			
 
				                 hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size);
			
 
				-                hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,u64inttype,right.location,hreg1);
			
 
				-                emit_reg(op,S_Q,hreg1);
			
 
				+                hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,right.resultdef,right.location,hreg1);
			
 
				+                emit_reg(op,opsize,hreg1);
			
 
				               end;
			
 
				 
			
 
				-            { Copy the result into a new register. Release RAX & RDX.}
			
 
				-            cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RDX);
			
 
				-            cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RAX);
			
 
				-            location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
			
 
				+            { Copy the result into a new register. Release R/EAX & R/EDX.}
			
 
				+            cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
			
 
				+            cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
			
 
				+            location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
			
 
				             if nodetype=divn then
			
 
				-              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_RAX,location.register)
			
 
				+              cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,rega,location.register)
			
 
				             else
			
 
				-              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_RDX,location.register);
			
 
				+              cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register);
			
 
				           end;
			
 
				       end;