Răsfoiți Sursa

- Bug fix to new ADD/SUB optimisation where conditions are concerned
- Register allocation fixes for overflow checks

J. Gareth "Curious Kit" Moreton 3 ani în urmă
părinte
comite
2dc0995067

+ 9 - 0
compiler/cg64f32.pas

@@ -970,6 +970,8 @@ unit cg64f32;
                end;
              current_asmdata.getjumplabel(poslabel);
 
+             cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
+
              { check high dword, must be 0 (for positive numbers) }
              cg.a_cmp_const_reg_label(list,OS_32,OC_EQ,0,hreg,poslabel);
 
@@ -983,6 +985,9 @@ unit cg64f32;
                { we do not have dynamic dfa, so avoid a warning below about the unused
                  neglabel }
                neglabel:=nil;
+
+             cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
+
              { For all other values we have a range check error }
              cg.a_call_name(list,'fpc_rangeerror',false);
 
@@ -1022,7 +1027,9 @@ unit cg64f32;
                    end;
                  { get a new neglabel (JM) }
                  current_asmdata.getjumplabel(neglabel);
+                 cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
                  cg.a_cmp_const_reg_label(list,OS_32,OC_LT,0,hreg,neglabel);
+                 cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
 
                  cg.a_call_name(list,'fpc_rangeerror',false);
 
@@ -1073,7 +1080,9 @@ unit cg64f32;
                      cg.a_load_ref_reg(list,l.size,OS_32,l.reference,hreg);
                  end;
                current_asmdata.getjumplabel(poslabel);
+               cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
                cg.a_cmp_const_reg_label(list,opsize,OC_GTE,0,hreg,poslabel);
+               cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
 
                cg.a_call_name(list,'fpc_rangeerror',false);
                cg.a_label(list,poslabel);

+ 2 - 0
compiler/hlcgobj.pas

@@ -3896,10 +3896,12 @@ implementation
         a_cmp_const_reg_label(list,OS_INT,OC_GTE,aint(hto-lto),hreg,neglabel)
       else
       }
+      cg.a_reg_alloc(list, NR_DEFAULTFLAGS);
       if qword(hto-lto)>qword(aintmax) then
         a_cmp_const_reg_label(list,maxdef,OC_BE,aintmax,hreg,neglabel)
       else
         a_cmp_const_reg_label(list,maxdef,OC_BE,tcgint(int64(hto-lto)),hreg,neglabel);
+      cg.a_reg_dealloc(list, NR_DEFAULTFLAGS);
       g_call_system_proc(list,'fpc_rangeerror',[],nil).resetiftemp;
       a_label(list,neglabel);
     end;

+ 18 - 1
compiler/i386/n386add.pas

@@ -168,6 +168,9 @@ interface
         { at this point, left.location.loc should be LOC_REGISTER }
         if right.location.loc=LOC_REGISTER then
          begin
+           if mboverflow and needoverflowcheck then
+             cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+
            { when swapped another result register }
            if (nodetype=subn) and (nf_swapped in flags) then
             begin
@@ -197,11 +200,18 @@ interface
               cg64.a_load64high_loc_reg(current_asmdata.CurrAsmList,right.location,r);
               { the carry flag is still ok }
               emit_reg_reg(op2,opsize,left.location.register64.reghi,r);
-              cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+
+              { We need to keep the FLAGS register allocated for overflow checks }
+              if not mboverflow or not needoverflowcheck then
+                cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+
               emit_reg_reg(A_MOV,opsize,r,left.location.register64.reghi);
             end
            else
             begin
+              if mboverflow and needoverflowcheck then
+                cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+
               cg64.a_op64_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,
                 left.location.register64);
             end;
@@ -221,6 +231,8 @@ interface
                 cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
               else
                 cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
+
+              cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
               cg.a_label(current_asmdata.CurrAsmList,hl4);
             end;
@@ -508,6 +520,10 @@ interface
           hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_EAX);
           { Also allocate EDX, since it is also modified by a mul (JM). }
           cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
+
+          if needoverflowcheck then
+            cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
+
           if use_ref then
             emit_ref(asmops[unsigned],S_L,ref)
           else
@@ -518,6 +534,7 @@ interface
             begin
               current_asmdata.getjumplabel(hl4);
               cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
+              cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
               cg.a_label(current_asmdata.CurrAsmList,hl4);
             end;

+ 11 - 0
compiler/i8086/n8086add.pas

@@ -264,6 +264,9 @@ interface
             end;
          end;
 
+        if mboverflow and needoverflowcheck then
+          cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
+
         { at this point, left.location.loc should be LOC_REGISTER }
         if right.location.loc=LOC_REGISTER then
          begin
@@ -322,6 +325,7 @@ interface
                 cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
               else
                 cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
+              cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
               cg.a_label(current_asmdata.CurrAsmList,hl4);
             end;
@@ -1038,6 +1042,12 @@ interface
       hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_AX);
       {Also allocate DX, since it is also modified by a mul (JM).}
       cg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
+
+      if overflowcheck and
+        { 16->32 bit cannot overflow }
+        (not is_32bitint(resultdef)) then
+        cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
+
       if use_ref then
         emit_ref(asmops[unsigned],S_W,ref)
       else
@@ -1048,6 +1058,7 @@ interface
         begin
           current_asmdata.getjumplabel(hl4);
           cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
+          cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
           cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
           cg.a_label(current_asmdata.CurrAsmList,hl4);
         end;

+ 4 - 0
compiler/jvm/cpubase.pas

@@ -247,6 +247,10 @@ uses
       NR_FPU_RESULT_REG = NR_NO;
       NR_MM_RESULT_REG = NR_NO;
 
+      { No default flags }
+      NR_DEFAULTFLAGS = NR_NO;
+      RS_DEFAULTFLAGS = RS_NO;
+
 
 {*****************************************************************************
                        GCC /ABI linking information

+ 6 - 0
compiler/ncgmat.pas

@@ -218,7 +218,9 @@ implementation
             cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_OR,OS_32,
               location.register64.reglo,tr);
             current_asmdata.getjumplabel(hl);
+            cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
             cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,OS_32,OC_NE,0,tr,hl);
+            cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
             cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
             cg.a_label(current_asmdata.CurrAsmList,hl);
           end;
@@ -298,12 +300,16 @@ implementation
           hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
         location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
         location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+        if (cs_check_overflow in current_settings.localswitches) then
+          hlcg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
+
         hlcg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NEG,resultdef,left.location.register,location.register);
 
         if (cs_check_overflow in current_settings.localswitches) then
           begin
             current_asmdata.getjumplabel(hl);
             hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,resultdef,OC_NE,torddef(resultdef).low.svalue,location.register,hl);
+            hlcg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
             hlcg.g_call_system_proc(current_asmdata.CurrAsmList,'fpc_overflow',[],nil).resetiftemp;
             hlcg.a_label(current_asmdata.CurrAsmList,hl);
           end;

+ 4 - 0
compiler/wasm32/cpubase.pas

@@ -273,6 +273,10 @@ uses
       NR_FPU_RESULT_REG = NR_NO;
       NR_MM_RESULT_REG = NR_NO;
 
+      { No default flags }
+      NR_DEFAULTFLAGS = NR_NO;
+      RS_DEFAULTFLAGS = RS_NO;
+
 
 {*****************************************************************************
                        GCC /ABI linking information

+ 3 - 0
compiler/wasm32/hlcgcpu.pas

@@ -2136,10 +2136,13 @@ implementation
         a_load_const_stack(list,maxdef,aintmax,R_INTREGISTER)
       else
         a_load_const_stack(list,maxdef,tcgint(int64(hto-lto)),R_INTREGISTER);
+
+      a_reg_alloc(list, NR_DEFAULTFLAGS);
       a_cmp_stack_stack(list,maxdef,OC_A);
 
       current_asmdata.CurrAsmList.concat(taicpu.op_none(a_if));
       thlcgwasm(hlcg).decstack(current_asmdata.CurrAsmList,1);
+      a_reg_dealloc(list, NR_DEFAULTFLAGS);
 
       g_call_system_proc(list,'fpc_rangeerror',[],nil).resetiftemp;
 

+ 66 - 0
compiler/x86/aoptx86.pas

@@ -10641,6 +10641,7 @@ unit aoptx86;
       var
         hp1, hp2: tai;
         Opposite: TAsmOp;
+        NewCond: TAsmCond;
       begin
         Result := False;
 
@@ -10660,6 +10661,71 @@ unit aoptx86;
             else
               Opposite := A_ADD;
 
+            { Be careful if the flags are in use, because the CF flag inverts
+              when changing from ADD to SUB and vice versa }
+            if RegInUsedRegs(NR_DEFAULTFLAGS, UsedRegs) and
+              GetNextInstruction(p, hp1) then
+              begin
+                TransferUsedRegs(TmpUsedRegs);
+                TmpUsedRegs[R_SPECIALREGISTER].Update(tai(hp1.Next), True);
+
+                hp2 := hp1;
+
+                { Scan ahead to check if everything's safe }
+                while Assigned(hp1) and RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) do
+                  begin
+                    if (hp1.typ <> ait_instruction) then
+                      { Probably unsafe since the flags are still in use }
+                      Exit;
+
+                    if MatchInstruction(hp1, A_CALL, A_JMP, A_RET, []) then
+                      { Stop searching at an unconditional jump }
+                      Break;
+
+                    if (taicpu(hp1).condition = C_None) and RegInInstruction(NR_DEFAULTFLAGS, hp1) then
+                      { Instruction depends on FLAGS; break out }
+                      Exit;
+
+                    UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
+                    TmpUsedRegs[R_SPECIALREGISTER].Update(tai(hp1.Next), True);
+
+                    { Move to the next instruction }
+                    GetNextInstruction(hp1, hp1);
+                  end;
+
+                while Assigned(hp2) and (hp2 <> hp1) do
+                  begin
+                    NewCond := C_None;
+
+                    case taicpu(hp2).condition of
+                      C_A, C_NBE:
+                        NewCond := C_BE;
+                      C_B, C_C, C_NAE:
+                        NewCond := C_AE;
+                      C_AE, C_NB, C_NC:
+                        NewCond := C_B;
+                      C_BE, C_NA:
+                        NewCond := C_A;
+                      else
+                        { No change needed };
+                    end;
+
+                    if NewCond <> C_None then
+                      begin
+                        DebugMsg(SPeepholeOptimization + 'Condition changed from ' + cond2str[taicpu(hp2).condition] + ' to ' + cond2str[NewCond] +
+                          ' to accommodate ' + debug_op2str(taicpu(p).opcode) + ' -> ' + debug_op2str(opposite) + ' above', hp2);
+
+                        taicpu(hp2).condition := NewCond;
+                      end;
+
+                    { Move to the next instruction }
+                    GetNextInstruction(hp2, hp2);
+                  end;
+
+                if (hp2 <> hp1) then
+                  InternalError(2021111501);
+              end;
+
             DebugMsg(SPeepholeOptimization + debug_op2str(taicpu(p).opcode) + ' 128,' + debug_operstr(taicpu(p).oper[1]^) + ' changed to ' +
               debug_op2str(opposite) + ' -128,' + debug_operstr(taicpu(p).oper[1]^) + ' to reduce instruction size', p);
 

+ 1 - 0
compiler/x86/cgx86.pas

@@ -3711,6 +3711,7 @@ unit cgx86;
          ai.is_jmp:=true;
          list.concat(ai);
 
+         a_reg_dealloc(list, NR_DEFAULTFLAGS);
          a_call_name(list,'FPC_OVERFLOW',false);
          a_label(list,hl);
       end;

+ 33 - 2
compiler/x86/nx86add.pas

@@ -198,11 +198,18 @@ unit nx86add;
                         r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
                         hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
                         emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
+
+                        if mboverflow and overflowcheck then
+                          cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
+
                         emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
                      end
                    else
                      begin
-                        emit_op_right_left(op,opsize);
+                       if mboverflow and overflowcheck then
+                         cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
+
+                       emit_op_right_left(op,opsize);
                      end;
                  end;
             end;
@@ -221,6 +228,7 @@ unit nx86add;
                 cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
               else
                 cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
+              cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
               cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
               cg.a_label(current_asmdata.CurrAsmList,hl4);
             end;
@@ -1563,6 +1571,9 @@ unit nx86add;
            set_result_location_reg;
            if nodetype<>subn then
             begin
+              if checkoverflow then
+                cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
+
               if (right.location.loc<>LOC_CONSTANT) then
                 hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
                    left.location.register,right.location.register,
@@ -1578,6 +1589,9 @@ unit nx86add;
                 swapleftright;
               if left.location.loc<>LOC_CONSTANT then
                 begin
+                  if checkoverflow then
+                    cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
+
                   if right.location.loc<>LOC_CONSTANT then
                     hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
                         right.location.register,left.location.register,
@@ -1592,6 +1606,10 @@ unit nx86add;
                   tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
                   hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
                     left.location.value,tmpreg);
+
+                  if checkoverflow then
+                    cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
+
                   hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
                     right.location.register,tmpreg,location.register,checkoverflow,ovloc);
                 end;
@@ -1614,6 +1632,9 @@ unit nx86add;
            { at this point, left.location.loc should be LOC_REGISTER }
            if right.location.loc=LOC_REGISTER then
              begin
+               if checkoverflow then
+                 cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
+
                { when swapped another result register }
                if (nodetype=subn) and (nf_swapped in flags) then
                  begin
@@ -1631,15 +1652,25 @@ unit nx86add;
                { right.location<>LOC_REGISTER }
                if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then
                  hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
+
                if (nodetype=subn) and (nf_swapped in flags) then
                  begin
                    tmpreg:=left.location.register;
                    left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
                    cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);
+
+                   if checkoverflow then
+                     cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
+
                    cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);
                  end
                else
-                 cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
+                 begin
+                   if checkoverflow then
+                     cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
+
+                   cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
+                 end;
                location_freetemp(current_asmdata.CurrAsmList,right.location);
              end;
 

+ 5 - 0
compiler/x86_64/nx64add.pas

@@ -122,6 +122,10 @@ interface
         hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,resultdef,right.location,rega);
         { Also allocate RDX, since it is also modified by a mul (JM). }
         cg.getcpuregister(current_asmdata.CurrAsmList,regd);
+
+        if needoverflowcheck then
+          cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+
         if use_ref then
           emit_ref(A_MUL,opsize,ref)
         else
@@ -130,6 +134,7 @@ interface
          begin
            current_asmdata.getjumplabel(hl4);
            cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
+           cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
            cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
            cg.a_label(current_asmdata.CurrAsmList,hl4);
          end;