Browse Source

+ optimizations in tcg8086.g_flags2reg for the case where the carry flag or the
inverted carry flag is converted to int, using shorter branchless code and
the adc/rcl/sbb instructions

git-svn-id: trunk@32106 -

nickysn 9 years ago
parent
commit
09218c88aa
1 changed files with 109 additions and 41 deletions
  1. 109 41
      compiler/i8086/cgcpu.pas

+ 109 - 41
compiler/i8086/cgcpu.pas

@@ -1660,55 +1660,123 @@ unit cgcpu;
         hl_skip: TAsmLabel;
         hl_skip: TAsmLabel;
         invf: TResFlags;
         invf: TResFlags;
         tmpsize: TCgSize;
         tmpsize: TCgSize;
+        tmpopsize: topsize;
       begin
       begin
-        invf := f;
-        inverse_flags(invf);
-
-        case size of
-          OS_8,OS_S8:
-            begin
-              tmpsize:=OS_8;
-              list.concat(Taicpu.op_const_reg(A_MOV, S_B, 0, reg));
+        { optimized case for the carry flag, using ADC/RCL }
+        if f in [F_C,F_B,F_FB] then
+          begin
+            case size of
+              OS_8,OS_S8:
+                begin
+                  tmpsize:=OS_8;
+                  tmpopsize:=S_B;
+                end;
+              OS_16,OS_S16,OS_32,OS_S32:
+                begin
+                  tmpsize:=OS_16;
+                  tmpopsize:=S_W;
+                end;
+              else
+                internalerror(2013123101);
             end;
             end;
-          OS_16,OS_S16,OS_32,OS_S32:
-            begin
-              tmpsize:=OS_16;
-              list.concat(Taicpu.op_const_reg(A_MOV, S_W, 0, reg));
+            list.concat(Taicpu.op_const_reg(A_MOV, tmpopsize, 0, reg));
+            if f=F_FB then
+              begin
+                current_asmdata.getjumplabel(hl_skip);
+                ai:=Taicpu.op_sym(A_Jcc,S_NO,hl_skip);
+                ai.SetCondition(C_P);
+                ai.is_jmp:=true;
+                list.concat(ai);
+              end;
+            { RCL is faster than ADC on 8086/8088. On the 80286, it is
+              equally fast and it also has the same size. In these cases,
+              we still prefer it over ADC, because it's a better choice in
+              case the register is spilled. }
+            if (cs_opt_size in current_settings.optimizerswitches) or
+               (current_settings.optimizecputype<=cpu_286) then
+              list.concat(Taicpu.op_const_reg(A_RCL, tmpopsize, 1, reg))
+            else
+              { ADC is much faster on the 386. }
+              list.concat(Taicpu.op_reg_reg(A_ADC, tmpopsize, reg, reg));
+            if f=F_FB then
+              a_label(list,hl_skip);
+            a_load_reg_reg(list,tmpsize,size,reg,reg);
+          end
+        { optimized case for the inverted carry flag, using SBB }
+        else if f in [F_NC,F_AE,F_FAE] then
+          begin
+            case size of
+              OS_8,OS_S8:
+                begin
+                  tmpsize:=OS_8;
+                  list.concat(Taicpu.op_const_reg(A_MOV, S_B, 1, reg));
+                  list.concat(Taicpu.op_const_reg(A_SBB, S_B, 0, reg));
+                end;
+              OS_16,OS_S16,OS_32,OS_S32:
+                begin
+                  tmpsize:=OS_16;
+                  list.concat(Taicpu.op_const_reg(A_MOV, S_W, 1, reg));
+                  list.concat(Taicpu.op_const_reg(A_SBB, S_W, 0, reg));
+                end;
+              else
+                internalerror(2013123101);
             end;
             end;
-          else
-            internalerror(2013123101);
-        end;
+            a_load_reg_reg(list,tmpsize,size,reg,reg);
+          end
+        else
+          begin
+            invf := f;
+            inverse_flags(invf);
 
 
-        current_asmdata.getjumplabel(hl_skip);
-        { we can't just forward invf to a_jmp_flags for FA,FAE,FB and FBE, because
-          in the case of NaNs:
-           not(F_FA )<>F_FBE
-           not(F_FAE)<>F_FB
-           not(F_FB )<>F_FAE
-           not(F_FBE)<>F_FA
-        }
-        case f of
-          F_FA,F_FAE:
-            invf:=FPUFlags2Flags[invf];
-          F_FB,F_FBE:
-            begin
-              ai:=Taicpu.op_sym(A_Jcc,S_NO,hl_skip);
-              ai.SetCondition(C_P);
-              ai.is_jmp:=true;
-              list.concat(ai);
-              invf:=FPUFlags2Flags[invf];
+            case size of
+              OS_8,OS_S8:
+                begin
+                  tmpsize:=OS_8;
+                  list.concat(Taicpu.op_const_reg(A_MOV, S_B, 0, reg));
+                end;
+              OS_16,OS_S16,OS_32,OS_S32:
+                begin
+                  tmpsize:=OS_16;
+                  list.concat(Taicpu.op_const_reg(A_MOV, S_W, 0, reg));
+                end;
+              else
+                internalerror(2013123101);
             end;
             end;
-        end;
-        a_jmp_flags(list,invf,hl_skip);
 
 
-        { 16-bit INC is shorter than 8-bit }
-        hreg16:=makeregsize(list,reg,OS_16);
-        list.concat(Taicpu.op_reg(A_INC, S_W, hreg16));
-        makeregsize(list,hreg16,tmpsize);
+            current_asmdata.getjumplabel(hl_skip);
+            { we can't just forward invf to a_jmp_flags for FA,FAE,FB and FBE, because
+              in the case of NaNs:
+               not(F_FA )<>F_FBE
+               not(F_FAE)<>F_FB
+               not(F_FB )<>F_FAE
+               not(F_FBE)<>F_FA
+            }
+            case f of
+              F_FA:
+                invf:=FPUFlags2Flags[invf];
+              F_FAE,F_FB:
+                { F_FAE and F_FB are handled above, using ADC/RCL/SBB }
+                internalerror(2015102101);
+              F_FBE:
+                begin
+                  ai:=Taicpu.op_sym(A_Jcc,S_NO,hl_skip);
+                  ai.SetCondition(C_P);
+                  ai.is_jmp:=true;
+                  list.concat(ai);
+                  invf:=FPUFlags2Flags[invf];
+                end;
+            end;
+            a_jmp_flags(list,invf,hl_skip);
 
 
-        a_label(list,hl_skip);
+            { 16-bit INC is shorter than 8-bit }
+            hreg16:=makeregsize(list,reg,OS_16);
+            list.concat(Taicpu.op_reg(A_INC, S_W, hreg16));
+            makeregsize(list,hreg16,tmpsize);
 
 
-        a_load_reg_reg(list,tmpsize,size,reg,reg);
+            a_label(list,hl_skip);
+
+            a_load_reg_reg(list,tmpsize,size,reg,reg);
+          end;
       end;
       end;