Browse Source

* do an even better optimization (swap registers in the result, saving 2 mov
instructions) on i8086 for 64-bit shl/shr with a constant in the range 32..47

git-svn-id: trunk@32062 -

nickysn 9 years ago
parent
commit
7e6ba9db2a
1 changed files with 11 additions and 15 deletions
  1. 11 15
      compiler/i8086/n8086mat.pas

+ 11 - 15
compiler/i8086/n8086mat.pas

@@ -397,6 +397,8 @@ implementation
         hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
         hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
         hreg64hi:=left.location.register64.reghi;
         hreg64hi:=left.location.register64.reghi;
         hreg64lo:=left.location.register64.reglo;
         hreg64lo:=left.location.register64.reglo;
+        location.register64.reglo:=hreg64lo;
+        location.register64.reghi:=hreg64hi;
 
 
         v:=0;
         v:=0;
         if right.nodetype=ordconstn then
         if right.nodetype=ordconstn then
@@ -446,25 +448,22 @@ implementation
               end;
               end;
           end
           end
         { shifting by 32..47 }
         { shifting by 32..47 }
-        else if (right.nodetype=ordconstn) and (v>=32) and (v<=47) and
-                ((not (cs_opt_size in current_settings.optimizerswitches)) or (v<=33)) then
+        else if (right.nodetype=ordconstn) and (v>=32) and (v<=47) then
           begin
           begin
             if nodetype=shln then
             if nodetype=shln then
               begin
               begin
-                cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_16,OS_16,hreg64lo,hreg64hi);
-                cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_16,OS_16,GetNextReg(hreg64lo),GetNextReg(hreg64hi));
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,hreg64lo,hreg64lo);
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,GetNextReg(hreg64lo),GetNextReg(hreg64lo));
-                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHL,OS_32,v-32,hreg64hi);
+                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,hreg64hi,hreg64hi);
+                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,GetNextReg(hreg64hi),GetNextReg(hreg64hi));
+                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHL,OS_32,v-32,hreg64lo);
               end
               end
             else
             else
               begin
               begin
-                cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_16,OS_16,hreg64hi,hreg64lo);
-                cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_16,OS_16,GetNextReg(hreg64hi),GetNextReg(hreg64lo));
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,hreg64hi,hreg64hi);
-                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,GetNextReg(hreg64hi),GetNextReg(hreg64hi));
-                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_32,v-32,hreg64lo);
+                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,hreg64lo,hreg64lo);
+                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_16,GetNextReg(hreg64lo),GetNextReg(hreg64lo));
+                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_32,v-32,hreg64hi);
               end;
               end;
+            location.register64.reghi:=hreg64lo;
+            location.register64.reglo:=hreg64hi;
           end
           end
         else
         else
           begin
           begin
@@ -534,9 +533,6 @@ implementation
 
 
             cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_CX);
             cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_CX);
           end;
           end;
-
-        location.register64.reglo:=hreg64lo;
-        location.register64.reghi:=hreg64hi;
       end;
       end;