Ver código fonte

+ i386: generate optimized code for 64-bit arithmetic shifts by constant amount. Shifts by 63 and by less than 32 take just two instructions, shifts by 32..62 bits are done with 3 instructions.

git-svn-id: trunk@25880 -
sergei 11 anos atrás
pai
commit
66e82f1655
1 arquivos alterados com 86 adições e 2 exclusões
  1. 86 2
      compiler/i386/n386inl.pas

+ 86 - 2
compiler/i386/n386inl.pas

@@ -26,16 +26,100 @@ unit n386inl;
 interface
 
     uses
-       nx86inl;
+       node,nx86inl;
 
     type
        ti386inlinenode = class(tx86inlinenode)
+       public
+         function first_sar: tnode; override;
+         procedure second_rox_sar; override;
        end;
 
 implementation
 
   uses
-    ninl;
+    globtype,globals,
+    defutil,
+    aasmbase,aasmdata,
+    cgbase,pass_2,
+    cpuinfo,cpubase,
+    cga,cgutils,cgx86,cgobj,hlcgobj,
+    ninl,ncon,ncal;
+
+
+  function ti386inlinenode.first_sar: tnode;
+    begin
+      if is_64bitint(resultdef) and (
+        (inlinenumber=in_sar_x) or (
+          (inlinenumber=in_sar_x_y) and
+          (tcallparanode(left).left.nodetype=ordconstn)
+      )) then
+        begin
+          result:=nil;
+          expectloc:=LOC_REGISTER;
+        end
+      else
+        result:=inherited first_sar;
+    end;
+
+
+  procedure ti386inlinenode.second_rox_sar;
+    var
+      op1: tnode;
+      hreg64hi,hreg64lo: tregister;
+      v: aint;
+    begin
+      if is_64bitint(resultdef) and (
+        (inlinenumber=in_sar_x) or (
+          (inlinenumber=in_sar_x_y) and
+          (tcallparanode(left).left.nodetype=ordconstn)
+      )) then
+        begin
+          { x sar constant }
+          if (left.nodetype=callparan) and
+             assigned(tcallparanode(left).right) then
+            begin
+              op1:=tcallparanode(tcallparanode(left).right).left;
+              secondpass(tcallparanode(left).left);
+              v:=Tordconstnode(tcallparanode(left).left).value.svalue and 63;
+            end
+          else
+            begin
+              op1:=left;
+              v:=1;
+            end;
+          secondpass(op1);
+
+          location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+
+          { load left operator in a register }
+          hlcg.location_force_reg(current_asmdata.CurrAsmList,op1.location,op1.resultdef,resultdef,false);
+          hreg64hi:=op1.location.register64.reghi;
+          hreg64lo:=op1.location.register64.reglo;
+
+          if (v=63) then
+            begin
+              emit_const_reg(A_SAR,S_L,31,hreg64hi);
+              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_32,OS_32,hreg64hi,hreg64lo);
+            end
+          else if (v>31) then
+            begin
+              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_32,OS_32,hreg64hi,hreg64lo);
+              emit_const_reg(A_SAR,S_L,31,hreg64hi);
+              emit_const_reg(A_SAR,S_L,v and 31,hreg64lo);
+            end
+          else
+            begin
+              emit_const_reg_reg(A_SHRD,S_L,v and 31,hreg64hi,hreg64lo);
+              emit_const_reg(A_SAR,S_L,v and 31,hreg64hi);
+            end;
+          location.register64.reghi:=hreg64hi;
+          location.register64.reglo:=hreg64lo;
+        end
+      else
+        inherited second_rox_sar;
+    end;
+
 
 begin
    cinlinenode:=ti386inlinenode;