Przeglądaj źródła

* better heuristics to decide when a mul by a constant shall be replaced by shift/add/sub sequences

git-svn-id: trunk@22300 -
florian 13 lat temu
rodzic
commit
59012afe26
2 zmienionych plików z 52 dodań i 3 usunięć
  1. 27 3
      compiler/arm/cgcpu.pas
  2. 25 0
      tests/test/tmul1.pp

+ 27 - 3
compiler/arm/cgcpu.pas

@@ -665,7 +665,9 @@ unit cgcpu;
         bitsset : byte;
         bitsset : byte;
         negative : boolean;
         negative : boolean;
         first : boolean;
         first : boolean;
+        b,
         cycles : byte;
         cycles : byte;
+        maxeffort : byte;
       begin
       begin
         result:=true;
         result:=true;
         cycles:=0;
         cycles:=0;
@@ -677,6 +679,28 @@ unit cgcpu;
         multiplier:=dword(abs(a));
         multiplier:=dword(abs(a));
         bitsset:=popcnt(multiplier and $fffffffe);
         bitsset:=popcnt(multiplier and $fffffffe);
 
 
+        { heuristics to estimate how much instructions are reasonable to replace the mul,
+          this is currently based on XScale timings }
+        { in the simplest case, we need a mov to load the constant and a mul to carry out the
+          actual multiplication, this requires min. 1+4 cycles
+
+          because the first shift imm. might cause a stall and because we need more instructions
+          when replacing the mul we generate max. 3 instructions to replace this mul }
+        maxeffort:=3;
+
+        { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
+          a ldr, so generating one more operation to replace this is beneficial }
+        if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
+          inc(maxeffort);
+
+        { if the upper 5 bits are all set or clear, mul is one cycle faster }
+        if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
+          dec(maxeffort);
+
+        { if the upper 17 bits are all set or clear, mul is another cycle faster }
+        if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
+          dec(maxeffort);
+
         { most simple cases }
         { most simple cases }
         if a=1 then
         if a=1 then
           a_load_reg_reg(list,OS_32,OS_32,src,dst)
           a_load_reg_reg(list,OS_32,OS_32,src,dst)
@@ -690,8 +714,8 @@ unit cgcpu;
           however, the least significant bit is for free, it can be hidden in the initial
           however, the least significant bit is for free, it can be hidden in the initial
           instruction
           instruction
         }
         }
-        else if (bitsset+cycles<=3) and
-          (bitsset>popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
+        else if (bitsset+cycles<=maxeffort) and
+          (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
           begin
           begin
             first:=true;
             first:=true;
             while multiplier<>0 do
             while multiplier<>0 do
@@ -714,7 +738,7 @@ unit cgcpu;
               list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
               list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
           end
           end
         { subtract from the next greater power of two? }
         { subtract from the next greater power of two? }
-        else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles<=3 then
+        else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
           begin
           begin
             first:=true;
             first:=true;
             while multiplier<>0 do
             while multiplier<>0 do

+ 25 - 0
tests/test/tmul1.pp

@@ -2,6 +2,31 @@ var
   i : longint;
   i : longint;
 
 
 begin
 begin
+  i:=5;
+  i:=i*$80010;
+  if i<>2621520 then
+    halt(1);
+
+  i:=5;
+  i:=i*$18000010;
+  if i<>2013266000 then
+    halt(1);
+
+  i:=5;
+  i:=i*$18ffffef;
+  if i<>2097151915 then
+    halt(1);
+
+  i:=5;
+  i:=i*$7ffef;
+  if i<>2621355 then
+    halt(1);
+
+  i:=5;
+  i:=i*$6fffffcf;
+  if i<>805306123 then
+    halt(1);
+
   i:=5;
   i:=5;
   i:=i*10;
   i:=i*10;
   i:=i*62;
   i:=i*62;