Преглед на файлове

+ optimized multiplication for "symmetric" bit patterns on arm

git-svn-id: trunk@49199 -
florian преди 4 години
родител
ревизия
695665c393
променени са 2 файла, в които са добавени 40 реда и са изтрити 2 реда
  1. 36 2
      compiler/arm/cgcpu.pas
  2. 4 0
      tests/test/cg/taddcard.pp

+ 36 - 2
compiler/arm/cgcpu.pas

@@ -787,10 +787,11 @@ unit cgcpu;
         shifterop : tshifterop;
         bitsset : byte;
         negative : boolean;
-        first : boolean;
+        first, doshiftadd: boolean;
         b,
         cycles : byte;
         maxeffort : byte;
+        leftmostbit,i,shiftvalue: DWord;
       begin
         result:=true;
         cycles:=0;
@@ -800,7 +801,6 @@ unit cgcpu;
         if negative then
           inc(cycles);
         multiplier:=dword(abs(a));
-        bitsset:=popcnt(multiplier and $fffffffe);
 
         { heuristics to estimate how much instructions are reasonable to replace the mul,
           this is currently based on XScale timings }
@@ -824,6 +824,30 @@ unit cgcpu;
         if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
           dec(maxeffort);
 
+        { "symmetric" bit pattern like $10101010 where
+          res:=a*$10101010 can be simplified into
+
+          temp:=a*$1010
+          res:=temp+temp shl 16
+        }
+        doshiftadd:=false;
+        leftmostbit:=BsrDWord(multiplier);
+        shiftvalue:=0;
+        if (maxeffort>1) and (leftmostbit>2) then
+          begin
+            for i:=2 to 31 do
+              if (multiplier shr i)=(multiplier and ($ffffffff shr (32-i))) then
+                begin
+                  doshiftadd:=true;
+                  shiftvalue:=i;
+                  dec(maxeffort);
+                  multiplier:=multiplier shr shiftvalue;
+                  break;
+                end;
+          end;
+
+        bitsset:=popcnt(multiplier and $fffffffe);
+
         { most simple cases }
         if a=1 then
           a_load_reg_reg(list,OS_32,OS_32,src,dst)
@@ -857,6 +881,11 @@ unit cgcpu;
                 first:=false;
                 dec(multiplier,1 shl shifterop.shiftimm);
               end;
+            if doshiftadd then
+              begin
+                shifterop.shiftimm:=shiftvalue;
+                list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
+              end;
             if negative then
               list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
           end
@@ -889,6 +918,11 @@ unit cgcpu;
                     end;
                 first:=false;
               end;
+            if doshiftadd then
+              begin
+                shifterop.shiftimm:=shiftvalue;
+                list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,shifterop));
+              end;
             if negative then
               list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
           end

+ 4 - 0
tests/test/cg/taddcard.pp

@@ -94,6 +94,10 @@ begin
  i:=i * 16;
  if i <> 160 then
     result := false;
+ i:=$10;
+ i:=i * $100010;
+ if i <> $1000100 then
+    result := false;
  j:=10000;
  i:=10000;
  i:=i * j;