소스 검색

+ strength reduction for array accesses inside for loops
* export get_mul_size so we can use it for strength reduction
* ensure loop counter is marked as being written
* typos fixed

git-svn-id: trunk@11825 -

florian 17 년 전
부모
커밋
db196d76c2
5개의 변경된 파일87개의 추가작업 그리고 6개의 파일을 삭제
  1. 1 1
      compiler/ncgmem.pas
  2. 2 0
      compiler/nflw.pas
  3. 1 1
      compiler/nmem.pas
  4. 82 3
      compiler/optloop.pas
  5. 1 1
      compiler/optutils.pas

+ 1 - 1
compiler/ncgmem.pas

@@ -56,10 +56,10 @@ interface
        end;
 
        tcgvecnode = class(tvecnode)
+         function get_mul_size : aint;
        private
          procedure rangecheck_array;
        protected
-         function get_mul_size : aint;
          {# This routine is used to calculate the address of the reference.
             On entry reg contains the index in the array,
            and l contains the size of each element in the array.

+ 2 - 0
compiler/nflw.pas

@@ -791,6 +791,8 @@ implementation
          typecheckpass(right);
          typecheckpass(t1);
 
+         set_varstate(left,vs_written,[]);
+
          { loop unrolling }
          if cs_opt_loopunroll in current_settings.optimizerswitches then
            begin

+ 1 - 1
compiler/nmem.pas

@@ -449,7 +449,7 @@ implementation
               CGMessage(type_e_variable_id_expected);
           end;
 
-        if (mark_read_written) then
+        if mark_read_written then
           begin
             { This is actually only "read", but treat it nevertheless as  }
             { modified due to the possible use of pointers                }

+ 82 - 3
compiler/optloop.pas

@@ -34,12 +34,14 @@ unit optloop;
   implementation
 
     uses
-      cclasses,
+      cutils,cclasses,
       globtype,globals,constexp,
       symdef,symsym,
+      defutil,
       cpuinfo,
       nutils,
-      nadd,nbas,nflw,ncon,ninl,ncal,nld,
+      nadd,nbas,nflw,ncon,ninl,ncal,nld,nmem,ncnv,
+      ncgmem,
       pass_1,
       optbase,optutils,
       procinfo;
@@ -205,6 +207,13 @@ unit optloop;
                 { no definition in the loop? }
                   not(DFASetIn(loop.optinfo^.defsum,expr.optinfo^.index));
             end;
+          vecn:
+            begin
+              result:=((tvecnode(expr).left.nodetype=loadn) or is_loop_invariant(loop,tvecnode(expr).left)) and
+                is_loop_invariant(loop,tvecnode(expr).right);
+            end;
+          typeconvn:
+            result:=is_loop_invariant(loop,ttypeconvnode(expr).left);
         end;
       end;
 
@@ -223,7 +232,15 @@ unit optloop;
               if tnode(inductionexprs[i]).isequal(n) then
                 begin
                   n.free;
-                  n:=ctemprefnode.create(ttempcreatenode(templist[i]));
+                  case n.nodetype of
+                    muln:
+                      n:=ctemprefnode.create(ttempcreatenode(templist[i]));
+                    vecn:
+                      n:=ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(
+                        ttempcreatenode(templist[i]))),n.resultdef);
+                    else
+                      internalerror(200809211);
+                  end;
                   result:=true;
                   exit;
                 end;
@@ -243,6 +260,7 @@ unit optloop;
 
       var
         tempnode : ttempcreatenode;
+        dummy : longint;
       begin
         result:=fen_false;
         case n.nodetype of
@@ -289,6 +307,7 @@ unit optloop;
                         addstatement(calccodestatements,
                           geninlinenode(in_inc_x,false,
                           ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(taddnode(n).right.getcopy,nil))));
+
                       addstatement(initcodestatements,tempnode);
                       addstatement(initcodestatements,cassignmentnode.create(ctemprefnode.create(tempnode),
                         caddnode.create(muln,
@@ -308,6 +327,66 @@ unit optloop;
                   result:=fen_norecurse_false;
                 end;
             end;
+          vecn:
+            begin
+              { is the index the counter variable? }
+              if not(is_special_array(tvecnode(n).left.resultdef)) and
+                (tvecnode(n).right.isequal(tfornode(arg).left) or
+                 { fpc usually creates a type cast to access an array }
+                 ((tvecnode(n).right.nodetype=typeconvn) and
+                  ttypeconvnode(tvecnode(n).right).left.isequal(tfornode(arg).left)
+                 )
+                ) and
+                { plain read of the loop variable? }
+                not(nf_write in tvecnode(n).right.flags) and
+                not(nf_modify in tvecnode(n).right.flags) and
+                { direct array access? }
+                ((tvecnode(n).left.nodetype=loadn) or
+                { ... or loop invariant expression? }
+                is_loop_invariant(tfornode(arg),tvecnode(n).left)) and
+                { removing the multiplication is only worth the
+                  effort if it's not a simple shift }
+                not(ispowerof2(tcgvecnode(n).get_mul_size,dummy)) then
+                begin
+                  changedforloop:=true;
+                  { did we use the same expression before already? }
+                  if not(findpreviousstrengthreduction) then
+                    begin
+                      tempnode:=ctempcreatenode.create(voidpointertype,voidpointertype.size,tt_persistent,true);
+
+                      templist.Add(tempnode);
+                      inductionexprs.Add(n);
+                      CreateNodes;
+
+                      if lnf_backward in tfornode(arg).loopflags then
+                        addstatement(calccodestatements,
+                          geninlinenode(in_dec_x,false,
+                          ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(
+                          cordconstnode.create(tcgvecnode(n).get_mul_size,tfornode(arg).right.resultdef,false),nil))))
+                      else
+                        addstatement(calccodestatements,
+                          geninlinenode(in_inc_x,false,
+                          ccallparanode.create(ctemprefnode.create(tempnode),ccallparanode.create(
+                          cordconstnode.create(tcgvecnode(n).get_mul_size,tfornode(arg).right.resultdef,false),nil))));
+
+                      addstatement(initcodestatements,tempnode);
+                      addstatement(initcodestatements,cassignmentnode.create(ctemprefnode.create(tempnode),
+                        caddrnode.create(
+                          cvecnode.create(tvecnode(n).left.getcopy,tfornode(arg).right.getcopy)
+                        )
+                      ));
+
+                      { finally replace the node by a temp. ref }
+                      n:=ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(tempnode)),n.resultdef);
+
+                      { ... and add a temp. release node }
+                      addstatement(deletecodestatements,ctempdeletenode.create(tempnode));
+                    end;
+                  { set types }
+                  do_firstpass(n);
+                  result:=fen_norecurse_false;
+                end;
+            end;
         end;
       end;
 

+ 1 - 1
compiler/optutils.pas

@@ -211,7 +211,7 @@ unit optutils;
                 Breakstack.Add(succ);
                 Continuestack.Add(p);
                 result:=p;
-                { the successor of the last node of the for body is the while node itself }
+                { the successor of the last node of the while body is the while node itself }
                 DoSet(twhilerepeatnode(p).right,p);
                 p.successor:=succ;
                 Breakstack.Delete(Breakstack.Count-1);