| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335 | {    Common subexpression elimination on base blocks    Copyright (c) 2005 by Florian Klaempfl    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ****************************************************************************}unit optcse;{$i fpcdefs.inc}{ $define csedebug}{ $define csestats}  interface    uses      node;    {      the function  creates non optimal code so far:      - call para nodes are cse barriers because they can be reordered and thus the        temp. creation can be done too late      - cse's in chained expressions are not recognized: the common subexpression        in (a1 and b and c) vs. (a2 and b and c) is not recognized because there is no common        subtree b and c      - the cse knows nothing about register pressure. In case of high register pressure, cse might        have a negative impact      - assignment nodes are currently cse borders: things like a[i,j]:=a[i,j]+1; are not improved      - the list of cseinvariant node types and inline numbers is not complete yet      Further, it could be done probably in a faster way though the complexity can't probably not reduced    }    function do_optcse(var rootnode : tnode) : tnode;  implementation    uses      globtype,      cclasses,      verbose,      nutils,      procinfo,      nbas,nld,ninl,ncal,ncnv,nadd,      pass_1,      symconst,symtype,symdef,symsym,      defutil,      optbase;    const      cseinvariant : set of tnodetype = [addn,muln,subn,divn,slashn,modn,andn,orn,xorn,notn,vecn,        derefn,equaln,unequaln,ltn,gtn,lten,gten,typeconvn,subscriptn,        inn,symdifn,shrn,shln,ordconstn,realconstn,unaryminusn,pointerconstn,stringconstn,setconstn,        isn,asn,starstarn,nothingn,temprefn,loadparentfpn {,callparan}];    function searchsubdomain(var n:tnode; arg: pointer) : foreachnoderesult;      begin        if (n.nodetype in cseinvariant) or          ((n.nodetype=inlinen) and           (tinlinenode(n).inlinenumber in [in_assigned_x])          ) or          ((n.nodetype=loadn) and            not((tloadnode(n).symtableentry.typ in [staticvarsym,localvarsym,paravarsym]) and                (vo_volatile in tabstractvarsym(tloadnode(n).symtableentry).varoptions))          ) then          result:=fen_true        else          begin            pboolean(arg)^:=false;            result:=fen_norecurse_true;          end;      end;    type      tlists = record        nodelist : tfplist;        locationlist : tfplist;        equalto : tfplist;        refs : tfplist;        avail : TDFASet;      end;      plists = ^tlists;    { collectnodes needs the address of itself to call foreachnodestatic,      so we need a wrapper because @<func> inside <func doesn't work }    function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;forward;    function collectnodes2(var n:tnode; arg: pointer) : foreachnoderesult;      begin        result:=collectnodes(n,arg);      end;    function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;      var        i,j : longint;      begin        result:=fen_false;        { don't add the tree below an untyped const parameter: there is          no information available that this kind of tree actually needs          to be addresable, this could be improved }        if ((n.nodetype=callparan) and          (tcallparanode(n).left.resultdef.typ=formaldef) and          (tcallparanode(n).parasym.varspez=vs_const)) then          begin            result:=fen_norecurse_false;            exit;          end;        { so far, we can handle only nodes being read }        if (n.flags*[nf_write,nf_modify]=[]) and          { node possible to add? }          assigned(n.resultdef) and          (tstoreddef(n.resultdef).is_intregable or tstoreddef(n.resultdef).is_fpuregable) and          { is_int/fpuregable allows arrays and records to be in registers, cse cannot handle this }          not(n.resultdef.typ in [arraydef,recorddef]) and          { same for voiddef }          not(is_void(n.resultdef)) and          { adding tempref nodes is worthless but their complexity is probably <= 1 anyways }          not(n.nodetype in [temprefn]) and          { node worth to add?            We consider almost every node because even loading a variables from            a register instead of memory is more beneficial. This behaviour should            not increase register pressure because if a variable is already            in a register, the reg. allocator can merge the nodes. If a variable            is loaded from memory, loading this variable and spilling another register            should not add a speed penalty.          }          {            load nodes are not considered if they load para or local symbols from the            current stack frame, those are in registers anyways if possible          }          (not(n.nodetype=loadn) or           not(tloadnode(n).symtableentry.typ in [paravarsym,localvarsym]) or           (tloadnode(n).symtable.symtablelevel<>current_procinfo.procdef.parast.symtablelevel)          ) and          {            Const nodes however are only considered if their complexity is >1            This might be the case for the risc architectures if they need            more than one instruction to load this particular value          }          (not(is_constnode(n)) or (node_complexity(n)>1)) then          begin            plists(arg)^.nodelist.Add(n);            plists(arg)^.locationlist.Add(@n);            plists(arg)^.refs.Add(nil);            plists(arg)^.equalto.Add(pointer(-1));            DFASetInclude(plists(arg)^.avail,plists(arg)^.nodelist.count-1);            for i:=0 to plists(arg)^.nodelist.count-2 do              begin                if tnode(plists(arg)^.nodelist[i]).isequal(n) and DFASetIn(plists(arg)^.avail,i) then                  begin                    { use always the first occurence }                    if plists(arg)^.equalto[i]<>pointer(-1) then                      plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=plists(arg)^.equalto[i]                    else                      plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=pointer(ptrint(i));                    plists(arg)^.refs[i]:=pointer(plists(arg)^.refs[i])+1;                    break;                  end;              end;            { boolean and/or require a special handling: after evaluating the and/or node,              the expressions of the right side might not be available due to short boolean              evaluation, so after handling the right side, mark those expressions              as unavailable }            if (n.nodetype in [orn,andn]) and is_boolean(taddnode(n).left.resultdef) then              begin                foreachnodestatic(pm_postprocess,taddnode(n).left,@collectnodes2,arg);                j:=plists(arg)^.nodelist.count;                foreachnodestatic(pm_postprocess,taddnode(n).right,@collectnodes2,arg);                for i:=j to plists(arg)^.nodelist.count-1 do                  DFASetExclude(plists(arg)^.avail,i);                result:=fen_norecurse_false;              end;          end;      end;    function searchcsedomain(var n: tnode; arg: pointer) : foreachnoderesult;      var        csedomain : boolean;        lists : tlists;        templist : tfplist;        i : longint;        def : tstoreddef;        nodes : tblocknode;        creates,        statements : tstatementnode;        hp : ttempcreatenode;      begin        result:=fen_false;        if n.nodetype in cseinvariant then          begin            csedomain:=true;            foreachnodestatic(pm_postprocess,n,@searchsubdomain,@csedomain);            { found a cse domain }            if csedomain then              begin                statements:=nil;                result:=fen_norecurse_true;{$ifdef csedebug}                writeln('============ cse domain ==================');                printnode(output,n);                writeln('Complexity: ',node_complexity(n));{$endif csedebug}                lists.nodelist:=tfplist.create;                lists.locationlist:=tfplist.create;                lists.equalto:=tfplist.create;                lists.refs:=tfplist.create;                foreachnodestatic(pm_postprocess,n,@collectnodes,@lists);                templist:=tfplist.create;                templist.count:=lists.nodelist.count;                { check all nodes if one is used more than once }                for i:=0 to lists.nodelist.count-1 do                  begin                    { current node used more than once? }                    if assigned(lists.refs[i]) then                      begin                        if not(assigned(statements)) then                          begin                            nodes:=internalstatements(statements);                            addstatement(statements,internalstatements(creates));                          end;                        def:=tstoreddef(tnode(lists.nodelist[i]).resultdef);                        templist[i]:=ctempcreatenode.create_value(def,def.size,tt_persistent,                          def.is_intregable or def.is_fpuregable,tnode(lists.nodelist[i]));                        { make debugging easier and set temp. location to the original location }                        tnode(templist[i]).fileinfo:=tnode(lists.nodelist[i]).fileinfo;                        addstatement(creates,tnode(templist[i]));                        { make debugging easier and set temp. location to the original location }                        creates.fileinfo:=tnode(lists.nodelist[i]).fileinfo;                        hp:=ttempcreatenode(templist[i]);                        do_firstpass(tnode(hp));                        templist[i]:=hp;                        pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[i]));                        { make debugging easier and set temp. location to the original location }                        pnode(lists.locationlist[i])^.fileinfo:=tnode(lists.nodelist[i]).fileinfo;                        do_firstpass(pnode(lists.locationlist[i])^);{$ifdef csedebug}                        printnode(output,statements);{$endif csedebug}                      end                    { current node reference to another node? }                    else if lists.equalto[i]<>pointer(-1) then                      begin{$if defined(csedebug) or defined(csestats)}                        printnode(output,tnode(lists.nodelist[i]));                        writeln(i,'    equals   ',ptrint(lists.equalto[i]));                        printnode(output,tnode(lists.nodelist[ptrint(lists.equalto[i])]));{$endif defined(csedebug) or defined(csestats)}                        templist[i]:=templist[ptrint(lists.equalto[i])];                        pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[ptrint(lists.equalto[i])]));                        { make debugging easier and set temp. location to the original location }                        pnode(lists.locationlist[i])^.fileinfo:=tnode(lists.nodelist[i]).fileinfo;                        do_firstpass(pnode(lists.locationlist[i])^);                      end;                  end;                { clean up unused trees }                for i:=0 to lists.nodelist.count-1 do                  if lists.equalto[i]<>pointer(-1) then                    tnode(lists.nodelist[i]).free;{$ifdef csedebug}                writeln('nodes: ',lists.nodelist.count);                writeln('==========================================');{$endif csedebug}                lists.nodelist.free;                lists.locationlist.free;                lists.equalto.free;                lists.refs.free;                templist.free;                if assigned(statements) then                  begin                    { call para nodes need a special handling because                      they can be only children nodes of call nodes                      so the initialization code is inserted below the                      call para node                    }                    if n.nodetype=callparan then                      begin                        addstatement(statements,tcallparanode(n).left);                        tcallparanode(n).left:=nodes;                        do_firstpass(tcallparanode(n).left);                      end                    else                      begin                        addstatement(statements,n);                        n:=nodes;                        do_firstpass(n);                      end;{$ifdef csedebug}                    printnode(output,nodes);{$endif csedebug}                  end;              end          end;      end;    function do_optcse(var rootnode : tnode) : tnode;      begin        foreachnodestatic(pm_postprocess,rootnode,@searchcsedomain,nil);        result:=nil;      end;end.
 |