123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856 |
- {
- Common subexpression elimination on base blocks
- Copyright (c) 2005-2012 by Florian Klaempfl
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- ****************************************************************************
- }
- unit optcse;
- {$i fpcdefs.inc}
- { $define csedebug}
- { $define csestats}
- interface
- uses
- node;
- {
- the function creates non optimal code so far:
- - call para nodes are cse barriers because they can be reordered and thus the
- temp. creation could be done too late
- - the cse knows nothing about register pressure. In case of high register pressure, cse might
- have a negative impact
- - the list of cseinvariant node types and inline numbers is not complete yet
- Further, it could be done probably in a faster way though the complexity can't probably not reduced
- }
- function do_optcse(var rootnode : tnode) : tnode;
- function do_consttovar(var rootnode : tnode) : tnode;
- implementation
- uses
- globtype,globals,
- systems,
- cutils,cclasses,
- nutils,compinnr,
- nbas,nld,ninl,ncal,nadd,nmem,ncnv,
- pass_1,
- procinfo,
- paramgr,
- cpubase,
- symconst,symdef,symsym,symtable,symtype,
- defutil,
- optbase;
- const
- cseinvariant : set of tnodetype = [addn,muln,subn,divn,slashn,modn,andn,orn,xorn,notn,vecn,
- derefn,equaln,unequaln,ltn,gtn,lten,gten,typeconvn,subscriptn,
- inn,symdifn,shrn,shln,ordconstn,realconstn,unaryminusn,pointerconstn,stringconstn,setconstn,niln,
- setelementn,{arrayconstructorn,arrayconstructorrangen,}
- isn,asn,starstarn,nothingn,temprefn,loadparentfpn {,callparan},assignn,addrn];
- function searchsubdomain(var n:tnode; arg: pointer) : foreachnoderesult;
- begin
- if (n.nodetype in cseinvariant) or
- ((n.nodetype=inlinen) and
- (tinlinenode(n).inlinenumber in [in_length_x,in_assigned_x,in_sqr_real,in_sqrt_real,in_sin_real,in_cos_real,in_abs_long,
- in_abs_real,in_exp_real,in_ln_real,in_pi_real,in_popcnt_x,in_arctan_real,in_round_real,in_trunc_real,
- { cse on fma will still not work because it would require proper handling of call nodes
- with more than one parameter }
- in_fma_single,in_fma_double,in_fma_extended,in_fma_float128,
- in_min_single,in_min_double,in_max_single,in_max_double,
- in_max_longint,in_max_dword,in_min_longint,in_min_dword,
- in_max_int64,in_max_qword,in_min_int64,in_min_qword
- ])
- ) or
- ((n.nodetype=callparan) and not(assigned(tcallparanode(n).right))) or
- ((n.nodetype=loadn) and
- not((tloadnode(n).symtableentry.typ in [staticvarsym,localvarsym,paravarsym]) and
- (vo_volatile in tabstractvarsym(tloadnode(n).symtableentry).varoptions))
- ) then
- result:=fen_true
- else
- begin
- pboolean(arg)^:=false;
- result:=fen_norecurse_true;
- end;
- end;
- type
- tlists = record
- nodelist : tfplist;
- locationlist : tfplist;
- equalto : tfplist;
- refs : tfplist;
- avail : TDFASet;
- end;
- plists = ^tlists;
- { collectnodes needs the address of itself to call foreachnodestatic,
- so we need a wrapper because @<func> inside <func doesn't work }
- function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;forward;
- function collectnodes2(var n:tnode; arg: pointer) : foreachnoderesult;
- begin
- result:=collectnodes(n,arg);
- end;
- function collectnodes(var n:tnode; arg: pointer) : foreachnoderesult;
- { when compiling a tree like
- and
- / \
- and C
- / \
- A B
- all expressions of B are available during evaluation of C. However considerung the whole expression,
- values of B and C might not be available due to short boolean evaluation.
- So recurseintobooleanchain detectes such chained and/or expressions and makes sub-expressions of B
- available during the evaluation of C
- firstleftend is later used to remove all sub expressions of B and C by storing the expression count
- in the cse table after handling A
- }
- var
- firstleftend : longint;
- procedure recurseintobooleanchain(t : tnodetype;n : tnode);
- begin
- if (tbinarynode(n).left.nodetype=t) and is_boolean(tbinarynode(n).left.resultdef) then
- recurseintobooleanchain(t,tbinarynode(n).left)
- else
- foreachnodestatic(pm_postprocess,tbinarynode(n).left,@collectnodes2,arg);
- firstleftend:=min(plists(arg)^.nodelist.count,firstleftend);
- foreachnodestatic(pm_postprocess,tbinarynode(n).right,@collectnodes2,arg);
- end;
- var
- i : longint;
- tempdef : tdef;
- begin
- result:=fen_false;
- { don't add the tree below an untyped const parameter: there is
- no information available that this kind of tree actually needs
- to be addresable, this could be improved }
- { the nodes below a type conversion node created for an absolute
- reference cannot be handled separately, because the absolute reference
- may have special requirements (no regability, must be in memory, ...)
- }
- if (((n.nodetype=callparan) and
- (tcallparanode(n).left.resultdef.typ=formaldef) and
- (tcallparanode(n).parasym.varspez=vs_const)) or
- ((n.nodetype=typeconvn) and
- (nf_absolute in n.flags))
- ) then
- begin
- result:=fen_norecurse_false;
- exit;
- end;
- if
- { node possible to add? }
- assigned(n.resultdef) and
- ((
- { regable expressions }
- (actualtargetnode(@n)^.flags*[nf_write,nf_modify,nf_address_taken]=[]) and
- (
- tstoreddef(n.resultdef).is_intregable or
- tstoreddef(n.resultdef).is_fpuregable or
- tstoreddef(n.resultdef).is_const_intregable
- ) and
- { is_int/fpuregable allows arrays and records to be in registers, cse cannot handle this }
- (
- not(n.resultdef.typ in [arraydef,recorddef]) or
- (
- (
- (n.resultdef.typ = recorddef) and
- tabstractrecordsymtable(tabstractrecorddef(n.resultdef).symtable).has_single_field(tempdef)
- ) or
- is_dynamic_array(n.resultdef) or
- (
- not(is_special_array(tstoreddef(n.resultdef))) and
- not(tstoreddef(n.resultdef).is_intregable) and
- not(tstoreddef(n.resultdef).is_fpuregable)
- )
- )
- ) and
- { same for voiddef }
- not(is_void(n.resultdef)) and
- { adding tempref and callpara nodes itself is worthless but
- their complexity is probably <= 1 anyways
- neither add setelementn nodes because the compiler sometimes depends on the fact
- that a certain node stays a setelementn, this does not hurt either because
- setelementn nodes itself generate no real code (except moving data into register) }
- not(n.nodetype in [temprefn,callparan,setelementn]) and
- { node worth to add?
- We consider almost every node because even loading a variables from
- a register instead of memory is more beneficial. This behaviour should
- not increase register pressure because if a variable is already
- in a register, the reg. allocator can merge the nodes. If a variable
- is loaded from memory, loading this variable and spilling another register
- should not add a speed penalty.
- }
- {
- load nodes are not considered if they load para or local symbols from the
- current stack frame, those are in registers anyways if possible
- }
- (not(actualtargetnode(@n)^.nodetype=loadn) or
- not(tloadnode(actualtargetnode(@n)^).symtableentry.typ in [paravarsym,localvarsym,staticvarsym]) or
- { apply cse on non-regable variables }
- ((tloadnode(actualtargetnode(@n)^).symtableentry.typ in [paravarsym,localvarsym,staticvarsym]) and
- not(tabstractvarsym(tloadnode(actualtargetnode(@n)^).symtableentry).is_regvar(true)) and
- not(vo_volatile in tabstractvarsym(tloadnode(actualtargetnode(@n)^).symtableentry).varoptions)) or
- (node_complexity(n)>1)
- ) and
- {
- Const nodes however are only considered if their complexity is >1
- This might be the case for the risc architectures if they need
- more than one instruction to load this particular value
- }
- (not(is_constnode(n)) or (node_complexity(n)>1)))
- {$if not(defined(i386)) and not(defined(i8086))}
- or
- { store reference of expression? }
- { loading the address of a global symbol takes typically more than
- one instruction on every platform except i8086/i386
- so consider in this case loading the address of the data
- }
- (((n.resultdef.typ in [arraydef,recorddef]) or is_object(n.resultdef)) and not(is_dynamic_array(n.resultdef)) and
- (n.nodetype=loadn) and
- (tloadnode(n).symtableentry.typ=staticvarsym)
- )
- {$endif not(defined(i386)) and not(defined(i8086))}
- ) then
- begin
- plists(arg)^.nodelist.Add(n);
- plists(arg)^.locationlist.Add(@n);
- plists(arg)^.refs.Add(nil);
- plists(arg)^.equalto.Add(pointer(-1));
- DFASetInclude(plists(arg)^.avail,plists(arg)^.nodelist.count-1);
- for i:=0 to plists(arg)^.nodelist.count-2 do
- begin
- if tnode(plists(arg)^.nodelist[i]).isequal(n) and DFASetIn(plists(arg)^.avail,i) then
- begin
- { use always the first occurence }
- if plists(arg)^.equalto[i]<>pointer(-1) then
- plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=plists(arg)^.equalto[i]
- else
- plists(arg)^.equalto[plists(arg)^.nodelist.count-1]:=pointer(ptrint(i));
- plists(arg)^.refs[i]:=pointer(plists(arg)^.refs[i])+1;
- { tree has been found, no need to search further,
- sub-trees have been added by the first occurence of
- the tree already }
- result:=fen_norecurse_false;
- break;
- end;
- end;
- end;
- { boolean and/or require a special handling: after evaluating the and/or node,
- the expressions of the right side might not be available due to short boolean
- evaluation, so after handling the right side, mark those expressions
- as unavailable }
- if (n.nodetype in [orn,andn]) and is_boolean(taddnode(n).left.resultdef) then
- begin
- firstleftend:=high(longint);
- recurseintobooleanchain(n.nodetype,n);
- for i:=firstleftend to plists(arg)^.nodelist.count-1 do
- DFASetExclude(plists(arg)^.avail,i);
- result:=fen_norecurse_false;
- end;
- {$ifdef cpuhighleveltarget}
- { The high level targets use the functionality from ncgnstld for
- nested accesses, and that one stores the complete location of the
- nested variable in tloadnode.left rather than only the location of
- the parent context containing it. This causes problems with the
- CSE in case the nested variable is used as an lvalue, so disable
- CSE in that case
- }
- if (n.nodetype=loadn) and assigned(tloadnode(n).left) then
- result:=fen_norecurse_false;
- {$endif}
- end;
- function searchcsedomain(var n: tnode; arg: pointer) : foreachnoderesult;
- var
- csedomain : boolean;
- lists : tlists;
- templist : tfplist;
- i : longint;
- def : tstoreddef;
- nodes : tblocknode;
- creates,
- statements : tstatementnode;
- deletetemp : ttempdeletenode;
- hp : ttempcreatenode;
- addrstored : boolean;
- hp2 : tnode;
- begin
- result:=fen_false;
- nodes:=nil;
- if (n.nodetype in cseinvariant) and
- { a setelement node is cseinvariant, but it might not be replaced by a block so
- it cannot be the root of the cse search }
- (n.nodetype<>setelementn) then
- begin
- csedomain:=true;
- foreachnodestatic(pm_postprocess,n,@searchsubdomain,@csedomain);
- if not(csedomain) then
- begin
- { try to transform the tree to get better cse domains, consider:
- + (1)
- / \
- (2) + C
- / \
- A B
- if A is not cse'able but B and C are, then the compiler cannot do cse so the tree is transformed into
- +
- / \
- A +
- / \
- B C
- Because A could be another tree of this kind, the whole process is done in a while loop
- }
- if (n.nodetype in [andn,orn,addn,muln]) and
- (n.nodetype=tbinarynode(n).left.nodetype) and
- { do is optimizations only for integers, reals (no currency!), vectors, sets or booleans }
- (is_integer(n.resultdef) or is_real(n.resultdef) or is_vector(n.resultdef) or is_set(n.resultdef) or
- is_boolean(n.resultdef)) and
- { either if fastmath is on }
- ((cs_opt_fastmath in current_settings.optimizerswitches) or
- { or for the logical operators, they cannot overflow }
- (n.nodetype in [andn,orn]) or
- { or for integers if range checking is off }
- ((is_integer(n.resultdef) and
- (n.localswitches*[cs_check_range,cs_check_overflow]=[]) and
- (tbinarynode(n).left.localswitches*[cs_check_range,cs_check_overflow]=[]))) or
- { for sets, we can do this always }
- (is_set(n.resultdef))
- ) then
- while (n.nodetype=tbinarynode(n).left.nodetype) and
- { if not both or none of the nodes is short/full boolean evaluated, we cannot do this optimization as it might change
- semantics, there are border cases where this might not apply, but so far it is not worth the effort to check }
- (not(is_boolean(n.resultdef)) or not(n.nodetype in [andn,orn]) or (doshortbooleval(n)=doshortbooleval(tbinarynode(n).left))) and
- { the resulttypes of the operands we'll swap must be equal,
- required in case of a 32x32->64 multiplication, then we
- cannot swap out one of the 32 bit operands for a 64 bit one
- }
- (tbinarynode(tbinarynode(n).left).left.resultdef=tbinarynode(n).left.resultdef) and
- (tbinarynode(n).left.resultdef=tbinarynode(n).right.resultdef) do
- begin
- csedomain:=true;
- foreachnodestatic(pm_postprocess,tbinarynode(n).right,@searchsubdomain,@csedomain);
- if csedomain then
- begin
- csedomain:=true;
- foreachnodestatic(pm_postprocess,tbinarynode(tbinarynode(n).left).right,@searchsubdomain,@csedomain);
- if csedomain then
- begin
- {
- move the full boolean evaluation of (2) to (1), if it was there (so it again applies to A and
- what follows) }
- { this cannot happen anymore, see above, so the condition is not needed anymore
- if not(doshortbooleval(tbinarynode(n).left)) and
- doshortbooleval(n) then }
- begin
- n.localswitches:=n.localswitches+(tbinarynode(n).left.localswitches*[cs_full_boolean_eval]);
- exclude(tbinarynode(n).left.localswitches,cs_full_boolean_eval);
- if (n.nodetype in [orn,andn]) then
- begin
- if (tbinarynode(n).left.nodetype in [orn,andn]) then
- taddnode(tbinarynode(n).left).addnodeflags:=taddnode(tbinarynode(n).left).addnodeflags+
- (taddnode(n).addnodeflags*[anf_short_bool]);
- exclude(taddnode(n).addnodeflags,anf_short_bool);
- end;
- end;
- hp2:=tbinarynode(tbinarynode(n).left).left;
- tbinarynode(tbinarynode(n).left).left:=tbinarynode(tbinarynode(n).left).right;
- tbinarynode(tbinarynode(n).left).right:=tbinarynode(n).right;
- tbinarynode(n).right:=tbinarynode(n).left;
- tbinarynode(n).left:=hp2;
- { the transformed tree could result in new possibilities to fold constants
- so force a firstpass on the root node }
- exclude(tbinarynode(n).right.transientflags,tnf_pass1_done);
- do_firstpass(tbinarynode(n).right);
- end
- else
- break;
- end
- else
- break;
- end;
- end
- else
- begin
- statements:=nil;
- result:=fen_norecurse_true;
- {$ifdef csedebug}
- writeln('============ cse domain ==================');
- printnode(output,n);
- writeln('Complexity: ',node_complexity(n));
- {$endif csedebug}
- lists.nodelist:=tfplist.create;
- lists.locationlist:=tfplist.create;
- lists.equalto:=tfplist.create;
- lists.refs:=tfplist.create;
- foreachnodestatic(pm_postprocess,n,@collectnodes,@lists);
- templist:=tfplist.create;
- templist.count:=lists.nodelist.count;
- { check all nodes if one is used more than once }
- for i:=0 to lists.nodelist.count-1 do
- begin
- { current node used more than once? }
- if assigned(lists.refs[i]) then
- begin
- if not(assigned(statements)) then
- begin
- nodes:=internalstatements(statements);
- addstatement(statements,internalstatements(creates));
- end;
- def:=tstoreddef(tnode(lists.nodelist[i]).resultdef);
- { we cannot handle register stored records or array in CSE yet
- but we can store their reference }
- addrstored:=((def.typ in [arraydef,recorddef]) or is_object(def)) and not(is_dynamic_array(def));
- if addrstored then
- templist[i]:=ctempcreatenode.create_value(cpointerdef.getreusable(def),voidpointertype.size,tt_persistent,
- true,caddrnode.create_internal(tnode(lists.nodelist[i])))
- else
- templist[i]:=ctempcreatenode.create_value(def,def.size,tt_persistent,
- def.is_intregable or def.is_fpuregable or def.is_const_intregable,tnode(lists.nodelist[i]));
- { the value described by the temp. is immutable and the temp. can be always in register
- ttempcreatenode.create normally takes care of the register location but it does not
- know about immutability so it cannot take care of managed types }
- ttempcreatenode(templist[i]).includetempflag(ti_const);
- ttempcreatenode(templist[i]).includetempflag(ti_may_be_in_reg);
- { make debugging easier and set temp. location to the original location }
- tnode(templist[i]).fileinfo:=tnode(lists.nodelist[i]).fileinfo;
- addstatement(creates,tnode(templist[i]));
- { the delete node has no semantic use yet, it is just used to clean up memory }
- deletetemp:=ctempdeletenode.create(ttempcreatenode(templist[i]));
- deletetemp.includetempflag(ti_cleanup_only);
- addstatement(tstatementnode(arg^),deletetemp);
- { make debugging easier and set temp. location to the original location }
- creates.fileinfo:=tnode(lists.nodelist[i]).fileinfo;
- hp:=ttempcreatenode(templist[i]);
- do_firstpass(tnode(hp));
- templist[i]:=hp;
- if addrstored then
- pnode(lists.locationlist[i])^:=cderefnode.Create(ctemprefnode.create(ttempcreatenode(templist[i])))
- else
- pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[i]));
- { make debugging easier and set temp. location to the original location }
- pnode(lists.locationlist[i])^.fileinfo:=tnode(lists.nodelist[i]).fileinfo;
- do_firstpass(pnode(lists.locationlist[i])^);
- {$ifdef csedebug}
- printnode(output,statements);
- {$endif csedebug}
- end
- { current node reference to another node? }
- else if lists.equalto[i]<>pointer(-1) then
- begin
- def:=tstoreddef(tnode(lists.nodelist[i]).resultdef);
- { we cannot handle register stored records or array in CSE yet
- but we can store their reference }
- addrstored:=((def.typ in [arraydef,recorddef]) or is_object(def)) and not(is_dynamic_array(def));
- {$if defined(csedebug) or defined(csestats)}
- writeln;
- writeln('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!');
- writeln('Complexity: ',node_complexity(tnode(lists.nodelist[i])),' Node ',i,' equals Node ',ptrint(lists.equalto[i]));
- printnode(output,tnode(lists.nodelist[i]));
- printnode(output,tnode(lists.nodelist[ptrint(lists.equalto[i])]));
- writeln('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!');
- writeln;
- {$endif defined(csedebug) or defined(csestats)}
- templist[i]:=templist[ptrint(lists.equalto[i])];
- if addrstored then
- pnode(lists.locationlist[i])^:=cderefnode.Create(ctemprefnode.create(ttempcreatenode(templist[ptrint(lists.equalto[i])])))
- else
- pnode(lists.locationlist[i])^:=ctemprefnode.create(ttempcreatenode(templist[ptrint(lists.equalto[i])]));
- { make debugging easier and set temp. location to the original location }
- pnode(lists.locationlist[i])^.fileinfo:=tnode(lists.nodelist[i]).fileinfo;
- do_firstpass(pnode(lists.locationlist[i])^);
- end;
- end;
- { clean up unused trees }
- for i:=0 to lists.nodelist.count-1 do
- if lists.equalto[i]<>pointer(-1) then
- tnode(lists.nodelist[i]).free;
- {$ifdef csedebug}
- writeln('nodes: ',lists.nodelist.count);
- writeln('==========================================');
- {$endif csedebug}
- lists.nodelist.free;
- lists.locationlist.free;
- lists.equalto.free;
- lists.refs.free;
- templist.free;
- if assigned(statements) then
- begin
- { call para nodes need a special handling because
- they can be only children nodes of call nodes
- so the initialization code is inserted below the
- call para node
- }
- if n.nodetype=callparan then
- begin
- addstatement(statements,tcallparanode(n).left);
- tcallparanode(n).left:=nodes;
- do_firstpass(tcallparanode(n).left);
- end
- else
- begin
- addstatement(statements,n);
- n:=nodes;
- do_firstpass(n);
- end;
- {$ifdef csedebug}
- printnode(output,nodes);
- {$endif csedebug}
- end;
- end
- end;
- end;
- function do_optcse(var rootnode : tnode) : tnode;
- var
- deletes,
- statements : tstatementnode;
- deleteblock,
- rootblock : tblocknode;
- begin
- {$ifdef csedebug}
- writeln('====================================================================================');
- writeln('CSE optimization pass started');
- writeln('====================================================================================');
- printnode(rootnode);
- writeln('====================================================================================');
- writeln;
- {$endif csedebug}
- deleteblock:=internalstatements(deletes);
- foreachnodestatic(pm_postprocess,rootnode,@searchcsedomain,@deletes);
- rootblock:=internalstatements(statements);
- addstatement(statements,rootnode);
- addstatement(statements,deleteblock);
- rootnode:=rootblock;
- do_firstpass(rootnode);
- {$ifdef csedebug}
- writeln('====================================================================================');
- writeln('CSE optimization result');
- writeln('====================================================================================');
- printnode(rootnode);
- writeln('====================================================================================');
- writeln;
- {$endif csedebug}
- result:=nil;
- end;
- type
- tconstentry = record
- valuenode : tnode;
- weight : TCGInt;
- temp : ttempcreatenode;
- end;
- pconstentry = ^tconstentry;
- tconstentries = array of tconstentry;
- pconstentries = ^tconstentries;
- function CSEOnReference(n : tnode) : Boolean;
- begin
- Result:=(n.nodetype=loadn) and (tloadnode(n).symtableentry.typ=staticvarsym)
- and ((vo_is_thread_var in tstaticvarsym(tloadnode(n).symtableentry).varoptions) or
- (cs_create_pic in current_settings.moduleswitches)
- {$if defined(aarch64) or defined(sparc) or defined(sparc64) or defined(riscv)}
- or (not(tabstractvarsym(tloadnode(n).symtableentry).is_regvar(false)))
- {$endif defined(aarch64) or defined(sparc) or defined(sparc64) or defined(riscv)}
- );
- end;
- function collectconsts(var n:tnode; arg: pointer) : foreachnoderesult;
- var
- consts: pconstentries;
- found: Boolean;
- i: Integer;
- begin
- result:=fen_true;
- consts:=pconstentries(arg);
- if ((n.nodetype=realconstn)
- {$ifdef x86}
- { x87 consts would end up in memory, so loading them in temps. makes no sense }
- and use_vectorfpu(n.resultdef)
- {$endif x86}
- ) or
- CSEOnReference(n) then
- begin
- found:=false;
- i:=0;
- for i:=0 to High(consts^) do
- begin
- if tnode(consts^[i].valuenode).isequal(n) then
- begin
- found:=true;
- break;
- end;
- end;
- if found then
- begin
- inc(consts^[i].weight);
- { non-sectioned threadvars really hurt so do more aggressive cse on them }
- if not(tf_section_threadvars in target_info.flags) and (n.nodetype=loadn) and (tloadnode(n).symtableentry.typ=staticvarsym) and (vo_is_thread_var in tstaticvarsym(tloadnode(n).symtableentry).varoptions) then
- inc(consts^[i].weight);
- end
- else
- begin
- SetLength(consts^,length(consts^)+1);
- with consts^[high(consts^)] do
- begin
- valuenode:=n.getcopy;
- valuenode.fileinfo:=current_procinfo.entrypos;
- weight:=1;
- end;
- end;
- end;
- end;
- function replaceconsts(var n:tnode; arg: pointer) : foreachnoderesult;
- var
- hp: tnode;
- begin
- result:=fen_true;
- if tnode(pconstentry(arg)^.valuenode).isequal(n) then
- begin
- { shall we take the address? }
- if CSEOnReference(pconstentry(arg)^.valuenode) then
- begin
- hp:=ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(pconstentry(arg)^.temp)),pconstentry(arg)^.valuenode.resultdef);
- ttypeconvnode(hp).left.fileinfo:=n.fileinfo;
- end
- else
- hp:=ctemprefnode.create(pconstentry(arg)^.temp);
- hp.fileinfo:=n.fileinfo;
- n.Free;
- n:=hp;
- do_firstpass(n);
- end;
- end;
- function do_consttovar(var rootnode : tnode) : tnode;
- var
- constentries : tconstentries;
- Procedure QuickSort(L, R : Longint);
- var
- I, J, P: Longint;
- Item, Q : tconstentry;
- begin
- repeat
- I := L;
- J := R;
- P := (L + R) div 2;
- repeat
- Item := constentries[P];
- while Item.weight>constentries[I].weight do
- I := I + 1;
- while Item.weight<constentries[J].weight do
- J := J - 1;
- If I <= J then
- begin
- Q := constentries[I];
- constentries[I] := constentries[J];
- constentries[J] := Q;
- if P = I then
- P := J
- else if P = J then
- P := I;
- I := I + 1;
- J := J - 1;
- end;
- until I > J;
- if L < J then
- QuickSort(L, J);
- L := I;
- until I >= R;
- end;
- var
- creates,
- deletes,
- statements : tstatementnode;
- createblock,
- deleteblock,
- rootblock : tblocknode;
- i, max_fpu_regs_assigned, fpu_regs_assigned,
- max_int_regs_assigned, int_regs_assigned: Integer;
- old_current_filepos: tfileposinfo;
- begin
- {$ifdef csedebug}
- writeln('====================================================================================');
- writeln('Const optimization pass started');
- writeln('====================================================================================');
- printnode(rootnode);
- writeln('====================================================================================');
- writeln;
- {$endif csedebug}
- foreachnodestatic(pm_postprocess,rootnode,@collectconsts,@constentries);
- createblock:=nil;
- deleteblock:=nil;
- rootblock:=nil;
- { estimate how many int registers can be used }
- if pi_do_call in current_procinfo.flags then
- max_int_regs_assigned:=length(paramanager.get_saved_registers_int(current_procinfo.procdef.proccalloption))
- { we store only addresses, so take care of the relation between address sizes and register sizes }
- div max(sizeof(PtrUInt) div sizeof(ALUUInt),1)
- { heuristics, just use a quarter of all registers at maximum }
- div 4
- else
- max_int_regs_assigned:=max(first_int_imreg div 4,1);
- {$if defined(x86) or defined(aarch64) or defined(arm)}
- { x86, aarch64 and arm (neglecting fpa) use mm registers for floats }
- if pi_do_call in current_procinfo.flags then
- { heuristics, just use a fifth of all registers at maximum }
- max_fpu_regs_assigned:=length(paramanager.get_saved_registers_mm(current_procinfo.procdef.proccalloption)) div 5
- else
- max_fpu_regs_assigned:=max(first_mm_imreg div 5,1);
- {$else defined(x86) or defined(aarch64) or defined(arm)}
- if pi_do_call in current_procinfo.flags then
- { heuristics, just use a fifth of all registers at maximum }
- max_fpu_regs_assigned:=length(paramanager.get_saved_registers_fpu(current_procinfo.procdef.proccalloption)) div 5
- else
- max_fpu_regs_assigned:=max(first_fpu_imreg div 5,1);
- {$endif defined(x86) or defined(aarch64) or defined(arm)}
- fpu_regs_assigned:=0;
- int_regs_assigned:=0;
- if Length(constentries)>0 then
- begin
- { sort entries by weight }
- QuickSort(0,High(constentries));
- { assign only the constants with the highest weight to a register }
- for i:=High(constentries) downto 0 do
- begin
- if (constentries[i].valuenode.nodetype=realconstn) and
- { if there is a call, we need most likely to save/restore a register }
- ((constentries[i].weight>3) or
- ((constentries[i].weight>1) and not(pi_do_call in current_procinfo.flags)))
- then
- begin
- if fpu_regs_assigned>=max_fpu_regs_assigned then
- break;
- old_current_filepos:=current_filepos;
- current_filepos:=current_procinfo.entrypos;
- if not(assigned(createblock)) then
- begin
- rootblock:=internalstatements(statements);
- createblock:=internalstatements(creates);
- deleteblock:=internalstatements(deletes);
- end;
- constentries[i].temp:=ctempcreatenode.create(constentries[i].valuenode.resultdef,
- constentries[i].valuenode.resultdef.size,tt_persistent,true);
- addstatement(creates,constentries[i].temp);
- addstatement(creates,cassignmentnode.create_internal(ctemprefnode.create(constentries[i].temp),constentries[i].valuenode));
- current_filepos:=old_current_filepos;
- foreachnodestatic(pm_postprocess,rootnode,@replaceconsts,@constentries[i]);
- inc(fpu_regs_assigned);
- end
- else if CSEOnReference(constentries[i].valuenode) and
- { if there is a call, we need most likely to save/restore a register }
- ((constentries[i].weight>2) or
- ((constentries[i].weight>1) and not(pi_do_call in current_procinfo.flags)))
- then
- begin
- if int_regs_assigned>=max_int_regs_assigned then
- break;
- old_current_filepos:=current_filepos;
- current_filepos:=current_procinfo.entrypos;
- if not(assigned(createblock)) then
- begin
- rootblock:=internalstatements(statements);
- createblock:=internalstatements(creates);
- deleteblock:=internalstatements(deletes);
- end;
- constentries[i].temp:=ctempcreatenode.create(cpointerdef.getreusable(constentries[i].valuenode.resultdef),
- voidpointertype.size,tt_persistent,true);
- addstatement(creates,constentries[i].temp);
- addstatement(creates,cassignmentnode.create_internal(ctemprefnode.create(constentries[i].temp),
- caddrnode.create_internal(constentries[i].valuenode)));
- current_filepos:=old_current_filepos;
- foreachnodestatic(pm_postprocess,rootnode,@replaceconsts,@constentries[i]);
- inc(int_regs_assigned);
- end;
- end;
- end;
- if assigned(createblock) then
- begin
- addstatement(statements,createblock);
- addstatement(statements,rootnode);
- addstatement(statements,deleteblock);
- rootnode:=rootblock;
- do_firstpass(rootnode);
- end;
- {$ifdef csedebug}
- writeln('====================================================================================');
- writeln('Const optimization result');
- writeln('====================================================================================');
- printnode(rootnode);
- writeln('====================================================================================');
- writeln;
- {$endif csedebug}
- result:=nil;
- end;
- end.
|