123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452 |
- {
- Copyright (c) 2014 by Jonas Maebe
- Generate LLVM bytecode for inline nodes
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- ****************************************************************************
- }
- unit nllvminl;
- {$i fpcdefs.inc}
- interface
- uses
- node,
- ncginl;
- type
- tllvminlinenode = class(tcginlinenode)
- protected
- procedure maybe_remove_round_trunc_typeconv;
- function first_get_frame: tnode; override;
- function first_abs_real: tnode; override;
- function first_bitscan: tnode; override;
- function first_fma: tnode; override;
- function first_sqr_real: tnode; override;
- function first_sqrt_real: tnode; override;
- function first_trunc_real: tnode; override;
- function first_popcnt: tnode; override;
- public
- procedure second_length; override;
- procedure second_high; override;
- procedure second_sqr_real; override;
- procedure second_trunc_real; override;
- end;
- implementation
- uses
- verbose,globals,globtype,constexp,cutils,
- aasmbase, aasmdata,
- symconst,symtype,symdef,defutil,
- compinnr,
- nutils,nadd,nbas,ncal,ncnv,ncon,nflw,ninl,nld,nmat,
- pass_2,
- cgbase,cgutils,tgobj,hlcgobj,
- cpubase,
- llvmbase,aasmllvm,aasmllvmmetadata;
- procedure tllvminlinenode.maybe_remove_round_trunc_typeconv;
- var
- temp: tnode;
- begin
- { the prototype of trunc()/round() in the system unit is declared
- with valreal as parameter type, so the argument will always be
- extended -> remove the typeconversion to extended if any; not done
- in ninl, because there are other code generators that assume that
- the parameter to trunc has been converted to valreal (e.g. PowerPC).
- (copy from code in nx64inl, should be refactored)
- }
- if (left.nodetype=typeconvn) and
- not(nf_explicit in left.flags) and
- (ttypeconvnode(left).left.resultdef.typ=floatdef) then
- begin
- { get rid of the type conversion, so the use_vectorfpu will be
- applied to the original type }
- temp:=ttypeconvnode(left).left;
- ttypeconvnode(left).left:=nil;
- left.free;
- left:=temp;
- end;
- end;
- function tllvminlinenode.first_get_frame: tnode;
- begin
- result:=ccallnode.createintern('llvm_frameaddress',
- ccallparanode.create(genintconstnode(0),nil));
- end;
- { in general, generate regular expression rather than intrinsics: according
- to the "Performance Tips for Frontend Authors", "The optimizer is quite
- good at reasoning about general control flow and arithmetic, it is not
- anywhere near as strong at reasoning about the various intrinsics. If
- profitable for code generation purposes, the optimizer will likely form
- the intrinsics itself late in the optimization pipeline." }
- function tllvminlinenode.first_abs_real: tnode;
- var
- lefttemp,
- resulttemp: ttempcreatenode;
- stat: tstatementnode;
- begin
- result:=internalstatements(stat);
- lefttemp:=ctempcreatenode.create(left.resultdef,left.resultdef.size,tt_persistent,true);
- { assigned twice -> will be spilled if put in register }
- resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
- addstatement(stat,lefttemp);
- addstatement(stat,resulttemp);
- { lefttemp:=left }
- addstatement(stat,
- cassignmentnode.create(ctemprefnode.create(lefttemp),left)
- );
- { if lefttemp>=0 then
- resulttemp:=lefttemp
- else
- resulttemp:=-lefttemp
- }
- addstatement(stat,
- cifnode.create(
- caddnode.create(
- gten,
- ctemprefnode.create(lefttemp),
- crealconstnode.create(0.0,left.resultdef)
- ),
- cassignmentnode.create(
- ctemprefnode.create(resulttemp),
- ctemprefnode.create(lefttemp)
- ),
- cassignmentnode.create(
- ctemprefnode.create(resulttemp),
- cunaryminusnode.create(ctemprefnode.create(lefttemp))
- )
- )
- );
- addstatement(stat,ctempdeletenode.create(lefttemp));
- addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
- { return resulttemp }
- addstatement(stat,ctemprefnode.create(resulttemp));
- { reused }
- left:=nil;
- end;
- function tllvminlinenode.first_bitscan: tnode;
- var
- leftdef: tdef;
- resulttemp,
- lefttemp: ttempcreatenode;
- stat: tstatementnode;
- block: tblocknode;
- cntresult: tnode;
- procname: string[15];
- begin
- {
- if left<>0 then
- result:=llvm_ctlz/cttz(unsigned(left),true)
- else
- result:=255;
- }
- if inlinenumber=in_bsr_x then
- procname:='LLVM_CTLZ'
- else
- procname:='LLVM_CTTZ';
- leftdef:=left.resultdef;
- block:=internalstatements(stat);
- resulttemp:=ctempcreatenode.create(resultdef,resultdef.size,tt_persistent,false);
- addstatement(stat,resulttemp);
- lefttemp:=maybereplacewithtemp(left,block,stat,left.resultdef.size,true);
- cntresult:=
- ccallnode.createintern(
- procname,
- ccallparanode.create(cordconstnode.create(1,llvmbool1type,false),
- ccallparanode.create(
- ctypeconvnode.create_explicit(left,get_unsigned_inttype(leftdef)),nil
- )
- )
- );
- { ctlz returns the number of leading zero bits, while bsr returns the bit
- number of the first non-zero bit (with the least significant bit as 0)
- -> invert result }
- if inlinenumber=in_bsr_x then
- begin
- cntresult:=
- caddnode.create(xorn,
- cntresult,
- genintconstnode(leftdef.size*8-1)
- );
- end;
- addstatement(stat,
- cifnode.create(caddnode.create(unequaln,left.getcopy,genintconstnode(0)),
- cassignmentnode.create(
- ctemprefnode.create(resulttemp),
- cntresult
- ),
- cassignmentnode.create(
- ctemprefnode.create(resulttemp),
- genintconstnode(255)
- )
- )
- );
- if assigned(lefttemp) then
- addstatement(stat,ctempdeletenode.create(lefttemp));
- addstatement(stat,ctempdeletenode.create_normal_temp(resulttemp));
- addstatement(stat,ctemprefnode.create(resulttemp));
- left:=nil;
- result:=block;
- end;
- function tllvminlinenode.first_fma: tnode;
- var
- exceptmode: ansistring;
- procname: string[40];
- begin
- if cs_opt_fastmath in current_settings.optimizerswitches then
- begin
- case inlinenumber of
- in_fma_single:
- procname:='llvm_fma_f32';
- in_fma_double:
- procname:='llvm_fma_f64';
- in_fma_extended:
- procname:='llvm_fma_f80';
- in_fma_float128:
- procname:='llvm_fma_f128';
- else
- internalerror(2018122101);
- end;
- result:=ccallnode.createintern(procname,left);
- end
- else
- begin
- case inlinenumber of
- in_fma_single,
- in_fma_double,
- in_fma_extended,
- in_fma_float128:
- procname:='LLVM_EXPERIMENTAL_CONSTRAINED_FMA';
- else
- internalerror(2019122811);
- end;
- exceptmode:=llvm_constrainedexceptmodestring;
- result:=ccallnode.createintern(procname,
- ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar(exceptmode),length(exceptmode),llvm_metadatatype),
- ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
- left
- )
- )
- );
- end;
- left:=nil;
- end;
- function tllvminlinenode.first_sqr_real: tnode;
- begin
- result:=nil;
- if use_vectorfpu(left.resultdef) then
- expectloc:=LOC_MMREGISTER
- else
- expectloc:=LOC_FPUREGISTER;
- end;
- function tllvminlinenode.first_sqrt_real: tnode;
- var
- exceptmode: ansistring;
- intrinsic: string[40];
- begin
- if left.resultdef.typ<>floatdef then
- internalerror(2018121601);
- if cs_opt_fastmath in current_settings.optimizerswitches then
- begin
- case tfloatdef(left.resultdef).floattype of
- s32real:
- intrinsic:='llvm_sqrt_f32';
- s64real:
- intrinsic:='llvm_sqrt_f64';
- s80real,sc80real:
- intrinsic:='llvm_sqrt_f80';
- s128real:
- intrinsic:='llvm_sqrt_f128';
- else
- internalerror(2018121602);
- end;
- result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
- end
- else
- begin
- case tfloatdef(left.resultdef).floattype of
- s32real,
- s64real,
- s80real,sc80real,
- s128real:
- intrinsic:='LLVM_EXPERIMENTAL_CONSTRAINED_SQRT';
- else
- internalerror(2019122810);
- end;
- exceptmode:=llvm_constrainedexceptmodestring;
- result:=ccallnode.createintern(intrinsic,
- ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar(exceptmode),length(exceptmode),llvm_metadatatype),
- ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
- ccallparanode.create(left,nil)
- )
- )
- );
- end;
- left:=nil;
- end;
- function tllvminlinenode.first_trunc_real: tnode;
- begin
- { fptosi is undefined if the value is out of range -> only generate
- in cast of fastmath }
- if cs_opt_fastmath in current_settings.optimizerswitches then
- begin
- maybe_remove_round_trunc_typeconv;
- expectloc:=LOC_REGISTER;
- result:=nil;
- end
- else
- result:=inherited;
- end;
- function tllvminlinenode.first_popcnt: tnode;
- begin
- result:=ctypeconvnode.create(ccallnode.createintern('LLVM_CTPOP', ccallparanode.create(left,nil)),resultdef);
- left:=nil;
- end;
- procedure tllvminlinenode.second_length;
- var
- hreg: tregister;
- begin
- second_high;
- { Dynamic arrays do not have their length attached but their maximum index }
- if is_dynamic_array(left.resultdef) then
- begin
- hreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
- hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_ADD,resultdef,1,location.register,hreg);
- location.register:=hreg;
- end;
- end;
- procedure tllvminlinenode.second_high;
- var
- lengthlab, nillab: tasmlabel;
- hregister: tregister;
- href: treference;
- lendef: tdef;
- begin
- secondpass(left);
- if is_shortstring(left.resultdef) then
- begin
- if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
- internalerror(2014080806);
- { typecast the shortstring reference into a length byte reference }
- location_reset_ref(location,left.location.loc,def_cgsize(resultdef),left.location.reference.alignment,left.location.reference.volatility);
- hregister:=hlcg.getaddressregister(current_asmdata.CurrAsmList,cpointerdef.getreusable(resultdef));
- hlcg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.resultdef,cpointerdef.getreusable(resultdef),left.location.reference,hregister);
- hlcg.reference_reset_base(location.reference,cpointerdef.getreusable(resultdef),hregister,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
- end
- else
- begin
- { length in ansi/wide strings and high in dynamic arrays is at offset
- -sizeof(sizeint), for widestrings it's at -4 }
- if is_widestring(left.resultdef) then
- lendef:=u32inttype
- else
- lendef:=ossinttype;
- hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,
- left.resultdef,cpointerdef.getreusable(lendef),true);
- current_asmdata.getjumplabel(nillab);
- current_asmdata.getjumplabel(lengthlab);
- hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,cpointerdef.getreusable(lendef),OC_EQ,0,left.location.register,nillab);
- { volatility of the ansistring/widestring refers to the volatility of the
- string pointer, not of the string data }
- hlcg.reference_reset_base(href,cpointerdef.getreusable(lendef),left.location.register,-lendef.size,ctempposinvalid,lendef.alignment,[]);
- hregister:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
- hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,lendef,resultdef,href,hregister);
- if is_widestring(left.resultdef) then
- hlcg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,resultdef,1,hregister);
- hlcg.a_jmp_always(current_asmdata.CurrAsmList,lengthlab);
- hlcg.a_label(current_asmdata.CurrAsmList,nillab);
- if is_dynamic_array(left.resultdef) then
- hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,-1,hregister)
- else
- hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,0,hregister);
- hlcg.a_label(current_asmdata.CurrAsmList,lengthlab);
- location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
- location.register:=hregister;
- end;
- end;
- procedure tllvminlinenode.second_sqr_real;
- begin
- secondpass(left);
- location.loc:=expectloc;
- if expectloc=LOC_MMREGISTER then
- begin
- hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
- location.register:=hlcg.getmmregister(current_asmdata.CurrAsmList,resultdef);
- end
- else
- begin
- hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
- location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
- end;
- current_asmdata.CurrAsmList.concat(
- taillvm.op_reg_size_reg_reg(la_fmul,
- location.register,resultdef,
- left.location.register,left.location.register
- )
- );
- end;
- procedure tllvminlinenode.second_trunc_real;
- begin
- secondpass(left);
- if use_vectorfpu(left.resultdef) then
- hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true)
- else
- hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
- location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
- location.register:=hlcg.getregisterfordef(current_asmdata.CurrAsmList,resultdef);
- current_asmdata.CurrAsmList.concat(
- taillvm.op_reg_size_reg_size(la_fptosi,location.register,left.resultdef,left.location.register,resultdef)
- );
- end;
- begin
- cinlinenode:=tllvminlinenode;
- end.
|