sphinxexpr.cpp 215 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470
  1. //
  2. // $Id$
  3. //
  4. //
  5. // Copyright (c) 2001-2016, Andrew Aksyonoff
  6. // Copyright (c) 2008-2016, Sphinx Technologies Inc
  7. // All rights reserved
  8. //
  9. // This program is free software; you can redistribute it and/or modify
  10. // it under the terms of the GNU General Public License. You should have
  11. // received a copy of the GPL license along with this program; if you
  12. // did not, you can find it at http://www.gnu.org/
  13. //
  14. #include "sphinx.h"
  15. #include "sphinxexpr.h"
  16. #include "sphinxplugin.h"
  17. #include "sphinxutils.h"
  18. #include "sphinxint.h"
  19. #include "sphinxjson.h"
  20. #include <time.h>
  21. #include <math.h>
  22. #ifndef M_LOG2E
  23. #define M_LOG2E 1.44269504088896340736
  24. #endif
  25. #ifndef M_LOG10E
  26. #define M_LOG10E 0.434294481903251827651
  27. #endif
  28. // hack hack hack
  29. UservarIntSet_c * ( *g_pUservarsHook )( const CSphString & sUservar );
  30. //////////////////////////////////////////////////////////////////////////
  31. // EVALUATION ENGINE
  32. //////////////////////////////////////////////////////////////////////////
  33. #if USE_WINDOWS
  34. #ifndef NDEBUG
  35. #define EXPR_CLASS_NAME(name) \
  36. {\
  37. const char * szFuncName = __FUNCTION__; \
  38. const char * szClassNameEnd = strstr ( szFuncName, "::" ); \
  39. assert ( szClassNameEnd ); \
  40. const char * szTemplateNameEnd = strstr ( szFuncName, "<" ); \
  41. if ( szTemplateNameEnd ) szClassNameEnd = szTemplateNameEnd; \
  42. size_t iLen = szClassNameEnd-szFuncName; \
  43. assert ( strlen(name)==iLen && "Wrong expression name specified in ::GetHash" ); \
  44. assert ( !strncmp(name, szFuncName, iLen) && "Wrong expression name specified in ::GetHash" ); \
  45. }\
  46. const char * szClassName = name; \
  47. uint64_t uHash = uPrevHash;
  48. #else
  49. #define EXPR_CLASS_NAME(name) \
  50. const char * szClassName = name; \
  51. uint64_t uHash = uPrevHash;
  52. #endif
  53. #else
  54. #define EXPR_CLASS_NAME(name) \
  55. const char * szClassName = name; \
  56. uint64_t uHash = uPrevHash;
  57. #endif
  58. #define EXPR_CLASS_NAME_NOCHECK(name) \
  59. const char * szClassName = name; \
  60. uint64_t uHash = uPrevHash;
  61. #define CALC_DEP_HASHES() sphCalcExprDepHash ( szClassName, this, tSorterSchema, uHash, bDisable );
  62. #define CALC_DEP_HASHES_EX(hash) sphCalcExprDepHash ( szClassName, this, tSorterSchema, uHash^hash, bDisable );
  63. #define CALC_PARENT_HASH() CalcHash ( szClassName, tSorterSchema, uHash, bDisable );
  64. #define CALC_PARENT_HASH_EX(hash) CalcHash ( szClassName, tSorterSchema, uHash^hash, bDisable );
  65. #define CALC_POD_HASH(value) uHash = sphFNV64 ( &value, sizeof(value), uHash );
  66. #define CALC_POD_HASHES(values) uHash = sphFNV64 ( values.Begin(), values.GetLength()*sizeof(values[0]), uHash );
  67. #define CALC_STR_HASH(str,len) uHash = sphFNV64 ( str.cstr(), len, uHash );
  68. #define CALC_CHILD_HASH(child) if (child) uHash = child->GetHash ( tSorterSchema, uHash, bDisable );
  69. #define CALC_CHILD_HASHES(children) ARRAY_FOREACH ( i, children ) if (children[i]) uHash = children[i]->GetHash ( tSorterSchema, uHash, bDisable );
  70. struct ExprLocatorTraits_t
  71. {
  72. CSphAttrLocator m_tLocator;
  73. int m_iLocator; // used by SPH_EXPR_GET_DEPENDENT_COLS
  74. ExprLocatorTraits_t ( const CSphAttrLocator & tLocator, int iLocator ) : m_tLocator ( tLocator ), m_iLocator ( iLocator ) {}
  75. void HandleCommand ( ESphExprCommand eCmd, void * pArg )
  76. {
  77. if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS && m_iLocator!=-1 )
  78. static_cast < CSphVector<int>* >(pArg)->Add ( m_iLocator );
  79. }
  80. };
  81. struct Expr_WithLocator_c : public ISphExpr, public ExprLocatorTraits_t
  82. {
  83. public:
  84. Expr_WithLocator_c ( const CSphAttrLocator & tLocator, int iLocator )
  85. : ExprLocatorTraits_t ( tLocator, iLocator )
  86. {}
  87. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  88. {
  89. HandleCommand ( eCmd, pArg );
  90. }
  91. };
  92. struct Expr_GetInt_c : public Expr_WithLocator_c
  93. {
  94. Expr_GetInt_c ( const CSphAttrLocator & tLocator, int iLocator ) : Expr_WithLocator_c ( tLocator, iLocator ) {}
  95. virtual float Eval ( const CSphMatch & tMatch ) const { return (float) tMatch.GetAttr ( m_tLocator ); } // FIXME! OPTIMIZE!!! we can go the short route here
  96. virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
  97. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.GetAttr ( m_tLocator ); }
  98. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  99. {
  100. EXPR_CLASS_NAME("Expr_GetInt_c");
  101. return CALC_DEP_HASHES();
  102. }
  103. };
  104. struct Expr_GetBits_c : public Expr_WithLocator_c
  105. {
  106. Expr_GetBits_c ( const CSphAttrLocator & tLocator, int iLocator ) : Expr_WithLocator_c ( tLocator, iLocator ) {}
  107. virtual float Eval ( const CSphMatch & tMatch ) const { return (float) tMatch.GetAttr ( m_tLocator ); }
  108. virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
  109. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.GetAttr ( m_tLocator ); }
  110. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  111. {
  112. EXPR_CLASS_NAME("Expr_GetBits_c");
  113. return CALC_DEP_HASHES();
  114. }
  115. };
  116. struct Expr_GetSint_c : public Expr_WithLocator_c
  117. {
  118. Expr_GetSint_c ( const CSphAttrLocator & tLocator, int iLocator ) : Expr_WithLocator_c ( tLocator, iLocator ) {}
  119. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)(int)tMatch.GetAttr ( m_tLocator ); }
  120. virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
  121. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
  122. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  123. {
  124. EXPR_CLASS_NAME("Expr_GetSint_c");
  125. return CALC_DEP_HASHES();
  126. }
  127. };
  128. struct Expr_GetFloat_c : public Expr_WithLocator_c
  129. {
  130. Expr_GetFloat_c ( const CSphAttrLocator & tLocator, int iLocator ) : Expr_WithLocator_c ( tLocator, iLocator ) {}
  131. virtual float Eval ( const CSphMatch & tMatch ) const { return tMatch.GetAttrFloat ( m_tLocator ); }
  132. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  133. {
  134. EXPR_CLASS_NAME("Expr_GetFloat_c");
  135. return CALC_DEP_HASHES();
  136. }
  137. };
  138. struct Expr_GetString_c : public Expr_WithLocator_c
  139. {
  140. const BYTE * m_pStrings;
  141. Expr_GetString_c ( const CSphAttrLocator & tLocator, int iLocator ) : Expr_WithLocator_c ( tLocator, iLocator ) {}
  142. virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
  143. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  144. {
  145. Expr_WithLocator_c::Command ( eCmd, pArg );
  146. if ( eCmd==SPH_EXPR_SET_STRING_POOL )
  147. m_pStrings = (const BYTE*)pArg;
  148. }
  149. virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
  150. {
  151. SphAttr_t iOff = tMatch.GetAttr ( m_tLocator );
  152. if ( iOff>0 )
  153. return sphUnpackStr ( m_pStrings + iOff, ppStr );
  154. *ppStr = NULL;
  155. return 0;
  156. }
  157. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  158. {
  159. EXPR_CLASS_NAME("Expr_GetString_c");
  160. return CALC_DEP_HASHES();
  161. }
  162. };
  163. struct Expr_GetMva_c : public Expr_WithLocator_c
  164. {
  165. const DWORD * m_pMva;
  166. bool m_bArenaProhibit;
  167. Expr_GetMva_c ( const CSphAttrLocator & tLocator, int iLocator ) : Expr_WithLocator_c ( tLocator, iLocator ), m_pMva ( NULL ), m_bArenaProhibit ( false ) {}
  168. virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
  169. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  170. {
  171. Expr_WithLocator_c::Command ( eCmd, pArg );
  172. if ( eCmd==SPH_EXPR_SET_MVA_POOL )
  173. {
  174. const PoolPtrs_t * pPool = (const PoolPtrs_t *)pArg;
  175. assert ( pPool );
  176. m_pMva = pPool->m_pMva;
  177. m_bArenaProhibit = pPool->m_bArenaProhibit;
  178. }
  179. }
  180. virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.GetAttr ( m_tLocator ); }
  181. virtual const DWORD * MvaEval ( const CSphMatch & tMatch ) const { return tMatch.GetAttrMVA ( m_tLocator, m_pMva, m_bArenaProhibit ); }
  182. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  183. {
  184. EXPR_CLASS_NAME("Expr_GetMva_c");
  185. CALC_POD_HASH(m_bArenaProhibit);
  186. return CALC_DEP_HASHES();
  187. }
  188. };
  189. struct Expr_GetFactorsAttr_c : public Expr_WithLocator_c
  190. {
  191. Expr_GetFactorsAttr_c ( const CSphAttrLocator & tLocator, int iLocator ) : Expr_WithLocator_c ( tLocator, iLocator ) {}
  192. virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
  193. virtual const DWORD * FactorEval ( const CSphMatch & tMatch ) const { return (DWORD *)tMatch.GetAttr ( m_tLocator ); }
  194. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  195. {
  196. EXPR_CLASS_NAME("Expr_GetFactorsAttr_c");
  197. return CALC_DEP_HASHES();
  198. }
  199. };
  200. struct Expr_GetConst_c : public ISphExpr
  201. {
  202. float m_fValue;
  203. explicit Expr_GetConst_c ( float fValue ) : m_fValue ( fValue ) {}
  204. virtual float Eval ( const CSphMatch & ) const { return m_fValue; }
  205. virtual int IntEval ( const CSphMatch & ) const { return (int)m_fValue; }
  206. virtual int64_t Int64Eval ( const CSphMatch & ) const { return (int64_t)m_fValue; }
  207. virtual bool IsConst () const { return true; }
  208. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  209. {
  210. EXPR_CLASS_NAME("Expr_GetConst_c");
  211. CALC_POD_HASH(m_fValue);
  212. return CALC_DEP_HASHES();
  213. }
  214. };
  215. struct Expr_GetIntConst_c : public ISphExpr
  216. {
  217. int m_iValue;
  218. explicit Expr_GetIntConst_c ( int iValue ) : m_iValue ( iValue ) {}
  219. virtual float Eval ( const CSphMatch & ) const { return (float) m_iValue; } // no assert() here cause generic float Eval() needs to work even on int-evaluator tree
  220. virtual int IntEval ( const CSphMatch & ) const { return m_iValue; }
  221. virtual int64_t Int64Eval ( const CSphMatch & ) const { return m_iValue; }
  222. virtual bool IsConst () const { return true; }
  223. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  224. {
  225. EXPR_CLASS_NAME("Expr_GetIntConst_c");
  226. CALC_POD_HASH(m_iValue);
  227. return CALC_DEP_HASHES();
  228. }
  229. };
  230. struct Expr_GetInt64Const_c : public ISphExpr
  231. {
  232. int64_t m_iValue;
  233. explicit Expr_GetInt64Const_c ( int64_t iValue ) : m_iValue ( iValue ) {}
  234. virtual float Eval ( const CSphMatch & ) const { return (float) m_iValue; } // no assert() here cause generic float Eval() needs to work even on int-evaluator tree
  235. virtual int IntEval ( const CSphMatch & ) const { assert ( 0 ); return (int)m_iValue; }
  236. virtual int64_t Int64Eval ( const CSphMatch & ) const { return m_iValue; }
  237. virtual bool IsConst () const { return true; }
  238. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  239. {
  240. EXPR_CLASS_NAME("Expr_GetInt64Const_c");
  241. CALC_POD_HASH(m_iValue);
  242. return CALC_DEP_HASHES();
  243. }
  244. };
  245. struct Expr_GetStrConst_c : public ISphStringExpr
  246. {
  247. CSphString m_sVal;
  248. int m_iLen;
  249. explicit Expr_GetStrConst_c ( const char * sVal, int iLen, bool bUnescape )
  250. {
  251. if ( iLen>0 )
  252. {
  253. if ( bUnescape )
  254. SqlUnescape ( m_sVal, sVal, iLen );
  255. else
  256. m_sVal.SetBinary ( sVal, iLen );
  257. }
  258. m_iLen = m_sVal.Length();
  259. }
  260. virtual int StringEval ( const CSphMatch &, const BYTE ** ppStr ) const
  261. {
  262. *ppStr = (const BYTE*) m_sVal.cstr();
  263. return m_iLen;
  264. }
  265. virtual float Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
  266. virtual int IntEval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
  267. virtual int64_t Int64Eval ( const CSphMatch & ) const { assert ( 0 ); return 0; }
  268. virtual bool IsConst () const { return true; }
  269. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  270. {
  271. EXPR_CLASS_NAME("Expr_GetStrConst_c");
  272. CALC_STR_HASH(m_sVal, m_iLen);
  273. return CALC_DEP_HASHES();
  274. }
  275. };
  276. class Expr_StrLength_c : public ISphExpr
  277. {
  278. public:
  279. Expr_StrLength_c ( ISphExpr * pArg )
  280. : m_pArg ( pArg )
  281. {}
  282. virtual int IntEval ( const CSphMatch & tMatch ) const
  283. {
  284. const BYTE * pStr;
  285. return m_pArg->StringEval ( tMatch, &pStr );
  286. }
  287. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  288. {
  289. m_pArg->Command ( eCmd, pArg );
  290. }
  291. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
  292. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  293. {
  294. EXPR_CLASS_NAME("Expr_StrLength_c");
  295. CALC_CHILD_HASH(m_pArg);
  296. return CALC_DEP_HASHES();
  297. }
  298. protected:
  299. ISphExpr * m_pArg;
  300. };
  301. struct Expr_GetZonespanlist_c : public ISphStringExpr
  302. {
  303. const CSphVector<int> * m_pData;
  304. mutable CSphStringBuilder m_sBuilder;
  305. explicit Expr_GetZonespanlist_c ()
  306. : m_pData ( NULL )
  307. {}
  308. virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
  309. {
  310. assert ( ppStr );
  311. if ( !m_pData || !m_pData->GetLength() )
  312. {
  313. *ppStr = NULL;
  314. return 0;
  315. }
  316. m_sBuilder.Clear();
  317. const CSphVector<int> & dSpans = *m_pData;
  318. int iStart = tMatch.m_iTag + 1; // spans[tag] contains the length, so the 1st data index is tag+1
  319. int iEnd = iStart + dSpans [ tMatch.m_iTag ]; // [start,end) now covers all data indexes
  320. for ( int i=iStart; i<iEnd; i+=2 )
  321. m_sBuilder.Appendf ( " %d:%d", 1+dSpans[i], 1+dSpans[i+1] ); // convert our 0-based span numbers to human 1-based ones
  322. *ppStr = (const BYTE *) CSphString ( m_sBuilder.cstr() ).Leak();
  323. return m_sBuilder.Length();
  324. }
  325. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  326. {
  327. if ( eCmd==SPH_EXPR_SET_EXTRA_DATA )
  328. static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_ZONESPANS, (void**)&m_pData );
  329. }
  330. virtual bool IsStringPtr() const
  331. {
  332. return true;
  333. }
  334. virtual uint64_t GetHash ( const ISphSchema &, uint64_t, bool & bDisable )
  335. {
  336. bDisable = true; // disable caching for now, might add code to process if necessary
  337. return 0;
  338. }
  339. };
  340. struct Expr_GetRankFactors_c : public ISphStringExpr
  341. {
  342. /// hash type MUST BE IN SYNC with RankerState_Export_fn in sphinxsearch.cpp
  343. CSphOrderedHash < CSphString, SphDocID_t, IdentityHash_fn, 256 > * m_pFactors;
  344. explicit Expr_GetRankFactors_c ()
  345. : m_pFactors ( NULL )
  346. {}
  347. virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
  348. {
  349. assert ( ppStr );
  350. if ( !m_pFactors )
  351. {
  352. *ppStr = NULL;
  353. return 0;
  354. }
  355. CSphString * sVal = (*m_pFactors) ( tMatch.m_uDocID );
  356. if ( !sVal )
  357. {
  358. *ppStr = NULL;
  359. return 0;
  360. }
  361. int iLen = sVal->Length();
  362. *ppStr = (const BYTE*)sVal->Leak();
  363. m_pFactors->Delete ( tMatch.m_uDocID );
  364. return iLen;
  365. }
  366. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  367. {
  368. if ( eCmd==SPH_EXPR_SET_EXTRA_DATA )
  369. static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_RANKFACTORS, (void**)&m_pFactors );
  370. }
  371. virtual bool IsStringPtr() const
  372. {
  373. return true;
  374. }
  375. virtual uint64_t GetHash ( const ISphSchema &, uint64_t, bool & bDisable )
  376. {
  377. bDisable = true; // disable caching for now, might add code to process if necessary
  378. return 0;
  379. }
  380. };
  381. struct Expr_GetPackedFactors_c : public ISphStringExpr
  382. {
  383. SphFactorHash_t * m_pHash;
  384. explicit Expr_GetPackedFactors_c ()
  385. : m_pHash ( NULL )
  386. {}
  387. virtual const DWORD * FactorEval ( const CSphMatch & tMatch ) const
  388. {
  389. if ( !m_pHash || !m_pHash->GetLength() )
  390. return NULL;
  391. SphFactorHashEntry_t * pEntry = (*m_pHash)[ (int)( tMatch.m_uDocID % m_pHash->GetLength() ) ];
  392. assert ( pEntry );
  393. while ( pEntry && pEntry->m_iId!=tMatch.m_uDocID )
  394. pEntry = pEntry->m_pNext;
  395. if ( !pEntry )
  396. return NULL;
  397. DWORD uDataLen = (BYTE *)pEntry - (BYTE *)pEntry->m_pData;
  398. BYTE * pData = new BYTE[uDataLen];
  399. memcpy ( pData, pEntry->m_pData, uDataLen );
  400. return (DWORD *)pData;
  401. }
  402. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  403. {
  404. if ( eCmd==SPH_EXPR_SET_EXTRA_DATA )
  405. static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_PACKEDFACTORS, (void**)&m_pHash );
  406. }
  407. virtual bool IsStringPtr() const
  408. {
  409. return true;
  410. }
  411. virtual uint64_t GetHash ( const ISphSchema &, uint64_t, bool & bDisable )
  412. {
  413. bDisable = true; // disable caching for now, might add code to process if necessary
  414. return 0;
  415. }
  416. };
  417. struct Expr_BM25F_c : public ISphExpr
  418. {
  419. SphExtraDataRankerState_t m_tRankerState;
  420. float m_fK1;
  421. float m_fB;
  422. float m_fWeightedAvgDocLen;
  423. CSphVector<int> m_dWeights; ///< per field weights
  424. SphFactorHash_t * m_pHash;
  425. CSphVector<CSphNamedVariant> m_dFieldWeights;
  426. Expr_BM25F_c ( float k1, float b, CSphVector<CSphNamedVariant> * pFieldWeights )
  427. : m_pHash ( NULL )
  428. {
  429. // bind k1, b
  430. m_fK1 = k1;
  431. m_fB = b;
  432. if ( pFieldWeights )
  433. m_dFieldWeights.SwapData ( *pFieldWeights );
  434. }
  435. float Eval ( const CSphMatch & tMatch ) const
  436. {
  437. if ( !m_pHash || !m_pHash->GetLength() )
  438. return 0.0f;
  439. SphFactorHashEntry_t * pEntry = (*m_pHash)[ (int)( tMatch.m_uDocID % m_pHash->GetLength() ) ];
  440. assert ( pEntry );
  441. while ( pEntry && pEntry->m_iId!=tMatch.m_uDocID )
  442. pEntry = pEntry->m_pNext;
  443. if ( !pEntry )
  444. return 0.0f;
  445. SPH_UDF_FACTORS tUnpacked;
  446. sphinx_factors_init ( &tUnpacked );
  447. #ifndef NDEBUG
  448. Verify ( sphinx_factors_unpack ( (const unsigned int*)pEntry->m_pData, &tUnpacked )==0 );
  449. #else
  450. sphinx_factors_unpack ( (const unsigned int*)pEntry->m_pData, &tUnpacked ); // fix MSVC Release warning
  451. #endif
  452. // compute document length
  453. // OPTIMIZE? could precompute and store total dl in attrs, but at a storage cost
  454. // OPTIMIZE? could at least share between multiple BM25F instances, if there are many
  455. float dl = 0;
  456. CSphAttrLocator tLoc = m_tRankerState.m_tFieldLensLoc;
  457. if ( tLoc.m_iBitOffset>=0 )
  458. {
  459. for ( int i=0; i<m_tRankerState.m_iFields; i++ )
  460. {
  461. dl += tMatch.GetAttr ( tLoc ) * m_dWeights[i];
  462. tLoc.m_iBitOffset += 32;
  463. }
  464. }
  465. // compute (the current instance of) BM25F
  466. float fRes = 0.0f;
  467. for ( int iWord=0; iWord<m_tRankerState.m_iMaxQpos; iWord++ )
  468. {
  469. if ( !tUnpacked.term[iWord].keyword_mask )
  470. continue;
  471. // compute weighted TF
  472. float tf = 0.0f;
  473. for ( int i=0; i<m_tRankerState.m_iFields; i++ )
  474. {
  475. tf += tUnpacked.field_tf[ iWord + 1 + i * ( 1 + m_tRankerState.m_iMaxQpos ) ] * m_dWeights[i];
  476. }
  477. float idf = tUnpacked.term[iWord].idf; // FIXME? zeroed out for dupes!
  478. fRes += tf / ( tf + m_fK1 * ( 1.0f - m_fB + m_fB * dl / m_fWeightedAvgDocLen ) ) * idf;
  479. }
  480. sphinx_factors_deinit ( &tUnpacked );
  481. return fRes + 0.5f; // map to [0..1] range
  482. }
  483. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  484. {
  485. if ( eCmd!=SPH_EXPR_SET_EXTRA_DATA )
  486. return;
  487. bool bGotHash = static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_PACKEDFACTORS, (void**)&m_pHash );
  488. if ( !bGotHash )
  489. return;
  490. bool bGotState = static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_DATA_RANKER_STATE, (void**)&m_tRankerState );
  491. if ( !bGotState )
  492. return;
  493. // bind weights
  494. m_dWeights.Resize ( m_tRankerState.m_iFields );
  495. m_dWeights.Fill ( 1 );
  496. if ( m_dFieldWeights.GetLength() )
  497. {
  498. ARRAY_FOREACH ( i, m_dFieldWeights )
  499. {
  500. // FIXME? report errors if field was not found?
  501. CSphString & sField = m_dFieldWeights[i].m_sKey;
  502. int iField = m_tRankerState.m_pSchema->GetFieldIndex ( sField.cstr() );
  503. if ( iField>=0 )
  504. m_dWeights[iField] = m_dFieldWeights[i].m_iValue;
  505. }
  506. }
  507. // compute weighted avgdl
  508. m_fWeightedAvgDocLen = 1.0f;
  509. if ( m_tRankerState.m_pFieldLens )
  510. {
  511. m_fWeightedAvgDocLen = 0.0f;
  512. ARRAY_FOREACH ( i, m_dWeights )
  513. m_fWeightedAvgDocLen += m_tRankerState.m_pFieldLens[i] * m_dWeights[i];
  514. }
  515. m_fWeightedAvgDocLen /= m_tRankerState.m_iTotalDocuments;
  516. }
  517. virtual uint64_t GetHash ( const ISphSchema &, uint64_t, bool & bDisable )
  518. {
  519. bDisable = true; // disable caching for now, might add code to process if necessary
  520. return 0;
  521. }
  522. };
  523. struct Expr_GetId_c : public ISphExpr
  524. {
  525. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)tMatch.m_uDocID; }
  526. virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.m_uDocID; }
  527. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.m_uDocID; }
  528. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  529. {
  530. EXPR_CLASS_NAME("Expr_GetId_c");
  531. return CALC_DEP_HASHES();
  532. }
  533. };
  534. struct Expr_GetWeight_c : public ISphExpr
  535. {
  536. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)tMatch.m_iWeight; }
  537. virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)tMatch.m_iWeight; }
  538. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)tMatch.m_iWeight; }
  539. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  540. {
  541. EXPR_CLASS_NAME("Expr_GetWeight_c");
  542. return CALC_DEP_HASHES();
  543. }
  544. };
  545. //////////////////////////////////////////////////////////////////////////
  546. struct Expr_Arglist_c : public ISphExpr
  547. {
  548. CSphVector<ISphExpr *> m_dArgs;
  549. Expr_Arglist_c ( ISphExpr * pLeft, ISphExpr * pRight )
  550. {
  551. AddArgs ( pLeft );
  552. AddArgs ( pRight );
  553. }
  554. ~Expr_Arglist_c ()
  555. {
  556. ARRAY_FOREACH ( i, m_dArgs )
  557. SafeRelease ( m_dArgs[i] );
  558. }
  559. void AddArgs ( ISphExpr * pExpr )
  560. {
  561. // not an arglist? just add it
  562. if ( !pExpr->IsArglist() )
  563. {
  564. m_dArgs.Add ( pExpr );
  565. return;
  566. }
  567. // arglist? take ownership of its args, and dismiss it
  568. Expr_Arglist_c * pArgs = (Expr_Arglist_c *) pExpr;
  569. ARRAY_FOREACH ( i, pArgs->m_dArgs )
  570. {
  571. m_dArgs.Add ( pArgs->m_dArgs[i] );
  572. pArgs->m_dArgs[i] = NULL;
  573. }
  574. SafeRelease ( pExpr );
  575. }
  576. virtual bool IsArglist () const
  577. {
  578. return true;
  579. }
  580. virtual ISphExpr * GetArg ( int i ) const
  581. {
  582. if ( i>=m_dArgs.GetLength() )
  583. return NULL;
  584. return m_dArgs[i];
  585. }
  586. virtual int GetNumArgs() const
  587. {
  588. return m_dArgs.GetLength();
  589. }
  590. virtual float Eval ( const CSphMatch & ) const
  591. {
  592. assert ( 0 && "internal error: Eval() must not be explicitly called on arglist" );
  593. return 0.0f;
  594. }
  595. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  596. {
  597. ARRAY_FOREACH ( i, m_dArgs )
  598. m_dArgs[i]->Command ( eCmd, pArg );
  599. }
  600. virtual uint64_t GetHash ( const ISphSchema &, uint64_t, bool & )
  601. {
  602. assert ( 0 && "internal error: GetHash() must not be explicitly called on arglist" );
  603. return 0;
  604. }
  605. };
  606. //////////////////////////////////////////////////////////////////////////
  607. struct Expr_Unary_c : public ISphExpr
  608. {
  609. ISphExpr * m_pFirst;
  610. const char * m_szExprName;
  611. explicit Expr_Unary_c ( const char * szClassName, ISphExpr * pFirst )
  612. : m_pFirst ( pFirst )
  613. , m_szExprName ( szClassName )
  614. {}
  615. virtual ~Expr_Unary_c()
  616. {
  617. SafeRelease ( m_pFirst );
  618. }
  619. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  620. {
  621. m_pFirst->Command ( eCmd, pArg );
  622. }
  623. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  624. {
  625. EXPR_CLASS_NAME_NOCHECK(m_szExprName);
  626. CALC_CHILD_HASH(m_pFirst);
  627. return CALC_DEP_HASHES();
  628. }
  629. };
  630. struct Expr_Binary_c : public ISphExpr
  631. {
  632. ISphExpr * m_pFirst;
  633. ISphExpr * m_pSecond;
  634. const char * m_szExprName;
  635. explicit Expr_Binary_c ( const char * szClassName, ISphExpr * pFirst, ISphExpr * pSecond )
  636. : m_pFirst ( pFirst )
  637. , m_pSecond ( pSecond )
  638. , m_szExprName ( szClassName )
  639. {}
  640. virtual ~Expr_Binary_c()
  641. {
  642. SafeRelease ( m_pFirst );
  643. SafeRelease ( m_pSecond );
  644. }
  645. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  646. {
  647. m_pFirst->Command ( eCmd, pArg );
  648. m_pSecond->Command ( eCmd, pArg );
  649. }
  650. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  651. {
  652. EXPR_CLASS_NAME_NOCHECK(m_szExprName);
  653. CALC_CHILD_HASH(m_pFirst);
  654. CALC_CHILD_HASH(m_pSecond);
  655. return CALC_DEP_HASHES();
  656. }
  657. };
  658. //////////////////////////////////////////////////////////////////////////
  659. struct Expr_Crc32_c : public Expr_Unary_c
  660. {
  661. explicit Expr_Crc32_c ( ISphExpr * pFirst ) : Expr_Unary_c ( "Expr_Crc32_c", pFirst ) {}
  662. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
  663. virtual int IntEval ( const CSphMatch & tMatch ) const
  664. {
  665. const BYTE * pStr;
  666. int iLen = m_pFirst->StringEval ( tMatch, &pStr );
  667. DWORD uCrc = sphCRC32 ( pStr, iLen );
  668. if ( m_pFirst->IsStringPtr() )
  669. SafeDeleteArray ( pStr );
  670. return uCrc;
  671. }
  672. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)(DWORD)IntEval ( tMatch ); }
  673. };
  674. static inline int Fibonacci ( int i )
  675. {
  676. if ( i<0 )
  677. return 0;
  678. int f0 = 0;
  679. int f1 = 1;
  680. int j = 0;
  681. for ( j=0; j+1<i; j+=2 )
  682. {
  683. f0 += f1; // f_j
  684. f1 += f0; // f_{j+1}
  685. }
  686. return ( i & 1 ) ? f1 : f0;
  687. }
  688. struct Expr_Fibonacci_c : public Expr_Unary_c
  689. {
  690. explicit Expr_Fibonacci_c ( ISphExpr * pFirst ) : Expr_Unary_c ( "Expr_Fibonacci_c", pFirst ) {}
  691. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
  692. virtual int IntEval ( const CSphMatch & tMatch ) const { return Fibonacci ( m_pFirst->IntEval ( tMatch ) ); }
  693. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval ( tMatch ); }
  694. };
  695. struct Expr_ToString_c : public Expr_Unary_c
  696. {
  697. protected:
  698. ESphAttr m_eArg;
  699. mutable CSphStringBuilder m_sBuilder;
  700. const BYTE * m_pStrings;
  701. public:
  702. Expr_ToString_c ( ISphExpr * pArg, ESphAttr eArg )
  703. : Expr_Unary_c ( "Expr_ToString_c", pArg )
  704. , m_eArg ( eArg )
  705. , m_pStrings ( NULL )
  706. {}
  707. virtual float Eval ( const CSphMatch & ) const
  708. {
  709. assert ( 0 );
  710. return 0.0f;
  711. }
  712. virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
  713. {
  714. m_sBuilder.Clear();
  715. int64_t iPacked = 0;
  716. ESphJsonType eJson = JSON_NULL;
  717. DWORD uOff = 0;
  718. int iLen = 0;
  719. switch ( m_eArg )
  720. {
  721. case SPH_ATTR_INTEGER: m_sBuilder.Appendf ( "%u", m_pFirst->IntEval ( tMatch ) ); break;
  722. case SPH_ATTR_BIGINT: m_sBuilder.Appendf ( INT64_FMT, m_pFirst->Int64Eval ( tMatch ) ); break;
  723. case SPH_ATTR_FLOAT: m_sBuilder.Appendf ( "%f", m_pFirst->Eval ( tMatch ) ); break;
  724. case SPH_ATTR_UINT32SET:
  725. case SPH_ATTR_INT64SET:
  726. {
  727. const DWORD * pValues = m_pFirst->MvaEval ( tMatch );
  728. if ( !pValues || !*pValues )
  729. break;
  730. DWORD nValues = *pValues++;
  731. assert (!( m_eArg==SPH_ATTR_INT64SET && ( nValues & 1 ) ));
  732. // OPTIMIZE? minibuffer on stack, less allocs, manual formatting vs printf, etc
  733. if ( m_eArg==SPH_ATTR_UINT32SET )
  734. {
  735. while ( nValues-- )
  736. {
  737. if ( m_sBuilder.Length() )
  738. m_sBuilder += ",";
  739. m_sBuilder.Appendf ( "%u", *pValues++ );
  740. }
  741. } else
  742. {
  743. for ( ; nValues; nValues-=2, pValues+=2 )
  744. {
  745. if ( m_sBuilder.Length() )
  746. m_sBuilder += ",";
  747. m_sBuilder.Appendf ( INT64_FMT, MVA_UPSIZE ( pValues ) );
  748. }
  749. }
  750. }
  751. break;
  752. case SPH_ATTR_STRINGPTR:
  753. return m_pFirst->StringEval ( tMatch, ppStr );
  754. case SPH_ATTR_JSON_FIELD:
  755. iPacked = m_pFirst->Int64Eval ( tMatch );
  756. eJson = ESphJsonType ( iPacked>>32 );
  757. uOff = (DWORD)iPacked;
  758. if ( !uOff || eJson==JSON_NULL )
  759. {
  760. *ppStr = NULL;
  761. iLen = 0;
  762. } else
  763. {
  764. CSphVector<BYTE> dTmp;
  765. sphJsonFieldFormat ( dTmp, m_pStrings+uOff, eJson, false );
  766. if ( dTmp.GetLength() )
  767. dTmp.Add ( '\0' );
  768. iLen = dTmp.GetLength();
  769. *ppStr = dTmp.LeakData();
  770. }
  771. return iLen;
  772. default:
  773. assert ( 0 && "unhandled arg type in TO_STRING()" );
  774. break;
  775. }
  776. if ( !m_sBuilder.Length() )
  777. {
  778. *ppStr = NULL;
  779. return 0;
  780. }
  781. *ppStr = (const BYTE *) CSphString ( m_sBuilder.cstr() ).Leak();
  782. return m_sBuilder.Length();
  783. }
  784. virtual bool IsStringPtr() const
  785. {
  786. return true;
  787. }
  788. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  789. {
  790. if ( eCmd==SPH_EXPR_SET_STRING_POOL )
  791. m_pStrings = (const BYTE*)pArg;
  792. m_pFirst->Command ( eCmd, pArg );
  793. }
  794. };
  795. //////////////////////////////////////////////////////////////////////////
  796. /// generic JSON value evaluation
  797. /// can handle arbitrary stacks of jsoncol.key1.arr2[indexexpr3].key4[keynameexpr5]
  798. /// m_dArgs holds the expressions that return actual accessors (either keynames or indexes)
  799. /// m_dRetTypes holds their respective types
  800. struct Expr_JsonField_c : public Expr_WithLocator_c
  801. {
  802. protected:
  803. const BYTE * m_pStrings;
  804. CSphVector<ISphExpr *> m_dArgs;
  805. CSphVector<ESphAttr> m_dRetTypes;
  806. public:
  807. /// takes over the expressions
  808. Expr_JsonField_c ( const CSphAttrLocator & tLocator, int iLocator, CSphVector<ISphExpr*> & dArgs, CSphVector<ESphAttr> & dRetTypes )
  809. : Expr_WithLocator_c ( tLocator, iLocator )
  810. , m_pStrings ( NULL )
  811. {
  812. assert ( dArgs.GetLength()==dRetTypes.GetLength() );
  813. m_dArgs.SwapData ( dArgs );
  814. m_dRetTypes.SwapData ( dRetTypes );
  815. }
  816. ~Expr_JsonField_c ()
  817. {
  818. ARRAY_FOREACH ( i, m_dArgs )
  819. SafeRelease ( m_dArgs[i] );
  820. }
  821. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  822. {
  823. Expr_WithLocator_c::Command ( eCmd, pArg );
  824. if ( eCmd==SPH_EXPR_SET_STRING_POOL )
  825. m_pStrings = (const BYTE*)pArg;
  826. else if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS && m_iLocator!=-1 )
  827. static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLocator );
  828. ARRAY_FOREACH ( i, m_dArgs )
  829. if ( m_dArgs[i] )
  830. m_dArgs[i]->Command ( eCmd, pArg );
  831. }
  832. virtual float Eval ( const CSphMatch & ) const
  833. {
  834. assert ( 0 && "one just does not simply evaluate a JSON as float" );
  835. return 0;
  836. }
  837. virtual int64_t DoEval ( ESphJsonType eJson, const BYTE * pVal, const CSphMatch & tMatch ) const
  838. {
  839. int iLen;
  840. const BYTE * pStr;
  841. ARRAY_FOREACH ( i, m_dRetTypes )
  842. {
  843. switch ( m_dRetTypes[i] )
  844. {
  845. case SPH_ATTR_INTEGER: eJson = sphJsonFindByIndex ( eJson, &pVal, m_dArgs[i]->IntEval ( tMatch ) ); break;
  846. case SPH_ATTR_BIGINT: eJson = sphJsonFindByIndex ( eJson, &pVal, (int)m_dArgs[i]->Int64Eval ( tMatch ) ); break;
  847. case SPH_ATTR_FLOAT: eJson = sphJsonFindByIndex ( eJson, &pVal, (int)m_dArgs[i]->Eval ( tMatch ) ); break;
  848. case SPH_ATTR_STRING:
  849. // is this assert will fail someday it's ok
  850. // just remove it and add this code instead to handle possible memory leak
  851. // if ( m_dArgv[i]->IsStringPtr() ) SafeDeleteArray ( pStr );
  852. assert ( !m_dArgs[i]->IsStringPtr() );
  853. iLen = m_dArgs[i]->StringEval ( tMatch, &pStr );
  854. eJson = sphJsonFindByKey ( eJson, &pVal, (const void *)pStr, iLen, sphJsonKeyMask ( (const char *)pStr, iLen ) );
  855. break;
  856. case SPH_ATTR_JSON_FIELD: // handle cases like "json.a [ json.b ]"
  857. {
  858. uint64_t uValue = m_dArgs[i]->Int64Eval ( tMatch );
  859. const BYTE * p = m_pStrings + ( uValue & 0xffffffff );
  860. ESphJsonType eType = (ESphJsonType)( uValue >> 32 );
  861. switch ( eType )
  862. {
  863. case JSON_INT32: eJson = sphJsonFindByIndex ( eJson, &pVal, sphJsonLoadInt ( &p ) ); break;
  864. case JSON_INT64: eJson = sphJsonFindByIndex ( eJson, &pVal, (int)sphJsonLoadBigint ( &p ) ); break;
  865. case JSON_DOUBLE: eJson = sphJsonFindByIndex ( eJson, &pVal, (int)sphQW2D ( sphJsonLoadBigint ( &p ) ) ); break;
  866. case JSON_STRING:
  867. iLen = sphJsonUnpackInt ( &p );
  868. eJson = sphJsonFindByKey ( eJson, &pVal, (const void *)p, iLen, sphJsonKeyMask ( (const char *)p, iLen ) );
  869. break;
  870. default:
  871. return 0;
  872. }
  873. break;
  874. }
  875. default:
  876. return 0;
  877. }
  878. if ( eJson==JSON_EOF )
  879. return 0;
  880. }
  881. // keep actual attribute type and offset to data packed
  882. int64_t iPacked = ( ( (int64_t)( pVal-m_pStrings ) ) | ( ( (int64_t)eJson )<<32 ) );
  883. return iPacked;
  884. }
  885. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
  886. {
  887. if ( !m_pStrings )
  888. return 0;
  889. uint64_t uOffset = tMatch.GetAttr ( m_tLocator );
  890. if ( !uOffset )
  891. return 0;
  892. if ( m_tLocator.m_bDynamic )
  893. {
  894. // extends precalculated (aliased) field
  895. const BYTE * pVal = m_pStrings + ( uOffset & 0xffffffff );
  896. ESphJsonType eJson = (ESphJsonType)( uOffset >> 32 );
  897. return DoEval ( eJson, pVal, tMatch );
  898. }
  899. const BYTE * pVal = NULL;
  900. sphUnpackStr ( m_pStrings + uOffset, &pVal );
  901. if ( !pVal )
  902. return 0;
  903. ESphJsonType eJson = sphJsonFindFirst ( &pVal );
  904. return DoEval ( eJson, pVal, tMatch );
  905. }
  906. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  907. {
  908. EXPR_CLASS_NAME("Expr_JsonField_c");
  909. CALC_POD_HASHES(m_dRetTypes);
  910. CALC_CHILD_HASHES(m_dArgs);
  911. return CALC_DEP_HASHES();
  912. }
  913. };
  914. /// fastpath (instead of generic JsonField_c) for jsoncol.key access by a static key name
  915. struct Expr_JsonFastKey_c : public Expr_WithLocator_c
  916. {
  917. protected:
  918. const BYTE * m_pStrings;
  919. CSphString m_sKey;
  920. int m_iKeyLen;
  921. DWORD m_uKeyBloom;
  922. public:
  923. /// takes over the expressions
  924. Expr_JsonFastKey_c ( const CSphAttrLocator & tLocator, int iLocator, ISphExpr * pArg )
  925. : Expr_WithLocator_c ( tLocator, iLocator )
  926. , m_pStrings ( NULL )
  927. {
  928. assert ( ( tLocator.m_iBitOffset % ROWITEM_BITS )==0 );
  929. assert ( tLocator.m_iBitCount==ROWITEM_BITS );
  930. Expr_GetStrConst_c * pKey = (Expr_GetStrConst_c*)pArg;
  931. m_sKey = pKey->m_sVal;
  932. m_iKeyLen = pKey->m_iLen;
  933. m_uKeyBloom = sphJsonKeyMask ( m_sKey.cstr(), m_iKeyLen );
  934. SafeRelease ( pArg );
  935. }
  936. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  937. {
  938. Expr_WithLocator_c::Command ( eCmd, pArg );
  939. if ( eCmd==SPH_EXPR_SET_STRING_POOL )
  940. m_pStrings = (const BYTE*)pArg;
  941. }
  942. virtual float Eval ( const CSphMatch & ) const
  943. {
  944. assert ( 0 && "one just does not simply evaluate a JSON as float" );
  945. return 0;
  946. }
  947. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
  948. {
  949. // get pointer to JSON blob data
  950. assert ( m_pStrings );
  951. DWORD uOffset = m_tLocator.m_bDynamic
  952. ? tMatch.m_pDynamic [ m_tLocator.m_iBitOffset >> ROWITEM_SHIFT ]
  953. : tMatch.m_pStatic [ m_tLocator.m_iBitOffset >> ROWITEM_SHIFT ];
  954. if ( !uOffset )
  955. return 0;
  956. const BYTE * pJson;
  957. sphUnpackStr ( m_pStrings + uOffset, &pJson );
  958. // all root objects start with a Bloom mask; quickly check it
  959. if ( ( sphGetDword(pJson) & m_uKeyBloom )!=m_uKeyBloom )
  960. return 0;
  961. // OPTIMIZE? FindByKey does an extra (redundant) bloom check inside
  962. ESphJsonType eJson = sphJsonFindByKey ( JSON_ROOT, &pJson, m_sKey.cstr(), m_iKeyLen, m_uKeyBloom );
  963. if ( eJson==JSON_EOF )
  964. return 0;
  965. // keep actual attribute type and offset to data packed
  966. int64_t iPacked = ( ( (int64_t)( pJson-m_pStrings ) ) | ( ( (int64_t)eJson )<<32 ) );
  967. return iPacked;
  968. }
  969. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  970. {
  971. EXPR_CLASS_NAME("Expr_JsonFastKey_c");
  972. CALC_STR_HASH(m_sKey,m_iKeyLen);
  973. return CALC_DEP_HASHES();
  974. }
  975. };
  976. struct Expr_JsonFieldConv_c : public ISphExpr
  977. {
  978. protected:
  979. const BYTE * m_pStrings;
  980. ISphExpr * m_pArg;
  981. public:
  982. explicit Expr_JsonFieldConv_c ( ISphExpr * pArg )
  983. : m_pStrings ( NULL )
  984. , m_pArg ( pArg )
  985. {}
  986. ~Expr_JsonFieldConv_c()
  987. {
  988. SafeRelease ( m_pArg );
  989. }
  990. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  991. {
  992. if ( eCmd==SPH_EXPR_SET_STRING_POOL )
  993. m_pStrings = (const BYTE*)pArg;
  994. if ( m_pArg )
  995. m_pArg->Command ( eCmd, pArg );
  996. }
  997. protected:
  998. virtual ESphJsonType GetKey ( const BYTE ** ppKey, const CSphMatch & tMatch ) const
  999. {
  1000. assert ( ppKey );
  1001. if ( !m_pStrings || !m_pArg )
  1002. return JSON_EOF;
  1003. uint64_t uValue = m_pArg->Int64Eval ( tMatch );
  1004. *ppKey = m_pStrings + ( uValue & 0xffffffff );
  1005. return (ESphJsonType)( uValue >> 32 );
  1006. }
  1007. // generic evaluate
  1008. template < typename T >
  1009. T DoEval ( const CSphMatch & tMatch ) const
  1010. {
  1011. const BYTE * pVal = NULL;
  1012. ESphJsonType eJson = GetKey ( &pVal, tMatch );
  1013. switch ( eJson )
  1014. {
  1015. case JSON_INT32: return (T)sphJsonLoadInt ( &pVal );
  1016. case JSON_INT64: return (T)sphJsonLoadBigint ( &pVal );
  1017. case JSON_DOUBLE: return (T)sphQW2D ( sphJsonLoadBigint ( &pVal ) );
  1018. case JSON_TRUE: return 1;
  1019. case JSON_STRING:
  1020. {
  1021. if ( !g_bJsonAutoconvNumbers )
  1022. return 0;
  1023. int iLen = sphJsonUnpackInt ( &pVal );
  1024. int64_t iVal;
  1025. double fVal;
  1026. ESphJsonType eType;
  1027. if ( sphJsonStringToNumber ( (const char*)pVal, iLen, eType, iVal, fVal ) )
  1028. return eType==JSON_DOUBLE ? (T)fVal : (T)iVal;
  1029. }
  1030. default: return 0;
  1031. }
  1032. }
  1033. virtual uint64_t CalcHash ( const char * szTag, const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  1034. {
  1035. EXPR_CLASS_NAME_NOCHECK(szTag);
  1036. CALC_CHILD_HASH(m_pArg);
  1037. return CALC_DEP_HASHES();
  1038. }
  1039. public:
  1040. virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
  1041. {
  1042. const BYTE * pVal = NULL;
  1043. ESphJsonType eJson = GetKey ( &pVal, tMatch );
  1044. return ( eJson==JSON_STRING ) ? sphUnpackStr ( pVal, ppStr ) : 0;
  1045. }
  1046. virtual float Eval ( const CSphMatch & tMatch ) const { return DoEval<float> ( tMatch ); }
  1047. virtual int IntEval ( const CSphMatch & tMatch ) const { return DoEval<int> ( tMatch ); }
  1048. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return DoEval<int64_t> ( tMatch ); }
  1049. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  1050. {
  1051. EXPR_CLASS_NAME("Expr_JsonFieldConv_c");
  1052. return CALC_PARENT_HASH();
  1053. }
  1054. };
  1055. template <typename T>
  1056. T JsonAggr ( ESphJsonType eJson, const BYTE * pVal, ESphAggrFunc eFunc, CSphString * pBuf )
  1057. {
  1058. if ( !pVal || ( eFunc!=SPH_AGGR_MIN && eFunc!=SPH_AGGR_MAX ) )
  1059. return 0;
  1060. switch ( eJson )
  1061. {
  1062. case JSON_INT32_VECTOR:
  1063. {
  1064. int iVals = sphJsonUnpackInt ( &pVal );
  1065. if ( iVals==0 )
  1066. return 0;
  1067. const int * p = (const int*) pVal;
  1068. int iRes = *p; // first value
  1069. switch ( eFunc )
  1070. {
  1071. case SPH_AGGR_MIN: while ( --iVals ) if ( *++p<iRes ) iRes = *p; break;
  1072. case SPH_AGGR_MAX: while ( --iVals ) if ( *++p>iRes ) iRes = *p; break;
  1073. default: return 0;
  1074. }
  1075. return (T)iRes;
  1076. }
  1077. case JSON_DOUBLE_VECTOR:
  1078. {
  1079. int iLen = sphJsonUnpackInt ( &pVal );
  1080. if ( !iLen || ( eFunc!=SPH_AGGR_MIN && eFunc!=SPH_AGGR_MAX ) )
  1081. return 0;
  1082. double fRes = ( eFunc==SPH_AGGR_MIN ? FLT_MAX : FLT_MIN );
  1083. const BYTE * p = pVal;
  1084. for ( int i=0; i<iLen; i++ )
  1085. {
  1086. double fStored = sphQW2D ( sphJsonLoadBigint ( &p ) );
  1087. switch ( eFunc )
  1088. {
  1089. case SPH_AGGR_MIN:
  1090. fRes = Min ( fRes, fStored );
  1091. break;
  1092. case SPH_AGGR_MAX:
  1093. fRes = Max ( fRes, fStored );
  1094. break;
  1095. default: return 0;
  1096. }
  1097. }
  1098. return (T)fRes;
  1099. }
  1100. case JSON_STRING_VECTOR:
  1101. {
  1102. if ( !pBuf )
  1103. return 0;
  1104. sphJsonUnpackInt ( &pVal ); // skip node length
  1105. int iVals = sphJsonUnpackInt ( &pVal );
  1106. if ( iVals==0 )
  1107. return 0;
  1108. // first value
  1109. int iLen = sphJsonUnpackInt ( &pVal );
  1110. const char * pRes = (const char* )pVal;
  1111. int iResLen = iLen;
  1112. while ( --iVals )
  1113. {
  1114. pVal += iLen;
  1115. iLen = sphJsonUnpackInt ( &pVal );
  1116. // binary string comparison
  1117. int iCmp = memcmp ( pRes, (const char*)pVal, iLen<iResLen ? iLen : iResLen );
  1118. if ( iCmp==0 && iLen!=iResLen )
  1119. iCmp = iResLen-iLen;
  1120. if ( ( eFunc==SPH_AGGR_MIN && iCmp>0 ) || ( eFunc==SPH_AGGR_MAX && iCmp<0 ) )
  1121. {
  1122. pRes = (const char*)pVal;
  1123. iResLen = iLen;
  1124. }
  1125. }
  1126. pBuf->SetBinary ( pRes, iResLen );
  1127. return (T)iResLen;
  1128. }
  1129. case JSON_MIXED_VECTOR:
  1130. {
  1131. sphJsonUnpackInt ( &pVal ); // skip node length
  1132. int iLen = sphJsonUnpackInt ( &pVal );
  1133. if ( !iLen || ( eFunc!=SPH_AGGR_MIN && eFunc!=SPH_AGGR_MAX ) )
  1134. return 0;
  1135. double fRes = ( eFunc==SPH_AGGR_MIN ? FLT_MAX : FLT_MIN );
  1136. for ( int i=0; i<iLen; i++ )
  1137. {
  1138. double fVal = ( eFunc==SPH_AGGR_MIN ? FLT_MAX : FLT_MIN );
  1139. ESphJsonType eType = (ESphJsonType)*pVal++;
  1140. switch (eType)
  1141. {
  1142. case JSON_INT32:
  1143. case JSON_INT64:
  1144. fVal = (double)( eType==JSON_INT32 ? sphJsonLoadInt ( &pVal ) : sphJsonLoadBigint ( &pVal ) );
  1145. break;
  1146. case JSON_DOUBLE:
  1147. fVal = sphQW2D ( sphJsonLoadBigint ( &pVal ) );
  1148. break;
  1149. default:
  1150. sphJsonSkipNode ( eType, &pVal );
  1151. break; // for weird subobjects, just let min
  1152. }
  1153. switch ( eFunc )
  1154. {
  1155. case SPH_AGGR_MIN:
  1156. fRes = Min ( fRes, fVal );
  1157. break;
  1158. case SPH_AGGR_MAX:
  1159. fRes = Max ( fRes, fVal );
  1160. break;
  1161. default: return 0;
  1162. }
  1163. }
  1164. return (T)fRes;
  1165. }
  1166. default: return 0;
  1167. }
  1168. }
  1169. struct Expr_JsonFieldAggr_c : public Expr_JsonFieldConv_c
  1170. {
  1171. protected:
  1172. ESphAggrFunc m_eFunc;
  1173. public:
  1174. Expr_JsonFieldAggr_c ( ISphExpr * pArg, ESphAggrFunc eFunc )
  1175. : Expr_JsonFieldConv_c ( pArg )
  1176. , m_eFunc ( eFunc )
  1177. {}
  1178. virtual int IntEval ( const CSphMatch & tMatch ) const
  1179. {
  1180. const BYTE * pVal = NULL;
  1181. ESphJsonType eJson = GetKey ( &pVal, tMatch );
  1182. return JsonAggr<int> ( eJson, pVal, m_eFunc, NULL );
  1183. }
  1184. virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
  1185. {
  1186. CSphString sBuf;
  1187. *ppStr = NULL;
  1188. const BYTE * pVal = NULL;
  1189. ESphJsonType eJson = GetKey ( &pVal, tMatch );
  1190. int iLen = 0;
  1191. int iVal = 0;
  1192. float fVal = 0.0f;
  1193. switch ( eJson )
  1194. {
  1195. case JSON_INT32_VECTOR:
  1196. iVal = JsonAggr<int> ( eJson, pVal, m_eFunc, NULL );
  1197. sBuf.SetSprintf ( "%u", iVal );
  1198. iLen = sBuf.Length();
  1199. *ppStr = (const BYTE *) sBuf.Leak();
  1200. return iLen;
  1201. case JSON_STRING_VECTOR:
  1202. JsonAggr<int> ( eJson, pVal, m_eFunc, &sBuf );
  1203. iLen = sBuf.Length();
  1204. *ppStr = (const BYTE *) sBuf.Leak();
  1205. return iLen;
  1206. case JSON_DOUBLE_VECTOR:
  1207. fVal = JsonAggr<float> ( eJson, pVal, m_eFunc, NULL );
  1208. sBuf.SetSprintf ( "%f", fVal );
  1209. iLen = sBuf.Length();
  1210. *ppStr = (const BYTE *) sBuf.Leak();
  1211. return iLen;
  1212. case JSON_MIXED_VECTOR:
  1213. fVal = JsonAggr<float> ( eJson, pVal, m_eFunc, NULL );
  1214. sBuf.SetSprintf ( "%f", fVal );
  1215. iLen = sBuf.Length();
  1216. *ppStr = (const BYTE *) sBuf.Leak();
  1217. return iLen;
  1218. default: return 0;
  1219. }
  1220. }
  1221. virtual float Eval ( const CSphMatch & tMatch ) const
  1222. {
  1223. const BYTE * pVal = NULL;
  1224. ESphJsonType eJson = GetKey ( &pVal, tMatch );
  1225. return JsonAggr<float> ( eJson, pVal, m_eFunc, NULL );
  1226. }
  1227. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
  1228. {
  1229. const BYTE * pVal = NULL;
  1230. ESphJsonType eJson = GetKey ( &pVal, tMatch );
  1231. return JsonAggr<int64_t> ( eJson, pVal, m_eFunc, NULL );
  1232. }
  1233. virtual bool IsStringPtr() const { return true; }
  1234. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  1235. {
  1236. EXPR_CLASS_NAME("Expr_JsonFieldAggr_c");
  1237. CALC_POD_HASH(m_eFunc);
  1238. return CALC_PARENT_HASH();
  1239. }
  1240. };
  1241. struct Expr_JsonFieldLength_c : public Expr_JsonFieldConv_c
  1242. {
  1243. public:
  1244. explicit Expr_JsonFieldLength_c ( ISphExpr * pArg )
  1245. : Expr_JsonFieldConv_c ( pArg )
  1246. {}
  1247. virtual int IntEval ( const CSphMatch & tMatch ) const
  1248. {
  1249. const BYTE * pVal = NULL;
  1250. ESphJsonType eJson = GetKey ( &pVal, tMatch );
  1251. return sphJsonFieldLength ( eJson, pVal );
  1252. }
  1253. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
  1254. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
  1255. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  1256. {
  1257. EXPR_CLASS_NAME("Expr_JsonFieldLength_c");
  1258. return CALC_PARENT_HASH();
  1259. }
  1260. };
  1261. struct Expr_Now_c : public ISphExpr
  1262. {
  1263. Expr_Now_c ( int iNow )
  1264. : m_iNow ( iNow )
  1265. {}
  1266. virtual int IntEval ( const CSphMatch & ) const { return m_iNow; }
  1267. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
  1268. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
  1269. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  1270. {
  1271. EXPR_CLASS_NAME("Expr_Now_c");
  1272. CALC_POD_HASH(m_iNow);
  1273. return CALC_DEP_HASHES();
  1274. }
  1275. private:
  1276. int m_iNow;
  1277. };
  1278. struct Expr_Time_c : public ISphExpr
  1279. {
  1280. bool m_bUTC;
  1281. bool m_bDate;
  1282. explicit Expr_Time_c ( bool bUTC, bool bDate )
  1283. : m_bUTC ( bUTC )
  1284. , m_bDate ( bDate )
  1285. {}
  1286. virtual int IntEval ( const CSphMatch & ) const
  1287. {
  1288. struct tm s; // can't get non-UTC timestamp without mktime
  1289. time_t t = time ( NULL );
  1290. if ( m_bUTC )
  1291. gmtime_r ( &t, &s );
  1292. else
  1293. localtime_r ( &t, &s );
  1294. return (int) mktime ( &s );
  1295. }
  1296. virtual int StringEval ( const CSphMatch &, const BYTE ** ppStr ) const
  1297. {
  1298. CSphString sVal;
  1299. struct tm s;
  1300. time_t t = time ( NULL );
  1301. if ( m_bUTC )
  1302. gmtime_r ( &t, &s );
  1303. else
  1304. localtime_r ( &t, &s );
  1305. if ( m_bDate )
  1306. sVal.SetSprintf ( "%04d-%02d-%02d %02d:%02d:%02d", s.tm_year+1900, s.tm_mon+1, s.tm_mday, s.tm_hour, s.tm_min, s.tm_sec );
  1307. else
  1308. sVal.SetSprintf ( "%02d:%02d:%02d", s.tm_hour, s.tm_min, s.tm_sec );
  1309. int iLength = sVal.Length();
  1310. *ppStr = (const BYTE*) sVal.Leak();
  1311. return iLength;
  1312. }
  1313. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
  1314. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
  1315. virtual bool IsStringPtr () const { return true; }
  1316. virtual uint64_t GetHash ( const ISphSchema &, uint64_t, bool & bDisable )
  1317. {
  1318. bDisable = true;
  1319. return 0;
  1320. }
  1321. };
  1322. struct Expr_TimeDiff_c : public ISphExpr
  1323. {
  1324. ISphExpr * m_pFirst;
  1325. ISphExpr * m_pSecond;
  1326. Expr_TimeDiff_c ( ISphExpr * pFirst, ISphExpr * pSecond )
  1327. : m_pFirst ( pFirst )
  1328. , m_pSecond ( pSecond )
  1329. {}
  1330. ~Expr_TimeDiff_c()
  1331. {
  1332. SafeRelease ( m_pFirst );
  1333. SafeRelease ( m_pSecond );
  1334. }
  1335. virtual int IntEval ( const CSphMatch & tMatch ) const
  1336. {
  1337. assert ( m_pFirst && m_pSecond );
  1338. return m_pFirst->IntEval ( tMatch )-m_pSecond->IntEval ( tMatch );
  1339. }
  1340. virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
  1341. {
  1342. int iVal = IntEval ( tMatch );
  1343. CSphString sVal;
  1344. int t = iVal<0 ? -iVal : iVal;
  1345. sVal.SetSprintf ( "%s%02d:%02d:%02d", iVal<0 ? "-" : "", t/60/60, (t/60)%60, t%60 );
  1346. int iLength = sVal.Length();
  1347. *ppStr = (const BYTE*) sVal.Leak();
  1348. return iLength;
  1349. }
  1350. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
  1351. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
  1352. virtual bool IsStringPtr () const { return true; }
  1353. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  1354. {
  1355. m_pFirst->Command ( eCmd, pArg );
  1356. m_pSecond->Command ( eCmd, pArg );
  1357. }
  1358. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  1359. {
  1360. EXPR_CLASS_NAME("Expr_TimeDiff_c");
  1361. CALC_CHILD_HASH(m_pFirst);
  1362. CALC_CHILD_HASH(m_pSecond);
  1363. return CALC_DEP_HASHES();
  1364. }
  1365. };
  1366. struct Expr_Iterator_c : Expr_JsonField_c
  1367. {
  1368. SphAttr_t * m_pData;
  1369. Expr_Iterator_c ( const CSphAttrLocator & tLocator, int iLocator, CSphVector<ISphExpr*> & dArgs, CSphVector<ESphAttr> & dRetTypes, SphAttr_t * pData )
  1370. : Expr_JsonField_c ( tLocator, iLocator, dArgs, dRetTypes )
  1371. , m_pData ( pData )
  1372. {}
  1373. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
  1374. {
  1375. uint64_t uValue = m_pData ? *m_pData : 0;
  1376. const BYTE * p = m_pStrings + ( uValue & 0xffffffff );
  1377. ESphJsonType eType = (ESphJsonType)( uValue >> 32 );
  1378. return DoEval ( eType, p, tMatch );
  1379. }
  1380. };
  1381. struct Expr_ForIn_c : public Expr_JsonFieldConv_c
  1382. {
  1383. ISphExpr * m_pExpr;
  1384. bool m_bStrict;
  1385. bool m_bIndex;
  1386. mutable uint64_t m_uData;
  1387. Expr_ForIn_c ( ISphExpr * pArg, bool bStrict, bool bIndex )
  1388. : Expr_JsonFieldConv_c ( pArg )
  1389. , m_pExpr ( NULL )
  1390. , m_bStrict ( bStrict )
  1391. , m_bIndex ( bIndex )
  1392. {}
  1393. ~Expr_ForIn_c ()
  1394. {
  1395. SafeRelease ( m_pExpr );
  1396. }
  1397. SphAttr_t * GetRef ()
  1398. {
  1399. return (SphAttr_t*)&m_uData;
  1400. }
  1401. void SetExpr ( ISphExpr * pExpr )
  1402. {
  1403. m_pExpr = pExpr;
  1404. }
  1405. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  1406. {
  1407. Expr_JsonFieldConv_c::Command ( eCmd, pArg );
  1408. if ( m_pExpr )
  1409. m_pExpr->Command ( eCmd, pArg );
  1410. }
  1411. bool ExprEval ( int * pResult, const CSphMatch & tMatch, int iIndex, ESphJsonType eType, const BYTE * pVal ) const
  1412. {
  1413. m_uData = ( ( (int64_t)( pVal-m_pStrings ) ) | ( ( (int64_t)eType )<<32 ) );
  1414. bool bMatch = m_pExpr->Eval ( tMatch )!=0;
  1415. *pResult = bMatch ? ( m_bIndex ? iIndex : 1 ) : ( m_bIndex ? -1 : 0 );
  1416. return m_bStrict ? bMatch : !bMatch;
  1417. }
  1418. virtual int IntEval ( const CSphMatch & tMatch ) const
  1419. {
  1420. int iResult = m_bIndex ? -1 : 0;
  1421. if ( !m_pExpr )
  1422. return iResult;
  1423. const BYTE * p = NULL;
  1424. ESphJsonType eJson = GetKey ( &p, tMatch );
  1425. switch ( eJson )
  1426. {
  1427. case JSON_INT32_VECTOR:
  1428. case JSON_INT64_VECTOR:
  1429. case JSON_DOUBLE_VECTOR:
  1430. {
  1431. int iSize = eJson==JSON_INT32_VECTOR ? 4 : 8;
  1432. ESphJsonType eType = eJson==JSON_INT32_VECTOR ? JSON_INT32
  1433. : eJson==JSON_INT64_VECTOR ? JSON_INT64
  1434. : JSON_DOUBLE;
  1435. int iLen = sphJsonUnpackInt ( &p );
  1436. for ( int i=0; i<iLen; i++, p+=iSize )
  1437. if ( !ExprEval ( &iResult, tMatch, i, eType, p ) )
  1438. break;
  1439. break;
  1440. }
  1441. case JSON_STRING_VECTOR:
  1442. {
  1443. sphJsonUnpackInt ( &p );
  1444. int iLen = sphJsonUnpackInt ( &p );
  1445. for ( int i=0;i<iLen;i++ )
  1446. {
  1447. if ( !ExprEval ( &iResult, tMatch, i, JSON_STRING, p ) )
  1448. break;
  1449. sphJsonSkipNode ( JSON_STRING, &p );
  1450. }
  1451. break;
  1452. }
  1453. case JSON_MIXED_VECTOR:
  1454. {
  1455. sphJsonUnpackInt ( &p );
  1456. int iLen = sphJsonUnpackInt ( &p );
  1457. for ( int i=0; i<iLen; i++ )
  1458. {
  1459. ESphJsonType eType = (ESphJsonType)*p++;
  1460. if ( !ExprEval ( &iResult, tMatch, i, eType, p ) )
  1461. break;
  1462. sphJsonSkipNode ( eType, &p );
  1463. }
  1464. break;
  1465. }
  1466. default:
  1467. break;
  1468. }
  1469. return iResult;
  1470. }
  1471. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
  1472. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
  1473. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  1474. {
  1475. EXPR_CLASS_NAME("Expr_ForIn_c");
  1476. CALC_POD_HASH(m_bStrict);
  1477. CALC_POD_HASH(m_bIndex);
  1478. CALC_CHILD_HASH(m_pExpr);
  1479. return CALC_PARENT_HASH();
  1480. }
  1481. };
  1482. SphStringCmp_fn GetCollationFn ( ESphCollation eCollation )
  1483. {
  1484. switch ( eCollation )
  1485. {
  1486. case SPH_COLLATION_LIBC_CS: return sphCollateLibcCS;
  1487. case SPH_COLLATION_UTF8_GENERAL_CI: return sphCollateUtf8GeneralCI;
  1488. case SPH_COLLATION_BINARY: return sphCollateBinary;
  1489. default: return sphCollateLibcCI;
  1490. }
  1491. }
  1492. struct Expr_StrEq_c : public ISphExpr
  1493. {
  1494. ISphExpr * m_pLeft;
  1495. ISphExpr * m_pRight;
  1496. SphStringCmp_fn m_fnStrCmp;
  1497. Expr_StrEq_c ( ISphExpr * pLeft, ISphExpr * pRight, ESphCollation eCollation )
  1498. : m_pLeft ( pLeft )
  1499. , m_pRight ( pRight )
  1500. {
  1501. m_fnStrCmp = GetCollationFn ( eCollation );
  1502. }
  1503. ~Expr_StrEq_c ()
  1504. {
  1505. SafeRelease ( m_pLeft );
  1506. SafeRelease ( m_pRight );
  1507. }
  1508. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  1509. {
  1510. assert ( m_pLeft && m_pRight );
  1511. m_pLeft->Command ( eCmd, pArg );
  1512. m_pRight->Command ( eCmd, pArg );
  1513. }
  1514. virtual int IntEval ( const CSphMatch & tMatch ) const
  1515. {
  1516. const BYTE * pLeft;
  1517. const BYTE * pRight;
  1518. int iLeft = m_pLeft->StringEval ( tMatch, &pLeft );
  1519. int iRight = m_pRight->StringEval ( tMatch, &pRight );
  1520. CSphString sStr1 ( iLeft ? (const char*)pLeft : "", iLeft );
  1521. CSphString sStr2 ( iRight ? (const char*)pRight : "", iRight );
  1522. bool bEq = m_fnStrCmp ( (const BYTE*)sStr1.cstr(), (const BYTE*)sStr2.cstr(), false )==0;
  1523. if ( m_pLeft->IsStringPtr() ) SafeDeleteArray ( pLeft );
  1524. if ( m_pRight->IsStringPtr() ) SafeDeleteArray ( pRight );
  1525. return (int)bEq;
  1526. }
  1527. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
  1528. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
  1529. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  1530. {
  1531. EXPR_CLASS_NAME("Expr_StrEq_c");
  1532. CALC_POD_HASH(m_fnStrCmp);
  1533. CALC_CHILD_HASH(m_pLeft);
  1534. CALC_CHILD_HASH(m_pRight);
  1535. return CALC_DEP_HASHES();
  1536. }
  1537. };
  1538. struct Expr_JsonFieldIsNull_c : public Expr_JsonFieldConv_c
  1539. {
  1540. bool m_bEquals;
  1541. explicit Expr_JsonFieldIsNull_c ( ISphExpr * pArg, bool bEquals )
  1542. : Expr_JsonFieldConv_c ( pArg )
  1543. , m_bEquals ( bEquals )
  1544. {}
  1545. virtual int IntEval ( const CSphMatch & tMatch ) const
  1546. {
  1547. const BYTE * pVal = NULL;
  1548. ESphJsonType eJson = GetKey ( &pVal, tMatch );
  1549. return m_bEquals ^ ( eJson!=JSON_EOF && eJson!=JSON_NULL );
  1550. }
  1551. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
  1552. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)IntEval ( tMatch ); }
  1553. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  1554. {
  1555. EXPR_CLASS_NAME("Expr_JsonFieldIsNull_c");
  1556. CALC_POD_HASH(m_bEquals);
  1557. return CALC_PARENT_HASH();
  1558. }
  1559. };
  1560. //////////////////////////////////////////////////////////////////////////
  1561. struct Expr_MinTopWeight : public ISphExpr
  1562. {
  1563. int * m_pWeight;
  1564. Expr_MinTopWeight() : m_pWeight ( NULL ) {}
  1565. virtual int IntEval ( const CSphMatch & ) const { return m_pWeight ? *m_pWeight : -INT_MAX; }
  1566. virtual float Eval ( const CSphMatch & ) const { return m_pWeight ? (float)*m_pWeight : -FLT_MAX; }
  1567. virtual int64_t Int64Eval ( const CSphMatch & ) const { return m_pWeight ? *m_pWeight : -LLONG_MAX; }
  1568. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  1569. {
  1570. CSphMatch * pWorst;
  1571. if ( eCmd!=SPH_EXPR_SET_EXTRA_DATA )
  1572. return;
  1573. if ( static_cast<ISphExtra*>(pArg)->ExtraData ( EXTRA_GET_QUEUE_WORST, (void**)&pWorst ) )
  1574. m_pWeight = &pWorst->m_iWeight;
  1575. }
  1576. virtual uint64_t GetHash ( const ISphSchema &, uint64_t, bool & bDisable )
  1577. {
  1578. bDisable = true;
  1579. return 0;
  1580. }
  1581. };
  1582. struct Expr_MinTopSortval : public ISphExpr
  1583. {
  1584. CSphMatch * m_pWorst;
  1585. int m_iSortval;
  1586. Expr_MinTopSortval()
  1587. : m_pWorst ( NULL )
  1588. , m_iSortval ( -1 )
  1589. {}
  1590. virtual float Eval ( const CSphMatch & ) const
  1591. {
  1592. if ( m_pWorst && m_pWorst->m_pDynamic && m_iSortval>=0 )
  1593. return *(float*)( m_pWorst->m_pDynamic + m_iSortval );
  1594. return -FLT_MAX;
  1595. }
  1596. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  1597. {
  1598. if ( eCmd!=SPH_EXPR_SET_EXTRA_DATA )
  1599. return;
  1600. ISphExtra * p = (ISphExtra*)pArg;
  1601. if ( !p->ExtraData ( EXTRA_GET_QUEUE_WORST, (void**)&m_pWorst )
  1602. || !p->ExtraData ( EXTRA_GET_QUEUE_SORTVAL, (void**)&m_iSortval ) )
  1603. {
  1604. m_pWorst = NULL;
  1605. }
  1606. }
  1607. virtual uint64_t GetHash ( const ISphSchema &, uint64_t, bool & bDisable )
  1608. {
  1609. bDisable = true;
  1610. return 0;
  1611. }
  1612. };
  1613. struct Expr_Rand_c : public ISphExpr
  1614. {
  1615. ISphExpr * m_pFirst;
  1616. bool m_bConst;
  1617. mutable bool m_bFirstEval;
  1618. mutable uint64_t m_uState;
  1619. explicit Expr_Rand_c ( ISphExpr * pFirst, bool bConst )
  1620. : m_pFirst ( pFirst )
  1621. , m_bConst ( bConst )
  1622. , m_bFirstEval ( true )
  1623. {
  1624. sphAutoSrand ();
  1625. m_uState = ( (uint64_t)sphRand() << 32 ) + sphRand();
  1626. }
  1627. uint64_t XorShift64Star() const
  1628. {
  1629. m_uState ^= m_uState >> 12;
  1630. m_uState ^= m_uState << 25;
  1631. m_uState ^= m_uState >> 27;
  1632. return m_uState * 2685821657736338717ULL;
  1633. }
  1634. virtual float Eval ( const CSphMatch & tMatch ) const
  1635. {
  1636. if ( m_pFirst )
  1637. {
  1638. uint64_t uSeed = (uint64_t)m_pFirst->Int64Eval ( tMatch );
  1639. if ( !m_bConst )
  1640. m_uState = uSeed;
  1641. else if ( m_bFirstEval )
  1642. {
  1643. m_uState = uSeed;
  1644. m_bFirstEval = false;
  1645. }
  1646. }
  1647. return (float)( XorShift64Star() / (double)UINT64_MAX );
  1648. }
  1649. virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)Eval ( tMatch ); }
  1650. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t)Eval ( tMatch ); }
  1651. virtual uint64_t GetHash ( const ISphSchema &, uint64_t, bool & bDisable )
  1652. {
  1653. bDisable = true;
  1654. return 0;
  1655. }
  1656. };
  1657. //////////////////////////////////////////////////////////////////////////
  1658. #define FIRST m_pFirst->Eval(tMatch)
  1659. #define SECOND m_pSecond->Eval(tMatch)
  1660. #define THIRD m_pThird->Eval(tMatch)
  1661. #define INTFIRST m_pFirst->IntEval(tMatch)
  1662. #define INTSECOND m_pSecond->IntEval(tMatch)
  1663. #define INTTHIRD m_pThird->IntEval(tMatch)
  1664. #define INT64FIRST m_pFirst->Int64Eval(tMatch)
  1665. #define INT64SECOND m_pSecond->Int64Eval(tMatch)
  1666. #define INT64THIRD m_pThird->Int64Eval(tMatch)
  1667. #define DECLARE_UNARY_TRAITS(_classname) \
  1668. struct _classname : public Expr_Unary_c \
  1669. { \
  1670. explicit _classname ( ISphExpr * pFirst ) : Expr_Unary_c ( #_classname, pFirst ) {}
  1671. #define DECLARE_END() };
  1672. #define DECLARE_UNARY_FLT(_classname,_expr) \
  1673. DECLARE_UNARY_TRAITS ( _classname ) \
  1674. virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
  1675. };
  1676. #define DECLARE_UNARY_INT(_classname,_expr,_expr2,_expr3) \
  1677. DECLARE_UNARY_TRAITS ( _classname ) \
  1678. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)_expr; } \
  1679. virtual int IntEval ( const CSphMatch & tMatch ) const { return _expr2; } \
  1680. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return _expr3; } \
  1681. };
  1682. #define IABS(_arg) ( (_arg)>0 ? (_arg) : (-_arg) )
  1683. DECLARE_UNARY_INT ( Expr_Neg_c, -FIRST, -INTFIRST, -INT64FIRST )
  1684. DECLARE_UNARY_INT ( Expr_Abs_c, fabs(FIRST), IABS(INTFIRST), IABS(INT64FIRST) )
  1685. DECLARE_UNARY_INT ( Expr_Ceil_c, float(ceil(FIRST)), int(ceil(FIRST)), int64_t(ceil(FIRST)) )
  1686. DECLARE_UNARY_INT ( Expr_Floor_c, float(floor(FIRST)), int(floor(FIRST)), int64_t(floor(FIRST)) )
  1687. DECLARE_UNARY_FLT ( Expr_Sin_c, float(sin(FIRST)) )
  1688. DECLARE_UNARY_FLT ( Expr_Cos_c, float(cos(FIRST)) )
  1689. DECLARE_UNARY_FLT ( Expr_Exp_c, float(exp(FIRST)) )
  1690. DECLARE_UNARY_INT ( Expr_NotInt_c, (float)(INTFIRST?0:1), INTFIRST?0:1, INTFIRST?0:1 )
  1691. DECLARE_UNARY_INT ( Expr_NotInt64_c, (float)(INT64FIRST?0:1), INT64FIRST?0:1, INT64FIRST?0:1 )
  1692. DECLARE_UNARY_INT ( Expr_Sint_c, (float)(INTFIRST), INTFIRST, INTFIRST )
  1693. DECLARE_UNARY_TRAITS ( Expr_Ln_c )
  1694. virtual float Eval ( const CSphMatch & tMatch ) const
  1695. {
  1696. float fFirst = m_pFirst->Eval ( tMatch );
  1697. // ideally this would be SQLNULL instead of plain 0.0f
  1698. return fFirst>0.0f ? (float)log ( fFirst ) : 0.0f;
  1699. }
  1700. DECLARE_END()
  1701. DECLARE_UNARY_TRAITS ( Expr_Log2_c )
  1702. virtual float Eval ( const CSphMatch & tMatch ) const
  1703. {
  1704. float fFirst = m_pFirst->Eval ( tMatch );
  1705. // ideally this would be SQLNULL instead of plain 0.0f
  1706. return fFirst>0.0f ? (float)( log ( fFirst )*M_LOG2E ) : 0.0f;
  1707. }
  1708. DECLARE_END()
  1709. DECLARE_UNARY_TRAITS ( Expr_Log10_c )
  1710. virtual float Eval ( const CSphMatch & tMatch ) const
  1711. {
  1712. float fFirst = m_pFirst->Eval ( tMatch );
  1713. // ideally this would be SQLNULL instead of plain 0.0f
  1714. return fFirst>0.0f ? (float)( log ( fFirst )*M_LOG10E ) : 0.0f;
  1715. }
  1716. DECLARE_END()
  1717. DECLARE_UNARY_TRAITS ( Expr_Sqrt_c )
  1718. virtual float Eval ( const CSphMatch & tMatch ) const
  1719. {
  1720. float fFirst = m_pFirst->Eval ( tMatch );
  1721. // ideally this would be SQLNULL instead of plain 0.0f in case of negative argument
  1722. // MEGA optimization: do not call sqrt for 0.0f
  1723. return fFirst>0.0f ? (float)sqrt ( fFirst ) : 0.0f;
  1724. }
  1725. DECLARE_END()
  1726. //////////////////////////////////////////////////////////////////////////
  1727. #define DECLARE_BINARY_TRAITS(_classname) \
  1728. struct _classname : public Expr_Binary_c \
  1729. { \
  1730. _classname ( ISphExpr * pFirst, ISphExpr * pSecond ) : Expr_Binary_c ( #_classname, pFirst, pSecond ) {}
  1731. #define DECLARE_BINARY_FLT(_classname,_expr) \
  1732. DECLARE_BINARY_TRAITS ( _classname ) \
  1733. virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
  1734. };
  1735. #define DECLARE_BINARY_INT(_classname,_expr,_expr2,_expr3) \
  1736. DECLARE_BINARY_TRAITS ( _classname ) \
  1737. virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
  1738. virtual int IntEval ( const CSphMatch & tMatch ) const { return _expr2; } \
  1739. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return _expr3; } \
  1740. };
  1741. #define DECLARE_BINARY_POLY(_classname,_expr,_expr2,_expr3) \
  1742. DECLARE_BINARY_INT ( _classname##Float_c, _expr, (int)Eval(tMatch), (int64_t)Eval(tMatch ) ) \
  1743. DECLARE_BINARY_INT ( _classname##Int_c, (float)IntEval(tMatch), _expr2, (int64_t)IntEval(tMatch) ) \
  1744. DECLARE_BINARY_INT ( _classname##Int64_c, (float)Int64Eval(tMatch), (int)Int64Eval(tMatch), _expr3 )
  1745. #define IFFLT(_expr) ( (_expr) ? 1.0f : 0.0f )
  1746. #define IFINT(_expr) ( (_expr) ? 1 : 0 )
  1747. DECLARE_BINARY_INT ( Expr_Add_c, FIRST + SECOND, (DWORD)INTFIRST + (DWORD)INTSECOND, (uint64_t)INT64FIRST + (uint64_t)INT64SECOND )
  1748. DECLARE_BINARY_INT ( Expr_Sub_c, FIRST - SECOND, (DWORD)INTFIRST - (DWORD)INTSECOND, (uint64_t)INT64FIRST - (uint64_t)INT64SECOND )
  1749. DECLARE_BINARY_INT ( Expr_Mul_c, FIRST * SECOND, (DWORD)INTFIRST * (DWORD)INTSECOND, (uint64_t)INT64FIRST * (uint64_t)INT64SECOND )
  1750. DECLARE_BINARY_INT ( Expr_BitAnd_c, (float)(int(FIRST)&int(SECOND)), INTFIRST & INTSECOND, INT64FIRST & INT64SECOND )
  1751. DECLARE_BINARY_INT ( Expr_BitOr_c, (float)(int(FIRST)|int(SECOND)), INTFIRST | INTSECOND, INT64FIRST | INT64SECOND )
  1752. DECLARE_BINARY_INT ( Expr_Mod_c, (float)(int(FIRST)%int(SECOND)), INTFIRST % INTSECOND, INT64FIRST % INT64SECOND )
  1753. DECLARE_BINARY_TRAITS ( Expr_Div_c )
  1754. virtual float Eval ( const CSphMatch & tMatch ) const
  1755. {
  1756. float fSecond = m_pSecond->Eval ( tMatch );
  1757. // ideally this would be SQLNULL instead of plain 0.0f
  1758. return fSecond ? m_pFirst->Eval ( tMatch )/fSecond : 0.0f;
  1759. }
  1760. DECLARE_END()
  1761. DECLARE_BINARY_TRAITS ( Expr_Idiv_c )
  1762. virtual float Eval ( const CSphMatch & tMatch ) const
  1763. {
  1764. int iSecond = int(SECOND);
  1765. // ideally this would be SQLNULL instead of plain 0.0f
  1766. return iSecond ? float(int(FIRST)/iSecond) : 0.0f;
  1767. }
  1768. virtual int IntEval ( const CSphMatch & tMatch ) const
  1769. {
  1770. int iSecond = INTSECOND;
  1771. // ideally this would be SQLNULL instead of plain 0
  1772. return iSecond ? ( INTFIRST / iSecond ) : 0;
  1773. }
  1774. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
  1775. {
  1776. int64_t iSecond = INT64SECOND;
  1777. // ideally this would be SQLNULL instead of plain 0
  1778. return iSecond ? ( INT64FIRST / iSecond ) : 0;
  1779. }
  1780. DECLARE_END()
  1781. DECLARE_BINARY_POLY ( Expr_Lt, IFFLT ( FIRST<SECOND ), IFINT ( INTFIRST<INTSECOND ), IFINT ( INT64FIRST<INT64SECOND ) )
  1782. DECLARE_BINARY_POLY ( Expr_Gt, IFFLT ( FIRST>SECOND ), IFINT ( INTFIRST>INTSECOND ), IFINT ( INT64FIRST>INT64SECOND ) )
  1783. DECLARE_BINARY_POLY ( Expr_Lte, IFFLT ( FIRST<=SECOND ), IFINT ( INTFIRST<=INTSECOND ), IFINT ( INT64FIRST<=INT64SECOND ) )
  1784. DECLARE_BINARY_POLY ( Expr_Gte, IFFLT ( FIRST>=SECOND ), IFINT ( INTFIRST>=INTSECOND ), IFINT ( INT64FIRST>=INT64SECOND ) )
  1785. DECLARE_BINARY_POLY ( Expr_Eq, IFFLT ( fabs ( FIRST-SECOND )<=1e-6 ), IFINT ( INTFIRST==INTSECOND ), IFINT ( INT64FIRST==INT64SECOND ) )
  1786. DECLARE_BINARY_POLY ( Expr_Ne, IFFLT ( fabs ( FIRST-SECOND )>1e-6 ), IFINT ( INTFIRST!=INTSECOND ), IFINT ( INT64FIRST!=INT64SECOND ) )
  1787. DECLARE_BINARY_INT ( Expr_Min_c, Min ( FIRST, SECOND ), Min ( INTFIRST, INTSECOND ), Min ( INT64FIRST, INT64SECOND ) )
  1788. DECLARE_BINARY_INT ( Expr_Max_c, Max ( FIRST, SECOND ), Max ( INTFIRST, INTSECOND ), Max ( INT64FIRST, INT64SECOND ) )
  1789. DECLARE_BINARY_FLT ( Expr_Pow_c, float ( pow ( FIRST, SECOND ) ) )
  1790. DECLARE_BINARY_POLY ( Expr_And, FIRST!=0.0f && SECOND!=0.0f, IFINT ( INTFIRST && INTSECOND ), IFINT ( INT64FIRST && INT64SECOND ) )
  1791. DECLARE_BINARY_POLY ( Expr_Or, FIRST!=0.0f || SECOND!=0.0f, IFINT ( INTFIRST || INTSECOND ), IFINT ( INT64FIRST || INT64SECOND ) )
  1792. DECLARE_BINARY_FLT ( Expr_Atan2_c, float ( atan2 ( FIRST, SECOND ) ) )
  1793. //////////////////////////////////////////////////////////////////////////
  1794. /// boring base stuff
  1795. struct ExprThreeway_c : public ISphExpr
  1796. {
  1797. ISphExpr * m_pFirst;
  1798. ISphExpr * m_pSecond;
  1799. ISphExpr * m_pThird;
  1800. CSphString m_sExprName;
  1801. ExprThreeway_c ( const char * szClassName, ISphExpr * pFirst, ISphExpr * pSecond, ISphExpr * pThird )
  1802. : m_pFirst ( pFirst )
  1803. , m_pSecond ( pSecond )
  1804. , m_pThird ( pThird )
  1805. , m_sExprName ( szClassName )
  1806. {}
  1807. virtual ~ExprThreeway_c()
  1808. {
  1809. SafeRelease ( m_pFirst );
  1810. SafeRelease ( m_pSecond );
  1811. SafeRelease ( m_pThird );
  1812. }
  1813. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  1814. {
  1815. m_pFirst->Command ( eCmd, pArg );
  1816. m_pSecond->Command ( eCmd, pArg );
  1817. m_pThird->Command ( eCmd, pArg );
  1818. }
  1819. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  1820. {
  1821. EXPR_CLASS_NAME_NOCHECK(m_sExprName.cstr());
  1822. CALC_CHILD_HASH(m_pFirst);
  1823. CALC_CHILD_HASH(m_pSecond);
  1824. CALC_CHILD_HASH(m_pThird);
  1825. return CALC_DEP_HASHES();
  1826. }
  1827. };
  1828. #define DECLARE_TERNARY(_classname,_expr,_expr2,_expr3) \
  1829. struct _classname : public ExprThreeway_c \
  1830. { \
  1831. _classname ( ISphExpr * pFirst, ISphExpr * pSecond, ISphExpr * pThird ) \
  1832. : ExprThreeway_c ( #_classname, pFirst, pSecond, pThird ) {} \
  1833. \
  1834. virtual float Eval ( const CSphMatch & tMatch ) const { return _expr; } \
  1835. virtual int IntEval ( const CSphMatch & tMatch ) const { return _expr2; } \
  1836. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return _expr3; } \
  1837. };
  1838. DECLARE_TERNARY ( Expr_If_c, ( FIRST!=0.0f ) ? SECOND : THIRD, INTFIRST ? INTSECOND : INTTHIRD, INT64FIRST ? INT64SECOND : INT64THIRD )
  1839. DECLARE_TERNARY ( Expr_Madd_c, FIRST*SECOND+THIRD, INTFIRST*INTSECOND + INTTHIRD, INT64FIRST*INT64SECOND + INT64THIRD )
  1840. DECLARE_TERNARY ( Expr_Mul3_c, FIRST*SECOND*THIRD, INTFIRST*INTSECOND*INTTHIRD, INT64FIRST*INT64SECOND*INT64THIRD )
  1841. //////////////////////////////////////////////////////////////////////////
  1842. #define DECLARE_TIMESTAMP(_classname,_expr) \
  1843. DECLARE_UNARY_TRAITS ( _classname ) \
  1844. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval(tMatch); } \
  1845. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval(tMatch); } \
  1846. virtual int IntEval ( const CSphMatch & tMatch ) const \
  1847. { \
  1848. time_t ts = (time_t)INTFIRST; \
  1849. struct tm s; \
  1850. localtime_r ( &ts, &s ); \
  1851. return _expr; \
  1852. } \
  1853. };
  1854. DECLARE_TIMESTAMP ( Expr_Day_c, s.tm_mday )
  1855. DECLARE_TIMESTAMP ( Expr_Month_c, s.tm_mon+1 )
  1856. DECLARE_TIMESTAMP ( Expr_Year_c, s.tm_year+1900 )
  1857. DECLARE_TIMESTAMP ( Expr_YearMonth_c, (s.tm_year+1900)*100+s.tm_mon+1 )
  1858. DECLARE_TIMESTAMP ( Expr_YearMonthDay_c, (s.tm_year+1900)*10000+(s.tm_mon+1)*100+s.tm_mday )
  1859. DECLARE_TIMESTAMP ( Expr_Hour_c, s.tm_hour )
  1860. DECLARE_TIMESTAMP ( Expr_Minute_c, s.tm_min )
  1861. DECLARE_TIMESTAMP ( Expr_Second_c, s.tm_sec )
  1862. #define DECLARE_TIMESTAMP_UTC( _classname, _expr ) \
  1863. DECLARE_UNARY_TRAITS ( _classname ) \
  1864. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval(tMatch); } \
  1865. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval(tMatch); } \
  1866. virtual int IntEval ( const CSphMatch & tMatch ) const \
  1867. { \
  1868. time_t ts = (time_t)INTFIRST; \
  1869. struct tm s; \
  1870. gmtime_r ( &ts, &s ); \
  1871. return _expr; \
  1872. } \
  1873. };
  1874. DECLARE_TIMESTAMP_UTC ( Expr_Day_utc_c, s.tm_mday )
  1875. DECLARE_TIMESTAMP_UTC ( Expr_Month_utc_c, s.tm_mon + 1 )
  1876. DECLARE_TIMESTAMP_UTC ( Expr_Year_utc_c, s.tm_year + 1900 )
  1877. DECLARE_TIMESTAMP_UTC ( Expr_YearMonth_utc_c, (s.tm_year + 1900) * 100 + s.tm_mon + 1 )
  1878. DECLARE_TIMESTAMP_UTC ( Expr_YearMonthDay_utc_c, (s.tm_year + 1900) * 10000 + (s.tm_mon + 1) * 100 + s.tm_mday )
  1879. extern bool bGroupingInUtc; // defined in searchd.cpp
  1880. void setGroupingInUtc ( bool b_GroupingInUtc )
  1881. {
  1882. bGroupingInUtc = b_GroupingInUtc;
  1883. }
  1884. Expr_Unary_c * ExprDay ( ISphExpr * pFirst )
  1885. {
  1886. return bGroupingInUtc
  1887. ? (Expr_Unary_c *) new Expr_Day_utc_c ( pFirst )
  1888. : (Expr_Unary_c *) new Expr_Day_c ( pFirst );
  1889. }
  1890. Expr_Unary_c * ExprMonth ( ISphExpr * pFirst )
  1891. {
  1892. return bGroupingInUtc
  1893. ? (Expr_Unary_c *) new Expr_Month_utc_c ( pFirst )
  1894. : (Expr_Unary_c *) new Expr_Month_c ( pFirst );
  1895. }
  1896. Expr_Unary_c * ExprYear ( ISphExpr * pFirst )
  1897. {
  1898. return bGroupingInUtc
  1899. ? (Expr_Unary_c *) new Expr_Year_utc_c ( pFirst )
  1900. : (Expr_Unary_c *) new Expr_Year_c ( pFirst );
  1901. }
  1902. Expr_Unary_c * ExprYearMonth ( ISphExpr * pFirst )
  1903. {
  1904. return bGroupingInUtc
  1905. ? (Expr_Unary_c *) new Expr_YearMonth_utc_c ( pFirst )
  1906. : (Expr_Unary_c *) new Expr_YearMonth_c ( pFirst );
  1907. }
  1908. Expr_Unary_c * ExprYearMonthDay ( ISphExpr * pFirst )
  1909. {
  1910. return bGroupingInUtc
  1911. ? (Expr_Unary_c *) new Expr_YearMonthDay_utc_c ( pFirst )
  1912. : (Expr_Unary_c *) new Expr_YearMonthDay_c ( pFirst );
  1913. }
  1914. //////////////////////////////////////////////////////////////////////////
  1915. // UDF CALL SITE
  1916. //////////////////////////////////////////////////////////////////////////
  1917. void * UdfMalloc ( int iLen )
  1918. {
  1919. return new BYTE [ iLen ];
  1920. }
  1921. /// UDF call site
  1922. struct UdfCall_t
  1923. {
  1924. const PluginUDF_c * m_pUdf;
  1925. SPH_UDF_INIT m_tInit;
  1926. SPH_UDF_ARGS m_tArgs;
  1927. CSphVector<int> m_dArgs2Free; // these args should be freed explicitly
  1928. UdfCall_t()
  1929. {
  1930. m_pUdf = NULL;
  1931. m_tInit.func_data = NULL;
  1932. m_tInit.is_const = false;
  1933. m_tArgs.arg_count = 0;
  1934. m_tArgs.arg_types = NULL;
  1935. m_tArgs.arg_values = NULL;
  1936. m_tArgs.arg_names = NULL;
  1937. m_tArgs.str_lengths = NULL;
  1938. m_tArgs.fn_malloc = UdfMalloc;
  1939. }
  1940. ~UdfCall_t ()
  1941. {
  1942. if ( m_pUdf )
  1943. m_pUdf->Release();
  1944. SafeDeleteArray ( m_tArgs.arg_types );
  1945. SafeDeleteArray ( m_tArgs.arg_values );
  1946. SafeDeleteArray ( m_tArgs.arg_names );
  1947. SafeDeleteArray ( m_tArgs.str_lengths );
  1948. }
  1949. };
  1950. //////////////////////////////////////////////////////////////////////////
  1951. // PARSER INTERNALS
  1952. //////////////////////////////////////////////////////////////////////////
  1953. class ExprParser_t;
  1954. #ifdef CMAKE_GENERATED_GRAMMAR
  1955. #include "bissphinxexpr.h"
  1956. #else
  1957. #include "yysphinxexpr.h"
  1958. #endif
  1959. /// known functions
  1960. enum Func_e
  1961. {
  1962. FUNC_NOW=0,
  1963. FUNC_ABS,
  1964. FUNC_CEIL,
  1965. FUNC_FLOOR,
  1966. FUNC_SIN,
  1967. FUNC_COS,
  1968. FUNC_LN,
  1969. FUNC_LOG2,
  1970. FUNC_LOG10,
  1971. FUNC_EXP,
  1972. FUNC_SQRT,
  1973. FUNC_BIGINT,
  1974. FUNC_SINT,
  1975. FUNC_CRC32,
  1976. FUNC_FIBONACCI,
  1977. FUNC_DAY,
  1978. FUNC_MONTH,
  1979. FUNC_YEAR,
  1980. FUNC_YEARMONTH,
  1981. FUNC_YEARMONTHDAY,
  1982. FUNC_HOUR,
  1983. FUNC_MINUTE,
  1984. FUNC_SECOND,
  1985. FUNC_MIN,
  1986. FUNC_MAX,
  1987. FUNC_POW,
  1988. FUNC_IDIV,
  1989. FUNC_IF,
  1990. FUNC_MADD,
  1991. FUNC_MUL3,
  1992. FUNC_INTERVAL,
  1993. FUNC_IN,
  1994. FUNC_BITDOT,
  1995. FUNC_REMAP,
  1996. FUNC_GEODIST,
  1997. FUNC_EXIST,
  1998. FUNC_POLY2D,
  1999. FUNC_GEOPOLY2D,
  2000. FUNC_CONTAINS,
  2001. FUNC_ZONESPANLIST,
  2002. FUNC_TO_STRING,
  2003. FUNC_RANKFACTORS,
  2004. FUNC_PACKEDFACTORS,
  2005. FUNC_FACTORS,
  2006. FUNC_BM25F,
  2007. FUNC_INTEGER,
  2008. FUNC_DOUBLE,
  2009. FUNC_LENGTH,
  2010. FUNC_LEAST,
  2011. FUNC_GREATEST,
  2012. FUNC_UINT,
  2013. FUNC_CURTIME,
  2014. FUNC_UTC_TIME,
  2015. FUNC_UTC_TIMESTAMP,
  2016. FUNC_TIMEDIFF,
  2017. FUNC_CURRENT_USER,
  2018. FUNC_CONNECTION_ID,
  2019. FUNC_ALL,
  2020. FUNC_ANY,
  2021. FUNC_INDEXOF,
  2022. FUNC_MIN_TOP_WEIGHT,
  2023. FUNC_MIN_TOP_SORTVAL,
  2024. FUNC_ATAN2,
  2025. FUNC_RAND
  2026. };
  2027. struct FuncDesc_t
  2028. {
  2029. const char * m_sName;
  2030. int m_iArgs;
  2031. Func_e m_eFunc;
  2032. ESphAttr m_eRet;
  2033. };
  2034. static FuncDesc_t g_dFuncs[] =
  2035. {
  2036. { "now", 0, FUNC_NOW, SPH_ATTR_INTEGER },
  2037. { "abs", 1, FUNC_ABS, SPH_ATTR_NONE },
  2038. { "ceil", 1, FUNC_CEIL, SPH_ATTR_BIGINT },
  2039. { "floor", 1, FUNC_FLOOR, SPH_ATTR_BIGINT },
  2040. { "sin", 1, FUNC_SIN, SPH_ATTR_FLOAT },
  2041. { "cos", 1, FUNC_COS, SPH_ATTR_FLOAT },
  2042. { "ln", 1, FUNC_LN, SPH_ATTR_FLOAT },
  2043. { "log2", 1, FUNC_LOG2, SPH_ATTR_FLOAT },
  2044. { "log10", 1, FUNC_LOG10, SPH_ATTR_FLOAT },
  2045. { "exp", 1, FUNC_EXP, SPH_ATTR_FLOAT },
  2046. { "sqrt", 1, FUNC_SQRT, SPH_ATTR_FLOAT },
  2047. { "bigint", 1, FUNC_BIGINT, SPH_ATTR_BIGINT }, // type-enforcer special as-if-function
  2048. { "sint", 1, FUNC_SINT, SPH_ATTR_BIGINT }, // type-enforcer special as-if-function
  2049. { "crc32", 1, FUNC_CRC32, SPH_ATTR_INTEGER },
  2050. { "fibonacci", 1, FUNC_FIBONACCI, SPH_ATTR_INTEGER },
  2051. { "day", 1, FUNC_DAY, SPH_ATTR_INTEGER },
  2052. { "month", 1, FUNC_MONTH, SPH_ATTR_INTEGER },
  2053. { "year", 1, FUNC_YEAR, SPH_ATTR_INTEGER },
  2054. { "yearmonth", 1, FUNC_YEARMONTH, SPH_ATTR_INTEGER },
  2055. { "yearmonthday", 1, FUNC_YEARMONTHDAY, SPH_ATTR_INTEGER },
  2056. { "hour", 1, FUNC_HOUR, SPH_ATTR_INTEGER },
  2057. { "minute", 1, FUNC_MINUTE, SPH_ATTR_INTEGER },
  2058. { "second", 1, FUNC_SECOND, SPH_ATTR_INTEGER },
  2059. { "min", 2, FUNC_MIN, SPH_ATTR_NONE },
  2060. { "max", 2, FUNC_MAX, SPH_ATTR_NONE },
  2061. { "pow", 2, FUNC_POW, SPH_ATTR_FLOAT },
  2062. { "idiv", 2, FUNC_IDIV, SPH_ATTR_NONE },
  2063. { "if", 3, FUNC_IF, SPH_ATTR_NONE },
  2064. { "madd", 3, FUNC_MADD, SPH_ATTR_NONE },
  2065. { "mul3", 3, FUNC_MUL3, SPH_ATTR_NONE },
  2066. { "interval", -2, FUNC_INTERVAL, SPH_ATTR_INTEGER },
  2067. { "in", -1, FUNC_IN, SPH_ATTR_INTEGER },
  2068. { "bitdot", -1, FUNC_BITDOT, SPH_ATTR_NONE },
  2069. { "remap", 4, FUNC_REMAP, SPH_ATTR_INTEGER },
  2070. { "geodist", -4, FUNC_GEODIST, SPH_ATTR_FLOAT },
  2071. { "exist", 2, FUNC_EXIST, SPH_ATTR_NONE },
  2072. { "poly2d", -1, FUNC_POLY2D, SPH_ATTR_POLY2D },
  2073. { "geopoly2d", -1, FUNC_GEOPOLY2D, SPH_ATTR_POLY2D },
  2074. { "contains", 3, FUNC_CONTAINS, SPH_ATTR_INTEGER },
  2075. { "zonespanlist", 0, FUNC_ZONESPANLIST, SPH_ATTR_STRINGPTR },
  2076. { "to_string", 1, FUNC_TO_STRING, SPH_ATTR_STRINGPTR },
  2077. { "rankfactors", 0, FUNC_RANKFACTORS, SPH_ATTR_STRINGPTR },
  2078. { "packedfactors", 0, FUNC_PACKEDFACTORS, SPH_ATTR_FACTORS },
  2079. { "factors", 0, FUNC_FACTORS, SPH_ATTR_FACTORS }, // just an alias for PACKEDFACTORS()
  2080. { "bm25f", -2, FUNC_BM25F, SPH_ATTR_FLOAT },
  2081. { "integer", 1, FUNC_INTEGER, SPH_ATTR_BIGINT },
  2082. { "double", 1, FUNC_DOUBLE, SPH_ATTR_FLOAT },
  2083. { "length", 1, FUNC_LENGTH, SPH_ATTR_INTEGER },
  2084. { "least", 1, FUNC_LEAST, SPH_ATTR_STRINGPTR },
  2085. { "greatest", 1, FUNC_GREATEST, SPH_ATTR_STRINGPTR },
  2086. { "uint", 1, FUNC_UINT, SPH_ATTR_INTEGER },
  2087. { "curtime", 0, FUNC_CURTIME, SPH_ATTR_STRINGPTR },
  2088. { "utc_time", 0, FUNC_UTC_TIME, SPH_ATTR_STRINGPTR },
  2089. { "utc_timestamp", 0, FUNC_UTC_TIMESTAMP, SPH_ATTR_STRINGPTR },
  2090. { "timediff", 2, FUNC_TIMEDIFF, SPH_ATTR_STRINGPTR },
  2091. { "current_user", 0, FUNC_CURRENT_USER, SPH_ATTR_INTEGER },
  2092. { "connection_id", 0, FUNC_CONNECTION_ID, SPH_ATTR_INTEGER },
  2093. { "all", -1, FUNC_ALL, SPH_ATTR_INTEGER },
  2094. { "any", -1, FUNC_ANY, SPH_ATTR_INTEGER },
  2095. { "indexof", -1, FUNC_INDEXOF, SPH_ATTR_BIGINT },
  2096. { "min_top_weight", 0, FUNC_MIN_TOP_WEIGHT, SPH_ATTR_INTEGER },
  2097. { "min_top_sortval", 0, FUNC_MIN_TOP_SORTVAL, SPH_ATTR_FLOAT },
  2098. { "atan2", 2, FUNC_ATAN2, SPH_ATTR_FLOAT },
  2099. { "rand", -1, FUNC_RAND, SPH_ATTR_FLOAT }
  2100. };
  2101. // helper to generate input data for gperf
  2102. // run this, run gperf, that will generate a C program
  2103. // copy dAsso from asso_values in that C source
  2104. // modify iHash switch according to that C source, if needed
  2105. // copy dIndexes from the program output
  2106. #if 0
  2107. int HashGen()
  2108. {
  2109. printf ( "struct func { char *name; int num; };\n%%%%\n" );
  2110. for ( int i=0; i<int( sizeof ( g_dFuncs )/sizeof ( g_dFuncs[0] )); i++ )
  2111. printf ( "%s, %d\n", g_dFuncs[i].m_sName, i );
  2112. printf ( "%%%%\n" );
  2113. printf ( "void main()\n" );
  2114. printf ( "{\n" );
  2115. printf ( "\tint i;\n" );
  2116. printf ( "\tfor ( i=0; i<=MAX_HASH_VALUE; i++ )\n" );
  2117. printf ( "\t\tprintf ( \"%%d,%%s\", wordlist[i].name[0] ? wordlist[i].num : -1, (i%%10)==9 ? \"\\n\" : \" \" );\n" );
  2118. printf ( "}\n" );
  2119. printf ( "// gperf -Gt 1.p > 1.c\n" );
  2120. exit ( 0 );
  2121. return 0;
  2122. }
  2123. static int G_HASHGEN = HashGen();
  2124. #endif
  2125. // FIXME? can remove this by preprocessing the assoc table
  2126. static inline BYTE FuncHashLower ( BYTE u )
  2127. {
  2128. return ( u>='A' && u<='Z' ) ? ( u | 0x20 ) : u;
  2129. }
  2130. static int FuncHashLookup ( const char * sKey )
  2131. {
  2132. assert ( sKey && sKey[0] );
  2133. static BYTE dAsso[] =
  2134. {
  2135. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2136. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2137. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2138. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2139. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2140. 0, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2141. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2142. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2143. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2144. 109, 109, 109, 109, 109, 60, 109, 25, 25, 0,
  2145. 25, 15, 30, 10, 60, 10, 109, 109, 5, 0,
  2146. 10, 25, 25, 25, 0, 55, 0, 0, 109, 15,
  2147. 60, 20, 0, 109, 109, 109, 109, 109, 109, 109,
  2148. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2149. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2150. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2151. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2152. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2153. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2154. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2155. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2156. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2157. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2158. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2159. 109, 109, 109, 109, 109, 109, 109, 109, 109, 109,
  2160. 109, 109, 109, 109, 109, 109
  2161. };
  2162. const BYTE * s = (const BYTE*) sKey;
  2163. int iHash = strlen ( sKey );
  2164. switch ( iHash )
  2165. {
  2166. default: iHash += dAsso [ FuncHashLower ( s[2] ) ];
  2167. case 2: iHash += dAsso [ FuncHashLower ( s[1] ) ];
  2168. case 1: iHash += dAsso [ FuncHashLower ( s[0] ) ];
  2169. }
  2170. static int dIndexes[] =
  2171. {
  2172. -1, -1, -1, -1, -1, 13, -1, 51, 52, 29,
  2173. -1, -1, 55, 53, -1, -1, -1, 6, 54, -1,
  2174. 33, -1, 31, 23, 50, -1, 21, 45, 30, 2,
  2175. 44, -1, -1, 49, 60, 61, 47, -1, 57, 63,
  2176. 16, 32, 27, 38, 7, 8, 41, 39, 56, 26,
  2177. 48, 11, 59, 0, 28, 62, 46, 34, 58, 37,
  2178. -1, 36, 43, 42, 17, 3, -1, -1, 25, 18,
  2179. -1, -1, 19, 15, 14, -1, 22, -1, 4, 12,
  2180. -1, -1, -1, 5, 10, -1, -1, -1, 24, 20,
  2181. 35, -1, -1, -1, 40, -1, -1, -1, -1, -1,
  2182. -1, -1, -1, 9, -1, -1, -1, -1, 1
  2183. };
  2184. if ( iHash<0 || iHash>=(int)(sizeof(dIndexes)/sizeof(dIndexes[0])) )
  2185. return -1;
  2186. int iFunc = dIndexes[iHash];
  2187. if ( iFunc>=0 && strcasecmp ( g_dFuncs[iFunc].m_sName, sKey )==0 )
  2188. return iFunc;
  2189. return -1;
  2190. }
  2191. static int FuncHashCheck()
  2192. {
  2193. for ( int i=0; i<(int)(sizeof(g_dFuncs)/sizeof(g_dFuncs[0])); i++ )
  2194. {
  2195. CSphString sKey ( g_dFuncs[i].m_sName );
  2196. sKey.ToLower();
  2197. if ( FuncHashLookup ( sKey.cstr() )!=i )
  2198. sphDie ( "INTERNAL ERROR: lookup for %s() failed, rebuild function hash", sKey.cstr() );
  2199. sKey.ToUpper();
  2200. if ( FuncHashLookup ( sKey.cstr() )!=i )
  2201. sphDie ( "INTERNAL ERROR: lookup for %s() failed, rebuild function hash", sKey.cstr() );
  2202. if ( g_dFuncs[i].m_eFunc!=i )
  2203. sphDie ( "INTERNAL ERROR: function hash entry %s() at index %d maps to Func_e entry %d, sync Func_e and g_dFuncs",
  2204. sKey.cstr(), i, g_dFuncs[i].m_eFunc );
  2205. }
  2206. if ( FuncHashLookup("A")!=-1 )
  2207. sphDie ( "INTERNAL ERROR: lookup for A() succeeded, rebuild function hash" );
  2208. return 1;
  2209. }
  2210. static int G_FUNC_HASH_CHECK = FuncHashCheck();
  2211. //////////////////////////////////////////////////////////////////////////
  2212. /// check whether the type is numeric
  2213. static inline bool IsNumeric ( ESphAttr eType )
  2214. {
  2215. return eType==SPH_ATTR_INTEGER || eType==SPH_ATTR_BIGINT || eType==SPH_ATTR_FLOAT;
  2216. }
  2217. /// check for type based on int value
  2218. static inline ESphAttr GetIntType ( int64_t iValue )
  2219. {
  2220. return ( iValue>=(int64_t)INT_MIN && iValue<=(int64_t)INT_MAX ) ? SPH_ATTR_INTEGER : SPH_ATTR_BIGINT;
  2221. }
  2222. /// get the widest numeric type of the two
  2223. static inline ESphAttr WidestType ( ESphAttr a, ESphAttr b )
  2224. {
  2225. assert ( IsNumeric(a) && IsNumeric(b) );
  2226. if ( a==SPH_ATTR_FLOAT || b==SPH_ATTR_FLOAT )
  2227. return SPH_ATTR_FLOAT;
  2228. if ( a==SPH_ATTR_BIGINT || b==SPH_ATTR_BIGINT )
  2229. return SPH_ATTR_BIGINT;
  2230. return SPH_ATTR_INTEGER;
  2231. }
  2232. /// list of constants
  2233. class ConstList_c
  2234. {
  2235. public:
  2236. CSphVector<int64_t> m_dInts; ///< dword/int64 storage
  2237. CSphVector<float> m_dFloats; ///< float storage
  2238. ESphAttr m_eRetType; ///< SPH_ATTR_INTEGER, SPH_ATTR_BIGINT, SPH_ATTR_STRING, or SPH_ATTR_FLOAT
  2239. CSphString m_sExpr; ///< m_sExpr copy for TOK_CONST_STRING evaluation
  2240. public:
  2241. ConstList_c ()
  2242. : m_eRetType ( SPH_ATTR_INTEGER )
  2243. {}
  2244. void Add ( int64_t iValue )
  2245. {
  2246. if ( m_eRetType==SPH_ATTR_FLOAT )
  2247. {
  2248. m_dFloats.Add ( (float)iValue );
  2249. } else
  2250. {
  2251. m_eRetType = WidestType ( m_eRetType, GetIntType ( iValue ) );
  2252. m_dInts.Add ( iValue );
  2253. }
  2254. }
  2255. void Add ( float fValue )
  2256. {
  2257. if ( m_eRetType!=SPH_ATTR_FLOAT )
  2258. {
  2259. assert ( m_dFloats.GetLength()==0 );
  2260. ARRAY_FOREACH ( i, m_dInts )
  2261. m_dFloats.Add ( (float)m_dInts[i] );
  2262. m_dInts.Reset ();
  2263. m_eRetType = SPH_ATTR_FLOAT;
  2264. }
  2265. m_dFloats.Add ( fValue );
  2266. }
  2267. };
  2268. /// {title=2, body=1}
  2269. /// {in=deg, out=mi}
  2270. /// argument to functions like BM25F() and GEODIST()
  2271. class MapArg_c
  2272. {
  2273. public:
  2274. CSphVector<CSphNamedVariant> m_dPairs;
  2275. public:
  2276. void Add ( const char * sKey, const char * sValue, int64_t iValue )
  2277. {
  2278. CSphNamedVariant & t = m_dPairs.Add();
  2279. t.m_sKey = sKey;
  2280. if ( sValue )
  2281. t.m_sValue = sValue;
  2282. else
  2283. t.m_iValue = (int)iValue;
  2284. }
  2285. };
  2286. /// expression tree node
  2287. /// used to build an AST (Abstract Syntax Tree)
  2288. struct ExprNode_t
  2289. {
  2290. int m_iToken; ///< token type, including operators
  2291. ESphAttr m_eRetType; ///< result type
  2292. ESphAttr m_eArgType; ///< args type
  2293. CSphAttrLocator m_tLocator; ///< attribute locator, for TOK_ATTR type
  2294. int m_iLocator; ///< index of attribute locator in schema
  2295. union
  2296. {
  2297. int64_t m_iConst; ///< constant value, for TOK_CONST_INT type
  2298. float m_fConst; ///< constant value, for TOK_CONST_FLOAT type
  2299. int m_iFunc; ///< built-in function id, for TOK_FUNC type
  2300. int m_iArgs; ///< args count, for arglist (token==',') type
  2301. ConstList_c * m_pConsts; ///< constants list, for TOK_CONST_LIST type
  2302. MapArg_c * m_pMapArg; ///< map argument (maps name to const or name to expr), for TOK_MAP_ARG type
  2303. const char * m_sIdent; ///< pointer to const char, for TOK_IDENT type
  2304. SphAttr_t * m_pAttr; ///< pointer to 64-bit value, for TOK_ITERATOR type
  2305. };
  2306. int m_iLeft;
  2307. int m_iRight;
  2308. ExprNode_t () : m_iToken ( 0 ), m_eRetType ( SPH_ATTR_NONE ), m_eArgType ( SPH_ATTR_NONE ),
  2309. m_iLocator ( -1 ), m_iLeft ( -1 ), m_iRight ( -1 ) {}
  2310. };
  2311. struct StackNode_t
  2312. {
  2313. int m_iNode;
  2314. int m_iLeft;
  2315. int m_iRight;
  2316. };
  2317. /// expression parser
  2318. class ExprParser_t
  2319. {
  2320. friend int yylex ( YYSTYPE * lvalp, ExprParser_t * pParser );
  2321. friend int yyparse ( ExprParser_t * pParser );
  2322. friend void yyerror ( ExprParser_t * pParser, const char * sMessage );
  2323. public:
  2324. ExprParser_t ( ISphExprHook * pHook, CSphQueryProfile * pProfiler, ESphCollation eCollation )
  2325. : m_pHook ( pHook )
  2326. , m_pProfiler ( pProfiler )
  2327. , m_bHasZonespanlist ( false )
  2328. , m_uPackedFactorFlags ( SPH_FACTOR_DISABLE )
  2329. , m_eEvalStage ( SPH_EVAL_FINAL ) // be default compute as late as possible
  2330. , m_eCollation ( eCollation )
  2331. {
  2332. m_dGatherStack.Reserve ( 64 );
  2333. }
  2334. ~ExprParser_t ();
  2335. ISphExpr * Parse ( const char * sExpr, const ISphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight, CSphString & sError );
  2336. protected:
  2337. int m_iParsed; ///< filled by yyparse() at the very end
  2338. CSphString m_sLexerError;
  2339. CSphString m_sParserError;
  2340. CSphString m_sCreateError;
  2341. ISphExprHook * m_pHook;
  2342. CSphQueryProfile * m_pProfiler;
  2343. protected:
  2344. ESphAttr GetWidestRet ( int iLeft, int iRight );
  2345. int AddNodeInt ( int64_t iValue );
  2346. int AddNodeFloat ( float fValue );
  2347. int AddNodeString ( int64_t iValue );
  2348. int AddNodeAttr ( int iTokenType, uint64_t uAttrLocator );
  2349. int AddNodeID ();
  2350. int AddNodeWeight ();
  2351. int AddNodeOp ( int iOp, int iLeft, int iRight );
  2352. int AddNodeFunc ( int iFunc, int iFirst, int iSecond=-1, int iThird=-1, int iFourth=-1 );
  2353. int AddNodeUdf ( int iCall, int iArg );
  2354. int AddNodePF ( int iFunc, int iArg );
  2355. int AddNodeConstlist ( int64_t iValue );
  2356. int AddNodeConstlist ( float iValue );
  2357. void AppendToConstlist ( int iNode, int64_t iValue );
  2358. void AppendToConstlist ( int iNode, float iValue );
  2359. int AddNodeUservar ( int iUservar );
  2360. int AddNodeHookIdent ( int iID );
  2361. int AddNodeHookFunc ( int iID, int iLeft );
  2362. int AddNodeMapArg ( const char * sKey, const char * sValue, int64_t iValue );
  2363. void AppendToMapArg ( int iNode, const char * sKey, const char * sValue, int64_t iValue );
  2364. const char * Attr2Ident ( uint64_t uAttrLoc );
  2365. int AddNodeJsonField ( uint64_t uAttrLocator, int iLeft );
  2366. int AddNodeJsonSubkey ( int64_t iValue );
  2367. int AddNodeDotNumber ( int64_t iValue );
  2368. int AddNodeIdent ( const char * sKey, int iLeft );
  2369. private:
  2370. const char * m_sExpr;
  2371. const char * m_pCur;
  2372. const char * m_pLastTokenStart;
  2373. const ISphSchema * m_pSchema;
  2374. CSphVector<ExprNode_t> m_dNodes;
  2375. CSphVector<CSphString> m_dUservars;
  2376. CSphVector<char*> m_dIdents;
  2377. int m_iConstNow;
  2378. CSphVector<StackNode_t> m_dGatherStack;
  2379. CSphVector<UdfCall_t*> m_dUdfCalls;
  2380. public:
  2381. bool m_bHasZonespanlist;
  2382. DWORD m_uPackedFactorFlags;
  2383. ESphEvalStage m_eEvalStage;
  2384. ESphCollation m_eCollation;
  2385. private:
  2386. int GetToken ( YYSTYPE * lvalp );
  2387. void GatherArgTypes ( int iNode, CSphVector<int> & dTypes );
  2388. void GatherArgNodes ( int iNode, CSphVector<int> & dNodes );
  2389. void GatherArgRetTypes ( int iNode, CSphVector<ESphAttr> & dTypes );
  2390. template < typename T >
  2391. void GatherArgT ( int iNode, T & FUNCTOR );
  2392. bool CheckForConstSet ( int iArgsNode, int iSkip );
  2393. int ParseAttr ( int iAttr, const char* sTok, YYSTYPE * lvalp );
  2394. template < typename T >
  2395. void WalkTree ( int iRoot, T & FUNCTOR );
  2396. void Optimize ( int iNode );
  2397. void CanonizePass ( int iNode );
  2398. void ConstantFoldPass ( int iNode );
  2399. void VariousOptimizationsPass ( int iNode );
  2400. void Dump ( int iNode );
  2401. ISphExpr * CreateTree ( int iNode );
  2402. ISphExpr * CreateIntervalNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs );
  2403. ISphExpr * CreateInNode ( int iNode );
  2404. ISphExpr * CreateLengthNode ( const ExprNode_t & tNode, ISphExpr * pLeft );
  2405. ISphExpr * CreateGeodistNode ( int iArgs );
  2406. ISphExpr * CreatePFNode ( int iArg );
  2407. ISphExpr * CreateBitdotNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs );
  2408. ISphExpr * CreateUdfNode ( int iCall, ISphExpr * pLeft );
  2409. ISphExpr * CreateExistNode ( const ExprNode_t & tNode );
  2410. ISphExpr * CreateContainsNode ( const ExprNode_t & tNode );
  2411. ISphExpr * CreateAggregateNode ( const ExprNode_t & tNode, ESphAggrFunc eFunc, ISphExpr * pLeft );
  2412. ISphExpr * CreateForInNode ( int iNode );
  2413. void FixupIterators ( int iNode, const char * sKey, SphAttr_t * pAttr );
  2414. bool GetError () const { return !( m_sLexerError.IsEmpty() && m_sParserError.IsEmpty() && m_sCreateError.IsEmpty() ); }
  2415. };
  2416. //////////////////////////////////////////////////////////////////////////
  2417. /// parse that numeric constant (e.g. "123", ".03")
  2418. static int ParseNumeric ( YYSTYPE * lvalp, const char ** ppStr )
  2419. {
  2420. assert ( lvalp && ppStr && *ppStr );
  2421. // try float route
  2422. char * pEnd = NULL;
  2423. float fRes = (float) strtod ( *ppStr, &pEnd );
  2424. // try int route
  2425. uint64_t uRes = 0; // unsigned overflow is better than signed overflow
  2426. bool bInt = true;
  2427. for ( const char * p=(*ppStr); p<pEnd; p++ && bInt )
  2428. {
  2429. if ( isdigit(*p) )
  2430. uRes = uRes*10 + (int)( (*p)-'0' ); // FIXME! missing overflow check, missing octal/hex handling
  2431. else
  2432. bInt = false;
  2433. }
  2434. // choose your destiny
  2435. *ppStr = pEnd;
  2436. if ( bInt )
  2437. {
  2438. lvalp->iConst = (int64_t)uRes;
  2439. return TOK_CONST_INT;
  2440. } else
  2441. {
  2442. lvalp->fConst = fRes;
  2443. return TOK_CONST_FLOAT;
  2444. }
  2445. }
  2446. // used to store in 8 bytes in Bison lvalp variable
  2447. static uint64_t sphPackAttrLocator ( const CSphAttrLocator & tLoc, int iLocator )
  2448. {
  2449. assert ( iLocator>=0 && iLocator<=0x7fff );
  2450. uint64_t uIndex = 0;
  2451. uIndex = ( tLoc.m_iBitOffset<<16 ) + tLoc.m_iBitCount + ( (uint64_t)iLocator<<32 );
  2452. if ( tLoc.m_bDynamic )
  2453. uIndex |= ( U64C(1)<<63 );
  2454. return uIndex;
  2455. }
  2456. static void sphUnpackAttrLocator ( uint64_t uIndex, ExprNode_t * pNode )
  2457. {
  2458. assert ( pNode );
  2459. pNode->m_tLocator.m_iBitOffset = (int)( ( uIndex>>16 ) & 0xffff );
  2460. pNode->m_tLocator.m_iBitCount = (int)( uIndex & 0xffff );
  2461. pNode->m_tLocator.m_bDynamic = ( ( uIndex & ( U64C(1)<<63 ) )!=0 );
  2462. pNode->m_iLocator = (int)( ( uIndex>>32 ) & 0x7fff );
  2463. }
  2464. int ExprParser_t::ParseAttr ( int iAttr, const char* sTok, YYSTYPE * lvalp )
  2465. {
  2466. // check attribute type and width
  2467. const CSphColumnInfo & tCol = m_pSchema->GetAttr ( iAttr );
  2468. int iRes = -1;
  2469. switch ( tCol.m_eAttrType )
  2470. {
  2471. case SPH_ATTR_FLOAT: iRes = TOK_ATTR_FLOAT; break;
  2472. case SPH_ATTR_UINT32SET: iRes = TOK_ATTR_MVA32; break;
  2473. case SPH_ATTR_INT64SET: iRes = TOK_ATTR_MVA64; break;
  2474. case SPH_ATTR_STRING: iRes = TOK_ATTR_STRING; break;
  2475. case SPH_ATTR_JSON: iRes = TOK_ATTR_JSON; break;
  2476. case SPH_ATTR_JSON_FIELD: iRes = TOK_ATTR_JSON; break;
  2477. case SPH_ATTR_FACTORS: iRes = TOK_ATTR_FACTORS; break;
  2478. case SPH_ATTR_INTEGER:
  2479. case SPH_ATTR_TIMESTAMP:
  2480. case SPH_ATTR_BOOL:
  2481. case SPH_ATTR_BIGINT:
  2482. case SPH_ATTR_TOKENCOUNT:
  2483. iRes = tCol.m_tLocator.IsBitfield() ? TOK_ATTR_BITS : TOK_ATTR_INT;
  2484. break;
  2485. default:
  2486. m_sLexerError.SetSprintf ( "attribute '%s' is of unsupported type (type=%d)", sTok, tCol.m_eAttrType );
  2487. return -1;
  2488. }
  2489. lvalp->iAttrLocator = sphPackAttrLocator ( tCol.m_tLocator, iAttr );
  2490. return iRes;
  2491. }
  2492. /// a lexer of my own
  2493. /// returns token id and fills lvalp on success
  2494. /// returns -1 and fills sError on failure
  2495. int ExprParser_t::GetToken ( YYSTYPE * lvalp )
  2496. {
  2497. // skip whitespace, check eof
  2498. while ( isspace ( *m_pCur ) ) m_pCur++;
  2499. m_pLastTokenStart = m_pCur;
  2500. if ( !*m_pCur ) return 0;
  2501. // check for constant
  2502. if ( isdigit ( m_pCur[0] ) )
  2503. return ParseNumeric ( lvalp, &m_pCur );
  2504. // check for field, function, or magic name
  2505. if ( sphIsAttr ( m_pCur[0] )
  2506. || ( m_pCur[0]=='@' && sphIsAttr ( m_pCur[1] ) && !isdigit ( m_pCur[1] ) ) )
  2507. {
  2508. // get token
  2509. const char * pStart = m_pCur++;
  2510. while ( sphIsAttr ( *m_pCur ) ) m_pCur++;
  2511. CSphString sTok;
  2512. sTok.SetBinary ( pStart, m_pCur-pStart );
  2513. CSphString sTokMixedCase = sTok;
  2514. sTok.ToLower ();
  2515. // check for magic name
  2516. if ( sTok=="@id" ) return TOK_ATID;
  2517. if ( sTok=="@weight" ) return TOK_ATWEIGHT;
  2518. if ( sTok=="id" ) return TOK_ID;
  2519. if ( sTok=="weight" ) return TOK_WEIGHT;
  2520. if ( sTok=="groupby" ) return TOK_GROUPBY;
  2521. if ( sTok=="distinct" ) return TOK_DISTINCT;
  2522. if ( sTok=="@geodist" )
  2523. {
  2524. int iGeodist = m_pSchema->GetAttrIndex("@geodist");
  2525. if ( iGeodist==-1 )
  2526. {
  2527. m_sLexerError = "geoanchor is not set, @geodist expression unavailable";
  2528. return -1;
  2529. }
  2530. const CSphAttrLocator & tLoc = m_pSchema->GetAttr ( iGeodist ).m_tLocator;
  2531. lvalp->iAttrLocator = sphPackAttrLocator ( tLoc, iGeodist );
  2532. return TOK_ATTR_FLOAT;
  2533. }
  2534. // check for uservar
  2535. if ( pStart[0]=='@' )
  2536. {
  2537. lvalp->iNode = m_dUservars.GetLength();
  2538. m_dUservars.Add ( sTok );
  2539. return TOK_USERVAR;
  2540. }
  2541. // check for keyword
  2542. if ( sTok=="and" ) { return TOK_AND; }
  2543. if ( sTok=="or" ) { return TOK_OR; }
  2544. if ( sTok=="not" ) { return TOK_NOT; }
  2545. if ( sTok=="div" ) { return TOK_DIV; }
  2546. if ( sTok=="mod" ) { return TOK_MOD; }
  2547. if ( sTok=="for" ) { return TOK_FOR; }
  2548. if ( sTok=="is" ) { return TOK_IS; }
  2549. if ( sTok=="null" ) { return TOK_NULL; }
  2550. // in case someone used 'count' as a name for an attribute
  2551. if ( sTok=="count" )
  2552. {
  2553. int iAttr = m_pSchema->GetAttrIndex ( "count" );
  2554. if ( iAttr>=0 )
  2555. ParseAttr ( iAttr, sTok.cstr(), lvalp );
  2556. return TOK_COUNT;
  2557. }
  2558. // check for attribute
  2559. int iAttr = m_pSchema->GetAttrIndex ( sTok.cstr() );
  2560. if ( iAttr>=0 )
  2561. return ParseAttr ( iAttr, sTok.cstr(), lvalp );
  2562. // hook might replace built-in function
  2563. int iHookFunc = -1;
  2564. if ( m_pHook )
  2565. iHookFunc = m_pHook->IsKnownFunc ( sTok.cstr() );
  2566. // check for function
  2567. int iFunc = FuncHashLookup ( sTok.cstr() );
  2568. if ( iFunc>=0 && iHookFunc==-1 )
  2569. {
  2570. assert ( !strcasecmp ( g_dFuncs[iFunc].m_sName, sTok.cstr() ) );
  2571. lvalp->iFunc = iFunc;
  2572. if ( iFunc==FUNC_IN )
  2573. return TOK_FUNC_IN;
  2574. else if ( iFunc==FUNC_REMAP )
  2575. return TOK_FUNC_REMAP;
  2576. else if ( iFunc==FUNC_PACKEDFACTORS || iFunc==FUNC_FACTORS )
  2577. return TOK_FUNC_PF;
  2578. else if ( iFunc==FUNC_RAND )
  2579. return TOK_FUNC_RAND;
  2580. else return TOK_FUNC;
  2581. }
  2582. // ask hook
  2583. if ( m_pHook )
  2584. {
  2585. int iID = m_pHook->IsKnownIdent ( sTok.cstr() );
  2586. if ( iID>=0 )
  2587. {
  2588. lvalp->iNode = iID;
  2589. return TOK_HOOK_IDENT;
  2590. }
  2591. iID = iHookFunc;
  2592. if ( iID>=0 )
  2593. {
  2594. lvalp->iNode = iID;
  2595. return TOK_HOOK_FUNC;
  2596. }
  2597. }
  2598. // check for UDF
  2599. const PluginUDF_c * pUdf = (const PluginUDF_c *) sphPluginGet ( PLUGIN_FUNCTION, sTok.cstr() );
  2600. if ( pUdf )
  2601. {
  2602. lvalp->iNode = m_dUdfCalls.GetLength();
  2603. m_dUdfCalls.Add ( new UdfCall_t() );
  2604. m_dUdfCalls.Last()->m_pUdf = pUdf;
  2605. return TOK_UDF;
  2606. }
  2607. // arbitrary identifier, then
  2608. m_dIdents.Add ( sTokMixedCase.Leak() );
  2609. lvalp->sIdent = m_dIdents.Last();
  2610. return TOK_IDENT;
  2611. }
  2612. // check for known operators, then
  2613. switch ( *m_pCur )
  2614. {
  2615. case '+':
  2616. case '-':
  2617. case '*':
  2618. case '/':
  2619. case '(':
  2620. case ')':
  2621. case ',':
  2622. case '&':
  2623. case '|':
  2624. case '%':
  2625. case '{':
  2626. case '}':
  2627. case '[':
  2628. case ']':
  2629. case '`':
  2630. return *m_pCur++;
  2631. case '<':
  2632. m_pCur++;
  2633. if ( *m_pCur=='>' ) { m_pCur++; return TOK_NE; }
  2634. if ( *m_pCur=='=' ) { m_pCur++; return TOK_LTE; }
  2635. return '<';
  2636. case '>':
  2637. m_pCur++;
  2638. if ( *m_pCur=='=' ) { m_pCur++; return TOK_GTE; }
  2639. return '>';
  2640. case '=':
  2641. m_pCur++;
  2642. if ( *m_pCur=='=' ) m_pCur++;
  2643. return TOK_EQ;
  2644. // special case for leading dots (float values without leading zero, JSON key names, etc)
  2645. case '.':
  2646. {
  2647. int iBeg = (int)( m_pCur-m_sExpr+1 );
  2648. bool bDigit = isdigit ( m_pCur[1] )!=0;
  2649. // handle dots followed by a digit
  2650. // aka, a float value without leading zero
  2651. if ( bDigit )
  2652. {
  2653. char * pEnd = NULL;
  2654. float fValue = (float) strtod ( m_pCur, &pEnd );
  2655. lvalp->fConst = fValue;
  2656. if ( pEnd && !sphIsAttr(*pEnd) )
  2657. m_pCur = pEnd;
  2658. else // fallback to subkey (e.g. ".1234a")
  2659. bDigit = false;
  2660. }
  2661. // handle dots followed by a non-digit
  2662. // for cases like jsoncol.keyname
  2663. if ( !bDigit )
  2664. {
  2665. m_pCur++;
  2666. while ( isspace ( *m_pCur ) )
  2667. m_pCur++;
  2668. iBeg = (int)( m_pCur-m_sExpr );
  2669. while ( sphIsAttr(*m_pCur) )
  2670. m_pCur++;
  2671. }
  2672. // return packed string after the dot
  2673. int iLen = (int)( m_pCur-m_sExpr ) - iBeg;
  2674. lvalp->iConst = ( int64_t(iBeg)<<32 ) + iLen;
  2675. return bDigit ? TOK_DOT_NUMBER : TOK_SUBKEY;
  2676. }
  2677. case '\'':
  2678. case '"':
  2679. {
  2680. const char cEnd = *m_pCur;
  2681. for ( const char * s = m_pCur+1; *s; s++ )
  2682. {
  2683. if ( *s==cEnd )
  2684. {
  2685. int iBeg = (int)( m_pCur-m_sExpr );
  2686. int iLen = (int)( s-m_sExpr ) - iBeg + 1;
  2687. lvalp->iConst = ( int64_t(iBeg)<<32 ) + iLen;
  2688. m_pCur = s+1;
  2689. return TOK_CONST_STRING;
  2690. } else if ( *s=='\\' )
  2691. {
  2692. s++;
  2693. if ( !*s )
  2694. break;
  2695. }
  2696. }
  2697. m_sLexerError.SetSprintf ( "unterminated string constant near '%s'", m_pCur );
  2698. return -1;
  2699. }
  2700. }
  2701. m_sLexerError.SetSprintf ( "unknown operator '%c' near '%s'", *m_pCur, m_pCur );
  2702. return -1;
  2703. }
  2704. /// is add/sub?
  2705. static inline bool IsAddSub ( const ExprNode_t * pNode )
  2706. {
  2707. return pNode->m_iToken=='+' || pNode->m_iToken=='-';
  2708. }
  2709. /// is unary operator?
  2710. static inline bool IsUnary ( const ExprNode_t * pNode )
  2711. {
  2712. return pNode->m_iToken==TOK_NEG || pNode->m_iToken==TOK_NOT;
  2713. }
  2714. /// is arithmetic?
  2715. static inline bool IsAri ( const ExprNode_t * pNode )
  2716. {
  2717. int iTok = pNode->m_iToken;
  2718. return iTok=='+' || iTok=='-' || iTok=='*' || iTok=='/';
  2719. }
  2720. /// is constant?
  2721. static inline bool IsConst ( const ExprNode_t * pNode )
  2722. {
  2723. return pNode->m_iToken==TOK_CONST_INT || pNode->m_iToken==TOK_CONST_FLOAT;
  2724. }
  2725. /// float value of a constant
  2726. static inline float FloatVal ( const ExprNode_t * pNode )
  2727. {
  2728. assert ( IsConst(pNode) );
  2729. return pNode->m_iToken==TOK_CONST_INT
  2730. ? (float)pNode->m_iConst
  2731. : pNode->m_fConst;
  2732. }
  2733. void ExprParser_t::CanonizePass ( int iNode )
  2734. {
  2735. if ( iNode<0 )
  2736. return;
  2737. CanonizePass ( m_dNodes [ iNode ].m_iLeft );
  2738. CanonizePass ( m_dNodes [ iNode ].m_iRight );
  2739. ExprNode_t * pRoot = &m_dNodes [ iNode ];
  2740. ExprNode_t * pLeft = ( pRoot->m_iLeft>=0 ) ? &m_dNodes [ pRoot->m_iLeft ] : NULL;
  2741. ExprNode_t * pRight = ( pRoot->m_iRight>=0 ) ? &m_dNodes [ pRoot->m_iRight ] : NULL;
  2742. // canonize (expr op const), move const to the left
  2743. if ( IsAri ( pRoot ) && !IsConst ( pLeft ) && IsConst ( pRight ) )
  2744. {
  2745. Swap ( pRoot->m_iLeft, pRoot->m_iRight );
  2746. Swap ( pLeft, pRight );
  2747. // fixup (expr-const) to ((-const)+expr)
  2748. if ( pRoot->m_iToken=='-' )
  2749. {
  2750. pRoot->m_iToken = '+';
  2751. if ( pLeft->m_iToken==TOK_CONST_INT )
  2752. pLeft->m_iConst *= -1;
  2753. else
  2754. pLeft->m_fConst *= -1;
  2755. }
  2756. // fixup (expr/const) to ((1/const)*expr)
  2757. if ( pRoot->m_iToken=='/' )
  2758. {
  2759. pRoot->m_iToken = '*';
  2760. pLeft->m_fConst = 1.0f / FloatVal ( pLeft );
  2761. pLeft->m_iToken = TOK_CONST_FLOAT;
  2762. }
  2763. }
  2764. // promote children constants
  2765. if ( IsAri ( pRoot ) && IsAri ( pLeft ) && IsAddSub ( pLeft )==IsAddSub ( pRoot ) &&
  2766. IsConst ( &m_dNodes [ pLeft->m_iLeft ] ) )
  2767. {
  2768. // ((const op lr) op2 right) gets replaced with (const op (lr op2/op right))
  2769. // constant gets promoted one level up
  2770. int iConst = pLeft->m_iLeft;
  2771. pLeft->m_iLeft = pLeft->m_iRight;
  2772. pLeft->m_iRight = pRoot->m_iRight; // (c op lr) -> (lr ... r)
  2773. switch ( pLeft->m_iToken )
  2774. {
  2775. case '+':
  2776. case '*':
  2777. // (c + lr) op r -> c + (lr op r)
  2778. // (c * lr) op r -> c * (lr op r)
  2779. Swap ( pLeft->m_iToken, pRoot->m_iToken );
  2780. break;
  2781. case '-':
  2782. // (c - lr) + r -> c - (lr - r)
  2783. // (c - lr) - r -> c - (lr + r)
  2784. pLeft->m_iToken = ( pRoot->m_iToken=='+' ? '-' : '+' );
  2785. pRoot->m_iToken = '-';
  2786. break;
  2787. case '/':
  2788. // (c / lr) * r -> c * (r / lr)
  2789. // (c / lr) / r -> c / (r * lr)
  2790. Swap ( pLeft->m_iLeft, pLeft->m_iRight );
  2791. pLeft->m_iToken = ( pRoot->m_iToken=='*' ) ? '/' : '*';
  2792. break;
  2793. default:
  2794. assert ( 0 && "internal error: unhandled op in left-const promotion" );
  2795. }
  2796. pRoot->m_iRight = pRoot->m_iLeft;
  2797. pRoot->m_iLeft = iConst;
  2798. pLeft = &m_dNodes [ pRoot->m_iLeft ];
  2799. pRight = &m_dNodes [ pRoot->m_iRight ];
  2800. }
  2801. // MySQL Workbench fixup
  2802. if ( pRoot->m_iToken==TOK_FUNC && ( pRoot->m_iFunc==FUNC_CURRENT_USER || pRoot->m_iFunc==FUNC_CONNECTION_ID ) )
  2803. {
  2804. pRoot->m_iToken = TOK_CONST_INT;
  2805. pRoot->m_iConst = 0;
  2806. return;
  2807. }
  2808. }
  2809. void ExprParser_t::ConstantFoldPass ( int iNode )
  2810. {
  2811. if ( iNode<0 )
  2812. return;
  2813. ConstantFoldPass ( m_dNodes [ iNode ].m_iLeft );
  2814. ConstantFoldPass ( m_dNodes [ iNode ].m_iRight );
  2815. ExprNode_t * pRoot = &m_dNodes [ iNode ];
  2816. ExprNode_t * pLeft = ( pRoot->m_iLeft>=0 ) ? &m_dNodes [ pRoot->m_iLeft ] : NULL;
  2817. ExprNode_t * pRight = ( pRoot->m_iRight>=0 ) ? &m_dNodes [ pRoot->m_iRight ] : NULL;
  2818. // unary arithmetic expression with constant
  2819. if ( IsUnary ( pRoot ) && IsConst ( pLeft ) )
  2820. {
  2821. if ( pLeft->m_iToken==TOK_CONST_INT )
  2822. {
  2823. switch ( pRoot->m_iToken )
  2824. {
  2825. case TOK_NEG: pRoot->m_iConst = -pLeft->m_iConst; break;
  2826. case TOK_NOT: pRoot->m_iConst = !pLeft->m_iConst; break;
  2827. default: assert ( 0 && "internal error: unhandled arithmetic token during const-int optimization" );
  2828. }
  2829. } else
  2830. {
  2831. switch ( pRoot->m_iToken )
  2832. {
  2833. case TOK_NEG: pRoot->m_fConst = -pLeft->m_fConst; break;
  2834. case TOK_NOT: pRoot->m_fConst = !pLeft->m_fConst; break;
  2835. default: assert ( 0 && "internal error: unhandled arithmetic token during const-float optimization" );
  2836. }
  2837. }
  2838. pRoot->m_iToken = pLeft->m_iToken;
  2839. pRoot->m_iLeft = -1;
  2840. return;
  2841. }
  2842. // arithmetic expression with constants
  2843. if ( IsAri ( pRoot ) )
  2844. {
  2845. assert ( pLeft && pRight );
  2846. // optimize fully-constant expressions
  2847. if ( IsConst ( pLeft ) && IsConst ( pRight ) )
  2848. {
  2849. if ( pLeft->m_iToken==TOK_CONST_INT && pRight->m_iToken==TOK_CONST_INT && pRoot->m_iToken!='/' )
  2850. {
  2851. switch ( pRoot->m_iToken )
  2852. {
  2853. case '+': pRoot->m_iConst = pLeft->m_iConst + pRight->m_iConst; break;
  2854. case '-': pRoot->m_iConst = pLeft->m_iConst - pRight->m_iConst; break;
  2855. case '*': pRoot->m_iConst = pLeft->m_iConst * pRight->m_iConst; break;
  2856. default: assert ( 0 && "internal error: unhandled arithmetic token during const-int optimization" );
  2857. }
  2858. pRoot->m_iToken = TOK_CONST_INT;
  2859. } else
  2860. {
  2861. float fLeft = FloatVal ( pLeft );
  2862. float fRight = FloatVal ( pRight );
  2863. switch ( pRoot->m_iToken )
  2864. {
  2865. case '+': pRoot->m_fConst = fLeft + fRight; break;
  2866. case '-': pRoot->m_fConst = fLeft - fRight; break;
  2867. case '*': pRoot->m_fConst = fLeft * fRight; break;
  2868. case '/': pRoot->m_fConst = fRight ? fLeft / fRight : 0.0f; break;
  2869. default: assert ( 0 && "internal error: unhandled arithmetic token during const-float optimization" );
  2870. }
  2871. pRoot->m_iToken = TOK_CONST_FLOAT;
  2872. }
  2873. pRoot->m_iLeft = -1;
  2874. pRoot->m_iRight = -1;
  2875. return;
  2876. }
  2877. // optimize compatible operations with constants
  2878. if ( IsConst ( pLeft ) && IsAri ( pRight ) && IsAddSub ( pRoot )==IsAddSub ( pRight ) &&
  2879. IsConst ( &m_dNodes [ pRight->m_iLeft ] ) )
  2880. {
  2881. ExprNode_t * pConst = &m_dNodes [ pRight->m_iLeft ];
  2882. ExprNode_t * pExpr = &m_dNodes [ pRight->m_iRight ];
  2883. assert ( !IsConst ( pExpr ) ); // must had been optimized
  2884. // optimize (left op (const op2 expr)) to ((left op const) op*op2 expr)
  2885. if ( IsAddSub ( pRoot ) )
  2886. {
  2887. // fold consts
  2888. int iSign = ( ( pRoot->m_iToken=='+' ) ? 1 : -1 );
  2889. if ( pLeft->m_iToken==TOK_CONST_INT && pConst->m_iToken==TOK_CONST_INT )
  2890. {
  2891. pLeft->m_iConst += iSign*pConst->m_iConst;
  2892. } else
  2893. {
  2894. pLeft->m_fConst = FloatVal ( pLeft ) + iSign*FloatVal ( pConst );
  2895. pLeft->m_iToken = TOK_CONST_FLOAT;
  2896. }
  2897. // fold ops
  2898. pRoot->m_iToken = ( pRoot->m_iToken==pRight->m_iToken ) ? '+' : '-';
  2899. } else
  2900. {
  2901. // fols consts
  2902. if ( pRoot->m_iToken=='*' && pLeft->m_iToken==TOK_CONST_INT && pConst->m_iToken==TOK_CONST_INT )
  2903. {
  2904. pLeft->m_iConst *= pConst->m_iConst;
  2905. } else
  2906. {
  2907. if ( pRoot->m_iToken=='*' )
  2908. pLeft->m_fConst = FloatVal ( pLeft ) * FloatVal ( pConst );
  2909. else
  2910. pLeft->m_fConst = FloatVal ( pLeft ) / FloatVal ( pConst );
  2911. pLeft->m_iToken = TOK_CONST_FLOAT;
  2912. }
  2913. // fold ops
  2914. pRoot->m_iToken = ( pRoot->m_iToken==pRight->m_iToken ) ? '*' : '/';
  2915. }
  2916. // promote expr arg
  2917. pRoot->m_iRight = pRight->m_iRight;
  2918. pRight = pExpr;
  2919. }
  2920. }
  2921. // unary function from a constant
  2922. if ( pRoot->m_iToken==TOK_FUNC && g_dFuncs [ pRoot->m_iFunc ].m_iArgs==1 && IsConst ( pLeft ) )
  2923. {
  2924. float fArg = pLeft->m_iToken==TOK_CONST_FLOAT ? pLeft->m_fConst : float ( pLeft->m_iConst );
  2925. switch ( pRoot->m_iFunc )
  2926. {
  2927. case FUNC_ABS:
  2928. pRoot->m_iToken = pLeft->m_iToken;
  2929. pRoot->m_iLeft = -1;
  2930. if ( pLeft->m_iToken==TOK_CONST_INT )
  2931. pRoot->m_iConst = IABS ( pLeft->m_iConst );
  2932. else
  2933. pRoot->m_fConst = (float)fabs ( fArg );
  2934. break;
  2935. case FUNC_CEIL: pRoot->m_iToken = TOK_CONST_INT; pRoot->m_iLeft = -1; pRoot->m_iConst = (int64_t)ceil ( fArg ); break;
  2936. case FUNC_FLOOR: pRoot->m_iToken = TOK_CONST_INT; pRoot->m_iLeft = -1; pRoot->m_iConst = (int64_t)floor ( fArg ); break;
  2937. case FUNC_SIN: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float ( sin ( fArg) ); break;
  2938. case FUNC_COS: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float ( cos ( fArg ) ); break;
  2939. case FUNC_LN: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = fArg>0.0f ? (float) log(fArg) : 0.0f; break;
  2940. case FUNC_LOG2: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = fArg>0.0f ? (float)( log(fArg)*M_LOG2E ) : 0.0f; break;
  2941. case FUNC_LOG10: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = fArg>0.0f ? (float)( log(fArg)*M_LOG10E ) : 0.0f; break;
  2942. case FUNC_EXP: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = float ( exp ( fArg ) ); break;
  2943. case FUNC_SQRT: pRoot->m_iToken = TOK_CONST_FLOAT; pRoot->m_iLeft = -1; pRoot->m_fConst = fArg>0.0f ? (float)sqrt(fArg) : 0.0f; break;
  2944. default: break;
  2945. }
  2946. return;
  2947. }
  2948. }
  2949. void ExprParser_t::VariousOptimizationsPass ( int iNode )
  2950. {
  2951. if ( iNode<0 )
  2952. return;
  2953. VariousOptimizationsPass ( m_dNodes [ iNode ].m_iLeft );
  2954. VariousOptimizationsPass ( m_dNodes [ iNode ].m_iRight );
  2955. ExprNode_t * pRoot = &m_dNodes [ iNode ];
  2956. ExprNode_t * pLeft = ( pRoot->m_iLeft>=0 ) ? &m_dNodes [ pRoot->m_iLeft ] : NULL;
  2957. ExprNode_t * pRight = ( pRoot->m_iRight>=0 ) ? &m_dNodes [ pRoot->m_iRight ] : NULL;
  2958. // madd, mul3
  2959. // FIXME! separate pass for these? otherwise (2+(a*b))+3 won't get const folding
  2960. if ( ( pRoot->m_iToken=='+' || pRoot->m_iToken=='*' ) && ( pLeft->m_iToken=='*' || pRight->m_iToken=='*' ) )
  2961. {
  2962. if ( pLeft->m_iToken!='*' )
  2963. {
  2964. Swap ( pRoot->m_iLeft, pRoot->m_iRight );
  2965. Swap ( pLeft, pRight );
  2966. }
  2967. pLeft->m_iToken = ',';
  2968. int iLeft = pRoot->m_iLeft;
  2969. int iRight = pRoot->m_iRight;
  2970. pRoot->m_iFunc = ( pRoot->m_iToken=='+' ) ? FUNC_MADD : FUNC_MUL3;
  2971. pRoot->m_iToken = TOK_FUNC;
  2972. pRoot->m_iLeft = m_dNodes.GetLength();
  2973. pRoot->m_iRight = -1;
  2974. ExprNode_t & tArgs = m_dNodes.Add(); // invalidates all pointers!
  2975. tArgs.m_iToken = ',';
  2976. tArgs.m_iLeft = iLeft;
  2977. tArgs.m_iRight = iRight;
  2978. return;
  2979. }
  2980. // division by a constant (replace with multiplication by inverse)
  2981. if ( pRoot->m_iToken=='/' && pRight->m_iToken==TOK_CONST_FLOAT )
  2982. {
  2983. pRight->m_fConst = 1.0f / pRight->m_fConst;
  2984. pRoot->m_iToken = '*';
  2985. return;
  2986. }
  2987. // SINT(int-attr)
  2988. if ( pRoot->m_iToken==TOK_FUNC && pRoot->m_iFunc==FUNC_SINT )
  2989. {
  2990. assert ( pLeft );
  2991. if ( pLeft->m_iToken==TOK_ATTR_INT || pLeft->m_iToken==TOK_ATTR_BITS )
  2992. {
  2993. pRoot->m_iToken = TOK_ATTR_SINT;
  2994. pRoot->m_tLocator = pLeft->m_tLocator;
  2995. pRoot->m_iLeft = -1;
  2996. }
  2997. }
  2998. }
  2999. /// optimize subtree
  3000. void ExprParser_t::Optimize ( int iNode )
  3001. {
  3002. CanonizePass ( iNode );
  3003. ConstantFoldPass ( iNode );
  3004. VariousOptimizationsPass ( iNode );
  3005. }
  3006. // debug dump
  3007. void ExprParser_t::Dump ( int iNode )
  3008. {
  3009. if ( iNode<0 )
  3010. return;
  3011. ExprNode_t & tNode = m_dNodes[iNode];
  3012. switch ( tNode.m_iToken )
  3013. {
  3014. case TOK_CONST_INT:
  3015. printf ( INT64_FMT, tNode.m_iConst );
  3016. break;
  3017. case TOK_CONST_FLOAT:
  3018. printf ( "%f", tNode.m_fConst );
  3019. break;
  3020. case TOK_ATTR_INT:
  3021. case TOK_ATTR_SINT:
  3022. printf ( "row[%d]", tNode.m_tLocator.m_iBitOffset/32 );
  3023. break;
  3024. default:
  3025. printf ( "(" );
  3026. Dump ( tNode.m_iLeft );
  3027. printf ( ( tNode.m_iToken<256 ) ? " %c " : " op-%d ", tNode.m_iToken );
  3028. Dump ( tNode.m_iRight );
  3029. printf ( ")" );
  3030. break;
  3031. }
  3032. }
  3033. /// fold arglist into array
  3034. static void FoldArglist ( ISphExpr * pLeft, CSphVector<ISphExpr *> & dArgs )
  3035. {
  3036. if ( !pLeft || !pLeft->IsArglist() )
  3037. {
  3038. dArgs.Add ( pLeft );
  3039. return;
  3040. }
  3041. Expr_Arglist_c * pArgs = (Expr_Arglist_c *)pLeft;
  3042. Swap ( dArgs, pArgs->m_dArgs );
  3043. SafeRelease ( pLeft );
  3044. }
  3045. typedef sphinx_int64_t ( *UdfInt_fn ) ( SPH_UDF_INIT *, SPH_UDF_ARGS *, char * );
  3046. typedef double ( *UdfDouble_fn ) ( SPH_UDF_INIT *, SPH_UDF_ARGS *, char * );
  3047. typedef char * ( *UdfCharptr_fn) ( SPH_UDF_INIT *, SPH_UDF_ARGS *, char * );
  3048. class Expr_Udf_c : public ISphExpr
  3049. {
  3050. public:
  3051. CSphVector<ISphExpr*> m_dArgs;
  3052. CSphVector<int> m_dArgs2Free;
  3053. protected:
  3054. UdfCall_t * m_pCall;
  3055. mutable CSphVector<int64_t> m_dArgvals;
  3056. mutable char m_bError;
  3057. CSphQueryProfile * m_pProfiler;
  3058. const BYTE * m_pStrings;
  3059. public:
  3060. explicit Expr_Udf_c ( UdfCall_t * pCall, CSphQueryProfile * pProfiler )
  3061. : m_pCall ( pCall )
  3062. , m_bError ( 0 )
  3063. , m_pProfiler ( pProfiler )
  3064. , m_pStrings ( NULL )
  3065. {
  3066. SPH_UDF_ARGS & tArgs = m_pCall->m_tArgs;
  3067. assert ( tArgs.arg_values==NULL );
  3068. tArgs.arg_values = new char * [ tArgs.arg_count ];
  3069. tArgs.str_lengths = new int [ tArgs.arg_count ];
  3070. m_dArgs2Free = pCall->m_dArgs2Free;
  3071. m_dArgvals.Resize ( tArgs.arg_count );
  3072. ARRAY_FOREACH ( i, m_dArgvals )
  3073. tArgs.arg_values[i] = (char*) &m_dArgvals[i];
  3074. }
  3075. ~Expr_Udf_c ()
  3076. {
  3077. if ( m_pCall->m_pUdf->m_fnDeinit )
  3078. m_pCall->m_pUdf->m_fnDeinit ( &m_pCall->m_tInit );
  3079. SafeDelete ( m_pCall );
  3080. ARRAY_FOREACH ( i, m_dArgs )
  3081. SafeRelease ( m_dArgs[i] );
  3082. }
  3083. void FillArgs ( const CSphMatch & tMatch ) const
  3084. {
  3085. int64_t iPacked = 0;
  3086. ESphJsonType eJson = JSON_NULL;
  3087. DWORD uOff = 0;
  3088. CSphVector<BYTE> dTmp;
  3089. // FIXME? a cleaner way to reinterpret?
  3090. SPH_UDF_ARGS & tArgs = m_pCall->m_tArgs;
  3091. ARRAY_FOREACH ( i, m_dArgs )
  3092. {
  3093. switch ( tArgs.arg_types[i] )
  3094. {
  3095. case SPH_UDF_TYPE_UINT32: *(DWORD*)&m_dArgvals[i] = m_dArgs[i]->IntEval ( tMatch ); break;
  3096. case SPH_UDF_TYPE_INT64: m_dArgvals[i] = m_dArgs[i]->Int64Eval ( tMatch ); break;
  3097. case SPH_UDF_TYPE_FLOAT: *(float*)&m_dArgvals[i] = m_dArgs[i]->Eval ( tMatch ); break;
  3098. case SPH_UDF_TYPE_STRING: tArgs.str_lengths[i] = m_dArgs[i]->StringEval ( tMatch, (const BYTE**)&tArgs.arg_values[i] ); break;
  3099. case SPH_UDF_TYPE_UINT32SET: tArgs.arg_values[i] = (char*) m_dArgs[i]->MvaEval ( tMatch ); break;
  3100. case SPH_UDF_TYPE_UINT64SET: tArgs.arg_values[i] = (char*) m_dArgs[i]->MvaEval ( tMatch ); break;
  3101. case SPH_UDF_TYPE_FACTORS: tArgs.arg_values[i] = (char*) m_dArgs[i]->FactorEval ( tMatch ); break;
  3102. case SPH_UDF_TYPE_JSON:
  3103. iPacked = m_dArgs[i]->Int64Eval ( tMatch );
  3104. eJson = ESphJsonType ( iPacked>>32 );
  3105. uOff = (DWORD)iPacked;
  3106. if ( !uOff || eJson==JSON_NULL )
  3107. {
  3108. tArgs.arg_values[i] = NULL;
  3109. tArgs.str_lengths[i] = 0;
  3110. } else
  3111. {
  3112. sphJsonFieldFormat ( dTmp, m_pStrings+uOff, eJson, false );
  3113. tArgs.str_lengths[i] = dTmp.GetLength();
  3114. tArgs.arg_values[i] = (char *)dTmp.LeakData();
  3115. }
  3116. break;
  3117. default: assert ( 0 ); m_dArgvals[i] = 0; break;
  3118. }
  3119. }
  3120. }
  3121. void FreeArgs() const
  3122. {
  3123. ARRAY_FOREACH ( i, m_dArgs2Free )
  3124. {
  3125. int iAttr = m_dArgs2Free[i];
  3126. SafeDeleteArray ( m_pCall->m_tArgs.arg_values[iAttr] );
  3127. }
  3128. }
  3129. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  3130. {
  3131. if ( eCmd==SPH_EXPR_GET_UDF )
  3132. {
  3133. *((bool*)pArg) = true;
  3134. return;
  3135. }
  3136. if ( eCmd==SPH_EXPR_SET_STRING_POOL )
  3137. m_pStrings = (const BYTE*)pArg;
  3138. ARRAY_FOREACH ( i, m_dArgs )
  3139. m_dArgs[i]->Command ( eCmd, pArg );
  3140. }
  3141. virtual uint64_t GetHash ( const ISphSchema &, uint64_t, bool & bDisable )
  3142. {
  3143. bDisable = true;
  3144. return 0;
  3145. }
  3146. };
  3147. class Expr_UdfInt_c : public Expr_Udf_c
  3148. {
  3149. public:
  3150. explicit Expr_UdfInt_c ( UdfCall_t * pCall, CSphQueryProfile * pProfiler )
  3151. : Expr_Udf_c ( pCall, pProfiler )
  3152. {
  3153. assert ( pCall->m_pUdf->m_eRetType==SPH_ATTR_INTEGER || pCall->m_pUdf->m_eRetType==SPH_ATTR_BIGINT );
  3154. }
  3155. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
  3156. {
  3157. if ( m_bError )
  3158. return 0;
  3159. CSphScopedProfile tProf ( m_pProfiler, SPH_QSTATE_EVAL_UDF );
  3160. FillArgs ( tMatch );
  3161. UdfInt_fn pFn = (UdfInt_fn) m_pCall->m_pUdf->m_fnFunc;
  3162. int64_t iRes = pFn ( &m_pCall->m_tInit, &m_pCall->m_tArgs, &m_bError );
  3163. FreeArgs();
  3164. return iRes;
  3165. }
  3166. virtual int IntEval ( const CSphMatch & tMatch ) const { return (int) Int64Eval ( tMatch ); }
  3167. virtual float Eval ( const CSphMatch & tMatch ) const { return (float) Int64Eval ( tMatch ); }
  3168. };
  3169. class Expr_UdfFloat_c : public Expr_Udf_c
  3170. {
  3171. public:
  3172. explicit Expr_UdfFloat_c ( UdfCall_t * pCall, CSphQueryProfile * pProfiler )
  3173. : Expr_Udf_c ( pCall, pProfiler )
  3174. {
  3175. assert ( pCall->m_pUdf->m_eRetType==SPH_ATTR_FLOAT );
  3176. }
  3177. virtual float Eval ( const CSphMatch & tMatch ) const
  3178. {
  3179. if ( m_bError )
  3180. return 0;
  3181. CSphScopedProfile tProf ( m_pProfiler, SPH_QSTATE_EVAL_UDF );
  3182. FillArgs ( tMatch );
  3183. UdfDouble_fn pFn = (UdfDouble_fn) m_pCall->m_pUdf->m_fnFunc;
  3184. float fRes = (float) pFn ( &m_pCall->m_tInit, &m_pCall->m_tArgs, &m_bError );
  3185. FreeArgs();
  3186. return fRes;
  3187. }
  3188. virtual int IntEval ( const CSphMatch & tMatch ) const { return (int) Eval ( tMatch ); }
  3189. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return (int64_t) Eval ( tMatch ); }
  3190. };
  3191. class Expr_UdfStringptr_c : public Expr_Udf_c
  3192. {
  3193. public:
  3194. explicit Expr_UdfStringptr_c ( UdfCall_t * pCall, CSphQueryProfile * pProfiler )
  3195. : Expr_Udf_c ( pCall, pProfiler )
  3196. {
  3197. assert ( pCall->m_pUdf->m_eRetType==SPH_ATTR_STRINGPTR );
  3198. }
  3199. virtual float Eval ( const CSphMatch & ) const
  3200. {
  3201. assert ( 0 && "internal error: stringptr udf evaluated as float" );
  3202. return 0.0f;
  3203. }
  3204. virtual int IntEval ( const CSphMatch & ) const
  3205. {
  3206. assert ( 0 && "internal error: stringptr udf evaluated as int" );
  3207. return 0;
  3208. }
  3209. virtual int64_t Int64Eval ( const CSphMatch & ) const
  3210. {
  3211. assert ( 0 && "internal error: stringptr udf evaluated as bigint" );
  3212. return 0;
  3213. }
  3214. virtual int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
  3215. {
  3216. if ( m_bError )
  3217. {
  3218. *ppStr = NULL;
  3219. return 0;
  3220. }
  3221. CSphScopedProfile tProf ( m_pProfiler, SPH_QSTATE_EVAL_UDF );
  3222. FillArgs ( tMatch );
  3223. UdfCharptr_fn pFn = (UdfCharptr_fn) m_pCall->m_pUdf->m_fnFunc;
  3224. char * pRes = pFn ( &m_pCall->m_tInit, &m_pCall->m_tArgs, &m_bError ); // owned now!
  3225. *ppStr = (const BYTE*) pRes;
  3226. int iLen = ( pRes ? strlen(pRes) : 0 );
  3227. FreeArgs();
  3228. return iLen;
  3229. }
  3230. virtual bool IsStringPtr() const
  3231. {
  3232. return true;
  3233. }
  3234. };
  3235. ISphExpr * ExprParser_t::CreateUdfNode ( int iCall, ISphExpr * pLeft )
  3236. {
  3237. Expr_Udf_c * pRes = NULL;
  3238. switch ( m_dUdfCalls[iCall]->m_pUdf->m_eRetType )
  3239. {
  3240. case SPH_ATTR_INTEGER:
  3241. case SPH_ATTR_BIGINT:
  3242. pRes = new Expr_UdfInt_c ( m_dUdfCalls[iCall], m_pProfiler );
  3243. break;
  3244. case SPH_ATTR_FLOAT:
  3245. pRes = new Expr_UdfFloat_c ( m_dUdfCalls[iCall], m_pProfiler );
  3246. break;
  3247. case SPH_ATTR_STRINGPTR:
  3248. pRes = new Expr_UdfStringptr_c ( m_dUdfCalls[iCall], m_pProfiler );
  3249. break;
  3250. default:
  3251. m_sCreateError.SetSprintf ( "internal error: unhandled type %d in CreateUdfNode()", m_dUdfCalls[iCall]->m_pUdf->m_eRetType );
  3252. break;
  3253. }
  3254. if ( pRes )
  3255. {
  3256. if ( pLeft )
  3257. FoldArglist ( pLeft, pRes->m_dArgs );
  3258. m_dUdfCalls[iCall] = NULL; // evaluator owns it now
  3259. }
  3260. return pRes;
  3261. }
  3262. ISphExpr * ExprParser_t::CreateExistNode ( const ExprNode_t & tNode )
  3263. {
  3264. assert ( m_dNodes[tNode.m_iLeft].m_iToken==',' );
  3265. int iAttrName = m_dNodes[tNode.m_iLeft].m_iLeft;
  3266. int iAttrDefault = m_dNodes[tNode.m_iLeft].m_iRight;
  3267. assert ( iAttrName>=0 && iAttrName<m_dNodes.GetLength()
  3268. && iAttrDefault>=0 && iAttrDefault<m_dNodes.GetLength() );
  3269. int iNameStart = (int)( m_dNodes[iAttrName].m_iConst>>32 );
  3270. int iNameLen = (int)( m_dNodes[iAttrName].m_iConst & 0xffffffffUL );
  3271. // skip head and tail non attribute name symbols
  3272. while ( m_sExpr[iNameStart]!='\0' && ( m_sExpr[iNameStart]=='\'' || m_sExpr[iNameStart]==' ' ) && iNameLen )
  3273. {
  3274. iNameStart++;
  3275. iNameLen--;
  3276. }
  3277. while ( m_sExpr[iNameStart+iNameLen-1]!='\0'
  3278. && ( m_sExpr[iNameStart+iNameLen-1]=='\'' || m_sExpr[iNameStart+iNameLen-1]==' ' )
  3279. && iNameLen )
  3280. {
  3281. iNameLen--;
  3282. }
  3283. if ( iNameLen<=0 )
  3284. {
  3285. m_sCreateError.SetSprintf ( "first EXIST() argument must be valid string" );
  3286. return NULL;
  3287. }
  3288. assert ( iNameStart>=0 && iNameLen>0 && iNameStart+iNameLen<=(int)strlen ( m_sExpr ) );
  3289. CSphString sAttr ( m_sExpr+iNameStart, iNameLen );
  3290. sphColumnToLowercase ( const_cast<char *>( sAttr.cstr() ) );
  3291. int iLoc = m_pSchema->GetAttrIndex ( sAttr.cstr() );
  3292. if ( iLoc>=0 )
  3293. {
  3294. const CSphColumnInfo & tCol = m_pSchema->GetAttr ( iLoc );
  3295. if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_INT64SET || tCol.m_eAttrType==SPH_ATTR_STRING )
  3296. {
  3297. m_sCreateError = "MVA and STRING in EXIST() prohibited";
  3298. return NULL;
  3299. }
  3300. const CSphAttrLocator & tLoc = tCol.m_tLocator;
  3301. if ( tNode.m_eRetType==SPH_ATTR_FLOAT )
  3302. return new Expr_GetFloat_c ( tLoc, iLoc );
  3303. else
  3304. return new Expr_GetInt_c ( tLoc, iLoc );
  3305. } else
  3306. {
  3307. if ( tNode.m_eRetType==SPH_ATTR_INTEGER )
  3308. return new Expr_GetIntConst_c ( (int)m_dNodes[iAttrDefault].m_iConst );
  3309. else if ( tNode.m_eRetType==SPH_ATTR_BIGINT )
  3310. return new Expr_GetInt64Const_c ( m_dNodes[iAttrDefault].m_iConst );
  3311. else
  3312. return new Expr_GetConst_c ( m_dNodes[iAttrDefault].m_fConst );
  3313. }
  3314. }
  3315. //////////////////////////////////////////////////////////////////////////
  3316. class Expr_Contains_c : public ISphExpr
  3317. {
  3318. protected:
  3319. ISphExpr * m_pLat;
  3320. ISphExpr * m_pLon;
  3321. static bool Contains ( float x, float y, int n, const float * p )
  3322. {
  3323. bool bIn = false;
  3324. for ( int ii=0; ii<n; ii+=2 )
  3325. {
  3326. // get that edge
  3327. float ax = p[ii];
  3328. float ay = p[ii+1];
  3329. float bx = ( ii==n-2 ) ? p[0] : p[ii+2];
  3330. float by = ( ii==n-2 ) ? p[1] : p[ii+3];
  3331. // check point vs edge
  3332. float t1 = (x-ax)*(by-ay);
  3333. float t2 = (y-ay)*(bx-ax);
  3334. if ( t1==t2 && !( ax==bx && ay==by ) )
  3335. {
  3336. // so AP and AB are colinear
  3337. // because (AP dot (-AB.y, AB.x)) aka (t1-t2) is 0
  3338. // check (AP dot AB) vs (AB dot AB) then
  3339. float t3 = (x-ax)*(bx-ax) + (y-ay)*(by-ay); // AP dot AP
  3340. float t4 = (bx-ax)*(bx-ax) + (by-ay)*(by-ay); // AB dot AB
  3341. if ( t3>=0 && t3<=t4 )
  3342. return true;
  3343. }
  3344. // count edge crossings
  3345. if ( ( ay>y )!=(by>y) )
  3346. if ( ( t1<t2 ) ^ ( by<ay ) )
  3347. bIn = !bIn;
  3348. }
  3349. return bIn;
  3350. }
  3351. public:
  3352. Expr_Contains_c ( ISphExpr * pLat, ISphExpr * pLon )
  3353. : m_pLat ( pLat )
  3354. , m_pLon ( pLon )
  3355. {}
  3356. ~Expr_Contains_c()
  3357. {
  3358. SafeRelease ( m_pLat );
  3359. SafeRelease ( m_pLon );
  3360. }
  3361. virtual float Eval ( const CSphMatch & tMatch ) const
  3362. {
  3363. return (float)IntEval ( tMatch );
  3364. }
  3365. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
  3366. {
  3367. return IntEval ( tMatch );
  3368. }
  3369. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  3370. {
  3371. m_pLat->Command ( eCmd, pArg );
  3372. m_pLon->Command ( eCmd, pArg );
  3373. }
  3374. protected:
  3375. virtual uint64_t CalcHash ( const char * szTag, const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  3376. {
  3377. EXPR_CLASS_NAME_NOCHECK(szTag);
  3378. CALC_CHILD_HASH(m_pLat);
  3379. CALC_CHILD_HASH(m_pLon);
  3380. return CALC_DEP_HASHES();
  3381. }
  3382. // FIXME! implement SetStringPool?
  3383. };
  3384. //////////////////////////////////////////////////////////////////////////
  3385. // GEODISTANCE
  3386. //////////////////////////////////////////////////////////////////////////
  3387. // conversions between degrees and radians
  3388. static const double PI = 3.14159265358979323846;
  3389. static const double TO_RAD = PI / 180.0;
  3390. static const double TO_RAD2 = PI / 360.0;
  3391. static const double TO_DEG = 180.0 / PI;
  3392. static const float TO_RADF = (float)( PI / 180.0 );
  3393. static const float TO_RADF2 = (float)( PI / 360.0 );
  3394. static const float TO_DEGF = (float)( 180.0 / PI );
  3395. const int GEODIST_TABLE_COS = 1024; // maxerr 0.00063%
  3396. const int GEODIST_TABLE_ASIN = 512;
  3397. const int GEODIST_TABLE_K = 1024;
  3398. static float g_GeoCos[GEODIST_TABLE_COS+1]; ///< cos(x) table
  3399. static float g_GeoAsin[GEODIST_TABLE_ASIN+1]; ///< asin(sqrt(x)) table
  3400. static float g_GeoFlatK[GEODIST_TABLE_K+1][2]; ///< GeodistAdaptive() flat ellipsoid method k1,k2 coeffs table
  3401. void GeodistInit()
  3402. {
  3403. for ( int i=0; i<=GEODIST_TABLE_COS; i++ )
  3404. g_GeoCos[i] = (float)cos ( 2*PI*i/GEODIST_TABLE_COS ); // [0, 2pi] -> [0, COSTABLE]
  3405. for ( int i=0; i<=GEODIST_TABLE_ASIN; i++ )
  3406. g_GeoAsin[i] = (float)asin ( sqrt ( double(i)/GEODIST_TABLE_ASIN ) ); // [0, 1] -> [0, ASINTABLE]
  3407. for ( int i=0; i<=GEODIST_TABLE_K; i++ )
  3408. {
  3409. double x = PI*i/GEODIST_TABLE_K - PI*0.5; // [-pi/2, pi/2] -> [0, KTABLE]
  3410. g_GeoFlatK[i][0] = (float) sqr ( 111132.09 - 566.05*cos ( 2*x ) + 1.20*cos ( 4*x ) );
  3411. g_GeoFlatK[i][1] = (float) sqr ( 111415.13*cos(x) - 94.55*cos ( 3*x ) + 0.12*cos ( 5*x ) );
  3412. }
  3413. }
  3414. inline float GeodistSphereRad ( float lat1, float lon1, float lat2, float lon2 )
  3415. {
  3416. static const double D = 2*6384000;
  3417. double dlat2 = 0.5*( lat1 - lat2 );
  3418. double dlon2 = 0.5*( lon1 - lon2 );
  3419. double a = sqr ( sin(dlat2) ) + cos(lat1)*cos(lat2)*sqr ( sin(dlon2) );
  3420. double c = asin ( Min ( 1.0, sqrt(a) ) );
  3421. return (float)(D*c);
  3422. }
  3423. inline float GeodistSphereDeg ( float lat1, float lon1, float lat2, float lon2 )
  3424. {
  3425. static const double D = 2*6384000;
  3426. double dlat2 = TO_RAD2*( lat1 - lat2 );
  3427. double dlon2 = TO_RAD2*( lon1 - lon2 );
  3428. double a = sqr ( sin(dlat2) ) + cos ( TO_RAD*lat1 )*cos ( TO_RAD*lat2 )*sqr ( sin(dlon2) );
  3429. double c = asin ( Min ( 1.0, sqrt(a) ) );
  3430. return (float)(D*c);
  3431. }
  3432. static inline float GeodistDegDiff ( float f )
  3433. {
  3434. f = (float)fabs(f);
  3435. while ( f>360 )
  3436. f -= 360;
  3437. if ( f>180 )
  3438. f = 360-f;
  3439. return f;
  3440. }
  3441. float GeodistFlatDeg ( float fLat1, float fLon1, float fLat2, float fLon2 )
  3442. {
  3443. double c1 = cos ( TO_RAD2*( fLat1+fLat2 ) );
  3444. double c2 = 2*c1*c1-1; // cos(2*t)
  3445. double c3 = c1*(2*c2-1); // cos(3*t)
  3446. double k1 = 111132.09 - 566.05*c2;
  3447. double k2 = 111415.13*c1 - 94.55*c3;
  3448. float dlat = GeodistDegDiff ( fLat1-fLat2 );
  3449. float dlon = GeodistDegDiff ( fLon1-fLon2 );
  3450. return (float)sqrt ( k1*k1*dlat*dlat + k2*k2*dlon*dlon );
  3451. }
  3452. static inline float GeodistFastCos ( float x )
  3453. {
  3454. float y = (float)(fabs(x)*GEODIST_TABLE_COS/PI/2);
  3455. int i = int(y);
  3456. y -= i;
  3457. i &= ( GEODIST_TABLE_COS-1 );
  3458. return g_GeoCos[i] + ( g_GeoCos[i+1]-g_GeoCos[i] )*y;
  3459. }
  3460. static inline float GeodistFastSin ( float x )
  3461. {
  3462. float y = float(fabs(x)*GEODIST_TABLE_COS/PI/2);
  3463. int i = int(y);
  3464. y -= i;
  3465. i = ( i - GEODIST_TABLE_COS/4 ) & ( GEODIST_TABLE_COS-1 ); // cos(x-pi/2)=sin(x), costable/4=pi/2
  3466. return g_GeoCos[i] + ( g_GeoCos[i+1]-g_GeoCos[i] )*y;
  3467. }
  3468. /// fast implementation of asin(sqrt(x))
  3469. /// max error in floats 0.00369%, in doubles 0.00072%
  3470. static inline float GeodistFastAsinSqrt ( float x )
  3471. {
  3472. if ( x<0.122 )
  3473. {
  3474. // distance under 4546km, Taylor error under 0.00072%
  3475. float y = (float)sqrt(x);
  3476. return y + x*y*0.166666666666666f + x*x*y*0.075f + x*x*x*y*0.044642857142857f;
  3477. }
  3478. if ( x<0.948 )
  3479. {
  3480. // distance under 17083km, 512-entry LUT error under 0.00072%
  3481. x *= GEODIST_TABLE_ASIN;
  3482. int i = int(x);
  3483. return g_GeoAsin[i] + ( g_GeoAsin[i+1] - g_GeoAsin[i] )*( x-i );
  3484. }
  3485. return (float)asin ( sqrt(x) ); // distance over 17083km, just compute honestly
  3486. }
  3487. inline float GeodistAdaptiveDeg ( float lat1, float lon1, float lat2, float lon2 )
  3488. {
  3489. float dlat = GeodistDegDiff ( lat1-lat2 );
  3490. float dlon = GeodistDegDiff ( lon1-lon2 );
  3491. if ( dlon<13 )
  3492. {
  3493. // points are close enough; use flat ellipsoid model
  3494. // interpolate sqr(k1), sqr(k2) coefficients using latitudes midpoint
  3495. float m = ( lat1+lat2+180 )*GEODIST_TABLE_K/360; // [-90, 90] degrees -> [0, KTABLE] indexes
  3496. int i = int(m);
  3497. i &= ( GEODIST_TABLE_K-1 );
  3498. float kk1 = g_GeoFlatK[i][0] + ( g_GeoFlatK[i+1][0] - g_GeoFlatK[i][0] )*( m-i );
  3499. float kk2 = g_GeoFlatK[i][1] + ( g_GeoFlatK[i+1][1] - g_GeoFlatK[i][1] )*( m-i );
  3500. return (float)sqrt ( kk1*dlat*dlat + kk2*dlon*dlon );
  3501. } else
  3502. {
  3503. // points too far away; use haversine
  3504. static const float D = 2*6371000;
  3505. float a = fsqr ( GeodistFastSin ( dlat*TO_RADF2 ) ) + GeodistFastCos ( lat1*TO_RADF ) * GeodistFastCos ( lat2*TO_RADF ) * fsqr ( GeodistFastSin ( dlon*TO_RADF2 ) );
  3506. return (float)( D*GeodistFastAsinSqrt(a) );
  3507. }
  3508. }
  3509. inline float GeodistAdaptiveRad ( float lat1, float lon1, float lat2, float lon2 )
  3510. {
  3511. // cut-paste-optimize, maybe?
  3512. return GeodistAdaptiveDeg ( lat1*TO_DEGF, lon1*TO_DEGF, lat2*TO_DEGF, lon2*TO_DEGF );
  3513. }
  3514. static inline void GeoTesselate ( CSphVector<float> & dIn )
  3515. {
  3516. // 1 minute of latitude, max
  3517. // (it varies from 1842.9 to 1861.57 at 0 to 90 respectively)
  3518. static const float LAT_MINUTE = 1861.57f;
  3519. // 1 minute of longitude in metres, at different latitudes
  3520. static const float LON_MINUTE[] =
  3521. {
  3522. 1855.32f, 1848.31f, 1827.32f, 1792.51f, // 0, 5, 10, 15
  3523. 1744.12f, 1682.50f, 1608.10f, 1521.47f, // 20, 25, 30, 35
  3524. 1423.23f, 1314.11f, 1194.93f, 1066.57f, // 40, 45, 50, 55
  3525. 930.00f, 786.26f, 636.44f, 481.70f, // 60, 65 70, 75
  3526. 323.22f, 162.24f, 0.0f // 80, 85, 90
  3527. };
  3528. // tesselation threshold
  3529. // FIXME! make this configurable?
  3530. static const float TESSELATE_TRESH = 500000.0f; // 500 km, error under 150m or 0.03%
  3531. CSphVector<float> dOut;
  3532. for ( int i=0; i<dIn.GetLength(); i+=2 )
  3533. {
  3534. // add the current vertex in any event
  3535. dOut.Add ( dIn[i] );
  3536. dOut.Add ( dIn[i+1] );
  3537. // get edge lat/lon, convert to radians
  3538. bool bLast = ( i==dIn.GetLength()-2 );
  3539. float fLat1 = dIn[i];
  3540. float fLon1 = dIn[i+1];
  3541. float fLat2 = dIn [ bLast ? 0 : (i+2) ];
  3542. float fLon2 = dIn [ bLast ? 1 : (i+3) ];
  3543. // quick rough geodistance estimation
  3544. float fMinLat = Min ( fLat1, fLat2 );
  3545. int iLatBand = (int) floor ( fabs ( fMinLat ) / 5.0f );
  3546. iLatBand = iLatBand % 18;
  3547. float d = (float) (60.0f*( LAT_MINUTE*fabs ( fLat1-fLat2 ) + LON_MINUTE [ iLatBand ]*fabs ( fLon1-fLon2 ) ) );
  3548. if ( d<=TESSELATE_TRESH )
  3549. continue;
  3550. // convert to radians
  3551. // FIXME! make units configurable
  3552. fLat1 *= TO_RADF;
  3553. fLon1 *= TO_RADF;
  3554. fLat2 *= TO_RADF;
  3555. fLon2 *= TO_RADF;
  3556. // compute precise geodistance
  3557. d = GeodistSphereRad ( fLat1, fLon1, fLat2, fLon2 );
  3558. if ( d<=TESSELATE_TRESH )
  3559. continue;
  3560. int iSegments = (int) ceil ( d / TESSELATE_TRESH );
  3561. // compute arc distance
  3562. // OPTIMIZE! maybe combine with CalcGeodist?
  3563. d = (float)acos ( sin(fLat1)*sin(fLat2) + cos(fLat1)*cos(fLat2)*cos(fLon1-fLon2) );
  3564. const float isd = (float)(1.0f / sin(d));
  3565. const float clat1 = (float)cos(fLat1);
  3566. const float slat1 = (float)sin(fLat1);
  3567. const float clon1 = (float)cos(fLon1);
  3568. const float slon1 = (float)sin(fLon1);
  3569. const float clat2 = (float)cos(fLat2);
  3570. const float slat2 = (float)sin(fLat2);
  3571. const float clon2 = (float)cos(fLon2);
  3572. const float slon2 = (float)sin(fLon2);
  3573. for ( int j=1; j<iSegments; j++ )
  3574. {
  3575. float f = float(j) / float(iSegments); // needed distance fraction
  3576. float a = (float)sin ( (1-f)*d ) * isd;
  3577. float b = (float)sin ( f*d ) * isd;
  3578. float x = a*clat1*clon1 + b*clat2*clon2;
  3579. float y = a*clat1*slon1 + b*clat2*slon2;
  3580. float z = a*slat1 + b*slat2;
  3581. dOut.Add ( (float)( TO_DEG * atan2 ( z, sqrt ( x*x+y*y ) ) ) );
  3582. dOut.Add ( (float)( TO_DEG * atan2 ( y, x ) ) );
  3583. }
  3584. }
  3585. // swap 'em results
  3586. dIn.SwapData ( dOut );
  3587. }
  3588. //////////////////////////////////////////////////////////////////////////
  3589. class Expr_ContainsConstvec_c : public Expr_Contains_c
  3590. {
  3591. protected:
  3592. CSphVector<float> m_dPoly;
  3593. float m_fMinX;
  3594. float m_fMinY;
  3595. float m_fMaxX;
  3596. float m_fMaxY;
  3597. public:
  3598. Expr_ContainsConstvec_c ( ISphExpr * pLat, ISphExpr * pLon, const CSphVector<int> & dNodes, const ExprNode_t * pNodes, bool bGeoTesselate )
  3599. : Expr_Contains_c ( pLat, pLon )
  3600. {
  3601. // copy polygon data
  3602. assert ( dNodes.GetLength()>=6 );
  3603. m_dPoly.Resize ( dNodes.GetLength() );
  3604. ARRAY_FOREACH ( i, dNodes )
  3605. m_dPoly[i] = FloatVal ( &pNodes[dNodes[i]] );
  3606. // handle (huge) geosphere polygons
  3607. if ( bGeoTesselate )
  3608. GeoTesselate ( m_dPoly );
  3609. // compute bbox
  3610. m_fMinX = m_fMaxX = m_dPoly[0];
  3611. for ( int i=2; i<m_dPoly.GetLength(); i+=2 )
  3612. {
  3613. m_fMinX = Min ( m_fMinX, m_dPoly[i] );
  3614. m_fMaxX = Max ( m_fMaxX, m_dPoly[i] );
  3615. }
  3616. m_fMinY = m_fMaxY = m_dPoly[1];
  3617. for ( int i=3; i<m_dPoly.GetLength(); i+=2 )
  3618. {
  3619. m_fMinY = Min ( m_fMinY, m_dPoly[i] );
  3620. m_fMaxY = Max ( m_fMaxY, m_dPoly[i] );
  3621. }
  3622. }
  3623. virtual int IntEval ( const CSphMatch & tMatch ) const
  3624. {
  3625. // eval args, do bbox check
  3626. float fLat = m_pLat->Eval(tMatch);
  3627. if ( fLat<m_fMinX || fLat>m_fMaxX )
  3628. return 0;
  3629. float fLon = m_pLon->Eval(tMatch);
  3630. if ( fLon<m_fMinY || fLon>m_fMaxY )
  3631. return 0;
  3632. // do the polygon check
  3633. return Contains ( fLat, fLon, m_dPoly.GetLength(), m_dPoly.Begin() );
  3634. }
  3635. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  3636. {
  3637. EXPR_CLASS_NAME("Expr_ContainsConstvec_c");
  3638. CALC_POD_HASHES(m_dPoly);
  3639. return CALC_PARENT_HASH();
  3640. }
  3641. };
  3642. class Expr_ContainsExprvec_c : public Expr_Contains_c
  3643. {
  3644. protected:
  3645. mutable CSphVector<float> m_dPoly;
  3646. CSphVector<ISphExpr*> m_dExpr;
  3647. public:
  3648. Expr_ContainsExprvec_c ( ISphExpr * pLat, ISphExpr * pLon, CSphVector<ISphExpr*> & dExprs )
  3649. : Expr_Contains_c ( pLat, pLon )
  3650. {
  3651. m_dExpr.SwapData ( dExprs );
  3652. m_dPoly.Resize ( m_dExpr.GetLength() );
  3653. }
  3654. ~Expr_ContainsExprvec_c()
  3655. {
  3656. ARRAY_FOREACH ( i, m_dExpr )
  3657. SafeRelease ( m_dExpr[i] );
  3658. }
  3659. virtual int IntEval ( const CSphMatch & tMatch ) const
  3660. {
  3661. ARRAY_FOREACH ( i, m_dExpr )
  3662. m_dPoly[i] = m_dExpr[i]->Eval ( tMatch );
  3663. return Contains ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), m_dPoly.GetLength(), m_dPoly.Begin() );
  3664. }
  3665. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  3666. {
  3667. Expr_Contains_c::Command ( eCmd, pArg );
  3668. ARRAY_FOREACH ( i, m_dExpr )
  3669. m_dExpr[i]->Command ( eCmd, pArg );
  3670. }
  3671. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  3672. {
  3673. EXPR_CLASS_NAME("Expr_ContainsExprvec_c");
  3674. CALC_CHILD_HASHES(m_dExpr);
  3675. return CALC_PARENT_HASH();
  3676. }
  3677. };
  3678. class Expr_ContainsStrattr_c : public Expr_Contains_c
  3679. {
  3680. protected:
  3681. ISphExpr * m_pStr;
  3682. bool m_bGeo;
  3683. public:
  3684. Expr_ContainsStrattr_c ( ISphExpr * pLat, ISphExpr * pLon, ISphExpr * pStr, bool bGeo )
  3685. : Expr_Contains_c (pLat, pLon )
  3686. , m_pStr ( pStr )
  3687. , m_bGeo ( bGeo )
  3688. {}
  3689. ~Expr_ContainsStrattr_c()
  3690. {
  3691. SafeRelease ( m_pStr );
  3692. }
  3693. static void ParsePoly ( const char * p, int iLen, CSphVector<float> & dPoly )
  3694. {
  3695. const char * pMax = p+iLen;
  3696. while ( p<pMax )
  3697. {
  3698. if ( isdigit(p[0]) || ( p+1<pMax && p[0]=='-' && isdigit(p[1]) ) )
  3699. dPoly.Add ( (float)strtod ( p, (char**)&p ) );
  3700. else
  3701. p++;
  3702. }
  3703. }
  3704. virtual int IntEval ( const CSphMatch & tMatch ) const
  3705. {
  3706. const char * pStr;
  3707. assert ( !m_pStr->IsStringPtr() ); // aware of mem leaks caused by some StringEval implementations
  3708. int iLen = m_pStr->StringEval ( tMatch, (const BYTE **)&pStr );
  3709. CSphVector<float> dPoly;
  3710. ParsePoly ( pStr, iLen, dPoly );
  3711. if ( dPoly.GetLength()<6 )
  3712. return 0;
  3713. // OPTIMIZE? add quick bbox check too?
  3714. if ( m_bGeo )
  3715. GeoTesselate ( dPoly );
  3716. return Contains ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), dPoly.GetLength(), dPoly.Begin() );
  3717. }
  3718. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  3719. {
  3720. Expr_Contains_c::Command ( eCmd, pArg );
  3721. m_pStr->Command ( eCmd, pArg );
  3722. }
  3723. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  3724. {
  3725. EXPR_CLASS_NAME("Expr_ContainsStrattr_c");
  3726. CALC_CHILD_HASH(m_pStr);
  3727. return CALC_PARENT_HASH();
  3728. }
  3729. };
  3730. ISphExpr * ExprParser_t::CreateContainsNode ( const ExprNode_t & tNode )
  3731. {
  3732. // get and check them args
  3733. const ExprNode_t & tArglist = m_dNodes [ tNode.m_iLeft ];
  3734. const int iPoly = m_dNodes [ tArglist.m_iLeft ].m_iLeft;
  3735. const int iLat = m_dNodes [ tArglist.m_iLeft ].m_iRight;
  3736. const int iLon = tArglist.m_iRight;
  3737. assert ( IsNumeric ( m_dNodes[iLat].m_eRetType ) );
  3738. assert ( IsNumeric ( m_dNodes[iLat].m_eRetType ) );
  3739. assert ( m_dNodes[iPoly].m_eRetType==SPH_ATTR_POLY2D );
  3740. // create evaluator
  3741. // gotta handle an optimized constant poly case
  3742. CSphVector<int> dPolyArgs;
  3743. GatherArgNodes ( m_dNodes[iPoly].m_iLeft, dPolyArgs );
  3744. bool bGeoTesselate = ( m_dNodes[iPoly].m_iToken==TOK_FUNC && m_dNodes[iPoly].m_iFunc==FUNC_GEOPOLY2D );
  3745. if ( dPolyArgs.GetLength()==1 && m_dNodes[dPolyArgs[0]].m_iToken==TOK_ATTR_STRING )
  3746. return new Expr_ContainsStrattr_c ( CreateTree(iLat), CreateTree(iLon), CreateTree ( dPolyArgs[0] ), bGeoTesselate );
  3747. bool bConst = ARRAY_ALL ( bConst, dPolyArgs, IsConst ( &m_dNodes [ dPolyArgs[_all] ] ) );
  3748. if ( bConst )
  3749. {
  3750. // POLY2D(numeric-consts)
  3751. return new Expr_ContainsConstvec_c ( CreateTree(iLat), CreateTree(iLon), dPolyArgs, m_dNodes.Begin(), bGeoTesselate );
  3752. } else
  3753. {
  3754. // POLY2D(generic-exprs)
  3755. CSphVector<ISphExpr*> dExprs ( dPolyArgs.GetLength() );
  3756. ARRAY_FOREACH ( i, dExprs )
  3757. dExprs[i] = CreateTree ( dPolyArgs[i] );
  3758. return new Expr_ContainsExprvec_c ( CreateTree(iLat), CreateTree(iLon), dExprs );
  3759. }
  3760. }
  3761. class Expr_Remap_c : public ISphExpr
  3762. {
  3763. struct CondValPair_t
  3764. {
  3765. int64_t m_iCond;
  3766. union
  3767. {
  3768. int64_t m_iVal;
  3769. float m_fVal;
  3770. };
  3771. explicit CondValPair_t ( int64_t iCond=0 ) : m_iCond ( iCond ), m_iVal ( 0 ) {}
  3772. bool operator< ( const CondValPair_t & rhs ) const { return m_iCond<rhs.m_iCond; }
  3773. bool operator== ( const CondValPair_t & rhs ) const { return m_iCond==rhs.m_iCond; }
  3774. };
  3775. ISphExpr * m_pCond;
  3776. ISphExpr * m_pVal;
  3777. CSphVector<CondValPair_t> m_dPairs;
  3778. public:
  3779. Expr_Remap_c ( ISphExpr * pCondExpr, ISphExpr * pValExpr, const CSphVector<int64_t> & dConds, const ConstList_c & tVals )
  3780. : m_pCond ( pCondExpr )
  3781. , m_pVal ( pValExpr )
  3782. , m_dPairs ( dConds.GetLength() )
  3783. {
  3784. assert ( pCondExpr && pValExpr );
  3785. assert ( dConds.GetLength() );
  3786. assert ( dConds.GetLength()==tVals.m_dInts.GetLength() ||
  3787. dConds.GetLength()==tVals.m_dFloats.GetLength() );
  3788. if ( tVals.m_dInts.GetLength() )
  3789. ARRAY_FOREACH ( i, m_dPairs )
  3790. {
  3791. m_dPairs[i].m_iCond = dConds[i];
  3792. m_dPairs[i].m_iVal = tVals.m_dInts[i];
  3793. }
  3794. else
  3795. ARRAY_FOREACH ( i, m_dPairs )
  3796. {
  3797. m_dPairs[i].m_iCond = dConds[i];
  3798. m_dPairs[i].m_fVal = tVals.m_dFloats[i];
  3799. }
  3800. m_dPairs.Uniq();
  3801. }
  3802. ~Expr_Remap_c()
  3803. {
  3804. SafeRelease ( m_pCond );
  3805. SafeRelease ( m_pVal );
  3806. }
  3807. virtual float Eval ( const CSphMatch & tMatch ) const
  3808. {
  3809. const CondValPair_t * p = m_dPairs.BinarySearch ( CondValPair_t ( m_pCond->Int64Eval ( tMatch ) ) );
  3810. if ( p )
  3811. return p->m_fVal;
  3812. return m_pVal->Eval ( tMatch );
  3813. }
  3814. virtual int IntEval ( const CSphMatch & tMatch ) const
  3815. {
  3816. return (int)Int64Eval ( tMatch );
  3817. }
  3818. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
  3819. {
  3820. const CondValPair_t * p = m_dPairs.BinarySearch ( CondValPair_t ( m_pCond->Int64Eval ( tMatch ) ) );
  3821. if ( p )
  3822. return p->m_iVal;
  3823. return m_pVal->Int64Eval ( tMatch );
  3824. }
  3825. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  3826. {
  3827. m_pCond->Command ( eCmd, pArg );
  3828. m_pVal->Command ( eCmd, pArg );
  3829. }
  3830. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  3831. {
  3832. EXPR_CLASS_NAME("Expr_Remap_c");
  3833. CALC_POD_HASHES(m_dPairs);
  3834. CALC_CHILD_HASH(m_pCond);
  3835. CALC_CHILD_HASH(m_pVal);
  3836. return CALC_DEP_HASHES();
  3837. }
  3838. };
  3839. //////////////////////////////////////////////////////////////////////////
  3840. /// fold nodes subtree into opcodes
  3841. ISphExpr * ExprParser_t::CreateTree ( int iNode )
  3842. {
  3843. if ( iNode<0 || GetError() )
  3844. return NULL;
  3845. const ExprNode_t & tNode = m_dNodes[iNode];
  3846. // avoid spawning argument node in some cases
  3847. bool bSkipLeft = false;
  3848. bool bSkipRight = false;
  3849. if ( tNode.m_iToken==TOK_FUNC )
  3850. {
  3851. switch ( tNode.m_iFunc )
  3852. {
  3853. case FUNC_NOW:
  3854. case FUNC_IN:
  3855. case FUNC_EXIST:
  3856. case FUNC_GEODIST:
  3857. case FUNC_CONTAINS:
  3858. case FUNC_ZONESPANLIST:
  3859. case FUNC_RANKFACTORS:
  3860. case FUNC_PACKEDFACTORS:
  3861. case FUNC_FACTORS:
  3862. case FUNC_BM25F:
  3863. case FUNC_CURTIME:
  3864. case FUNC_UTC_TIME:
  3865. case FUNC_UTC_TIMESTAMP:
  3866. case FUNC_ALL:
  3867. case FUNC_ANY:
  3868. case FUNC_INDEXOF:
  3869. case FUNC_MIN_TOP_WEIGHT:
  3870. case FUNC_MIN_TOP_SORTVAL:
  3871. case FUNC_REMAP:
  3872. bSkipLeft = true;
  3873. bSkipRight = true;
  3874. break;
  3875. default:
  3876. break;
  3877. }
  3878. }
  3879. ISphExpr * pLeft = bSkipLeft ? NULL : CreateTree ( tNode.m_iLeft );
  3880. ISphExpr * pRight = bSkipRight ? NULL : CreateTree ( tNode.m_iRight );
  3881. if ( GetError() )
  3882. {
  3883. SafeRelease ( pLeft );
  3884. SafeRelease ( pRight );
  3885. return NULL;
  3886. }
  3887. #define LOC_SPAWN_POLY(_classname) \
  3888. if ( tNode.m_eArgType==SPH_ATTR_INTEGER ) return new _classname##Int_c ( pLeft, pRight ); \
  3889. else if ( tNode.m_eArgType==SPH_ATTR_BIGINT ) return new _classname##Int64_c ( pLeft, pRight ); \
  3890. else return new _classname##Float_c ( pLeft, pRight );
  3891. int iOp = tNode.m_iToken;
  3892. if ( iOp=='+' || iOp=='-' || iOp=='*' || iOp=='/' || iOp=='&' || iOp=='|' || iOp=='%' || iOp=='<' || iOp=='>'
  3893. || iOp==TOK_LTE || iOp==TOK_GTE || iOp==TOK_EQ || iOp==TOK_NE || iOp==TOK_AND || iOp==TOK_OR || iOp==TOK_NOT )
  3894. {
  3895. if ( pLeft && m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_JSON_FIELD && m_dNodes[tNode.m_iLeft].m_iToken==TOK_ATTR_JSON )
  3896. pLeft = new Expr_JsonFieldConv_c ( pLeft );
  3897. if ( pRight && m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_JSON_FIELD && m_dNodes[tNode.m_iRight].m_iToken==TOK_ATTR_JSON )
  3898. pRight = new Expr_JsonFieldConv_c ( pRight );
  3899. }
  3900. switch ( tNode.m_iToken )
  3901. {
  3902. case TOK_ATTR_INT: return new Expr_GetInt_c ( tNode.m_tLocator, tNode.m_iLocator );
  3903. case TOK_ATTR_BITS: return new Expr_GetBits_c ( tNode.m_tLocator, tNode.m_iLocator );
  3904. case TOK_ATTR_FLOAT: return new Expr_GetFloat_c ( tNode.m_tLocator, tNode.m_iLocator );
  3905. case TOK_ATTR_SINT: return new Expr_GetSint_c ( tNode.m_tLocator, tNode.m_iLocator );
  3906. case TOK_ATTR_STRING: return new Expr_GetString_c ( tNode.m_tLocator, tNode.m_iLocator );
  3907. case TOK_ATTR_MVA64:
  3908. case TOK_ATTR_MVA32: return new Expr_GetMva_c ( tNode.m_tLocator, tNode.m_iLocator );
  3909. case TOK_ATTR_FACTORS: return new Expr_GetFactorsAttr_c ( tNode.m_tLocator, tNode.m_iLocator );
  3910. case TOK_CONST_FLOAT: return new Expr_GetConst_c ( tNode.m_fConst );
  3911. case TOK_CONST_INT:
  3912. if ( tNode.m_eRetType==SPH_ATTR_INTEGER )
  3913. return new Expr_GetIntConst_c ( (int)tNode.m_iConst );
  3914. else if ( tNode.m_eRetType==SPH_ATTR_BIGINT )
  3915. return new Expr_GetInt64Const_c ( tNode.m_iConst );
  3916. else
  3917. return new Expr_GetConst_c ( float(tNode.m_iConst) );
  3918. break;
  3919. case TOK_CONST_STRING:
  3920. return new Expr_GetStrConst_c ( m_sExpr+(int)( tNode.m_iConst>>32 ), (int)( tNode.m_iConst & 0xffffffffUL ), true );
  3921. case TOK_SUBKEY:
  3922. return new Expr_GetStrConst_c ( m_sExpr+(int)( tNode.m_iConst>>32 ), (int)( tNode.m_iConst & 0xffffffffUL ), false );
  3923. case TOK_ID: return new Expr_GetId_c ();
  3924. case TOK_WEIGHT: return new Expr_GetWeight_c ();
  3925. case '+': return new Expr_Add_c ( pLeft, pRight ); break;
  3926. case '-': return new Expr_Sub_c ( pLeft, pRight ); break;
  3927. case '*': return new Expr_Mul_c ( pLeft, pRight ); break;
  3928. case '/': return new Expr_Div_c ( pLeft, pRight ); break;
  3929. case '&': return new Expr_BitAnd_c ( pLeft, pRight ); break;
  3930. case '|': return new Expr_BitOr_c ( pLeft, pRight ); break;
  3931. case '%': return new Expr_Mod_c ( pLeft, pRight ); break;
  3932. case '<': LOC_SPAWN_POLY ( Expr_Lt ); break;
  3933. case '>': LOC_SPAWN_POLY ( Expr_Gt ); break;
  3934. case TOK_LTE: LOC_SPAWN_POLY ( Expr_Lte ); break;
  3935. case TOK_GTE: LOC_SPAWN_POLY ( Expr_Gte ); break;
  3936. case TOK_EQ: if ( ( m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_STRING ||
  3937. m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_STRINGPTR ) &&
  3938. ( m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_STRING ||
  3939. m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_STRINGPTR ) )
  3940. return new Expr_StrEq_c ( pLeft, pRight, m_eCollation );
  3941. else if ( ( m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_JSON_FIELD ) &&
  3942. ( m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_STRING ||
  3943. m_dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_STRINGPTR ) )
  3944. return new Expr_StrEq_c ( pLeft, pRight, m_eCollation );
  3945. LOC_SPAWN_POLY ( Expr_Eq ); break;
  3946. case TOK_NE: LOC_SPAWN_POLY ( Expr_Ne ); break;
  3947. case TOK_AND: LOC_SPAWN_POLY ( Expr_And ); break;
  3948. case TOK_OR: LOC_SPAWN_POLY ( Expr_Or ); break;
  3949. case TOK_NOT:
  3950. if ( tNode.m_eArgType==SPH_ATTR_BIGINT )
  3951. return new Expr_NotInt64_c ( pLeft );
  3952. else
  3953. return new Expr_NotInt_c ( pLeft );
  3954. break;
  3955. case ',':
  3956. if ( pLeft && pRight )
  3957. return new Expr_Arglist_c ( pLeft, pRight );
  3958. break;
  3959. case TOK_NEG: assert ( pRight==NULL ); return new Expr_Neg_c ( pLeft ); break;
  3960. case TOK_FUNC:
  3961. {
  3962. // fold arglist to array
  3963. Func_e eFunc = (Func_e)tNode.m_iFunc;
  3964. assert ( g_dFuncs[tNode.m_iFunc].m_eFunc==eFunc );
  3965. CSphVector<ISphExpr *> dArgs;
  3966. if ( !bSkipLeft )
  3967. FoldArglist ( pLeft, dArgs );
  3968. // spawn proper function
  3969. assert ( tNode.m_iFunc>=0 && tNode.m_iFunc<int(sizeof(g_dFuncs)/sizeof(g_dFuncs[0])) );
  3970. assert (
  3971. ( bSkipLeft ) || // function will handle its arglist,
  3972. ( g_dFuncs[tNode.m_iFunc].m_iArgs>=0 && g_dFuncs[tNode.m_iFunc].m_iArgs==dArgs.GetLength() ) || // arg count matches,
  3973. ( g_dFuncs[tNode.m_iFunc].m_iArgs<0 && -g_dFuncs[tNode.m_iFunc].m_iArgs<=dArgs.GetLength() ) ); // or min vararg count reached
  3974. switch ( eFunc )
  3975. {
  3976. case FUNC_NOW: return new Expr_Now_c(m_iConstNow); break;
  3977. case FUNC_ABS: return new Expr_Abs_c ( dArgs[0] );
  3978. case FUNC_CEIL: return new Expr_Ceil_c ( dArgs[0] );
  3979. case FUNC_FLOOR: return new Expr_Floor_c ( dArgs[0] );
  3980. case FUNC_SIN: return new Expr_Sin_c ( dArgs[0] );
  3981. case FUNC_COS: return new Expr_Cos_c ( dArgs[0] );
  3982. case FUNC_LN: return new Expr_Ln_c ( dArgs[0] );
  3983. case FUNC_LOG2: return new Expr_Log2_c ( dArgs[0] );
  3984. case FUNC_LOG10: return new Expr_Log10_c ( dArgs[0] );
  3985. case FUNC_EXP: return new Expr_Exp_c ( dArgs[0] );
  3986. case FUNC_SQRT: return new Expr_Sqrt_c ( dArgs[0] );
  3987. case FUNC_SINT: return new Expr_Sint_c ( dArgs[0] );
  3988. case FUNC_CRC32: return new Expr_Crc32_c ( dArgs[0] );
  3989. case FUNC_FIBONACCI:return new Expr_Fibonacci_c ( dArgs[0] );
  3990. case FUNC_DAY: return ExprDay ( dArgs[0] );
  3991. case FUNC_MONTH: return ExprMonth ( dArgs[0] );
  3992. case FUNC_YEAR: return ExprYear ( dArgs[0] );
  3993. case FUNC_YEARMONTH: return ExprYearMonth ( dArgs[0] );
  3994. case FUNC_YEARMONTHDAY: return ExprYearMonthDay ( dArgs[0] );
  3995. case FUNC_HOUR: return new Expr_Hour_c ( dArgs[0] );
  3996. case FUNC_MINUTE: return new Expr_Minute_c ( dArgs[0] );
  3997. case FUNC_SECOND: return new Expr_Second_c ( dArgs[0] );
  3998. case FUNC_MIN: return new Expr_Min_c ( dArgs[0], dArgs[1] );
  3999. case FUNC_MAX: return new Expr_Max_c ( dArgs[0], dArgs[1] );
  4000. case FUNC_POW: return new Expr_Pow_c ( dArgs[0], dArgs[1] );
  4001. case FUNC_IDIV: return new Expr_Idiv_c ( dArgs[0], dArgs[1] );
  4002. case FUNC_IF: return new Expr_If_c ( dArgs[0], dArgs[1], dArgs[2] );
  4003. case FUNC_MADD: return new Expr_Madd_c ( dArgs[0], dArgs[1], dArgs[2] );
  4004. case FUNC_MUL3: return new Expr_Mul3_c ( dArgs[0], dArgs[1], dArgs[2] );
  4005. case FUNC_ATAN2: return new Expr_Atan2_c ( dArgs[0], dArgs[1] );
  4006. case FUNC_RAND: return new Expr_Rand_c ( dArgs.GetLength() ? dArgs[0] : NULL, tNode.m_iLeft>=0 ? IsConst ( &m_dNodes[tNode.m_iLeft] ) : false );
  4007. case FUNC_INTERVAL: return CreateIntervalNode ( tNode.m_iLeft, dArgs );
  4008. case FUNC_IN: return CreateInNode ( iNode );
  4009. case FUNC_LENGTH: return CreateLengthNode ( tNode, dArgs[0] );
  4010. case FUNC_BITDOT: return CreateBitdotNode ( tNode.m_iLeft, dArgs );
  4011. case FUNC_REMAP:
  4012. {
  4013. ISphExpr * pCond = CreateTree ( tNode.m_iLeft );
  4014. ISphExpr * pVal = CreateTree ( tNode.m_iRight );
  4015. assert ( pCond && pVal );
  4016. // This is a hack. I know how parser fills m_dNodes and thus know where to find constlists.
  4017. const CSphVector<int64_t> & dConds = m_dNodes [ iNode-2 ].m_pConsts->m_dInts;
  4018. const ConstList_c & tVals = *m_dNodes [ iNode-1 ].m_pConsts;
  4019. return new Expr_Remap_c ( pCond, pVal, dConds, tVals );
  4020. }
  4021. case FUNC_GEODIST: return CreateGeodistNode ( tNode.m_iLeft );
  4022. case FUNC_EXIST: return CreateExistNode ( tNode );
  4023. case FUNC_CONTAINS: return CreateContainsNode ( tNode );
  4024. case FUNC_POLY2D:
  4025. case FUNC_GEOPOLY2D:break; // just make gcc happy
  4026. case FUNC_ZONESPANLIST:
  4027. m_bHasZonespanlist = true;
  4028. m_eEvalStage = SPH_EVAL_PRESORT;
  4029. return new Expr_GetZonespanlist_c ();
  4030. case FUNC_TO_STRING:
  4031. return new Expr_ToString_c ( dArgs[0], m_dNodes [ tNode.m_iLeft ].m_eRetType );
  4032. case FUNC_RANKFACTORS:
  4033. m_eEvalStage = SPH_EVAL_PRESORT;
  4034. return new Expr_GetRankFactors_c();
  4035. case FUNC_PACKEDFACTORS:
  4036. case FUNC_FACTORS:
  4037. return CreatePFNode ( tNode.m_iLeft );
  4038. case FUNC_BM25F:
  4039. {
  4040. m_uPackedFactorFlags |= SPH_FACTOR_ENABLE;
  4041. CSphVector<int> dBM25FArgs;
  4042. GatherArgNodes ( tNode.m_iLeft, dBM25FArgs );
  4043. const ExprNode_t & tLeft = m_dNodes [ dBM25FArgs[0] ];
  4044. const ExprNode_t & tRight = m_dNodes [ dBM25FArgs[1] ];
  4045. float fK1 = tLeft.m_fConst;
  4046. float fB = tRight.m_fConst;
  4047. fK1 = Max ( fK1, 0.001f );
  4048. fB = Min ( Max ( fB, 0.0f ), 1.0f );
  4049. CSphVector<CSphNamedVariant> * pFieldWeights = NULL;
  4050. if ( dBM25FArgs.GetLength()>2 )
  4051. pFieldWeights = &m_dNodes [ dBM25FArgs[2] ].m_pMapArg->m_dPairs;
  4052. return new Expr_BM25F_c ( fK1, fB, pFieldWeights );
  4053. }
  4054. case FUNC_BIGINT:
  4055. case FUNC_INTEGER:
  4056. case FUNC_DOUBLE:
  4057. case FUNC_UINT:
  4058. if ( m_dNodes[tNode.m_iLeft].m_iToken==TOK_ATTR_JSON )
  4059. return new Expr_JsonFieldConv_c ( dArgs[0] );
  4060. return dArgs[0];
  4061. case FUNC_LEAST: return CreateAggregateNode ( tNode, SPH_AGGR_MIN, dArgs[0] );
  4062. case FUNC_GREATEST: return CreateAggregateNode ( tNode, SPH_AGGR_MAX, dArgs[0] );
  4063. case FUNC_CURTIME: return new Expr_Time_c ( false, false ); break;
  4064. case FUNC_UTC_TIME: return new Expr_Time_c ( true, false ); break;
  4065. case FUNC_UTC_TIMESTAMP: return new Expr_Time_c ( true, true ); break;
  4066. case FUNC_TIMEDIFF: return new Expr_TimeDiff_c ( dArgs[0], dArgs[1] ); break;
  4067. case FUNC_ALL:
  4068. case FUNC_ANY:
  4069. case FUNC_INDEXOF:
  4070. return CreateForInNode ( iNode );
  4071. case FUNC_MIN_TOP_WEIGHT:
  4072. m_eEvalStage = SPH_EVAL_PRESORT;
  4073. return new Expr_MinTopWeight();
  4074. break;
  4075. case FUNC_MIN_TOP_SORTVAL:
  4076. m_eEvalStage = SPH_EVAL_PRESORT;
  4077. return new Expr_MinTopSortval();
  4078. break;
  4079. default: // just make gcc happy
  4080. break;
  4081. }
  4082. assert ( 0 && "unhandled function id" );
  4083. break;
  4084. }
  4085. case TOK_UDF: return CreateUdfNode ( tNode.m_iFunc, pLeft ); break;
  4086. case TOK_HOOK_IDENT: return m_pHook->CreateNode ( tNode.m_iFunc, NULL, NULL, m_sCreateError ); break;
  4087. case TOK_HOOK_FUNC: return m_pHook->CreateNode ( tNode.m_iFunc, pLeft, &m_eEvalStage, m_sCreateError ); break;
  4088. case TOK_MAP_ARG:
  4089. // tricky bit
  4090. // data gets moved (!) from node to ISphExpr at this point
  4091. return new Expr_MapArg_c ( tNode.m_pMapArg->m_dPairs );
  4092. break;
  4093. case TOK_ATTR_JSON:
  4094. if ( pLeft && m_dNodes[tNode.m_iLeft].m_iToken==TOK_SUBKEY && !tNode.m_tLocator.m_bDynamic )
  4095. {
  4096. // json key is a single static subkey, switch to fastpath
  4097. return new Expr_JsonFastKey_c ( tNode.m_tLocator, tNode.m_iLocator, pLeft );
  4098. } else
  4099. {
  4100. // json key is a generic expression, use generic catch-all JsonField
  4101. CSphVector<ISphExpr *> dArgs;
  4102. CSphVector<ESphAttr> dTypes;
  4103. if ( pLeft ) // may be NULL (top level array)
  4104. {
  4105. FoldArglist ( pLeft, dArgs );
  4106. GatherArgRetTypes ( tNode.m_iLeft, dTypes );
  4107. }
  4108. return new Expr_JsonField_c ( tNode.m_tLocator, tNode.m_iLocator, dArgs, dTypes );
  4109. }
  4110. break;
  4111. case TOK_ITERATOR:
  4112. {
  4113. // iterator, e.g. handles "x.gid" in SELECT ALL(x.gid=1 FOR x IN json.array)
  4114. CSphVector<ISphExpr *> dArgs;
  4115. CSphVector<ESphAttr> dTypes;
  4116. if ( pLeft )
  4117. {
  4118. FoldArglist ( pLeft, dArgs );
  4119. GatherArgRetTypes ( tNode.m_iLeft, dTypes );
  4120. }
  4121. return new Expr_JsonFieldConv_c ( new Expr_Iterator_c ( tNode.m_tLocator, tNode.m_iLocator, dArgs, dTypes, tNode.m_pAttr ) );
  4122. }
  4123. case TOK_IDENT: m_sCreateError.SetSprintf ( "unknown column: %s", tNode.m_sIdent ); break;
  4124. case TOK_IS_NULL:
  4125. case TOK_IS_NOT_NULL:
  4126. if ( m_dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_JSON_FIELD )
  4127. return new Expr_JsonFieldIsNull_c ( pLeft, tNode.m_iToken==TOK_IS_NULL );
  4128. else
  4129. return new Expr_GetIntConst_c ( tNode.m_iToken!=TOK_IS_NULL );
  4130. default: assert ( 0 && "unhandled token type" ); break;
  4131. }
  4132. #undef LOC_SPAWN_POLY
  4133. // fire exit
  4134. SafeRelease ( pLeft );
  4135. SafeRelease ( pRight );
  4136. return NULL;
  4137. }
  4138. //////////////////////////////////////////////////////////////////////////
  4139. /// arg-vs-set function (currently, IN or INTERVAL) evaluator traits
  4140. template < typename T >
  4141. class Expr_ArgVsSet_c : public ISphExpr
  4142. {
  4143. public:
  4144. explicit Expr_ArgVsSet_c ( ISphExpr * pArg ) : m_pArg ( pArg ) {}
  4145. ~Expr_ArgVsSet_c () { SafeRelease ( m_pArg ); }
  4146. virtual int IntEval ( const CSphMatch & tMatch ) const = 0;
  4147. virtual float Eval ( const CSphMatch & tMatch ) const { return (float) IntEval ( tMatch ); }
  4148. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const { return IntEval ( tMatch ); }
  4149. virtual void Command ( ESphExprCommand eCmd, void * pArg ) { if ( m_pArg ) m_pArg->Command ( eCmd, pArg ); }
  4150. protected:
  4151. ISphExpr * m_pArg;
  4152. T ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const;
  4153. virtual uint64_t CalcHash ( const char * szTag, const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4154. {
  4155. EXPR_CLASS_NAME_NOCHECK(szTag);
  4156. CALC_CHILD_HASH(m_pArg);
  4157. return CALC_DEP_HASHES();
  4158. }
  4159. };
  4160. template<> int Expr_ArgVsSet_c<int>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const
  4161. {
  4162. return pArg->IntEval ( tMatch );
  4163. }
  4164. template<> DWORD Expr_ArgVsSet_c<DWORD>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const
  4165. {
  4166. return (DWORD)pArg->IntEval ( tMatch );
  4167. }
  4168. template<> float Expr_ArgVsSet_c<float>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const
  4169. {
  4170. return pArg->Eval ( tMatch );
  4171. }
  4172. template<> int64_t Expr_ArgVsSet_c<int64_t>::ExprEval ( ISphExpr * pArg, const CSphMatch & tMatch ) const
  4173. {
  4174. return pArg->Int64Eval ( tMatch );
  4175. }
  4176. /// arg-vs-constant-set
  4177. template < typename T >
  4178. class Expr_ArgVsConstSet_c : public Expr_ArgVsSet_c<T>
  4179. {
  4180. public:
  4181. /// take ownership of arg, pre-evaluate and dismiss turn points
  4182. Expr_ArgVsConstSet_c ( ISphExpr * pArg, CSphVector<ISphExpr *> & dArgs, int iSkip )
  4183. : Expr_ArgVsSet_c<T> ( pArg )
  4184. , m_bFloat ( false )
  4185. {
  4186. CSphMatch tDummy;
  4187. for ( int i=iSkip; i<dArgs.GetLength(); i++ )
  4188. {
  4189. m_dValues.Add ( Expr_ArgVsSet_c<T>::ExprEval ( dArgs[i], tDummy ) );
  4190. SafeRelease ( dArgs[i] );
  4191. }
  4192. CalcValueHash();
  4193. }
  4194. /// take ownership of arg, and copy that constlist
  4195. Expr_ArgVsConstSet_c ( ISphExpr * pArg, ConstList_c * pConsts, bool bKeepFloat )
  4196. : Expr_ArgVsSet_c<T> ( pArg )
  4197. , m_bFloat ( false )
  4198. {
  4199. if ( !pConsts )
  4200. return; // can happen on uservar path
  4201. if ( pConsts->m_eRetType==SPH_ATTR_FLOAT )
  4202. {
  4203. m_dValues.Reserve ( pConsts->m_dFloats.GetLength() );
  4204. if ( !bKeepFloat )
  4205. {
  4206. ARRAY_FOREACH ( i, pConsts->m_dFloats )
  4207. m_dValues.Add ( (T)pConsts->m_dFloats[i] );
  4208. } else
  4209. {
  4210. m_bFloat = true;
  4211. ARRAY_FOREACH ( i, pConsts->m_dFloats )
  4212. m_dValues.Add ( (T) sphF2DW ( pConsts->m_dFloats[i] ) );
  4213. }
  4214. } else
  4215. {
  4216. m_dValues.Reserve ( pConsts->m_dInts.GetLength() );
  4217. ARRAY_FOREACH ( i, pConsts->m_dInts )
  4218. m_dValues.Add ( (T)pConsts->m_dInts[i] );
  4219. }
  4220. CalcValueHash();
  4221. }
  4222. /// take ownership of arg, and copy that uservar
  4223. Expr_ArgVsConstSet_c ( ISphExpr * pArg, UservarIntSet_c * pUservar )
  4224. : Expr_ArgVsSet_c<T> ( pArg )
  4225. , m_bFloat ( false )
  4226. {
  4227. if ( !pUservar )
  4228. return; // can happen on uservar path
  4229. m_dValues.Reserve ( pUservar->GetLength() );
  4230. for ( int i=0; i<pUservar->GetLength(); i++ )
  4231. m_dValues.Add ( (T)*(pUservar->Begin() + i) );
  4232. CalcValueHash();
  4233. }
  4234. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4235. {
  4236. EXPR_CLASS_NAME("Expr_ArgVsConstSet_c");
  4237. return CALC_PARENT_HASH();
  4238. }
  4239. protected:
  4240. CSphVector<T> m_dValues;
  4241. uint64_t m_uValueHash;
  4242. bool m_bFloat;
  4243. void CalcValueHash()
  4244. {
  4245. ARRAY_FOREACH ( i, m_dValues )
  4246. m_uValueHash = sphFNV64 ( &m_dValues[i], sizeof(m_dValues[i]), i ? m_uValueHash : SPH_FNV64_SEED );
  4247. }
  4248. virtual uint64_t CalcHash ( const char * szTag, const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4249. {
  4250. return Expr_ArgVsSet_c<T>::CalcHash ( szTag, tSorterSchema, uPrevHash^m_uValueHash, bDisable );
  4251. }
  4252. };
  4253. //////////////////////////////////////////////////////////////////////////
  4254. /// INTERVAL() evaluator for constant turn point values case
  4255. template < typename T >
  4256. class Expr_IntervalConst_c : public Expr_ArgVsConstSet_c<T>
  4257. {
  4258. public:
  4259. /// take ownership of arg, pre-evaluate and dismiss turn points
  4260. explicit Expr_IntervalConst_c ( CSphVector<ISphExpr *> & dArgs )
  4261. : Expr_ArgVsConstSet_c<T> ( dArgs[0], dArgs, 1 )
  4262. {}
  4263. /// evaluate arg, return interval id
  4264. virtual int IntEval ( const CSphMatch & tMatch ) const
  4265. {
  4266. T val = this->ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
  4267. ARRAY_FOREACH ( i, this->m_dValues ) // FIXME! OPTIMIZE! perform binary search here
  4268. if ( val<this->m_dValues[i] )
  4269. return i;
  4270. return this->m_dValues.GetLength();
  4271. }
  4272. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4273. {
  4274. EXPR_CLASS_NAME("Expr_IntervalConst_c");
  4275. return Expr_ArgVsConstSet_c<T>::CalcHash ( szClassName, tSorterSchema, uHash, bDisable ); // can't do CALC_PARENT_HASH because of gcc and templates
  4276. }
  4277. };
  4278. /// generic INTERVAL() evaluator
  4279. template < typename T >
  4280. class Expr_Interval_c : public Expr_ArgVsSet_c<T>
  4281. {
  4282. protected:
  4283. CSphVector<ISphExpr *> m_dTurnPoints;
  4284. public:
  4285. /// take ownership of arg and turn points
  4286. explicit Expr_Interval_c ( const CSphVector<ISphExpr *> & dArgs )
  4287. : Expr_ArgVsSet_c<T> ( dArgs[0] )
  4288. {
  4289. for ( int i=1; i<dArgs.GetLength(); i++ )
  4290. m_dTurnPoints.Add ( dArgs[i] );
  4291. }
  4292. /// evaluate arg, return interval id
  4293. virtual int IntEval ( const CSphMatch & tMatch ) const
  4294. {
  4295. T val = this->ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
  4296. ARRAY_FOREACH ( i, m_dTurnPoints )
  4297. if ( val < Expr_ArgVsSet_c<T>::ExprEval ( m_dTurnPoints[i], tMatch ) )
  4298. return i;
  4299. return m_dTurnPoints.GetLength();
  4300. }
  4301. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  4302. {
  4303. Expr_ArgVsSet_c<T>::Command ( eCmd, pArg );
  4304. ARRAY_FOREACH ( i, m_dTurnPoints )
  4305. m_dTurnPoints[i]->Command ( eCmd, pArg );
  4306. }
  4307. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4308. {
  4309. EXPR_CLASS_NAME("Expr_Interval_c");
  4310. CALC_CHILD_HASHES(m_dTurnPoints);
  4311. return Expr_ArgVsSet_c<T>::CalcHash ( szClassName, tSorterSchema, uHash, bDisable ); // can't do CALC_PARENT_HASH because of gcc and templates
  4312. }
  4313. };
  4314. //////////////////////////////////////////////////////////////////////////
  4315. /// IN() evaluator, arbitrary scalar expression vs. constant values
  4316. template < typename T >
  4317. class Expr_In_c : public Expr_ArgVsConstSet_c<T>
  4318. {
  4319. public:
  4320. /// pre-sort values for binary search
  4321. Expr_In_c ( ISphExpr * pArg, ConstList_c * pConsts ) :
  4322. Expr_ArgVsConstSet_c<T> ( pArg, pConsts, false )
  4323. {
  4324. this->m_dValues.Sort();
  4325. }
  4326. /// evaluate arg, check if the value is within set
  4327. virtual int IntEval ( const CSphMatch & tMatch ) const
  4328. {
  4329. T val = this->ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
  4330. return this->m_dValues.BinarySearch ( val )!=NULL;
  4331. }
  4332. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4333. {
  4334. EXPR_CLASS_NAME("Expr_In_c");
  4335. return Expr_ArgVsConstSet_c<T>::CalcHash ( szClassName, tSorterSchema, uHash, bDisable ); // can't do CALC_PARENT_HASH because of gcc and templates
  4336. }
  4337. };
  4338. /// IN() evaluator, arbitrary scalar expression vs. uservar
  4339. /// (for the sake of evaluator, uservar is a pre-sorted, refcounted external vector)
  4340. class Expr_InUservar_c : public Expr_ArgVsSet_c<int64_t>
  4341. {
  4342. public:
  4343. /// just get hold of args
  4344. explicit Expr_InUservar_c ( ISphExpr * pArg, UservarIntSet_c * pConsts )
  4345. : Expr_ArgVsSet_c<int64_t> ( pArg )
  4346. , m_pConsts ( pConsts ) // no addref, hook should have addref'd (otherwise there'd be a race)
  4347. {
  4348. assert ( m_pConsts );
  4349. m_uHash = sphFNV64 ( m_pConsts->Begin(), m_pConsts->GetLength()*sizeof((*m_pConsts)[0]) );
  4350. }
  4351. /// release the uservar value
  4352. ~Expr_InUservar_c()
  4353. {
  4354. SafeRelease ( m_pConsts );
  4355. }
  4356. /// evaluate arg, check if the value is within set
  4357. virtual int IntEval ( const CSphMatch & tMatch ) const
  4358. {
  4359. int64_t iVal = ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
  4360. return m_pConsts->BinarySearch ( iVal )!=NULL;
  4361. }
  4362. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4363. {
  4364. EXPR_CLASS_NAME("Expr_InUservar_c");
  4365. return CALC_PARENT_HASH_EX(m_uHash);
  4366. }
  4367. protected:
  4368. UservarIntSet_c * m_pConsts;
  4369. uint64_t m_uHash;
  4370. };
  4371. /// IN() evaluator, MVA attribute vs. constant values
  4372. template < bool MVA64 >
  4373. class Expr_MVAIn_c : public Expr_ArgVsConstSet_c<int64_t>, public ExprLocatorTraits_t
  4374. {
  4375. public:
  4376. /// pre-sort values for binary search
  4377. Expr_MVAIn_c ( const CSphAttrLocator & tLoc, int iLocator, ConstList_c * pConsts, UservarIntSet_c * pUservar )
  4378. : Expr_ArgVsConstSet_c<int64_t> ( NULL, pConsts, false )
  4379. , ExprLocatorTraits_t ( tLoc, iLocator )
  4380. , m_pMvaPool ( NULL )
  4381. , m_pUservar ( pUservar )
  4382. , m_bArenaProhibit ( false )
  4383. {
  4384. assert ( tLoc.m_iBitOffset>=0 && tLoc.m_iBitCount>0 );
  4385. assert ( !pConsts || !pUservar ); // either constlist or uservar, not both
  4386. this->m_dValues.Sort();
  4387. // consts are handled in Expr_ArgVsConstSet_c, we only need uservars
  4388. if ( pUservar )
  4389. m_uValueHash = sphFNV64 ( pUservar->Begin(), pUservar->GetLength()*sizeof((*pUservar)[0]) );
  4390. }
  4391. ~Expr_MVAIn_c()
  4392. {
  4393. SafeRelease ( m_pUservar );
  4394. }
  4395. int MvaEval ( const DWORD * pMva ) const;
  4396. virtual const DWORD * MvaEval ( const CSphMatch & ) const { assert ( 0 && "not implemented" ); return NULL; }
  4397. /// evaluate arg, check if any values are within set
  4398. virtual int IntEval ( const CSphMatch & tMatch ) const
  4399. {
  4400. const DWORD * pMva = tMatch.GetAttrMVA ( m_tLocator, m_pMvaPool, m_bArenaProhibit );
  4401. if ( !pMva )
  4402. return 0;
  4403. return MvaEval ( pMva );
  4404. }
  4405. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  4406. {
  4407. Expr_ArgVsConstSet_c<int64_t>::Command ( eCmd, pArg );
  4408. ExprLocatorTraits_t::HandleCommand ( eCmd, pArg );
  4409. if ( eCmd==SPH_EXPR_SET_MVA_POOL )
  4410. {
  4411. const PoolPtrs_t * pPool = (const PoolPtrs_t *)pArg;
  4412. assert ( pArg );
  4413. m_pMvaPool = pPool->m_pMva;
  4414. m_bArenaProhibit = pPool->m_bArenaProhibit;
  4415. }
  4416. }
  4417. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4418. {
  4419. EXPR_CLASS_NAME("Expr_MVAIn_c");
  4420. CALC_POD_HASH(m_bArenaProhibit);
  4421. return CALC_DEP_HASHES_EX(m_uValueHash);
  4422. }
  4423. protected:
  4424. const DWORD * m_pMvaPool;
  4425. UservarIntSet_c * m_pUservar;
  4426. bool m_bArenaProhibit;
  4427. };
  4428. template<>
  4429. int Expr_MVAIn_c<false>::MvaEval ( const DWORD * pMva ) const
  4430. {
  4431. // OPTIMIZE! FIXME! factor out a common function with Filter_MVAValues::Eval()
  4432. DWORD uLen = *pMva++;
  4433. const DWORD * pMvaMax = pMva+uLen;
  4434. const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
  4435. const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
  4436. const DWORD * L = pMva;
  4437. const DWORD * R = pMvaMax - 1;
  4438. for ( ; pFilter < pFilterMax; pFilter++ )
  4439. {
  4440. while ( L<=R )
  4441. {
  4442. const DWORD * m = L + (R - L) / 2;
  4443. if ( *pFilter > *m )
  4444. L = m + 1;
  4445. else if ( *pFilter < *m )
  4446. R = m - 1;
  4447. else
  4448. return 1;
  4449. }
  4450. R = pMvaMax - 1;
  4451. }
  4452. return 0;
  4453. }
  4454. template<>
  4455. int Expr_MVAIn_c<true>::MvaEval ( const DWORD * pMva ) const
  4456. {
  4457. // OPTIMIZE! FIXME! factor out a common function with Filter_MVAValues::Eval()
  4458. DWORD uLen = *pMva++;
  4459. assert ( ( uLen%2 )==0 );
  4460. const DWORD * pMvaMax = pMva+uLen;
  4461. const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
  4462. const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
  4463. const int64_t * L = (const int64_t *)pMva;
  4464. const int64_t * R = (const int64_t *)( pMvaMax - 2 );
  4465. for ( ; pFilter < pFilterMax; pFilter++ )
  4466. {
  4467. while ( L<=R )
  4468. {
  4469. const int64_t * pVal = L + (R - L) / 2;
  4470. int64_t iMva = MVA_UPSIZE ( (const DWORD *)pVal );
  4471. if ( *pFilter > iMva )
  4472. L = pVal + 1;
  4473. else if ( *pFilter < iMva )
  4474. R = pVal - 1;
  4475. else
  4476. return 1;
  4477. }
  4478. R = (const int64_t *) ( pMvaMax - 2 );
  4479. }
  4480. return 0;
  4481. }
  4482. /// LENGTH() evaluator for MVAs
  4483. class Expr_MVALength_c : public Expr_WithLocator_c
  4484. {
  4485. public:
  4486. Expr_MVALength_c ( const CSphAttrLocator & tLoc, int iLocator, bool b64 )
  4487. : Expr_WithLocator_c ( tLoc, iLocator )
  4488. , m_b64 ( b64 )
  4489. , m_pMvaPool ( NULL )
  4490. , m_bArenaProhibit ( false )
  4491. {
  4492. assert ( tLoc.m_iBitOffset>=0 && tLoc.m_iBitCount>0 );
  4493. }
  4494. virtual int IntEval ( const CSphMatch & tMatch ) const
  4495. {
  4496. const DWORD * pMva = tMatch.GetAttrMVA ( m_tLocator, m_pMvaPool, m_bArenaProhibit );
  4497. if ( !pMva )
  4498. return 0;
  4499. return (int)( m_b64 ? *pMva/2 : *pMva );
  4500. }
  4501. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  4502. {
  4503. Expr_WithLocator_c::Command ( eCmd, pArg );
  4504. if ( eCmd==SPH_EXPR_SET_MVA_POOL )
  4505. {
  4506. const PoolPtrs_t * pPool = (const PoolPtrs_t *)pArg;
  4507. assert ( pArg );
  4508. m_pMvaPool = pPool->m_pMva;
  4509. m_bArenaProhibit = pPool->m_bArenaProhibit;
  4510. }
  4511. }
  4512. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)IntEval ( tMatch ); }
  4513. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4514. {
  4515. EXPR_CLASS_NAME("Expr_MVALength_c");
  4516. CALC_POD_HASH(m_bArenaProhibit);
  4517. CALC_POD_HASH(m_b64);
  4518. return CALC_DEP_HASHES();
  4519. }
  4520. protected:
  4521. bool m_b64;
  4522. const DWORD * m_pMvaPool;
  4523. bool m_bArenaProhibit;
  4524. };
  4525. /// aggregate functions evaluator for MVA attribute
  4526. template < bool MVA64 >
  4527. class Expr_MVAAggr_c : public Expr_WithLocator_c
  4528. {
  4529. public:
  4530. Expr_MVAAggr_c ( const CSphAttrLocator & tLoc, int iLocator, ESphAggrFunc eFunc )
  4531. : Expr_WithLocator_c ( tLoc, iLocator )
  4532. , m_pMvaPool ( NULL )
  4533. , m_bArenaProhibit ( false )
  4534. , m_eFunc ( eFunc )
  4535. {
  4536. assert ( tLoc.m_iBitOffset>=0 && tLoc.m_iBitCount>0 );
  4537. }
  4538. int64_t MvaAggr ( const DWORD * pMva, ESphAggrFunc eFunc ) const;
  4539. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
  4540. {
  4541. const DWORD * pMva = tMatch.GetAttrMVA ( m_tLocator, m_pMvaPool, m_bArenaProhibit );
  4542. if ( !pMva )
  4543. return 0;
  4544. return MvaAggr ( pMva, m_eFunc );
  4545. }
  4546. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  4547. {
  4548. Expr_WithLocator_c::Command ( eCmd, pArg );
  4549. if ( eCmd==SPH_EXPR_SET_MVA_POOL )
  4550. {
  4551. const PoolPtrs_t * pPool = (const PoolPtrs_t *)pArg;
  4552. assert ( pArg );
  4553. m_pMvaPool = pPool->m_pMva;
  4554. m_bArenaProhibit = pPool->m_bArenaProhibit;
  4555. }
  4556. }
  4557. virtual float Eval ( const CSphMatch & tMatch ) const { return (float)Int64Eval ( tMatch ); }
  4558. virtual int IntEval ( const CSphMatch & tMatch ) const { return (int)Int64Eval ( tMatch ); }
  4559. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4560. {
  4561. EXPR_CLASS_NAME("Expr_MVAAggr_c");
  4562. CALC_POD_HASH(m_bArenaProhibit);
  4563. CALC_POD_HASH(m_eFunc);
  4564. return CALC_DEP_HASHES();
  4565. }
  4566. protected:
  4567. const DWORD * m_pMvaPool;
  4568. bool m_bArenaProhibit;
  4569. ESphAggrFunc m_eFunc;
  4570. };
  4571. template <>
  4572. int64_t Expr_MVAAggr_c<false>::MvaAggr ( const DWORD * pMva, ESphAggrFunc eFunc ) const
  4573. {
  4574. DWORD uLen = *pMva++;
  4575. const DWORD * pMvaMax = pMva+uLen;
  4576. const DWORD * L = pMva;
  4577. const DWORD * R = pMvaMax - 1;
  4578. switch ( eFunc )
  4579. {
  4580. case SPH_AGGR_MIN: return *L;
  4581. case SPH_AGGR_MAX: return *R;
  4582. default: return 0;
  4583. }
  4584. }
  4585. template <>
  4586. int64_t Expr_MVAAggr_c<true>::MvaAggr ( const DWORD * pMva, ESphAggrFunc eFunc ) const
  4587. {
  4588. DWORD uLen = *pMva++;
  4589. assert ( ( uLen%2 )==0 );
  4590. const DWORD * pMvaMax = pMva+uLen;
  4591. const int64_t * L = (const int64_t *)pMva;
  4592. const int64_t * R = (const int64_t *)( pMvaMax - 2 );
  4593. switch ( eFunc )
  4594. {
  4595. case SPH_AGGR_MIN: return *L;
  4596. case SPH_AGGR_MAX: return *R;
  4597. default: return 0;
  4598. }
  4599. }
  4600. /// IN() evaluator, JSON array vs. constant values
  4601. class Expr_JsonFieldIn_c : public Expr_ArgVsConstSet_c<int64_t>
  4602. {
  4603. public:
  4604. Expr_JsonFieldIn_c ( ConstList_c * pConsts, ISphExpr * pArg )
  4605. : Expr_ArgVsConstSet_c<int64_t> ( pArg, pConsts, true )
  4606. , m_pStrings ( NULL )
  4607. {
  4608. assert ( pConsts );
  4609. const char * sExpr = pConsts->m_sExpr.cstr();
  4610. int iExprLen = pConsts->m_sExpr.Length();
  4611. const int64_t * pFilter = m_dValues.Begin();
  4612. const int64_t * pFilterMax = pFilter + m_dValues.GetLength();
  4613. for ( const int64_t * pCur=pFilter; pCur<pFilterMax; pCur++ )
  4614. {
  4615. int64_t iVal = *pCur;
  4616. int iOfs = (int)( iVal>>32 );
  4617. int iLen = (int)( iVal & 0xffffffffUL );
  4618. if ( iOfs>0 && iOfs+iLen<=iExprLen )
  4619. {
  4620. CSphString sRes;
  4621. SqlUnescape ( sRes, sExpr + iOfs, iLen );
  4622. m_dHashes.Add ( sphFNV64 ( sRes.cstr(), sRes.Length() ) );
  4623. }
  4624. }
  4625. m_dHashes.Sort();
  4626. }
  4627. Expr_JsonFieldIn_c ( UservarIntSet_c * pUserVar, ISphExpr * pArg )
  4628. : Expr_ArgVsConstSet_c<int64_t> ( pArg, pUserVar )
  4629. , m_pStrings ( NULL )
  4630. {
  4631. assert ( pUserVar );
  4632. m_dHashes.Sort();
  4633. }
  4634. ~Expr_JsonFieldIn_c()
  4635. {
  4636. }
  4637. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  4638. {
  4639. Expr_ArgVsConstSet_c<int64_t>::Command ( eCmd, pArg );
  4640. if ( eCmd==SPH_EXPR_SET_STRING_POOL )
  4641. m_pStrings = (const BYTE*)pArg;
  4642. }
  4643. /// evaluate arg, check if any values are within set
  4644. virtual int IntEval ( const CSphMatch & tMatch ) const
  4645. {
  4646. const BYTE * pVal = NULL;
  4647. ESphJsonType eJson = GetKey ( &pVal, tMatch );
  4648. int64_t iVal = 0;
  4649. switch ( eJson )
  4650. {
  4651. case JSON_INT32_VECTOR: return ArrayEval<int> ( pVal );
  4652. case JSON_INT64_VECTOR: return ArrayEval<int64_t> ( pVal );
  4653. case JSON_STRING_VECTOR: return StringArrayEval ( pVal, false );
  4654. case JSON_DOUBLE_VECTOR: return ArrayFloatEval ( pVal );
  4655. case JSON_STRING: return StringArrayEval ( pVal, true );
  4656. case JSON_INT32:
  4657. case JSON_INT64:
  4658. iVal = ( eJson==JSON_INT32 ? sphJsonLoadInt ( &pVal ) : sphJsonLoadBigint ( &pVal ) );
  4659. if ( m_bFloat )
  4660. return FloatEval ( (float)iVal );
  4661. else
  4662. return ValueEval ( iVal );
  4663. case JSON_DOUBLE:
  4664. iVal = sphJsonLoadBigint ( &pVal );
  4665. if ( m_bFloat )
  4666. return FloatEval ( sphQW2D ( iVal ) );
  4667. else
  4668. return ValueEval ( iVal );
  4669. case JSON_MIXED_VECTOR:
  4670. {
  4671. const BYTE * p = pVal;
  4672. sphJsonUnpackInt ( &p ); // skip node length
  4673. int iLen = sphJsonUnpackInt ( &p );
  4674. for ( int i=0; i<iLen; i++ )
  4675. {
  4676. ESphJsonType eType = (ESphJsonType)*p++;
  4677. pVal = p;
  4678. int iRes = 0;
  4679. switch (eType)
  4680. {
  4681. case JSON_STRING:
  4682. iRes = StringArrayEval ( pVal, true );
  4683. break;
  4684. case JSON_INT32:
  4685. case JSON_INT64:
  4686. iVal = ( eType==JSON_INT32 ? sphJsonLoadInt ( &pVal ) : sphJsonLoadBigint ( &pVal ) );
  4687. if ( m_bFloat )
  4688. iRes = FloatEval ( (float)iVal );
  4689. else
  4690. iRes = ValueEval ( iVal );
  4691. break;
  4692. case JSON_DOUBLE:
  4693. iVal = sphJsonLoadBigint ( &pVal );
  4694. if ( m_bFloat )
  4695. iRes = FloatEval ( sphQW2D ( iVal ) );
  4696. else
  4697. iRes = ValueEval ( iVal );
  4698. break;
  4699. default: break; // for weird subobjects, just let IN() return false
  4700. }
  4701. if ( iRes )
  4702. return 1;
  4703. sphJsonSkipNode ( eType, &p );
  4704. }
  4705. return 0;
  4706. }
  4707. default: return 0;
  4708. }
  4709. }
  4710. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4711. {
  4712. EXPR_CLASS_NAME("Expr_JsonFieldIn_c");
  4713. return CALC_PARENT_HASH_EX(m_uValueHash);
  4714. }
  4715. protected:
  4716. const BYTE * m_pStrings;
  4717. CSphVector<int64_t> m_dHashes;
  4718. ESphJsonType GetKey ( const BYTE ** ppKey, const CSphMatch & tMatch ) const
  4719. {
  4720. assert ( ppKey );
  4721. if ( !m_pStrings )
  4722. return JSON_EOF;
  4723. uint64_t uValue = m_pArg->Int64Eval ( tMatch );
  4724. *ppKey = m_pStrings + ( uValue & 0xffffffff );
  4725. return (ESphJsonType)( uValue >> 32 );
  4726. }
  4727. int ValueEval ( const int64_t iVal ) const
  4728. {
  4729. const int64_t * pFilter = m_dValues.Begin();
  4730. const int64_t * pFilterMax = pFilter + m_dValues.GetLength();
  4731. for ( ; pFilter<pFilterMax; pFilter++ )
  4732. if ( iVal==*pFilter )
  4733. return 1;
  4734. return 0;
  4735. }
  4736. int FloatEval ( const double fVal ) const
  4737. {
  4738. assert ( m_bFloat );
  4739. const int64_t * pFilter = m_dValues.Begin();
  4740. const int64_t * pFilterMax = pFilter + m_dValues.GetLength();
  4741. for ( ; pFilter<pFilterMax; pFilter++ )
  4742. {
  4743. int64_t iFilterVal = *pFilter;
  4744. double fFilterVal = sphDW2F ( (DWORD)iFilterVal );
  4745. if ( fabs ( fVal - fFilterVal )<=1e-6 )
  4746. return 1;
  4747. }
  4748. return 0;
  4749. }
  4750. // cannot apply MvaEval() on unordered JSON arrays, using linear search
  4751. template <typename T>
  4752. int ArrayEval ( const BYTE * pVal ) const
  4753. {
  4754. const int64_t * pFilter = m_dValues.Begin();
  4755. const int64_t * pFilterMax = pFilter + m_dValues.GetLength();
  4756. int iLen = sphJsonUnpackInt ( &pVal );
  4757. const T * pArray = (const T *)pVal;
  4758. const T * pArrayMax = pArray+iLen;
  4759. for ( ; pFilter<pFilterMax; pFilter++ )
  4760. {
  4761. T iVal = (T)*pFilter;
  4762. for ( const T * m = pArray; m<pArrayMax; m++ )
  4763. if ( iVal==*m )
  4764. return 1;
  4765. }
  4766. return 0;
  4767. }
  4768. int StringArrayEval ( const BYTE * pVal, bool bValueEval ) const
  4769. {
  4770. if ( !bValueEval )
  4771. sphJsonUnpackInt ( &pVal );
  4772. int iCount = bValueEval ? 1 : sphJsonUnpackInt ( &pVal );
  4773. while ( iCount-- )
  4774. {
  4775. int iLen = sphJsonUnpackInt ( &pVal );
  4776. if ( m_dHashes.BinarySearch ( sphFNV64 ( pVal, iLen ) ) )
  4777. return 1;
  4778. pVal += iLen;
  4779. }
  4780. return 0;
  4781. }
  4782. int ArrayFloatEval ( const BYTE * pVal ) const
  4783. {
  4784. const int64_t * pFilter = m_dValues.Begin();
  4785. const int64_t * pFilterMax = pFilter + m_dValues.GetLength();
  4786. int iLen = sphJsonUnpackInt ( &pVal );
  4787. for ( ; pFilter<pFilterMax; pFilter++ )
  4788. {
  4789. int64_t iFilterVal = *pFilter;
  4790. double fFilterVal = ( m_bFloat ? sphDW2F ( (DWORD)iFilterVal ) : iFilterVal );
  4791. const BYTE * p = pVal;
  4792. for ( int i=0; i<iLen; i++ )
  4793. {
  4794. double fStored = sphQW2D ( sphJsonLoadBigint ( &p ) );
  4795. if ( fabs ( fStored - fFilterVal )<=1e-6 )
  4796. return 1;
  4797. }
  4798. }
  4799. return 0;
  4800. }
  4801. };
  4802. class Expr_StrIn_c : public Expr_ArgVsConstSet_c<int64_t>, public ExprLocatorTraits_t
  4803. {
  4804. protected:
  4805. const BYTE * m_pStrings;
  4806. UservarIntSet_c * m_pUservar;
  4807. CSphVector<CSphString> m_dStringValues;
  4808. SphStringCmp_fn m_fnStrCmp;
  4809. public:
  4810. Expr_StrIn_c ( const CSphAttrLocator & tLoc, int iLocator, ConstList_c * pConsts, UservarIntSet_c * pUservar, ESphCollation eCollation )
  4811. : Expr_ArgVsConstSet_c<int64_t> ( NULL, pConsts, false )
  4812. , ExprLocatorTraits_t ( tLoc, iLocator )
  4813. , m_pStrings ( NULL )
  4814. , m_pUservar ( pUservar )
  4815. {
  4816. assert ( tLoc.m_iBitOffset>=0 && tLoc.m_iBitCount>0 );
  4817. assert ( !pConsts || !pUservar );
  4818. m_fnStrCmp = GetCollationFn ( eCollation );
  4819. const char * sExpr = pConsts->m_sExpr.cstr();
  4820. int iExprLen = pConsts->m_sExpr.Length();
  4821. const int64_t * pFilter = m_pUservar ? m_pUservar->Begin() : m_dValues.Begin();
  4822. const int64_t * pFilterMax = pFilter + ( m_pUservar ? m_pUservar->GetLength() : m_dValues.GetLength() );
  4823. for ( const int64_t * pCur=pFilter; pCur<pFilterMax; pCur++ )
  4824. {
  4825. int64_t iVal = *pCur;
  4826. int iOfs = (int)( iVal>>32 );
  4827. int iLen = (int)( iVal & 0xffffffffUL );
  4828. if ( iOfs>0 && iOfs+iLen<=iExprLen )
  4829. {
  4830. CSphString sRes;
  4831. SqlUnescape ( sRes, sExpr + iOfs, iLen );
  4832. m_dStringValues.Add ( sRes );
  4833. }
  4834. }
  4835. // consts are handled in Expr_ArgVsConstSet_c, we only need uservars
  4836. if ( m_pUservar )
  4837. m_uValueHash = sphFNV64 ( pFilter, (pFilterMax-pFilter)*sizeof(*pFilter) );
  4838. }
  4839. ~Expr_StrIn_c()
  4840. {
  4841. SafeRelease ( m_pUservar );
  4842. }
  4843. virtual int IntEval ( const CSphMatch & tMatch ) const
  4844. {
  4845. const BYTE * pVal;
  4846. SphAttr_t iOfs = tMatch.GetAttr ( m_tLocator );
  4847. if ( iOfs<=0 )
  4848. return 0;
  4849. int iLen = sphUnpackStr ( m_pStrings + iOfs, &pVal );
  4850. CSphString sValue ( (const char*)pVal, iLen );
  4851. const BYTE * pStr = (const BYTE*)sValue.cstr();
  4852. ARRAY_FOREACH ( i, m_dStringValues )
  4853. if ( m_fnStrCmp ( pStr, (const BYTE*)m_dStringValues[i].cstr(), false )==0 )
  4854. return 1;
  4855. return 0;
  4856. }
  4857. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  4858. {
  4859. Expr_ArgVsConstSet_c<int64_t>::Command ( eCmd, pArg );
  4860. ExprLocatorTraits_t::HandleCommand ( eCmd, pArg );
  4861. if ( eCmd==SPH_EXPR_SET_STRING_POOL )
  4862. m_pStrings = (const BYTE*)pArg;
  4863. }
  4864. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4865. {
  4866. EXPR_CLASS_NAME("Expr_StrIn_c");
  4867. CALC_POD_HASH(m_fnStrCmp);
  4868. return CALC_PARENT_HASH_EX(m_uValueHash);
  4869. }
  4870. };
  4871. //////////////////////////////////////////////////////////////////////////
  4872. /// generic BITDOT() evaluator
  4873. /// first argument is a bit mask and the rest ones are bit weights
  4874. /// function returns sum of bits multiplied by their weights
  4875. /// BITDOT(5, 11, 33, 55) => 1*11 + 0*33 + 1*55 = 66
  4876. /// BITDOT(4, 11, 33, 55) => 0*11 + 0*33 + 1*55 = 55
  4877. template < typename T >
  4878. class Expr_Bitdot_c : public Expr_ArgVsSet_c<T>
  4879. {
  4880. public:
  4881. /// take ownership of arg and turn points
  4882. explicit Expr_Bitdot_c ( const CSphVector<ISphExpr *> & dArgs )
  4883. : Expr_ArgVsSet_c<T> ( dArgs[0] )
  4884. {
  4885. for ( int i=1; i<dArgs.GetLength(); i++ )
  4886. m_dBitWeights.Add ( dArgs[i] );
  4887. }
  4888. virtual float Eval ( const CSphMatch & tMatch ) const
  4889. {
  4890. return (float) DoEval ( tMatch );
  4891. }
  4892. virtual int IntEval ( const CSphMatch & tMatch ) const
  4893. {
  4894. return (int) DoEval ( tMatch );
  4895. }
  4896. virtual int64_t Int64Eval ( const CSphMatch & tMatch ) const
  4897. {
  4898. return (int64_t) DoEval ( tMatch );
  4899. }
  4900. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  4901. {
  4902. Expr_ArgVsSet_c<T>::Command ( eCmd, pArg );
  4903. ARRAY_FOREACH ( i, m_dBitWeights )
  4904. m_dBitWeights[i]->Command ( eCmd, pArg );
  4905. }
  4906. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4907. {
  4908. EXPR_CLASS_NAME("Expr_Bitdot_c");
  4909. CALC_CHILD_HASHES(m_dBitWeights);
  4910. return Expr_ArgVsSet_c<T>::CalcHash ( szClassName, tSorterSchema, uHash, bDisable ); // can't do CALC_PARENT_HASH because of gcc and templates
  4911. }
  4912. protected:
  4913. CSphVector<ISphExpr *> m_dBitWeights;
  4914. /// generic evaluate
  4915. virtual T DoEval ( const CSphMatch & tMatch ) const
  4916. {
  4917. int64_t uArg = this->m_pArg->Int64Eval ( tMatch ); // 'this' fixes gcc braindamage
  4918. T tRes = 0;
  4919. int iBit = 0;
  4920. while ( uArg && iBit<m_dBitWeights.GetLength() )
  4921. {
  4922. if ( uArg & 1 )
  4923. tRes += Expr_ArgVsSet_c<T>::ExprEval ( m_dBitWeights[iBit], tMatch );
  4924. uArg >>= 1;
  4925. iBit++;
  4926. }
  4927. return tRes;
  4928. }
  4929. };
  4930. //////////////////////////////////////////////////////////////////////////
  4931. enum GeoFunc_e
  4932. {
  4933. GEO_HAVERSINE,
  4934. GEO_ADAPTIVE
  4935. };
  4936. typedef float (*Geofunc_fn)( float, float, float, float );
  4937. static Geofunc_fn GeodistFn ( GeoFunc_e eFunc, bool bDeg )
  4938. {
  4939. switch ( 2*eFunc+bDeg )
  4940. {
  4941. case 2*GEO_HAVERSINE: return &GeodistSphereRad;
  4942. case 2*GEO_HAVERSINE+1: return &GeodistSphereDeg;
  4943. case 2*GEO_ADAPTIVE: return &GeodistAdaptiveRad;
  4944. case 2*GEO_ADAPTIVE+1: return &GeodistAdaptiveDeg;
  4945. }
  4946. return NULL;
  4947. }
  4948. static float Geodist ( GeoFunc_e eFunc, bool bDeg, float lat1, float lon1, float lat2, float lon2 )
  4949. {
  4950. return GeodistFn ( eFunc, bDeg ) ( lat1, lon1, lat2, lon2 );
  4951. }
  4952. /// geodist() - attr point, constant anchor
  4953. class Expr_GeodistAttrConst_c : public ISphExpr
  4954. {
  4955. public:
  4956. Expr_GeodistAttrConst_c ( Geofunc_fn pFunc, float fOut, CSphAttrLocator tLat, CSphAttrLocator tLon, float fAnchorLat, float fAnchorLon, int iLat, int iLon )
  4957. : m_pFunc ( pFunc )
  4958. , m_fOut ( fOut )
  4959. , m_tLat ( tLat )
  4960. , m_tLon ( tLon )
  4961. , m_fAnchorLat ( fAnchorLat )
  4962. , m_fAnchorLon ( fAnchorLon )
  4963. , m_iLat ( iLat )
  4964. , m_iLon ( iLon )
  4965. {}
  4966. virtual float Eval ( const CSphMatch & tMatch ) const
  4967. {
  4968. return m_fOut*m_pFunc ( tMatch.GetAttrFloat ( m_tLat ), tMatch.GetAttrFloat ( m_tLon ), m_fAnchorLat, m_fAnchorLon );
  4969. }
  4970. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  4971. {
  4972. if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
  4973. {
  4974. static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLat );
  4975. static_cast < CSphVector<int>* > ( pArg )->Add ( m_iLon );
  4976. }
  4977. }
  4978. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4979. {
  4980. EXPR_CLASS_NAME("Expr_GeodistAttrConst_c");
  4981. CALC_POD_HASH(m_fAnchorLat);
  4982. CALC_POD_HASH(m_fAnchorLon);
  4983. CALC_POD_HASH(m_fOut);
  4984. CALC_POD_HASH(m_pFunc);
  4985. return CALC_DEP_HASHES();
  4986. }
  4987. private:
  4988. Geofunc_fn m_pFunc;
  4989. float m_fOut;
  4990. CSphAttrLocator m_tLat;
  4991. CSphAttrLocator m_tLon;
  4992. float m_fAnchorLat;
  4993. float m_fAnchorLon;
  4994. int m_iLat;
  4995. int m_iLon;
  4996. };
  4997. /// geodist() - expr point, constant anchor
  4998. class Expr_GeodistConst_c: public ISphExpr
  4999. {
  5000. public:
  5001. Expr_GeodistConst_c ( Geofunc_fn pFunc, float fOut, ISphExpr * pLat, ISphExpr * pLon, float fAnchorLat, float fAnchorLon )
  5002. : m_pFunc ( pFunc )
  5003. , m_fOut ( fOut )
  5004. , m_pLat ( pLat )
  5005. , m_pLon ( pLon )
  5006. , m_fAnchorLat ( fAnchorLat )
  5007. , m_fAnchorLon ( fAnchorLon )
  5008. {}
  5009. ~Expr_GeodistConst_c ()
  5010. {
  5011. SafeRelease ( m_pLon );
  5012. SafeRelease ( m_pLat );
  5013. }
  5014. virtual float Eval ( const CSphMatch & tMatch ) const
  5015. {
  5016. return m_fOut*m_pFunc ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), m_fAnchorLat, m_fAnchorLon );
  5017. }
  5018. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  5019. {
  5020. m_pLat->Command ( eCmd, pArg );
  5021. m_pLon->Command ( eCmd, pArg );
  5022. }
  5023. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  5024. {
  5025. EXPR_CLASS_NAME("Expr_GeodistConst_c");
  5026. CALC_POD_HASH(m_fAnchorLat);
  5027. CALC_POD_HASH(m_fAnchorLon);
  5028. CALC_POD_HASH(m_fOut);
  5029. CALC_POD_HASH(m_pFunc);
  5030. CALC_CHILD_HASH(m_pLat);
  5031. CALC_CHILD_HASH(m_pLon);
  5032. return CALC_DEP_HASHES();
  5033. }
  5034. private:
  5035. Geofunc_fn m_pFunc;
  5036. float m_fOut;
  5037. ISphExpr * m_pLat;
  5038. ISphExpr * m_pLon;
  5039. float m_fAnchorLat;
  5040. float m_fAnchorLon;
  5041. };
  5042. /// geodist() - expr point, expr anchor
  5043. class Expr_Geodist_c: public ISphExpr
  5044. {
  5045. public:
  5046. Expr_Geodist_c ( Geofunc_fn pFunc, float fOut, ISphExpr * pLat, ISphExpr * pLon, ISphExpr * pAnchorLat, ISphExpr * pAnchorLon )
  5047. : m_pFunc ( pFunc )
  5048. , m_fOut ( fOut )
  5049. , m_pLat ( pLat )
  5050. , m_pLon ( pLon )
  5051. , m_pAnchorLat ( pAnchorLat )
  5052. , m_pAnchorLon ( pAnchorLon )
  5053. {}
  5054. ~Expr_Geodist_c ()
  5055. {
  5056. SafeRelease ( m_pAnchorLon );
  5057. SafeRelease ( m_pAnchorLat );
  5058. SafeRelease ( m_pLon );
  5059. SafeRelease ( m_pLat );
  5060. }
  5061. virtual float Eval ( const CSphMatch & tMatch ) const
  5062. {
  5063. return m_fOut*m_pFunc ( m_pLat->Eval(tMatch), m_pLon->Eval(tMatch), m_pAnchorLat->Eval(tMatch), m_pAnchorLon->Eval(tMatch) );
  5064. }
  5065. virtual void Command ( ESphExprCommand eCmd, void * pArg )
  5066. {
  5067. m_pLat->Command ( eCmd, pArg );
  5068. m_pLon->Command ( eCmd, pArg );
  5069. m_pAnchorLat->Command ( eCmd, pArg );
  5070. m_pAnchorLon->Command ( eCmd, pArg );
  5071. }
  5072. virtual uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  5073. {
  5074. EXPR_CLASS_NAME("Expr_Geodist_c");
  5075. CALC_POD_HASH(m_fOut);
  5076. CALC_POD_HASH(m_pFunc);
  5077. CALC_CHILD_HASH(m_pLat);
  5078. CALC_CHILD_HASH(m_pLon);
  5079. CALC_CHILD_HASH(m_pAnchorLat);
  5080. CALC_CHILD_HASH(m_pAnchorLon);
  5081. return CALC_DEP_HASHES();
  5082. }
  5083. private:
  5084. Geofunc_fn m_pFunc;
  5085. float m_fOut;
  5086. ISphExpr * m_pLat;
  5087. ISphExpr * m_pLon;
  5088. ISphExpr * m_pAnchorLat;
  5089. ISphExpr * m_pAnchorLon;
  5090. };
  5091. //////////////////////////////////////////////////////////////////////////
  5092. struct GatherArgTypes_t : ISphNoncopyable
  5093. {
  5094. CSphVector<int> & m_dTypes;
  5095. explicit GatherArgTypes_t ( CSphVector<int> & dTypes )
  5096. : m_dTypes ( dTypes )
  5097. {}
  5098. void Collect ( int , const ExprNode_t & tNode )
  5099. {
  5100. m_dTypes.Add ( tNode.m_iToken );
  5101. }
  5102. };
  5103. void ExprParser_t::GatherArgTypes ( int iNode, CSphVector<int> & dTypes )
  5104. {
  5105. GatherArgTypes_t tCollector ( dTypes );
  5106. GatherArgT ( iNode, tCollector );
  5107. }
  5108. struct GatherArgNodes_t : ISphNoncopyable
  5109. {
  5110. CSphVector<int> & m_dNodes;
  5111. explicit GatherArgNodes_t ( CSphVector<int> & dNodes )
  5112. : m_dNodes ( dNodes )
  5113. {}
  5114. void Collect ( int iNode, const ExprNode_t & )
  5115. {
  5116. m_dNodes.Add ( iNode );
  5117. }
  5118. };
  5119. void ExprParser_t::GatherArgNodes ( int iNode, CSphVector<int> & dNodes )
  5120. {
  5121. GatherArgNodes_t tCollector ( dNodes );
  5122. GatherArgT ( iNode, tCollector );
  5123. }
  5124. struct GatherArgReturnTypes_t : ISphNoncopyable
  5125. {
  5126. CSphVector<ESphAttr> & m_dTypes;
  5127. explicit GatherArgReturnTypes_t ( CSphVector<ESphAttr> & dTypes )
  5128. : m_dTypes ( dTypes )
  5129. {}
  5130. void Collect ( int , const ExprNode_t & tNode )
  5131. {
  5132. m_dTypes.Add ( tNode.m_eRetType );
  5133. }
  5134. };
  5135. void ExprParser_t::GatherArgRetTypes ( int iNode, CSphVector<ESphAttr> & dTypes )
  5136. {
  5137. GatherArgReturnTypes_t tCollector ( dTypes );
  5138. GatherArgT ( iNode, tCollector );
  5139. }
  5140. template < typename T >
  5141. void ExprParser_t::GatherArgT ( int iNode, T & FUNCTOR )
  5142. {
  5143. if ( iNode<0 )
  5144. return;
  5145. m_dGatherStack.Resize ( 0 );
  5146. StackNode_t & tInitial = m_dGatherStack.Add();
  5147. const ExprNode_t & tNode = m_dNodes[iNode];
  5148. tInitial.m_iNode = iNode;
  5149. tInitial.m_iLeft = tNode.m_iLeft;
  5150. tInitial.m_iRight = tNode.m_iRight;
  5151. while ( m_dGatherStack.GetLength()>0 )
  5152. {
  5153. StackNode_t & tCur = m_dGatherStack.Last();
  5154. const ExprNode_t & tCurExprNode = m_dNodes[tCur.m_iNode];
  5155. if ( tCurExprNode.m_iToken!=',' )
  5156. {
  5157. FUNCTOR.Collect ( tCur.m_iNode, tCurExprNode );
  5158. m_dGatherStack.Pop();
  5159. continue;
  5160. }
  5161. if ( tCur.m_iLeft==-1 && tCur.m_iRight==-1 )
  5162. {
  5163. m_dGatherStack.Pop();
  5164. continue;
  5165. }
  5166. int iChild = -1;
  5167. if ( tCur.m_iLeft>=0 )
  5168. {
  5169. iChild = tCur.m_iLeft;
  5170. tCur.m_iLeft = -1;
  5171. } else if ( tCur.m_iRight>=0 )
  5172. {
  5173. iChild = tCur.m_iRight;
  5174. tCur.m_iRight = -1;
  5175. }
  5176. assert ( iChild>=0 );
  5177. const ExprNode_t & tChild = m_dNodes[iChild];
  5178. StackNode_t & tNext = m_dGatherStack.Add();
  5179. tNext.m_iNode = iChild;
  5180. tNext.m_iLeft = tChild.m_iLeft;
  5181. tNext.m_iRight = tChild.m_iRight;
  5182. }
  5183. }
  5184. bool ExprParser_t::CheckForConstSet ( int iArgsNode, int iSkip )
  5185. {
  5186. CSphVector<int> dTypes;
  5187. GatherArgTypes ( iArgsNode, dTypes );
  5188. for ( int i=iSkip; i<dTypes.GetLength(); i++ )
  5189. if ( dTypes[i]!=TOK_CONST_INT && dTypes[i]!=TOK_CONST_FLOAT && dTypes[i]!=TOK_MAP_ARG )
  5190. return false;
  5191. return true;
  5192. }
  5193. template < typename T >
  5194. void ExprParser_t::WalkTree ( int iRoot, T & FUNCTOR )
  5195. {
  5196. if ( iRoot>=0 )
  5197. {
  5198. const ExprNode_t & tNode = m_dNodes[iRoot];
  5199. FUNCTOR.Enter ( tNode, m_dNodes );
  5200. WalkTree ( tNode.m_iLeft, FUNCTOR );
  5201. WalkTree ( tNode.m_iRight, FUNCTOR );
  5202. FUNCTOR.Exit ( tNode );
  5203. }
  5204. }
  5205. ISphExpr * ExprParser_t::CreateIntervalNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs )
  5206. {
  5207. assert ( dArgs.GetLength()>=2 );
  5208. CSphVector<ESphAttr> dTypes;
  5209. GatherArgRetTypes ( iArgsNode, dTypes );
  5210. // force type conversion, where possible
  5211. if ( dTypes[0]==SPH_ATTR_JSON_FIELD )
  5212. dArgs[0] = new Expr_JsonFieldConv_c ( dArgs[0] );
  5213. bool bConst = CheckForConstSet ( iArgsNode, 1 );
  5214. ESphAttr eAttrType = m_dNodes[iArgsNode].m_eArgType;
  5215. if ( bConst )
  5216. {
  5217. switch ( eAttrType )
  5218. {
  5219. case SPH_ATTR_INTEGER: return new Expr_IntervalConst_c<int> ( dArgs ); break;
  5220. case SPH_ATTR_BIGINT: return new Expr_IntervalConst_c<int64_t> ( dArgs ); break;
  5221. default: return new Expr_IntervalConst_c<float> ( dArgs ); break;
  5222. }
  5223. } else
  5224. {
  5225. switch ( eAttrType )
  5226. {
  5227. case SPH_ATTR_INTEGER: return new Expr_Interval_c<int> ( dArgs ); break;
  5228. case SPH_ATTR_BIGINT: return new Expr_Interval_c<int64_t> ( dArgs ); break;
  5229. default: return new Expr_Interval_c<float> ( dArgs ); break;
  5230. }
  5231. }
  5232. #if !USE_WINDOWS
  5233. return NULL;
  5234. #endif
  5235. }
  5236. ISphExpr * ExprParser_t::CreateInNode ( int iNode )
  5237. {
  5238. const ExprNode_t & tLeft = m_dNodes[m_dNodes[iNode].m_iLeft];
  5239. const ExprNode_t & tRight = m_dNodes[m_dNodes[iNode].m_iRight];
  5240. switch ( tRight.m_iToken )
  5241. {
  5242. // create IN(arg,constlist)
  5243. case TOK_CONST_LIST:
  5244. switch ( tLeft.m_iToken )
  5245. {
  5246. case TOK_ATTR_MVA32:
  5247. return new Expr_MVAIn_c<false> ( tLeft.m_tLocator, tLeft.m_iLocator, tRight.m_pConsts, NULL );
  5248. case TOK_ATTR_MVA64:
  5249. return new Expr_MVAIn_c<true> ( tLeft.m_tLocator, tLeft.m_iLocator, tRight.m_pConsts, NULL );
  5250. case TOK_ATTR_STRING:
  5251. return new Expr_StrIn_c ( tLeft.m_tLocator, tLeft.m_iLocator, tRight.m_pConsts, NULL, m_eCollation );
  5252. case TOK_ATTR_JSON:
  5253. return new Expr_JsonFieldIn_c ( tRight.m_pConsts, CreateTree ( m_dNodes [ iNode ].m_iLeft ) );
  5254. default:
  5255. {
  5256. ISphExpr * pArg = CreateTree ( m_dNodes[iNode].m_iLeft );
  5257. switch ( WidestType ( tLeft.m_eRetType, tRight.m_pConsts->m_eRetType ) )
  5258. {
  5259. case SPH_ATTR_INTEGER: return new Expr_In_c<int> ( pArg, tRight.m_pConsts ); break;
  5260. case SPH_ATTR_BIGINT: return new Expr_In_c<int64_t> ( pArg, tRight.m_pConsts ); break;
  5261. default: return new Expr_In_c<float> ( pArg, tRight.m_pConsts ); break;
  5262. }
  5263. }
  5264. }
  5265. break;
  5266. // create IN(arg,uservar)
  5267. case TOK_USERVAR:
  5268. {
  5269. if ( !g_pUservarsHook )
  5270. {
  5271. m_sCreateError.SetSprintf ( "internal error: no uservars hook" );
  5272. return NULL;
  5273. }
  5274. UservarIntSet_c * pUservar = g_pUservarsHook ( m_dUservars[(int)tRight.m_iConst] );
  5275. if ( !pUservar )
  5276. {
  5277. m_sCreateError.SetSprintf ( "undefined user variable '%s'", m_dUservars[(int)tRight.m_iConst].cstr() );
  5278. return NULL;
  5279. }
  5280. switch ( tLeft.m_iToken )
  5281. {
  5282. case TOK_ATTR_MVA32:
  5283. return new Expr_MVAIn_c<false> ( tLeft.m_tLocator, tLeft.m_iLocator, NULL, pUservar );
  5284. case TOK_ATTR_MVA64:
  5285. return new Expr_MVAIn_c<true> ( tLeft.m_tLocator, tLeft.m_iLocator, NULL, pUservar );
  5286. case TOK_ATTR_STRING:
  5287. return new Expr_StrIn_c ( tLeft.m_tLocator, tLeft.m_iLocator, NULL, pUservar, m_eCollation );
  5288. case TOK_ATTR_JSON:
  5289. return new Expr_JsonFieldIn_c ( pUservar, CreateTree ( m_dNodes[iNode].m_iLeft ) );
  5290. default:
  5291. return new Expr_InUservar_c ( CreateTree ( m_dNodes[iNode].m_iLeft ), pUservar );
  5292. }
  5293. break;
  5294. }
  5295. // oops, unhandled case
  5296. default:
  5297. m_sCreateError = "IN() arguments must be constants (except the 1st one)";
  5298. return NULL;
  5299. }
  5300. }
  5301. ISphExpr * ExprParser_t::CreateLengthNode ( const ExprNode_t & tNode, ISphExpr * pLeft )
  5302. {
  5303. const ExprNode_t & tLeft = m_dNodes [ tNode.m_iLeft ];
  5304. switch ( tLeft.m_iToken )
  5305. {
  5306. case TOK_FUNC:
  5307. return new Expr_StrLength_c ( pLeft );
  5308. case TOK_ATTR_MVA32:
  5309. case TOK_ATTR_MVA64:
  5310. return new Expr_MVALength_c ( tLeft.m_tLocator, tLeft.m_iLocator, tLeft.m_iToken==TOK_ATTR_MVA64 );
  5311. case TOK_ATTR_JSON:
  5312. return new Expr_JsonFieldLength_c ( pLeft );
  5313. default:
  5314. m_sCreateError = "LENGTH() argument must be MVA or JSON field";
  5315. return NULL;
  5316. }
  5317. }
  5318. ISphExpr * ExprParser_t::CreateGeodistNode ( int iArgs )
  5319. {
  5320. CSphVector<int> dArgs;
  5321. GatherArgNodes ( iArgs, dArgs );
  5322. assert ( dArgs.GetLength()==4 || dArgs.GetLength()==5 );
  5323. float fOut = 1.0f; // result scale, defaults to out=meters
  5324. bool bDeg = false; // arg units, defaults to in=radians
  5325. GeoFunc_e eMethod = GEO_ADAPTIVE; // geodist function to use, defaults to adaptive
  5326. if ( dArgs.GetLength()==5 )
  5327. {
  5328. assert ( m_dNodes [ dArgs[4] ].m_eRetType==SPH_ATTR_MAPARG );
  5329. CSphVector<CSphNamedVariant> & dOpts = m_dNodes [ dArgs[4] ].m_pMapArg->m_dPairs;
  5330. // FIXME! handle errors in options somehow?
  5331. ARRAY_FOREACH ( i, dOpts )
  5332. {
  5333. const CSphNamedVariant & t = dOpts[i];
  5334. if ( t.m_sKey=="in" )
  5335. {
  5336. if ( t.m_sValue=="deg" || t.m_sValue=="degrees" )
  5337. bDeg = true;
  5338. else if ( t.m_sValue=="rad" || t.m_sValue=="radians" )
  5339. bDeg = false;
  5340. } else if ( t.m_sKey=="out" )
  5341. {
  5342. if ( t.m_sValue=="km" || t.m_sValue=="kilometers" )
  5343. fOut = 1.0f / 1000.0f;
  5344. else if ( t.m_sValue=="mi" || t.m_sValue=="miles" )
  5345. fOut = 1.0f / 1609.34f;
  5346. else if ( t.m_sValue=="ft" || t.m_sValue=="feet" )
  5347. fOut = 1.0f / 0.3048f;
  5348. else if ( t.m_sValue=="m" || t.m_sValue=="meters" )
  5349. fOut = 1.0f;
  5350. } else if ( t.m_sKey=="method" )
  5351. {
  5352. if ( t.m_sValue=="haversine" )
  5353. eMethod = GEO_HAVERSINE;
  5354. else if ( t.m_sValue=="adaptive" )
  5355. eMethod = GEO_ADAPTIVE;
  5356. }
  5357. }
  5358. }
  5359. bool bConst1 = ( IsConst ( &m_dNodes[dArgs[0]] ) && IsConst ( &m_dNodes[dArgs[1]] ) );
  5360. bool bConst2 = ( IsConst ( &m_dNodes[dArgs[2]] ) && IsConst ( &m_dNodes[dArgs[3]] ) );
  5361. if ( bConst1 && bConst2 )
  5362. {
  5363. float t[4];
  5364. for ( int i=0; i<4; i++ )
  5365. t[i] = FloatVal ( &m_dNodes[dArgs[i]] );
  5366. return new Expr_GetConst_c ( fOut*Geodist ( eMethod, bDeg, t[0], t[1], t[2], t[3] ) );
  5367. }
  5368. if ( bConst1 )
  5369. {
  5370. Swap ( dArgs[0], dArgs[2] );
  5371. Swap ( dArgs[1], dArgs[3] );
  5372. Swap ( bConst1, bConst2 );
  5373. }
  5374. if ( bConst2 )
  5375. {
  5376. // constant anchor
  5377. if ( m_dNodes[dArgs[0]].m_iToken==TOK_ATTR_FLOAT && m_dNodes[dArgs[1]].m_iToken==TOK_ATTR_FLOAT )
  5378. {
  5379. // attr point
  5380. return new Expr_GeodistAttrConst_c ( GeodistFn ( eMethod, bDeg ), fOut,
  5381. m_dNodes[dArgs[0]].m_tLocator, m_dNodes[dArgs[1]].m_tLocator,
  5382. FloatVal ( &m_dNodes[dArgs[2]] ), FloatVal ( &m_dNodes[dArgs[3]] ),
  5383. m_dNodes[dArgs[0]].m_iLocator, m_dNodes[dArgs[1]].m_iLocator );
  5384. } else
  5385. {
  5386. // expr point
  5387. return new Expr_GeodistConst_c ( GeodistFn ( eMethod, bDeg ), fOut,
  5388. CreateTree ( dArgs[0] ), CreateTree ( dArgs[1] ),
  5389. FloatVal ( &m_dNodes[dArgs[2]] ), FloatVal ( &m_dNodes[dArgs[3]] ) );
  5390. }
  5391. }
  5392. // four expressions
  5393. CSphVector<ISphExpr *> dExpr;
  5394. FoldArglist ( CreateTree ( iArgs ), dExpr );
  5395. assert ( dExpr.GetLength()==4 );
  5396. return new Expr_Geodist_c ( GeodistFn ( eMethod, bDeg ), fOut, dExpr[0], dExpr[1], dExpr[2], dExpr[3] );
  5397. }
  5398. ISphExpr * ExprParser_t::CreatePFNode ( int iArg )
  5399. {
  5400. m_eEvalStage = SPH_EVAL_FINAL;
  5401. DWORD uNodeFactorFlags = SPH_FACTOR_ENABLE | SPH_FACTOR_CALC_ATC;
  5402. CSphVector<int> dArgs;
  5403. GatherArgNodes ( iArg, dArgs );
  5404. assert ( dArgs.GetLength()==0 || dArgs.GetLength()==1 );
  5405. bool bNoATC = false;
  5406. bool bJsonOut = false;
  5407. if ( dArgs.GetLength()==1 )
  5408. {
  5409. assert ( m_dNodes[dArgs[0]].m_eRetType==SPH_ATTR_MAPARG );
  5410. CSphVector<CSphNamedVariant> & dOpts = m_dNodes[dArgs[0]].m_pMapArg->m_dPairs;
  5411. ARRAY_FOREACH ( i, dOpts )
  5412. {
  5413. if ( dOpts[i].m_sKey=="no_atc" && dOpts[i].m_iValue>0)
  5414. bNoATC = true;
  5415. else if ( dOpts[i].m_sKey=="json" && dOpts[i].m_iValue>0 )
  5416. bJsonOut = true;
  5417. }
  5418. }
  5419. if ( bNoATC )
  5420. uNodeFactorFlags &= ~SPH_FACTOR_CALC_ATC;
  5421. if ( bJsonOut )
  5422. uNodeFactorFlags |= SPH_FACTOR_JSON_OUT;
  5423. m_uPackedFactorFlags |= uNodeFactorFlags;
  5424. return new Expr_GetPackedFactors_c();
  5425. }
  5426. ISphExpr * ExprParser_t::CreateBitdotNode ( int iArgsNode, CSphVector<ISphExpr *> & dArgs )
  5427. {
  5428. assert ( dArgs.GetLength()>=1 );
  5429. ESphAttr eAttrType = m_dNodes[iArgsNode].m_eRetType;
  5430. switch ( eAttrType )
  5431. {
  5432. case SPH_ATTR_INTEGER: return new Expr_Bitdot_c<int> ( dArgs ); break;
  5433. case SPH_ATTR_BIGINT: return new Expr_Bitdot_c<int64_t> ( dArgs ); break;
  5434. default: return new Expr_Bitdot_c<float> ( dArgs ); break;
  5435. }
  5436. }
  5437. ISphExpr * ExprParser_t::CreateAggregateNode ( const ExprNode_t & tNode, ESphAggrFunc eFunc, ISphExpr * pLeft )
  5438. {
  5439. const ExprNode_t & tLeft = m_dNodes [ tNode.m_iLeft ];
  5440. switch ( tLeft.m_iToken )
  5441. {
  5442. case TOK_ATTR_JSON: return new Expr_JsonFieldAggr_c ( pLeft, eFunc );
  5443. case TOK_ATTR_MVA32: return new Expr_MVAAggr_c<false> ( tLeft.m_tLocator, tLeft.m_iLocator, eFunc );
  5444. case TOK_ATTR_MVA64: return new Expr_MVAAggr_c<true> ( tLeft.m_tLocator, tLeft.m_iLocator, eFunc );
  5445. default: return NULL;
  5446. }
  5447. }
  5448. void ExprParser_t::FixupIterators ( int iNode, const char * sKey, SphAttr_t * pAttr )
  5449. {
  5450. if ( iNode==-1 )
  5451. return;
  5452. ExprNode_t & tNode = m_dNodes[iNode];
  5453. if ( tNode.m_iToken==TOK_IDENT && !strcmp ( sKey, tNode.m_sIdent ) )
  5454. {
  5455. tNode.m_iToken = TOK_ITERATOR;
  5456. tNode.m_pAttr = pAttr;
  5457. }
  5458. FixupIterators ( tNode.m_iLeft, sKey, pAttr );
  5459. FixupIterators ( tNode.m_iRight, sKey, pAttr );
  5460. }
  5461. ISphExpr * ExprParser_t::CreateForInNode ( int iNode )
  5462. {
  5463. ExprNode_t & tNode = m_dNodes[iNode];
  5464. int iFunc = tNode.m_iFunc;
  5465. int iExprNode = tNode.m_iLeft;
  5466. int iNameNode = tNode.m_iRight;
  5467. int iDataNode = m_dNodes[iNameNode].m_iLeft;
  5468. Expr_ForIn_c * pFunc = new Expr_ForIn_c ( CreateTree ( iDataNode ), iFunc==FUNC_ALL, iFunc==FUNC_INDEXOF );
  5469. FixupIterators ( iExprNode, m_dNodes[iNameNode].m_sIdent, pFunc->GetRef() );
  5470. pFunc->SetExpr ( CreateTree ( iExprNode ) );
  5471. return pFunc;
  5472. }
  5473. //////////////////////////////////////////////////////////////////////////
  5474. int yylex ( YYSTYPE * lvalp, ExprParser_t * pParser )
  5475. {
  5476. return pParser->GetToken ( lvalp );
  5477. }
  5478. void yyerror ( ExprParser_t * pParser, const char * sMessage )
  5479. {
  5480. pParser->m_sParserError.SetSprintf ( "Sphinx expr: %s near '%s'", sMessage, pParser->m_pLastTokenStart );
  5481. }
  5482. #if USE_WINDOWS
  5483. #pragma warning(push,1)
  5484. #endif
  5485. #ifdef CMAKE_GENERATED_GRAMMAR
  5486. #include "bissphinxexpr.c"
  5487. #else
  5488. #include "yysphinxexpr.c"
  5489. #endif
  5490. #if USE_WINDOWS
  5491. #pragma warning(pop)
  5492. #endif
  5493. //////////////////////////////////////////////////////////////////////////
  5494. ExprParser_t::~ExprParser_t ()
  5495. {
  5496. // i kinda own those things
  5497. ARRAY_FOREACH ( i, m_dNodes )
  5498. {
  5499. if ( m_dNodes[i].m_iToken==TOK_CONST_LIST )
  5500. SafeDelete ( m_dNodes[i].m_pConsts );
  5501. if ( m_dNodes[i].m_iToken==TOK_MAP_ARG )
  5502. SafeDelete ( m_dNodes[i].m_pMapArg );
  5503. }
  5504. // free any UDF calls that weren't taken over
  5505. ARRAY_FOREACH ( i, m_dUdfCalls )
  5506. SafeDelete ( m_dUdfCalls[i] );
  5507. // free temp map arguments storage
  5508. ARRAY_FOREACH ( i, m_dIdents )
  5509. SafeDeleteArray ( m_dIdents[i] );
  5510. }
  5511. ESphAttr ExprParser_t::GetWidestRet ( int iLeft, int iRight )
  5512. {
  5513. ESphAttr uLeftType = ( iLeft<0 ) ? SPH_ATTR_INTEGER : m_dNodes[iLeft].m_eRetType;
  5514. ESphAttr uRightType = ( iRight<0 ) ? SPH_ATTR_INTEGER : m_dNodes[iRight].m_eRetType;
  5515. ESphAttr uRes = SPH_ATTR_FLOAT; // default is float
  5516. if ( ( uLeftType==SPH_ATTR_INTEGER || uLeftType==SPH_ATTR_BIGINT ) &&
  5517. ( uRightType==SPH_ATTR_INTEGER || uRightType==SPH_ATTR_BIGINT ) )
  5518. {
  5519. // both types are integer (int32 or int64), compute in integers
  5520. uRes = ( uLeftType==SPH_ATTR_INTEGER && uRightType==SPH_ATTR_INTEGER )
  5521. ? SPH_ATTR_INTEGER
  5522. : SPH_ATTR_BIGINT;
  5523. }
  5524. // if json vs numeric then return numeric type (for the autoconversion)
  5525. if ( uLeftType==SPH_ATTR_JSON_FIELD && IsNumeric ( uRightType ) )
  5526. uRes = uRightType;
  5527. else if ( uRightType==SPH_ATTR_JSON_FIELD && IsNumeric ( uLeftType ) )
  5528. uRes = uLeftType;
  5529. return uRes;
  5530. }
  5531. int ExprParser_t::AddNodeInt ( int64_t iValue )
  5532. {
  5533. ExprNode_t & tNode = m_dNodes.Add ();
  5534. tNode.m_iToken = TOK_CONST_INT;
  5535. tNode.m_eRetType = GetIntType ( iValue );
  5536. tNode.m_iConst = iValue;
  5537. return m_dNodes.GetLength()-1;
  5538. }
  5539. int ExprParser_t::AddNodeFloat ( float fValue )
  5540. {
  5541. ExprNode_t & tNode = m_dNodes.Add ();
  5542. tNode.m_iToken = TOK_CONST_FLOAT;
  5543. tNode.m_eRetType = SPH_ATTR_FLOAT;
  5544. tNode.m_fConst = fValue;
  5545. return m_dNodes.GetLength()-1;
  5546. }
  5547. int ExprParser_t::AddNodeString ( int64_t iValue )
  5548. {
  5549. ExprNode_t & tNode = m_dNodes.Add ();
  5550. tNode.m_iToken = TOK_CONST_STRING;
  5551. tNode.m_eRetType = SPH_ATTR_STRING;
  5552. tNode.m_iConst = iValue;
  5553. return m_dNodes.GetLength()-1;
  5554. }
  5555. int ExprParser_t::AddNodeAttr ( int iTokenType, uint64_t uAttrLocator )
  5556. {
  5557. assert ( iTokenType==TOK_ATTR_INT || iTokenType==TOK_ATTR_BITS || iTokenType==TOK_ATTR_FLOAT
  5558. || iTokenType==TOK_ATTR_MVA32 || iTokenType==TOK_ATTR_MVA64 || iTokenType==TOK_ATTR_STRING
  5559. || iTokenType==TOK_ATTR_FACTORS || iTokenType==TOK_ATTR_JSON );
  5560. ExprNode_t & tNode = m_dNodes.Add ();
  5561. tNode.m_iToken = iTokenType;
  5562. sphUnpackAttrLocator ( uAttrLocator, &tNode );
  5563. if ( iTokenType==TOK_ATTR_FLOAT ) tNode.m_eRetType = SPH_ATTR_FLOAT;
  5564. else if ( iTokenType==TOK_ATTR_MVA32 ) tNode.m_eRetType = SPH_ATTR_UINT32SET;
  5565. else if ( iTokenType==TOK_ATTR_MVA64 ) tNode.m_eRetType = SPH_ATTR_INT64SET;
  5566. else if ( iTokenType==TOK_ATTR_STRING ) tNode.m_eRetType = SPH_ATTR_STRING;
  5567. else if ( iTokenType==TOK_ATTR_FACTORS ) tNode.m_eRetType = SPH_ATTR_FACTORS;
  5568. else if ( iTokenType==TOK_ATTR_JSON ) tNode.m_eRetType = SPH_ATTR_JSON_FIELD;
  5569. else if ( tNode.m_tLocator.m_iBitCount>32 ) tNode.m_eRetType = SPH_ATTR_BIGINT;
  5570. else tNode.m_eRetType = SPH_ATTR_INTEGER;
  5571. return m_dNodes.GetLength()-1;
  5572. }
  5573. int ExprParser_t::AddNodeID ()
  5574. {
  5575. ExprNode_t & tNode = m_dNodes.Add ();
  5576. tNode.m_iToken = TOK_ID;
  5577. tNode.m_eRetType = USE_64BIT ? SPH_ATTR_BIGINT : SPH_ATTR_INTEGER;
  5578. return m_dNodes.GetLength()-1;
  5579. }
  5580. int ExprParser_t::AddNodeWeight ()
  5581. {
  5582. ExprNode_t & tNode = m_dNodes.Add ();
  5583. tNode.m_iToken = TOK_WEIGHT;
  5584. tNode.m_eRetType = SPH_ATTR_BIGINT;
  5585. return m_dNodes.GetLength()-1;
  5586. }
  5587. int ExprParser_t::AddNodeOp ( int iOp, int iLeft, int iRight )
  5588. {
  5589. ExprNode_t & tNode = m_dNodes.Add ();
  5590. tNode.m_iToken = iOp;
  5591. // deduce type
  5592. tNode.m_eRetType = SPH_ATTR_FLOAT; // default to float
  5593. if ( iOp==TOK_NEG )
  5594. {
  5595. // NEG just inherits the type
  5596. tNode.m_eArgType = m_dNodes[iLeft].m_eRetType;
  5597. tNode.m_eRetType = tNode.m_eArgType;
  5598. } else if ( iOp==TOK_NOT )
  5599. {
  5600. // NOT result is integer, and its argument must be integer
  5601. tNode.m_eArgType = m_dNodes[iLeft].m_eRetType;
  5602. tNode.m_eRetType = SPH_ATTR_INTEGER;
  5603. if (!( tNode.m_eArgType==SPH_ATTR_INTEGER || tNode.m_eArgType==SPH_ATTR_BIGINT ))
  5604. {
  5605. m_sParserError.SetSprintf ( "NOT argument must be integer" );
  5606. return -1;
  5607. }
  5608. } else if ( iOp==TOK_LTE || iOp==TOK_GTE || iOp==TOK_EQ || iOp==TOK_NE
  5609. || iOp=='<' || iOp=='>' || iOp==TOK_AND || iOp==TOK_OR
  5610. || iOp=='+' || iOp=='-' || iOp=='*' || iOp==','
  5611. || iOp=='&' || iOp=='|' || iOp=='%'
  5612. || iOp==TOK_IS_NULL || iOp==TOK_IS_NOT_NULL )
  5613. {
  5614. tNode.m_eArgType = GetWidestRet ( iLeft, iRight );
  5615. // arithmetical operations return arg type, logical return int
  5616. tNode.m_eRetType = ( iOp=='+' || iOp=='-' || iOp=='*' || iOp==',' || iOp=='&' || iOp=='|' || iOp=='%' )
  5617. ? tNode.m_eArgType
  5618. : SPH_ATTR_INTEGER;
  5619. // both logical and bitwise AND/OR can only be over ints
  5620. if ( ( iOp==TOK_AND || iOp==TOK_OR || iOp=='&' || iOp=='|' )
  5621. && !( tNode.m_eArgType==SPH_ATTR_INTEGER || tNode.m_eArgType==SPH_ATTR_BIGINT ))
  5622. {
  5623. m_sParserError.SetSprintf ( "%s arguments must be integer", ( iOp==TOK_AND || iOp=='&' ) ? "AND" : "OR" );
  5624. return -1;
  5625. }
  5626. // MOD can only be over ints
  5627. if ( iOp=='%'
  5628. && !( tNode.m_eArgType==SPH_ATTR_INTEGER || tNode.m_eArgType==SPH_ATTR_BIGINT ))
  5629. {
  5630. m_sParserError.SetSprintf ( "MOD arguments must be integer" );
  5631. return -1;
  5632. }
  5633. } else
  5634. {
  5635. // check for unknown op
  5636. assert ( iOp=='/' && "unknown op in AddNodeOp() type deducer" );
  5637. }
  5638. tNode.m_iArgs = 0;
  5639. if ( iOp==',' )
  5640. {
  5641. if ( iLeft>=0 ) tNode.m_iArgs += ( m_dNodes[iLeft].m_iToken==',' ) ? m_dNodes[iLeft].m_iArgs : 1;
  5642. if ( iRight>=0 ) tNode.m_iArgs += ( m_dNodes[iRight].m_iToken==',' ) ? m_dNodes[iRight].m_iArgs : 1;
  5643. }
  5644. // argument type conversion for functions like INDEXOF(), ALL() and ANY()
  5645. // we need no conversion for operands of comma!
  5646. if ( iOp!=',' && iLeft>=0 && iRight>=0 )
  5647. {
  5648. if ( m_dNodes[iRight].m_eRetType==SPH_ATTR_STRING && m_dNodes[iLeft].m_iToken==TOK_IDENT )
  5649. m_dNodes[iLeft].m_eRetType = SPH_ATTR_STRING;
  5650. else if ( m_dNodes[iLeft].m_eRetType==SPH_ATTR_STRING && m_dNodes[iRight].m_iToken==TOK_IDENT )
  5651. m_dNodes[iRight].m_eRetType = SPH_ATTR_STRING;
  5652. }
  5653. tNode.m_iLeft = iLeft;
  5654. tNode.m_iRight = iRight;
  5655. return m_dNodes.GetLength()-1;
  5656. }
  5657. int ExprParser_t::AddNodeFunc ( int iFunc, int iFirst, int iSecond, int iThird, int iFourth )
  5658. {
  5659. // regular case, iFirst is entire arglist, iSecond is -1
  5660. // special case for IN(), iFirst is arg, iSecond is constlist
  5661. // special case for REMAP(), iFirst and iSecond are expressions, iThird and iFourth are constlists
  5662. assert ( iFunc>=0 && iFunc< int ( sizeof ( g_dFuncs )/sizeof ( g_dFuncs[0]) ) );
  5663. Func_e eFunc = (Func_e)iFunc;
  5664. assert ( g_dFuncs [ iFunc ].m_eFunc==eFunc );
  5665. const char * sFuncName = g_dFuncs [ iFunc ].m_sName;
  5666. // check args count
  5667. if ( iSecond<0 || eFunc==FUNC_IN )
  5668. {
  5669. int iExpectedArgc = g_dFuncs [ iFunc ].m_iArgs;
  5670. int iArgc = 0;
  5671. if ( iFirst>=0 )
  5672. iArgc = ( m_dNodes [ iFirst ].m_iToken==',' ) ? m_dNodes [ iFirst ].m_iArgs : 1;
  5673. if ( eFunc==FUNC_RAND )
  5674. {
  5675. if ( iArgc>1 )
  5676. {
  5677. m_sParserError.SetSprintf ( "%s() called with %d args, either 0 or 1 args expected", sFuncName, iArgc );
  5678. return -1;
  5679. }
  5680. } else if ( iExpectedArgc<0 )
  5681. {
  5682. if ( iArgc<-iExpectedArgc )
  5683. {
  5684. m_sParserError.SetSprintf ( "%s() called with %d args, at least %d args expected", sFuncName, iArgc, -iExpectedArgc );
  5685. return -1;
  5686. }
  5687. } else if ( iArgc!=iExpectedArgc )
  5688. {
  5689. m_sParserError.SetSprintf ( "%s() called with %d args, %d args expected", sFuncName, iArgc, iExpectedArgc );
  5690. return -1;
  5691. }
  5692. }
  5693. // check arg types
  5694. //
  5695. // check for string args
  5696. // most builtin functions take numeric args only
  5697. bool bGotString = false, bGotMva = false;
  5698. CSphVector<ESphAttr> dRetTypes;
  5699. if ( iSecond<0 )
  5700. {
  5701. GatherArgRetTypes ( iFirst, dRetTypes );
  5702. ARRAY_FOREACH ( i, dRetTypes )
  5703. {
  5704. bGotString |= ( dRetTypes[i]==SPH_ATTR_STRING );
  5705. bGotMva |= ( dRetTypes[i]==SPH_ATTR_UINT32SET || dRetTypes[i]==SPH_ATTR_INT64SET );
  5706. }
  5707. }
  5708. if ( bGotString && !( eFunc==FUNC_CRC32 || eFunc==FUNC_EXIST || eFunc==FUNC_POLY2D || eFunc==FUNC_GEOPOLY2D ) )
  5709. {
  5710. m_sParserError.SetSprintf ( "%s() arguments can not be string", sFuncName );
  5711. return -1;
  5712. }
  5713. if ( bGotMva && !( eFunc==FUNC_IN || eFunc==FUNC_TO_STRING || eFunc==FUNC_LENGTH || eFunc==FUNC_LEAST || eFunc==FUNC_GREATEST ) )
  5714. {
  5715. m_sParserError.SetSprintf ( "%s() arguments can not be MVA", sFuncName );
  5716. return -1;
  5717. }
  5718. // check that first BITDOT arg is integer or bigint
  5719. if ( eFunc==FUNC_BITDOT )
  5720. {
  5721. int iLeftmost = iFirst;
  5722. while ( m_dNodes [ iLeftmost ].m_iToken==',' )
  5723. iLeftmost = m_dNodes [ iLeftmost ].m_iLeft;
  5724. ESphAttr eArg = m_dNodes [ iLeftmost ].m_eRetType;
  5725. if ( eArg!=SPH_ATTR_INTEGER && eArg!=SPH_ATTR_BIGINT )
  5726. {
  5727. m_sParserError.SetSprintf ( "first BITDOT() argument must be integer" );
  5728. return -1;
  5729. }
  5730. }
  5731. if ( eFunc==FUNC_EXIST )
  5732. {
  5733. int iExistLeft = m_dNodes [ iFirst ].m_iLeft;
  5734. int iExistRight = m_dNodes [ iFirst ].m_iRight;
  5735. bool bIsLeftGood = ( m_dNodes [ iExistLeft ].m_eRetType==SPH_ATTR_STRING );
  5736. ESphAttr eRight = m_dNodes [ iExistRight ].m_eRetType;
  5737. bool bIsRightGood = ( eRight==SPH_ATTR_INTEGER || eRight==SPH_ATTR_TIMESTAMP || eRight==SPH_ATTR_BOOL
  5738. || eRight==SPH_ATTR_FLOAT || eRight==SPH_ATTR_BIGINT );
  5739. if ( !bIsLeftGood || !bIsRightGood )
  5740. {
  5741. if ( bIsRightGood )
  5742. m_sParserError.SetSprintf ( "first EXIST() argument must be string" );
  5743. else
  5744. m_sParserError.SetSprintf ( "ill-formed EXIST" );
  5745. return -1;
  5746. }
  5747. }
  5748. // check that first SINT or timestamp family arg is integer
  5749. if ( eFunc==FUNC_SINT || eFunc==FUNC_DAY || eFunc==FUNC_MONTH || eFunc==FUNC_YEAR || eFunc==FUNC_YEARMONTH || eFunc==FUNC_YEARMONTHDAY
  5750. || eFunc==FUNC_FIBONACCI || eFunc==FUNC_HOUR || eFunc==FUNC_MINUTE || eFunc==FUNC_SECOND )
  5751. {
  5752. assert ( iFirst>=0 );
  5753. if ( m_dNodes [ iFirst ].m_eRetType!=SPH_ATTR_INTEGER )
  5754. {
  5755. m_sParserError.SetSprintf ( "%s() argument must be integer", sFuncName );
  5756. return -1;
  5757. }
  5758. }
  5759. // check that CONTAINS args are poly, float, float
  5760. if ( eFunc==FUNC_CONTAINS )
  5761. {
  5762. assert ( dRetTypes.GetLength()==3 );
  5763. if ( dRetTypes[0]!=SPH_ATTR_POLY2D )
  5764. {
  5765. m_sParserError.SetSprintf ( "1st CONTAINS() argument must be a 2D polygon (see POLY2D)" );
  5766. return -1;
  5767. }
  5768. if ( !IsNumeric ( dRetTypes[1] ) || !IsNumeric ( dRetTypes[2] ) )
  5769. {
  5770. m_sParserError.SetSprintf ( "2nd and 3rd CONTAINS() arguments must be numeric" );
  5771. return -1;
  5772. }
  5773. }
  5774. // check POLY2D args
  5775. if ( eFunc==FUNC_POLY2D || eFunc==FUNC_GEOPOLY2D )
  5776. {
  5777. if ( dRetTypes.GetLength()==1 )
  5778. {
  5779. // handle 1 arg version, POLY2D(string-attr)
  5780. if ( dRetTypes[0]!=SPH_ATTR_STRING )
  5781. {
  5782. m_sParserError.SetSprintf ( "%s() argument must be a string attribute", sFuncName );
  5783. return -1;
  5784. }
  5785. } else if ( dRetTypes.GetLength()<6 )
  5786. {
  5787. // handle 2..5 arg versions, invalid
  5788. m_sParserError.SetSprintf ( "bad %s() argument count, must be either 1 (string) or 6+ (x/y pairs list)", sFuncName );
  5789. return -1;
  5790. } else
  5791. {
  5792. // handle 6+ arg version, POLY2D(xy-list)
  5793. if ( dRetTypes.GetLength() & 1 )
  5794. {
  5795. m_sParserError.SetSprintf ( "bad %s() argument count, must be even", sFuncName );
  5796. return -1;
  5797. }
  5798. ARRAY_FOREACH ( i, dRetTypes )
  5799. if ( !IsNumeric ( dRetTypes[i] ) )
  5800. {
  5801. m_sParserError.SetSprintf ( "%s() argument %d must be numeric", sFuncName, 1+i );
  5802. return -1;
  5803. }
  5804. }
  5805. }
  5806. // check that BM25F args are float, float [, {file_name=weight}]
  5807. if ( eFunc==FUNC_BM25F )
  5808. {
  5809. if ( dRetTypes.GetLength()>3 )
  5810. {
  5811. m_sParserError.SetSprintf ( "%s() called with %d args, at most 3 args expected", sFuncName, dRetTypes.GetLength() );
  5812. return -1;
  5813. }
  5814. if ( dRetTypes[0]!=SPH_ATTR_FLOAT || dRetTypes[1]!=SPH_ATTR_FLOAT )
  5815. {
  5816. m_sParserError.SetSprintf ( "%s() arguments 1,2 must be numeric", sFuncName );
  5817. return -1;
  5818. }
  5819. if ( dRetTypes.GetLength()==3 && dRetTypes[2]!=SPH_ATTR_MAPARG )
  5820. {
  5821. m_sParserError.SetSprintf ( "%s() argument 3 must be map", sFuncName );
  5822. return -1;
  5823. }
  5824. }
  5825. // check GEODIST args count, and that optional arg 5 is a map argument
  5826. if ( eFunc==FUNC_GEODIST )
  5827. {
  5828. if ( dRetTypes.GetLength()>5 )
  5829. {
  5830. m_sParserError.SetSprintf ( "%s() called with %d args, at most 5 args expected", sFuncName, dRetTypes.GetLength() );
  5831. return -1;
  5832. }
  5833. if ( dRetTypes.GetLength()==5 && dRetTypes[4]!=SPH_ATTR_MAPARG )
  5834. {
  5835. m_sParserError.SetSprintf ( "%s() argument 5 must be map", sFuncName );
  5836. return -1;
  5837. }
  5838. }
  5839. // check REMAP(expr, expr, (constlist), (constlist)) args
  5840. if ( eFunc==FUNC_REMAP )
  5841. {
  5842. if ( m_dNodes [ iFirst ].m_iToken==TOK_IDENT )
  5843. {
  5844. m_sParserError.SetSprintf ( "%s() incorrect first argument (not integer?)", sFuncName );
  5845. return 1;
  5846. }
  5847. if ( m_dNodes [ iSecond ].m_iToken==TOK_IDENT )
  5848. {
  5849. m_sParserError.SetSprintf ( "%s() incorrect second argument (not integer/float?)", sFuncName );
  5850. return 1;
  5851. }
  5852. ESphAttr eFirstRet = m_dNodes [ iFirst ].m_eRetType;
  5853. ESphAttr eSecondRet = m_dNodes [ iSecond ].m_eRetType;
  5854. if ( eFirstRet!=SPH_ATTR_INTEGER && eFirstRet!=SPH_ATTR_BIGINT )
  5855. {
  5856. m_sParserError.SetSprintf ( "%s() first argument should result in integer value", sFuncName );
  5857. return -1;
  5858. }
  5859. if ( eSecondRet!=SPH_ATTR_INTEGER && eSecondRet!=SPH_ATTR_BIGINT && eSecondRet!=SPH_ATTR_FLOAT )
  5860. {
  5861. m_sParserError.SetSprintf ( "%s() second argument should result in integer or float value", sFuncName );
  5862. return -1;
  5863. }
  5864. ConstList_c & tThirdList = *m_dNodes [ iThird ].m_pConsts;
  5865. ConstList_c & tFourthList = *m_dNodes [ iFourth ].m_pConsts;
  5866. if ( tThirdList.m_dInts.GetLength()==0 )
  5867. {
  5868. m_sParserError.SetSprintf ( "%s() first constlist should consist of integer values", sFuncName );
  5869. return -1;
  5870. }
  5871. if ( tThirdList.m_dInts.GetLength()!=tFourthList.m_dInts.GetLength() &&
  5872. tThirdList.m_dInts.GetLength()!=tFourthList.m_dFloats.GetLength() )
  5873. {
  5874. m_sParserError.SetSprintf ( "%s() both constlists should have the same length", sFuncName );
  5875. return -1;
  5876. }
  5877. if ( eSecondRet==SPH_ATTR_FLOAT && tFourthList.m_dFloats.GetLength()==0 )
  5878. {
  5879. m_sParserError.SetSprintf ( "%s() second argument results in float value and thus fourth argument should be a list of floats", sFuncName );
  5880. return -1;
  5881. }
  5882. if ( eSecondRet!=SPH_ATTR_FLOAT && tFourthList.m_dInts.GetLength()==0 )
  5883. {
  5884. m_sParserError.SetSprintf ( "%s() second argument results in integer value and thus fourth argument should be a list of integers", sFuncName );
  5885. return -1;
  5886. }
  5887. }
  5888. if ( eFunc==FUNC_RAND )
  5889. {
  5890. if ( iFirst>=0 && !IsNumeric ( m_dNodes [ iFirst ].m_eRetType ) )
  5891. {
  5892. m_sParserError.SetSprintf ( "%s() argument must be numeric", sFuncName );
  5893. return -1;
  5894. }
  5895. }
  5896. // do add
  5897. ExprNode_t & tNode = m_dNodes.Add ();
  5898. tNode.m_iToken = TOK_FUNC;
  5899. tNode.m_iFunc = iFunc;
  5900. tNode.m_iLeft = iFirst;
  5901. tNode.m_iRight = iSecond;
  5902. tNode.m_eArgType = ( iFirst>=0 ) ? m_dNodes [ iFirst ].m_eRetType : SPH_ATTR_INTEGER;
  5903. tNode.m_eRetType = g_dFuncs [ iFunc ].m_eRet;
  5904. // fixup return type in a few special cases
  5905. if ( eFunc==FUNC_MIN || eFunc==FUNC_MAX || eFunc==FUNC_MADD || eFunc==FUNC_MUL3 || eFunc==FUNC_ABS || eFunc==FUNC_IDIV )
  5906. tNode.m_eRetType = tNode.m_eArgType;
  5907. if ( eFunc==FUNC_EXIST )
  5908. {
  5909. int iExistRight = m_dNodes [ iFirst ].m_iRight;
  5910. ESphAttr eType = m_dNodes [ iExistRight ].m_eRetType;
  5911. tNode.m_eArgType = eType;
  5912. tNode.m_eRetType = eType;
  5913. }
  5914. if ( eFunc==FUNC_BIGINT && tNode.m_eRetType==SPH_ATTR_FLOAT )
  5915. tNode.m_eRetType = SPH_ATTR_FLOAT; // enforce if we can; FIXME! silently ignores BIGINT() on floats; should warn or raise an error
  5916. if ( eFunc==FUNC_IF || eFunc==FUNC_BITDOT )
  5917. tNode.m_eRetType = GetWidestRet ( iFirst, iSecond );
  5918. // fixup MVA return type according to the leftmost argument
  5919. if ( eFunc==FUNC_GREATEST || eFunc==FUNC_LEAST )
  5920. {
  5921. int iLeftmost = iFirst;
  5922. while ( m_dNodes [ iLeftmost ].m_iToken==',' )
  5923. iLeftmost = m_dNodes [ iLeftmost ].m_iLeft;
  5924. ESphAttr eArg = m_dNodes [ iLeftmost ].m_eRetType;
  5925. if ( eArg==SPH_ATTR_INT64SET )
  5926. tNode.m_eRetType = SPH_ATTR_BIGINT;
  5927. if ( eArg==SPH_ATTR_UINT32SET )
  5928. tNode.m_eRetType = SPH_ATTR_INTEGER;
  5929. }
  5930. if ( eFunc==FUNC_REMAP )
  5931. {
  5932. // function return type depends on second expression
  5933. tNode.m_eRetType = m_dNodes [ iSecond ].m_eRetType;
  5934. }
  5935. // all ok
  5936. assert ( tNode.m_eRetType!=SPH_ATTR_NONE );
  5937. return m_dNodes.GetLength()-1;
  5938. }
  5939. int ExprParser_t::AddNodeUdf ( int iCall, int iArg )
  5940. {
  5941. UdfCall_t * pCall = m_dUdfCalls[iCall];
  5942. SPH_UDF_INIT & tInit = pCall->m_tInit;
  5943. SPH_UDF_ARGS & tArgs = pCall->m_tArgs;
  5944. // initialize UDF right here, at AST creation stage
  5945. // just because it's easy to gather arg types here
  5946. if ( iArg>=0 )
  5947. {
  5948. // gather arg types
  5949. CSphVector<DWORD> dArgTypes;
  5950. int iCur = iArg;
  5951. while ( iCur>=0 )
  5952. {
  5953. if ( m_dNodes[iCur].m_iToken!=',' )
  5954. {
  5955. const ExprNode_t & tNode = m_dNodes[iCur];
  5956. if ( tNode.m_iToken==TOK_FUNC && ( tNode.m_iFunc==FUNC_PACKEDFACTORS || tNode.m_iFunc==FUNC_RANKFACTORS || tNode.m_iFunc==FUNC_FACTORS ) )
  5957. pCall->m_dArgs2Free.Add ( dArgTypes.GetLength() );
  5958. if ( tNode.m_eRetType==SPH_ATTR_JSON || tNode.m_eRetType==SPH_ATTR_JSON_FIELD )
  5959. pCall->m_dArgs2Free.Add ( dArgTypes.GetLength() );
  5960. dArgTypes.Add ( tNode.m_eRetType );
  5961. break;
  5962. }
  5963. int iRight = m_dNodes[iCur].m_iRight;
  5964. if ( iRight>=0 )
  5965. {
  5966. const ExprNode_t & tNode = m_dNodes[iRight];
  5967. assert ( tNode.m_iToken!=',' );
  5968. if ( tNode.m_iToken==TOK_FUNC && ( tNode.m_iFunc==FUNC_PACKEDFACTORS || tNode.m_iFunc==FUNC_RANKFACTORS || tNode.m_iFunc==FUNC_FACTORS) )
  5969. pCall->m_dArgs2Free.Add ( dArgTypes.GetLength() );
  5970. if ( tNode.m_eRetType==SPH_ATTR_JSON || tNode.m_eRetType==SPH_ATTR_JSON_FIELD )
  5971. pCall->m_dArgs2Free.Add ( dArgTypes.GetLength() );
  5972. dArgTypes.Add ( tNode.m_eRetType );
  5973. }
  5974. iCur = m_dNodes[iCur].m_iLeft;
  5975. }
  5976. assert ( dArgTypes.GetLength() );
  5977. tArgs.arg_count = dArgTypes.GetLength();
  5978. tArgs.arg_types = new sphinx_udf_argtype [ tArgs.arg_count ];
  5979. // we gathered internal type ids in right-to-left order
  5980. // reverse and remap
  5981. // FIXME! eliminate remap, maybe?
  5982. ARRAY_FOREACH ( i, dArgTypes )
  5983. {
  5984. sphinx_udf_argtype & eRes = tArgs.arg_types [ tArgs.arg_count-1-i ];
  5985. switch ( dArgTypes[i] )
  5986. {
  5987. case SPH_ATTR_INTEGER:
  5988. case SPH_ATTR_TIMESTAMP:
  5989. case SPH_ATTR_BOOL:
  5990. eRes = SPH_UDF_TYPE_UINT32;
  5991. break;
  5992. case SPH_ATTR_FLOAT:
  5993. eRes = SPH_UDF_TYPE_FLOAT;
  5994. break;
  5995. case SPH_ATTR_BIGINT:
  5996. eRes = SPH_UDF_TYPE_INT64;
  5997. break;
  5998. case SPH_ATTR_STRING:
  5999. eRes = SPH_UDF_TYPE_STRING;
  6000. break;
  6001. case SPH_ATTR_UINT32SET:
  6002. eRes = SPH_UDF_TYPE_UINT32SET;
  6003. break;
  6004. case SPH_ATTR_INT64SET:
  6005. eRes = SPH_UDF_TYPE_UINT64SET;
  6006. break;
  6007. case SPH_ATTR_FACTORS:
  6008. eRes = SPH_UDF_TYPE_FACTORS;
  6009. break;
  6010. case SPH_ATTR_JSON_FIELD:
  6011. eRes = SPH_UDF_TYPE_JSON;
  6012. break;
  6013. default:
  6014. m_sParserError.SetSprintf ( "internal error: unmapped UDF argument type (arg=%d, type=%d)", i, dArgTypes[i] );
  6015. return -1;
  6016. }
  6017. }
  6018. ARRAY_FOREACH ( i, pCall->m_dArgs2Free )
  6019. pCall->m_dArgs2Free[i] = tArgs.arg_count - 1 - pCall->m_dArgs2Free[i];
  6020. }
  6021. // init
  6022. if ( pCall->m_pUdf->m_fnInit )
  6023. {
  6024. char sError [ SPH_UDF_ERROR_LEN ];
  6025. if ( pCall->m_pUdf->m_fnInit ( &tInit, &tArgs, sError ) )
  6026. {
  6027. m_sParserError = sError;
  6028. return -1;
  6029. }
  6030. }
  6031. // do add
  6032. ExprNode_t & tNode = m_dNodes.Add ();
  6033. tNode.m_iToken = TOK_UDF;
  6034. tNode.m_iFunc = iCall;
  6035. tNode.m_iLeft = iArg;
  6036. tNode.m_iRight = -1;
  6037. // deduce type
  6038. tNode.m_eArgType = ( iArg>=0 ) ? m_dNodes[iArg].m_eRetType : SPH_ATTR_INTEGER;
  6039. tNode.m_eRetType = pCall->m_pUdf->m_eRetType;
  6040. return m_dNodes.GetLength()-1;
  6041. }
  6042. int ExprParser_t::AddNodePF ( int iFunc, int iArg )
  6043. {
  6044. assert ( iFunc>=0 && iFunc< int ( sizeof ( g_dFuncs )/sizeof ( g_dFuncs[0]) ) );
  6045. const char * sFuncName = g_dFuncs [ iFunc ].m_sName;
  6046. CSphVector<ESphAttr> dRetTypes;
  6047. GatherArgRetTypes ( iArg, dRetTypes );
  6048. assert ( dRetTypes.GetLength()==0 || dRetTypes.GetLength()==1 );
  6049. if ( dRetTypes.GetLength()==1 && dRetTypes[0]!=SPH_ATTR_MAPARG )
  6050. {
  6051. m_sParserError.SetSprintf ( "%s() argument must be a map", sFuncName );
  6052. return -1;
  6053. }
  6054. ExprNode_t & tNode = m_dNodes.Add ();
  6055. tNode.m_iToken = TOK_FUNC;
  6056. tNode.m_iFunc = iFunc;
  6057. tNode.m_iLeft = iArg;
  6058. tNode.m_iRight = -1;
  6059. tNode.m_eArgType = SPH_ATTR_MAPARG;
  6060. tNode.m_eRetType = g_dFuncs[iFunc].m_eRet;
  6061. return m_dNodes.GetLength()-1;
  6062. }
  6063. int ExprParser_t::AddNodeConstlist ( int64_t iValue )
  6064. {
  6065. ExprNode_t & tNode = m_dNodes.Add();
  6066. tNode.m_iToken = TOK_CONST_LIST;
  6067. tNode.m_pConsts = new ConstList_c();
  6068. tNode.m_pConsts->Add ( iValue );
  6069. tNode.m_pConsts->m_sExpr = m_sExpr;
  6070. return m_dNodes.GetLength()-1;
  6071. }
  6072. int ExprParser_t::AddNodeConstlist ( float iValue )
  6073. {
  6074. ExprNode_t & tNode = m_dNodes.Add();
  6075. tNode.m_iToken = TOK_CONST_LIST;
  6076. tNode.m_pConsts = new ConstList_c();
  6077. tNode.m_pConsts->Add ( iValue );
  6078. return m_dNodes.GetLength()-1;
  6079. }
  6080. void ExprParser_t::AppendToConstlist ( int iNode, int64_t iValue )
  6081. {
  6082. m_dNodes[iNode].m_pConsts->Add ( iValue );
  6083. }
  6084. void ExprParser_t::AppendToConstlist ( int iNode, float iValue )
  6085. {
  6086. m_dNodes[iNode].m_pConsts->Add ( iValue );
  6087. }
  6088. int ExprParser_t::AddNodeUservar ( int iUservar )
  6089. {
  6090. ExprNode_t & tNode = m_dNodes.Add();
  6091. tNode.m_iToken = TOK_USERVAR;
  6092. tNode.m_iConst = iUservar;
  6093. return m_dNodes.GetLength()-1;
  6094. }
  6095. int ExprParser_t::AddNodeHookIdent ( int iID )
  6096. {
  6097. ExprNode_t & tNode = m_dNodes.Add();
  6098. tNode.m_iToken = TOK_HOOK_IDENT;
  6099. tNode.m_iFunc = iID;
  6100. tNode.m_eRetType = m_pHook->GetIdentType ( iID );
  6101. return m_dNodes.GetLength()-1;
  6102. }
  6103. int ExprParser_t::AddNodeHookFunc ( int iID, int iLeft )
  6104. {
  6105. CSphVector<ESphAttr> dArgTypes;
  6106. GatherArgRetTypes ( iLeft, dArgTypes );
  6107. ESphAttr eRet = m_pHook->GetReturnType ( iID, dArgTypes, CheckForConstSet ( iLeft, 0 ), m_sParserError );
  6108. if ( eRet==SPH_ATTR_NONE )
  6109. return -1;
  6110. ExprNode_t & tNode = m_dNodes.Add();
  6111. tNode.m_iToken = TOK_HOOK_FUNC;
  6112. tNode.m_iFunc = iID;
  6113. tNode.m_iLeft = iLeft;
  6114. tNode.m_iRight = -1;
  6115. // deduce type
  6116. tNode.m_eArgType = ( iLeft>=0 ) ? m_dNodes[iLeft].m_eRetType : SPH_ATTR_INTEGER;
  6117. tNode.m_eRetType = eRet;
  6118. return m_dNodes.GetLength()-1;
  6119. }
  6120. int ExprParser_t::AddNodeMapArg ( const char * sKey, const char * sValue, int64_t iValue )
  6121. {
  6122. ExprNode_t & tNode = m_dNodes.Add();
  6123. tNode.m_iToken = TOK_MAP_ARG;
  6124. tNode.m_pMapArg = new MapArg_c();
  6125. tNode.m_pMapArg->Add ( sKey, sValue, iValue );
  6126. tNode.m_eRetType = SPH_ATTR_MAPARG;
  6127. return m_dNodes.GetLength()-1;
  6128. }
  6129. void ExprParser_t::AppendToMapArg ( int iNode, const char * sKey, const char * sValue, int64_t iValue )
  6130. {
  6131. m_dNodes[iNode].m_pMapArg->Add ( sKey, sValue, iValue );
  6132. }
  6133. const char * ExprParser_t::Attr2Ident ( uint64_t uAttrLoc )
  6134. {
  6135. ExprNode_t tAttr;
  6136. sphUnpackAttrLocator ( uAttrLoc, &tAttr );
  6137. CSphString sIdent;
  6138. sIdent = m_pSchema->GetAttr ( tAttr.m_iLocator ).m_sName;
  6139. m_dIdents.Add ( sIdent.Leak() );
  6140. return m_dIdents.Last();
  6141. }
  6142. int ExprParser_t::AddNodeJsonField ( uint64_t uAttrLocator, int iLeft )
  6143. {
  6144. int iNode = AddNodeAttr ( TOK_ATTR_JSON, uAttrLocator );
  6145. m_dNodes[iNode].m_iLeft = iLeft;
  6146. return m_dNodes.GetLength()-1;
  6147. }
  6148. int ExprParser_t::AddNodeJsonSubkey ( int64_t iValue )
  6149. {
  6150. ExprNode_t & tNode = m_dNodes.Add ();
  6151. tNode.m_iToken = TOK_SUBKEY;
  6152. tNode.m_eRetType = SPH_ATTR_STRING;
  6153. tNode.m_iConst = iValue;
  6154. return m_dNodes.GetLength()-1;
  6155. }
  6156. int ExprParser_t::AddNodeDotNumber ( int64_t iValue )
  6157. {
  6158. ExprNode_t & tNode = m_dNodes.Add ();
  6159. tNode.m_iToken = TOK_CONST_FLOAT;
  6160. tNode.m_eRetType = SPH_ATTR_FLOAT;
  6161. const char * pCur = m_sExpr + (int)( iValue>>32 );
  6162. tNode.m_fConst = (float) strtod ( pCur-1, NULL );
  6163. return m_dNodes.GetLength()-1;
  6164. }
  6165. int ExprParser_t::AddNodeIdent ( const char * sKey, int iLeft )
  6166. {
  6167. ExprNode_t & tNode = m_dNodes.Add ();
  6168. tNode.m_sIdent = sKey;
  6169. tNode.m_iLeft = iLeft;
  6170. tNode.m_iToken = TOK_IDENT;
  6171. tNode.m_eRetType = SPH_ATTR_JSON_FIELD;
  6172. return m_dNodes.GetLength()-1;
  6173. }
  6174. //////////////////////////////////////////////////////////////////////////
  6175. // performs simple semantic analysis
  6176. // checks operand types for some arithmetic operators
  6177. struct TypeCheck_fn
  6178. {
  6179. CSphString m_sError;
  6180. void Enter ( const ExprNode_t & tNode, const CSphVector<ExprNode_t> & dNodes )
  6181. {
  6182. if ( !m_sError.IsEmpty() )
  6183. return;
  6184. bool bNumberOp = tNode.m_iToken=='+' || tNode.m_iToken=='-' || tNode.m_iToken=='*' || tNode.m_iToken=='/';
  6185. if ( bNumberOp )
  6186. {
  6187. bool bLeftNumeric = tNode.m_iLeft==-1 ? false : IsNumericNode ( dNodes[tNode.m_iLeft] );
  6188. bool bRightNumeric = tNode.m_iRight==-1 ? false : IsNumericNode ( dNodes[tNode.m_iRight] );
  6189. // if json vs numeric then let it pass (for the autoconversion)
  6190. if ( ( bLeftNumeric && dNodes[tNode.m_iRight].m_eRetType==SPH_ATTR_JSON_FIELD )
  6191. || ( bRightNumeric && dNodes[tNode.m_iLeft].m_eRetType==SPH_ATTR_JSON_FIELD ) )
  6192. return;
  6193. if ( !bLeftNumeric || !bRightNumeric )
  6194. {
  6195. m_sError = "numeric operation applied to non-numeric operands";
  6196. return;
  6197. }
  6198. }
  6199. if ( tNode.m_iToken==TOK_EQ )
  6200. {
  6201. // string equal must work with string columns only
  6202. ESphAttr eLeftRet = tNode.m_iLeft==-1 ? SPH_ATTR_NONE : dNodes[tNode.m_iLeft].m_eRetType;
  6203. ESphAttr eRightRet = tNode.m_iRight==-1 ? SPH_ATTR_NONE : dNodes[tNode.m_iRight].m_eRetType;
  6204. bool bLeftStr = ( eLeftRet==SPH_ATTR_STRING || eLeftRet==SPH_ATTR_STRINGPTR || eLeftRet==SPH_ATTR_JSON_FIELD );
  6205. bool bRightStr = ( eRightRet==SPH_ATTR_STRING || eRightRet==SPH_ATTR_STRINGPTR || eRightRet==SPH_ATTR_JSON_FIELD );
  6206. if ( bLeftStr!=bRightStr && eLeftRet!=SPH_ATTR_JSON_FIELD && eRightRet!=SPH_ATTR_JSON_FIELD )
  6207. {
  6208. m_sError = "equal operation applied to part string operands";
  6209. return;
  6210. }
  6211. }
  6212. }
  6213. void Exit ( const ExprNode_t & )
  6214. {}
  6215. bool IsNumericNode ( const ExprNode_t & tNode )
  6216. {
  6217. return tNode.m_eRetType==SPH_ATTR_INTEGER || tNode.m_eRetType==SPH_ATTR_BOOL || tNode.m_eRetType==SPH_ATTR_FLOAT ||
  6218. tNode.m_eRetType==SPH_ATTR_BIGINT || tNode.m_eRetType==SPH_ATTR_TOKENCOUNT || tNode.m_eRetType==SPH_ATTR_TIMESTAMP;
  6219. }
  6220. };
  6221. // checks whether we have a WEIGHT() in expression
  6222. struct WeightCheck_fn
  6223. {
  6224. bool * m_pRes;
  6225. explicit WeightCheck_fn ( bool * pRes )
  6226. : m_pRes ( pRes )
  6227. {
  6228. assert ( m_pRes );
  6229. *m_pRes = false;
  6230. }
  6231. void Enter ( const ExprNode_t & tNode, const CSphVector<ExprNode_t> & )
  6232. {
  6233. if ( tNode.m_iToken==TOK_WEIGHT )
  6234. *m_pRes = true;
  6235. }
  6236. void Exit ( const ExprNode_t & )
  6237. {}
  6238. };
  6239. // checks whether expression has functions defined not in this file like
  6240. // searchd-level function or ranker-level functions
  6241. struct HookCheck_fn
  6242. {
  6243. ISphExprHook * m_pHook;
  6244. explicit HookCheck_fn ( ISphExprHook * pHook )
  6245. : m_pHook ( pHook )
  6246. {}
  6247. void Enter ( const ExprNode_t & tNode, const CSphVector<ExprNode_t> & )
  6248. {
  6249. if ( tNode.m_iToken==TOK_HOOK_IDENT || tNode.m_iToken==TOK_HOOK_FUNC )
  6250. m_pHook->CheckEnter ( tNode.m_iFunc );
  6251. }
  6252. void Exit ( const ExprNode_t & tNode )
  6253. {
  6254. if ( tNode.m_iToken==TOK_HOOK_IDENT || tNode.m_iToken==TOK_HOOK_FUNC )
  6255. m_pHook->CheckExit ( tNode.m_iFunc );
  6256. }
  6257. };
  6258. ISphExpr * ExprParser_t::Parse ( const char * sExpr, const ISphSchema & tSchema,
  6259. ESphAttr * pAttrType, bool * pUsesWeight, CSphString & sError )
  6260. {
  6261. m_sLexerError = "";
  6262. m_sParserError = "";
  6263. m_sCreateError = "";
  6264. // setup lexer
  6265. m_sExpr = sExpr;
  6266. m_pCur = sExpr;
  6267. m_pSchema = &tSchema;
  6268. // setup constant functions
  6269. m_iConstNow = (int) time ( NULL );
  6270. // build abstract syntax tree
  6271. m_iParsed = -1;
  6272. yyparse ( this );
  6273. // handle errors
  6274. if ( m_iParsed<0 || !m_sLexerError.IsEmpty() || !m_sParserError.IsEmpty() )
  6275. {
  6276. sError = !m_sLexerError.IsEmpty() ? m_sLexerError : m_sParserError;
  6277. if ( sError.IsEmpty() ) sError = "general parsing error";
  6278. return NULL;
  6279. }
  6280. // deduce return type
  6281. ESphAttr eAttrType = m_dNodes[m_iParsed].m_eRetType;
  6282. // Check expression stack to fit for mutual recursive function calls.
  6283. // This check is an approximation, because different compilers with
  6284. // different settings produce code which requires different stack size.
  6285. if ( m_dNodes.GetLength()>100 )
  6286. {
  6287. CSphVector<int> dNodes;
  6288. dNodes.Reserve ( m_dNodes.GetLength()/2 );
  6289. int iMaxHeight = 1;
  6290. int iHeight = 1;
  6291. dNodes.Add ( m_iParsed );
  6292. while ( dNodes.GetLength() )
  6293. {
  6294. const ExprNode_t & tExpr = m_dNodes[dNodes.Pop()];
  6295. iHeight += ( tExpr.m_iLeft>=0 || tExpr.m_iRight>=0 ? 1 : -1 );
  6296. iMaxHeight = Max ( iMaxHeight, iHeight );
  6297. if ( tExpr.m_iRight>=0 )
  6298. dNodes.Add ( tExpr.m_iRight );
  6299. if ( tExpr.m_iLeft>=0 )
  6300. dNodes.Add ( tExpr.m_iLeft );
  6301. }
  6302. #define SPH_EXPRNODE_STACK_SIZE 160
  6303. int64_t iExprStack = sphGetStackUsed() + iMaxHeight*SPH_EXPRNODE_STACK_SIZE;
  6304. if ( g_iThreadStackSize<=iExprStack )
  6305. {
  6306. sError.SetSprintf ( "query too complex, not enough stack (thread_stack=%dK or higher required)",
  6307. (int)( ( iExprStack + 1024 - ( iExprStack%1024 ) ) / 1024 ) );
  6308. return NULL;
  6309. }
  6310. }
  6311. // perform optimizations (tree transformations)
  6312. Optimize ( m_iParsed );
  6313. #if 0
  6314. Dump ( m_iParsed );
  6315. fflush ( stdout );
  6316. #endif
  6317. // simple semantic analysis
  6318. TypeCheck_fn tTypeChecker;
  6319. WalkTree ( m_iParsed, tTypeChecker );
  6320. if ( !tTypeChecker.m_sError.IsEmpty() )
  6321. {
  6322. sError.Swap ( tTypeChecker.m_sError );
  6323. return NULL;
  6324. }
  6325. // create evaluator
  6326. ISphExpr * pRes = CreateTree ( m_iParsed );
  6327. if ( !m_sCreateError.IsEmpty() )
  6328. {
  6329. sError = m_sCreateError;
  6330. SafeRelease ( pRes );
  6331. } else if ( !pRes )
  6332. {
  6333. sError.SetSprintf ( "empty expression" );
  6334. }
  6335. if ( pAttrType )
  6336. *pAttrType = eAttrType;
  6337. if ( pUsesWeight )
  6338. {
  6339. WeightCheck_fn tWeightFunctor ( pUsesWeight );
  6340. WalkTree ( m_iParsed, tWeightFunctor );
  6341. }
  6342. if ( m_pHook )
  6343. {
  6344. HookCheck_fn tHookFunctor ( m_pHook );
  6345. WalkTree ( m_iParsed, tHookFunctor );
  6346. }
  6347. return pRes;
  6348. }
  6349. //////////////////////////////////////////////////////////////////////////
  6350. // PUBLIC STUFF
  6351. //////////////////////////////////////////////////////////////////////////
  6352. /// parser entry point
  6353. ISphExpr * sphExprParse ( const char * sExpr, const ISphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight,
  6354. CSphString & sError, CSphQueryProfile * pProfiler, ESphCollation eCollation, ISphExprHook * pHook, bool * pZonespanlist, DWORD * pPackedFactorsFlags, ESphEvalStage * pEvalStage )
  6355. {
  6356. // parse into opcodes
  6357. ExprParser_t tParser ( pHook, pProfiler, eCollation );
  6358. ISphExpr * pRes = tParser.Parse ( sExpr, tSchema, pAttrType, pUsesWeight, sError );
  6359. if ( pZonespanlist )
  6360. *pZonespanlist = tParser.m_bHasZonespanlist;
  6361. if ( pEvalStage )
  6362. *pEvalStage = tParser.m_eEvalStage;
  6363. if ( pPackedFactorsFlags )
  6364. *pPackedFactorsFlags = tParser.m_uPackedFactorFlags;
  6365. return pRes;
  6366. }
  6367. /// json type autoconversion
  6368. ISphExpr * sphJsonFieldConv ( ISphExpr * pExpr )
  6369. {
  6370. return new Expr_JsonFieldConv_c ( pExpr );
  6371. }
  6372. //
  6373. // $Id$
  6374. //