sphinxsort.cpp 227 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487
  1. //
  2. // Copyright (c) 2017-2024, Manticore Software LTD (https://manticoresearch.com)
  3. // Copyright (c) 2001-2016, Andrew Aksyonoff
  4. // Copyright (c) 2008-2016, Sphinx Technologies Inc
  5. // All rights reserved
  6. //
  7. // This program is free software; you can redistribute it and/or modify
  8. // it under the terms of the GNU General Public License. You should have
  9. // received a copy of the GPL license along with this program; if you
  10. // did not, you can find it at http://www.gnu.org/
  11. //
  12. #include "sphinxsort.h"
  13. #include "sphinxint.h"
  14. #include "sphinxjson.h"
  15. #include "attribute.h"
  16. #include "collation.h"
  17. #include "memio.h"
  18. #include "columnargrouper.h"
  19. #include "columnarexpr.h"
  20. #include "exprtraits.h"
  21. #include "columnarsort.h"
  22. #include "sortcomp.h"
  23. #include "conversion.h"
  24. #include "docstore.h"
  25. #include "schema/rset.h"
  26. #include "aggregate.h"
  27. #include "distinct.h"
  28. #include "netreceive_ql.h"
  29. #include "knnmisc.h"
  30. #include <ctime>
  31. #include <cmath>
  32. #if !_WIN32
  33. #include <unistd.h>
  34. #include <sys/time.h>
  35. #endif
  36. static bool g_bAccurateAggregation = false;
  37. static int g_iDistinctThresh = 3500;
  38. void SetAccurateAggregationDefault ( bool bEnabled )
  39. {
  40. g_bAccurateAggregation = bEnabled;
  41. }
  42. bool GetAccurateAggregationDefault()
  43. {
  44. return g_bAccurateAggregation;
  45. }
  46. void SetDistinctThreshDefault ( int iThresh )
  47. {
  48. g_iDistinctThresh = iThresh;
  49. }
  50. int GetDistinctThreshDefault()
  51. {
  52. return g_iDistinctThresh;
  53. }
  54. void sphFixupLocator ( CSphAttrLocator & tLocator, const ISphSchema * pOldSchema, const ISphSchema * pNewSchema )
  55. {
  56. // first time schema setup?
  57. if ( !pOldSchema )
  58. return;
  59. if ( tLocator.m_iBlobAttrId==-1 && tLocator.m_iBitCount==-1 )
  60. return;
  61. assert ( pNewSchema );
  62. for ( int i = 0; i < pOldSchema->GetAttrsCount(); i++ )
  63. {
  64. const CSphColumnInfo & tAttr = pOldSchema->GetAttr(i);
  65. if ( tLocator==tAttr.m_tLocator )
  66. {
  67. const CSphColumnInfo * pAttrInNewSchema = pNewSchema->GetAttr ( tAttr.m_sName.cstr() );
  68. if ( pAttrInNewSchema )
  69. {
  70. tLocator = pAttrInNewSchema->m_tLocator;
  71. return;
  72. }
  73. }
  74. }
  75. }
  76. namespace {
  77. const char g_sIntAttrPrefix[] = "@int_attr_";
  78. const char g_sIntJsonPrefix[] = "@groupbystr";
  79. template <typename FN>
  80. void FnSortGetStringRemap ( const ISphSchema & tDstSchema, const ISphSchema & tSrcSchema, FN fnProcess )
  81. {
  82. for ( int i = 0; i<tDstSchema.GetAttrsCount (); ++i )
  83. {
  84. const CSphColumnInfo & tDst = tDstSchema.GetAttr ( i );
  85. // remap only static strings
  86. if ( tDst.m_eAttrType==SPH_ATTR_STRINGPTR || !IsSortStringInternal ( tDst.m_sName ) )
  87. continue;
  88. auto iSrcCol = tSrcSchema.GetAttrIndex ( tDst.m_sName.cstr ()+sizeof ( g_sIntAttrPrefix )-1 );
  89. if ( iSrcCol!=-1 ) // skip internal attributes received from agents
  90. fnProcess ( iSrcCol, i );
  91. }
  92. }
  93. } // unnamed (static) namespace
  94. int GetStringRemapCount ( const ISphSchema & tDstSchema, const ISphSchema & tSrcSchema )
  95. {
  96. int iMaps = 0;
  97. FnSortGetStringRemap ( tDstSchema, tSrcSchema, [&iMaps] ( int, int ) { ++iMaps; } );
  98. return iMaps;
  99. }
  100. //////////////////////////////////////////////////////////////////////////
  101. class TransformedSchemaBuilder_c
  102. {
  103. public:
  104. TransformedSchemaBuilder_c ( const ISphSchema & tOldSchema, CSphSchema & tNewSchema );
  105. void AddAttr ( const CSphString & sName );
  106. private:
  107. const ISphSchema & m_tOldSchema;
  108. CSphSchema & m_tNewSchema;
  109. void ReplaceColumnarAttrWithExpression ( CSphColumnInfo & tAttr, int iLocator );
  110. };
  111. TransformedSchemaBuilder_c::TransformedSchemaBuilder_c ( const ISphSchema & tOldSchema, CSphSchema & tNewSchema )
  112. : m_tOldSchema ( tOldSchema )
  113. , m_tNewSchema ( tNewSchema )
  114. {}
  115. void TransformedSchemaBuilder_c::AddAttr ( const CSphString & sName )
  116. {
  117. const CSphColumnInfo * pAttr = m_tOldSchema.GetAttr ( sName.cstr() );
  118. if ( !pAttr )
  119. return;
  120. CSphColumnInfo tAttr = *pAttr;
  121. tAttr.m_tLocator.Reset();
  122. if ( tAttr.m_iIndex==-1 )
  123. tAttr.m_iIndex = m_tOldSchema.GetAttrIndexOriginal ( tAttr.m_sName.cstr() );
  124. // check if new columnar attributes were added (that were not in the select list originally)
  125. if ( tAttr.IsColumnar() )
  126. ReplaceColumnarAttrWithExpression ( tAttr, m_tNewSchema.GetAttrsCount() );
  127. tAttr.m_eAttrType = sphPlainAttrToPtrAttr ( tAttr.m_eAttrType );
  128. m_tNewSchema.AddAttr ( tAttr, true );
  129. }
  130. void TransformedSchemaBuilder_c::ReplaceColumnarAttrWithExpression ( CSphColumnInfo & tAttr, int iLocator )
  131. {
  132. assert ( tAttr.IsColumnar() );
  133. assert ( !tAttr.m_pExpr );
  134. // temporarily add attr to new schema
  135. // when result set is finalized, corresponding columnar expression (will be spawned later)
  136. // will be evaluated and put into the match
  137. // and this expression will be used to fetch that value
  138. tAttr.m_uAttrFlags &= ~CSphColumnInfo::ATTR_COLUMNAR;
  139. tAttr.m_eAttrType = sphPlainAttrToPtrAttr ( tAttr.m_eAttrType );
  140. m_tNewSchema.AddAttr ( tAttr, true );
  141. // parse expression as if it is not columnar
  142. CSphString sError;
  143. ExprParseArgs_t tExprArgs;
  144. tAttr.m_pExpr = sphExprParse ( tAttr.m_sName.cstr(), m_tNewSchema, sError, tExprArgs );
  145. assert ( tAttr.m_pExpr );
  146. // now remove it from schema (it will be added later with the supplied expression)
  147. m_tNewSchema.RemoveAttr( tAttr.m_sName.cstr(), true );
  148. }
  149. //////////////////////////////////////////////////////////////////////////
  150. class MatchesToNewSchema_c : public MatchProcessor_i
  151. {
  152. public:
  153. MatchesToNewSchema_c ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema, GetBlobPoolFromMatch_fn fnGetBlobPool, GetColumnarFromMatch_fn fnGetColumnar );
  154. // performs actual processing according created plan
  155. void Process ( CSphMatch * pMatch ) final { ProcessMatch(pMatch); }
  156. void Process ( VecTraits_T<CSphMatch *> & dMatches ) final { dMatches.for_each ( [this]( CSphMatch * pMatch ){ ProcessMatch(pMatch); } ); }
  157. bool ProcessInRowIdOrder() const final { return m_dActions.any_of ( []( const MapAction_t & i ){ return i.IsExprEval(); } ); }
  158. private:
  159. struct MapAction_t
  160. {
  161. // what is to do with current position
  162. enum Action_e
  163. {
  164. SETZERO, // set default (0)
  165. COPY, // copy as is (plain attribute)
  166. COPYBLOB, // deep copy (unpack/pack) the blob
  167. COPYJSONFIELD, // json field (packed blob with type)
  168. EVALEXPR_INT, // evaluate the expression for the recently added int attribute
  169. EVALEXPR_BIGINT,// evaluate the expression for the recently added bigint attribute
  170. EVALEXPR_STR, // evaluate the expression for the recently added string attribute
  171. EVALEXPR_MVA // evaluate the expression for the recently added mva attribute
  172. };
  173. const CSphAttrLocator * m_pFrom;
  174. const CSphAttrLocator * m_pTo;
  175. ISphExprRefPtr_c m_pExpr;
  176. Action_e m_eAction;
  177. mutable columnar::Columnar_i * m_pPrevColumnar = nullptr;
  178. bool IsExprEval() const
  179. {
  180. return m_eAction==EVALEXPR_INT || m_eAction==EVALEXPR_BIGINT || m_eAction==EVALEXPR_STR || m_eAction==EVALEXPR_MVA;
  181. }
  182. };
  183. int m_iDynamicSize; // target dynamic size, from schema
  184. CSphVector<MapAction_t> m_dActions; // the recipe
  185. CSphVector<std::pair<CSphAttrLocator, CSphAttrLocator>> m_dRemapCmp; // remap @int_attr_ATTR -> ATTR
  186. CSphVector<int> m_dDataPtrAttrs; // orphaned attrs we have to free before swap to new attr
  187. GetBlobPoolFromMatch_fn m_fnGetBlobPool; // provides base for pool copying
  188. GetColumnarFromMatch_fn m_fnGetColumnar; // columnar storage getter
  189. static void SetupAction ( const CSphColumnInfo & tOld, const CSphColumnInfo & tNew, const ISphSchema * pOldSchema, MapAction_t & tAction );
  190. inline void ProcessMatch ( CSphMatch * pMatch );
  191. inline static void PerformAction ( const MapAction_t & tAction, CSphMatch * pMatch, CSphMatch & tResult, const BYTE * pBlobPool, columnar::Columnar_i * pColumnar );
  192. };
  193. MatchesToNewSchema_c::MatchesToNewSchema_c ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema, GetBlobPoolFromMatch_fn fnGetBlobPool, GetColumnarFromMatch_fn fnGetColumnar )
  194. : m_iDynamicSize ( pNewSchema->GetDynamicSize () )
  195. , m_fnGetBlobPool ( std::move ( fnGetBlobPool ) )
  196. , m_fnGetColumnar ( std::move ( fnGetColumnar ) )
  197. {
  198. assert ( pOldSchema && pNewSchema );
  199. // prepare transforming recipe
  200. // initial state: set all new columns to be reset by default
  201. for ( int i = 0; i<pNewSchema->GetAttrsCount(); ++i )
  202. m_dActions.Add ( { nullptr, &pNewSchema->GetAttr(i).m_tLocator, nullptr, MapAction_t::SETZERO } );
  203. // add mapping from old to new according to column type
  204. for ( int i = 0; i<pOldSchema->GetAttrsCount(); ++i )
  205. {
  206. const CSphColumnInfo & tOld = pOldSchema->GetAttr(i);
  207. auto iNewIdx = pNewSchema->GetAttrIndex ( tOld.m_sName.cstr () );
  208. if ( iNewIdx == -1 )
  209. {
  210. // dataptr present in old, but not in the new - mark it for releasing
  211. if ( sphIsDataPtrAttr ( tOld.m_eAttrType ) && tOld.m_tLocator.m_bDynamic )
  212. m_dDataPtrAttrs.Add( tOld.m_tLocator.m_iBitOffset >> ROWITEM_SHIFT );
  213. continue;
  214. }
  215. const CSphColumnInfo & tNew = pNewSchema->GetAttr(iNewIdx);
  216. auto & tAction = m_dActions[iNewIdx];
  217. SetupAction ( tOld, tNew, pOldSchema, tAction );
  218. }
  219. // need to update @int_attr_ locator to use new schema
  220. // no need to pass pOldSchema as we remap only new schema pointers
  221. // also need to update group sorter keypart to be str_ptr in caller code SetSchema
  222. FnSortGetStringRemap ( *pNewSchema, *pNewSchema, [this, pNewSchema] ( int iSrc, int iDst )
  223. {
  224. m_dRemapCmp.Add ( { pNewSchema->GetAttr(iSrc).m_tLocator, pNewSchema->GetAttr(iDst).m_tLocator } );
  225. } );
  226. }
  227. void MatchesToNewSchema_c::SetupAction ( const CSphColumnInfo & tOld, const CSphColumnInfo & tNew, const ISphSchema * pOldSchema, MapAction_t & tAction )
  228. {
  229. tAction.m_pFrom = &tOld.m_tLocator;
  230. // columnar attr replaced by an expression
  231. // we now need to create an expression that fetches data from columnar storage
  232. if ( tOld.IsColumnar() && tNew.m_pExpr )
  233. {
  234. CSphString sError;
  235. ExprParseArgs_t tExprArgs;
  236. tAction.m_pExpr = sphExprParse ( tOld.m_sName.cstr(), *pOldSchema, sError, tExprArgs );
  237. assert ( tAction.m_pExpr );
  238. switch ( tNew.m_eAttrType )
  239. {
  240. case SPH_ATTR_STRINGPTR: tAction.m_eAction = MapAction_t::EVALEXPR_STR; break;
  241. case SPH_ATTR_BIGINT: tAction.m_eAction = MapAction_t::EVALEXPR_BIGINT; break;
  242. case SPH_ATTR_UINT32SET_PTR:
  243. case SPH_ATTR_INT64SET_PTR:
  244. case SPH_ATTR_FLOAT_VECTOR_PTR: tAction.m_eAction = MapAction_t::EVALEXPR_MVA; break;
  245. default: tAction.m_eAction = MapAction_t::EVALEXPR_INT; break;
  246. }
  247. return;
  248. }
  249. // same type - just copy attr as is
  250. if ( tOld.m_eAttrType==tNew.m_eAttrType )
  251. {
  252. tAction.m_eAction = MapAction_t::COPY;
  253. return;
  254. }
  255. assert ( !sphIsDataPtrAttr ( tOld.m_eAttrType ) && sphIsDataPtrAttr ( tNew.m_eAttrType ) );
  256. if ( tOld.m_eAttrType==SPH_ATTR_JSON_FIELD )
  257. tAction.m_eAction = MapAction_t::COPYJSONFIELD;
  258. else
  259. tAction.m_eAction = MapAction_t::COPYBLOB;
  260. }
  261. void MatchesToNewSchema_c::ProcessMatch ( CSphMatch * pMatch )
  262. {
  263. CSphMatch tResult;
  264. tResult.Reset ( m_iDynamicSize );
  265. const BYTE * pBlobPool = m_fnGetBlobPool(pMatch);
  266. columnar::Columnar_i * pColumnar = m_fnGetColumnar(pMatch);
  267. for ( const auto & tAction : m_dActions )
  268. PerformAction ( tAction, pMatch, tResult, pBlobPool, pColumnar );
  269. // remap comparator attributes
  270. for ( const auto & tRemap : m_dRemapCmp )
  271. tResult.SetAttr ( tRemap.second, tResult.GetAttr ( tRemap.first ) );
  272. // free original orphaned pointers
  273. CSphSchemaHelper::FreeDataSpecial ( *pMatch, m_dDataPtrAttrs );
  274. Swap ( pMatch->m_pDynamic, tResult.m_pDynamic );
  275. pMatch->m_pStatic = nullptr;
  276. }
  277. inline void MatchesToNewSchema_c::PerformAction ( const MapAction_t & tAction, CSphMatch * pMatch, CSphMatch & tResult, const BYTE * pBlobPool, columnar::Columnar_i * pColumnar )
  278. {
  279. // try to minimize columnar switches inside the expression as this leads to recreating iterators
  280. if ( tAction.IsExprEval() && pColumnar!=tAction.m_pPrevColumnar )
  281. {
  282. tAction.m_pExpr->Command ( SPH_EXPR_SET_COLUMNAR, (void*)pColumnar );
  283. tAction.m_pPrevColumnar = pColumnar;
  284. }
  285. SphAttr_t uValue = 0;
  286. switch ( tAction.m_eAction )
  287. {
  288. case MapAction_t::SETZERO:
  289. break;
  290. case MapAction_t::COPY:
  291. uValue = pMatch->GetAttr ( *tAction.m_pFrom );
  292. break;
  293. case MapAction_t::COPYBLOB:
  294. {
  295. auto dBlob = sphGetBlobAttr ( *pMatch, *tAction.m_pFrom, pBlobPool );
  296. uValue = (SphAttr_t) sphPackPtrAttr ( dBlob );
  297. }
  298. break;
  299. case MapAction_t::COPYJSONFIELD:
  300. {
  301. SphAttr_t uPacked = pMatch->GetAttr ( *tAction.m_pFrom );
  302. const BYTE * pStr = uPacked ? pBlobPool+sphJsonUnpackOffset ( uPacked ) : nullptr;
  303. ESphJsonType eJson = sphJsonUnpackType ( uPacked );
  304. if ( pStr && eJson!=JSON_NULL )
  305. {
  306. int iLengthBytes = sphJsonNodeSize ( eJson, pStr );
  307. BYTE * pData = nullptr;
  308. uValue = (SphAttr_t) sphPackPtrAttr ( iLengthBytes+1, &pData );
  309. // store field type before the field
  310. *pData = (BYTE) eJson;
  311. memcpy ( pData+1, pStr, iLengthBytes );
  312. }
  313. }
  314. break;
  315. case MapAction_t::EVALEXPR_INT:
  316. uValue = (SphAttr_t)tAction.m_pExpr->IntEval(*pMatch);
  317. break;
  318. case MapAction_t::EVALEXPR_BIGINT:
  319. uValue = (SphAttr_t)tAction.m_pExpr->Int64Eval(*pMatch);
  320. break;
  321. case MapAction_t::EVALEXPR_STR:
  322. uValue = (SphAttr_t)tAction.m_pExpr->StringEvalPacked(*pMatch);
  323. break;
  324. case MapAction_t::EVALEXPR_MVA:
  325. uValue = (SphAttr_t)tAction.m_pExpr->Int64Eval(*pMatch);
  326. break;
  327. default:
  328. assert(false && "Unknown state");
  329. }
  330. tResult.SetAttr ( *tAction.m_pTo, uValue );
  331. }
  332. //////////////////////////////////////////////////////////////////////////
  333. class MatchSorter_c : public ISphMatchSorter
  334. {
  335. public:
  336. void SetState ( const CSphMatchComparatorState & tState ) override;
  337. const CSphMatchComparatorState & GetState() const override { return m_tState; }
  338. void SetSchema ( ISphSchema * pSchema, bool bRemapCmp ) override;
  339. const ISphSchema * GetSchema() const override { return ( ISphSchema *) m_pSchema; }
  340. void SetColumnar ( columnar::Columnar_i * pColumnar ) override { m_pColumnar = pColumnar; }
  341. int64_t GetTotalCount() const override { return m_iTotal; }
  342. void CloneTo ( ISphMatchSorter * pTrg ) const override;
  343. bool CanBeCloned() const override;
  344. void SetFilteredAttrs ( const sph::StringSet & hAttrs, bool bAddDocid ) override;
  345. void TransformPooled2StandalonePtrs ( GetBlobPoolFromMatch_fn fnBlobPoolFromMatch, GetColumnarFromMatch_fn fnGetColumnarFromMatch, bool bFinalizeSorters ) override;
  346. void SetRandom ( bool bRandom ) override { m_bRandomize = bRandom; }
  347. bool IsRandom() const override { return m_bRandomize; }
  348. int GetMatchCapacity() const override { return m_iMatchCapacity; }
  349. RowTagged_t GetJustPushed() const override { return m_tJustPushed; }
  350. VecTraits_T<RowTagged_t> GetJustPopped() const override { return m_dJustPopped; }
  351. protected:
  352. SharedPtr_t<ISphSchema> m_pSchema; ///< sorter schema (adds dynamic attributes on top of index schema)
  353. CSphMatchComparatorState m_tState; ///< protected to set m_iNow automatically on SetState() calls
  354. StrVec_t m_dTransformed;
  355. columnar::Columnar_i * m_pColumnar = nullptr;
  356. bool m_bRandomize = false;
  357. int64_t m_iTotal = 0;
  358. int m_iMatchCapacity = 0;
  359. RowTagged_t m_tJustPushed;
  360. CSphTightVector<RowTagged_t> m_dJustPopped;
  361. };
  362. void MatchSorter_c::SetSchema ( ISphSchema * pSchema, bool bRemapCmp )
  363. {
  364. assert ( pSchema );
  365. m_tState.FixupLocators ( m_pSchema, pSchema, bRemapCmp );
  366. m_pSchema = pSchema;
  367. }
  368. void MatchSorter_c::SetState ( const CSphMatchComparatorState & tState )
  369. {
  370. m_tState = tState;
  371. m_tState.m_iNow = (DWORD) time ( nullptr );
  372. }
  373. void MatchSorter_c::CloneTo ( ISphMatchSorter * pTrg ) const
  374. {
  375. assert ( pTrg );
  376. pTrg->SetRandom(m_bRandomize);
  377. pTrg->SetState(m_tState);
  378. pTrg->SetSchema ( m_pSchema->CloneMe(), false );
  379. }
  380. bool MatchSorter_c::CanBeCloned() const
  381. {
  382. if ( !m_pSchema )
  383. return true;
  384. bool bGotStatefulUDF = false;
  385. for ( int i = 0; i < m_pSchema->GetAttrsCount() && !bGotStatefulUDF; i++ )
  386. {
  387. auto & pExpr = m_pSchema->GetAttr(i).m_pExpr;
  388. if ( pExpr )
  389. pExpr->Command ( SPH_EXPR_GET_STATEFUL_UDF, &bGotStatefulUDF );
  390. }
  391. return !bGotStatefulUDF;
  392. }
  393. void MatchSorter_c::SetFilteredAttrs ( const sph::StringSet & hAttrs, bool bAddDocid )
  394. {
  395. assert ( m_pSchema );
  396. m_dTransformed.Reserve ( hAttrs.GetLength() );
  397. if ( bAddDocid && !hAttrs[sphGetDocidName()] )
  398. m_dTransformed.Add ( sphGetDocidName() );
  399. for ( auto & tName : hAttrs )
  400. {
  401. const CSphColumnInfo * pCol = m_pSchema->GetAttr ( tName.first.cstr() );
  402. if ( pCol )
  403. m_dTransformed.Add ( pCol->m_sName );
  404. }
  405. }
  406. void MatchSorter_c::TransformPooled2StandalonePtrs ( GetBlobPoolFromMatch_fn fnBlobPoolFromMatch, GetColumnarFromMatch_fn fnGetColumnarFromMatch, bool bFinalizeSorters )
  407. {
  408. auto * pOldSchema = GetSchema();
  409. assert ( pOldSchema );
  410. // create new standalone schema (from old, or from filtered)
  411. auto * pNewSchema = new CSphSchema ( "standalone" );
  412. for ( int i = 0; i<pOldSchema->GetFieldsCount (); ++i )
  413. pNewSchema->AddField ( pOldSchema->GetField(i) );
  414. TransformedSchemaBuilder_c tBuilder ( *pOldSchema, *pNewSchema );
  415. if ( m_dTransformed.IsEmpty() )
  416. {
  417. // keep id as the first attribute
  418. const CSphColumnInfo* pId = pOldSchema->GetAttr ( sphGetDocidName() );
  419. if ( pId )
  420. tBuilder.AddAttr ( sphGetDocidName() );
  421. // add the rest
  422. for ( int i = 0; i<pOldSchema->GetAttrsCount (); i++ )
  423. {
  424. const CSphColumnInfo & tAttr = pOldSchema->GetAttr(i);
  425. if ( tAttr.m_sName!=sphGetDocidName() )
  426. tBuilder.AddAttr ( tAttr.m_sName );
  427. }
  428. }
  429. else
  430. {
  431. // keep id as the first attribute, then the rest.
  432. m_dTransformed.any_of ( [&tBuilder] ( const auto& sName ) { auto bID = ( sName==sphGetDocidName() ); if ( bID ) tBuilder.AddAttr(sName); return bID; } );
  433. m_dTransformed.for_each ( [&tBuilder] ( const auto& sName ) { if ( sName!=sphGetDocidName() ) tBuilder.AddAttr(sName); } );
  434. }
  435. for ( int i = 0; i <pNewSchema->GetAttrsCount(); ++i )
  436. {
  437. auto & pExpr = pNewSchema->GetAttr(i).m_pExpr;
  438. if ( pExpr )
  439. pExpr->FixupLocator ( pOldSchema, pNewSchema );
  440. }
  441. MatchesToNewSchema_c fnFinal ( pOldSchema, pNewSchema, std::move ( fnBlobPoolFromMatch ), std::move ( fnGetColumnarFromMatch ) );
  442. Finalize ( fnFinal, false, bFinalizeSorters );
  443. SetSchema ( pNewSchema, true );
  444. }
  445. //////////////////////////////////////////////////////////////////////////
  446. /// match-sorting priority queue traits
  447. class CSphMatchQueueTraits : public MatchSorter_c, ISphNoncopyable
  448. {
  449. protected:
  450. int m_iSize; // size of internal struct we can operate
  451. CSphFixedVector<CSphMatch> m_dData;
  452. CSphTightVector<int> m_dIData; // indexes into m_pData, to avoid extra moving of matches themselves
  453. public:
  454. /// ctor
  455. explicit CSphMatchQueueTraits ( int iSize )
  456. : m_iSize ( iSize )
  457. , m_dData { iSize }
  458. {
  459. assert ( iSize>0 );
  460. m_iMatchCapacity = iSize;
  461. m_dIData.Resize ( iSize );
  462. m_tState.m_iNow = (DWORD) time ( nullptr );
  463. ARRAY_FOREACH ( i, m_dIData )
  464. m_dIData[i] = i;
  465. m_dIData.Resize ( 0 );
  466. }
  467. /// dtor make FreeDataPtrs here, then ResetDynamic also get called on m_dData d-tr.
  468. ~CSphMatchQueueTraits () override
  469. {
  470. if ( m_pSchema )
  471. m_dData.Apply ( [this] ( CSphMatch& tMatch ) { m_pSchema->FreeDataPtrs ( tMatch ); } );
  472. }
  473. public:
  474. int GetLength() override { return Used(); }
  475. // helper
  476. void SwapMatchQueueTraits ( CSphMatchQueueTraits& rhs )
  477. {
  478. // ISphMatchSorter
  479. ::Swap ( m_iTotal, rhs.m_iTotal );
  480. // CSphMatchQueueTraits
  481. m_dData.SwapData ( rhs.m_dData );
  482. m_dIData.SwapData ( rhs.m_dIData );
  483. assert ( m_iSize==rhs.m_iSize );
  484. }
  485. const VecTraits_T<CSphMatch>& GetMatches() const { return m_dData; }
  486. protected:
  487. CSphMatch * Last () const
  488. {
  489. return &m_dData[m_dIData.Last ()];
  490. }
  491. CSphMatch & Get ( int iElem ) const
  492. {
  493. return m_dData[m_dIData[iElem]];
  494. }
  495. CSphMatch & Add ()
  496. {
  497. // proper ids at m_dIData already set at constructor
  498. // they will be same during life-span - that is why Add used like anti-Pop
  499. int iLast = m_dIData.Add();
  500. return m_dData[iLast];
  501. }
  502. int Used() const
  503. {
  504. return m_dIData.GetLength();
  505. }
  506. bool IsEmpty() const
  507. {
  508. return m_dIData.IsEmpty();
  509. }
  510. void ResetAfterFlatten()
  511. {
  512. m_dIData.Resize(0);
  513. }
  514. int ResetDynamic ( int iMaxUsed )
  515. {
  516. for ( int i=0; i<iMaxUsed; i++ )
  517. m_dData[i].ResetDynamic();
  518. return -1;
  519. }
  520. int ResetDynamicFreeData ( int iMaxUsed )
  521. {
  522. for ( int i=0; i<iMaxUsed; i++ )
  523. {
  524. m_pSchema->FreeDataPtrs ( m_dData[i] );
  525. m_dData[i].ResetDynamic();
  526. }
  527. return -1;
  528. }
  529. };
  530. //////////////////////////////////////////////////////////////////////////
  531. // SORTING QUEUES
  532. //////////////////////////////////////////////////////////////////////////
  533. template < typename COMP >
  534. struct InvCompareIndex_fn
  535. {
  536. const VecTraits_T<CSphMatch>& m_dBase;
  537. const CSphMatchComparatorState & m_tState;
  538. explicit InvCompareIndex_fn ( const CSphMatchQueueTraits & tBase )
  539. : m_dBase ( tBase.GetMatches() )
  540. , m_tState ( tBase.GetState() )
  541. {}
  542. bool IsLess ( int a, int b ) const // inverts COMP::IsLess
  543. {
  544. return COMP::IsLess ( m_dBase[b], m_dBase[a], m_tState );
  545. }
  546. };
  547. #define LOG_COMPONENT_KMQ __LINE__ << " *(" << this << ") "
  548. #define LOG_LEVEL_DIAG false
  549. #define KMQ LOC(DIAG,KMQ)
  550. /// heap sorter
  551. /// plain binary heap based PQ
  552. template < typename COMP, bool NOTIFICATIONS >
  553. class CSphMatchQueue final : public CSphMatchQueueTraits
  554. {
  555. using MYTYPE = CSphMatchQueue<COMP, NOTIFICATIONS>;
  556. LOC_ADD;
  557. public:
  558. /// ctor
  559. explicit CSphMatchQueue ( int iSize )
  560. : CSphMatchQueueTraits ( iSize )
  561. , m_fnComp ( *this )
  562. {
  563. if constexpr ( NOTIFICATIONS )
  564. m_dJustPopped.Reserve(1);
  565. }
  566. bool IsGroupby () const final { return false; }
  567. const CSphMatch * GetWorst() const final { return m_dIData.IsEmpty() ? nullptr : Root(); }
  568. bool Push ( const CSphMatch & tEntry ) final { return PushT ( tEntry, [this] ( CSphMatch & tTrg, const CSphMatch & tMatch ) { m_pSchema->CloneMatch ( tTrg, tMatch ); }); }
  569. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final
  570. {
  571. for ( auto & i : dMatches )
  572. if ( i.m_tRowID!=INVALID_ROWID )
  573. PushT ( i, [this] ( CSphMatch & tTrg, const CSphMatch & tMatch ) { m_pSchema->CloneMatch ( tTrg, tMatch ); } );
  574. else
  575. m_iTotal++;
  576. }
  577. bool PushGrouped ( const CSphMatch &, bool ) final { assert(0); return false; }
  578. /// store all entries into specified location in sorted order, and remove them from queue
  579. int Flatten ( CSphMatch * pTo ) final
  580. {
  581. KMQ << "flatten";
  582. assert ( !IsEmpty() );
  583. int iReadyMatches = Used();
  584. pTo += iReadyMatches;
  585. while ( !IsEmpty() )
  586. {
  587. --pTo;
  588. // m_pSchema->FreeDataPtrs ( *pTo );
  589. PopAndProcess_T ( [pTo] ( CSphMatch & tRoot ) { Swap ( *pTo, tRoot ); return true; } );
  590. }
  591. m_iTotal = 0;
  592. return iReadyMatches;
  593. }
  594. /// finalize, perform final sort/cut as needed
  595. void Finalize ( MatchProcessor_i & tProcessor, bool bCallProcessInResultSetOrder, bool bFinalizeMatches ) final
  596. {
  597. KMQ << "finalize";
  598. if ( !GetLength() )
  599. return;
  600. if ( bCallProcessInResultSetOrder )
  601. m_dIData.Sort ( m_fnComp );
  602. if ( tProcessor.ProcessInRowIdOrder() )
  603. {
  604. CSphFixedVector<int> dSorted ( m_dIData.GetLength() );
  605. memcpy ( dSorted.Begin(), m_dIData.Begin(), m_dIData.GetLength()*sizeof(m_dIData[0]) );
  606. // sort by tag, rowid. minimize columnar switches inside expressions and minimize seeks inside columnar iterators
  607. dSorted.Sort ( Lesser ( [this] ( int l, int r )
  608. {
  609. int iTagL = m_dData[l].m_iTag;
  610. int iTagR = m_dData[r].m_iTag;
  611. if ( iTagL!=iTagR )
  612. return iTagL < iTagR;
  613. return m_dData[l].m_tRowID < m_dData[r].m_tRowID;
  614. }
  615. ) );
  616. CSphFixedVector<CSphMatch *> dMatchPtrs ( dSorted.GetLength() );
  617. ARRAY_FOREACH ( i, dSorted )
  618. dMatchPtrs[i] = &m_dData[dSorted[i]];
  619. tProcessor.Process(dMatchPtrs);
  620. }
  621. else
  622. {
  623. for ( auto iMatch : m_dIData )
  624. tProcessor.Process ( &m_dData[iMatch] );
  625. }
  626. }
  627. // fixme! test
  628. ISphMatchSorter * Clone () const final
  629. {
  630. auto pClone = new MYTYPE ( m_iSize );
  631. CloneTo ( pClone );
  632. return pClone;
  633. }
  634. // FIXME! test CSphMatchQueue
  635. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final
  636. {
  637. KMQ << "moveto";
  638. // m_dLogger.Print ();
  639. auto& dRhs = *(MYTYPE *) pRhs;
  640. if ( IsEmpty() )
  641. return; // no matches, nothing to do.
  642. // dRhs.m_dLogger.Print ();
  643. // install into virgin sorter - no need to do something; just swap
  644. if ( dRhs.IsEmpty() )
  645. {
  646. SwapMatchQueueTraits ( dRhs );
  647. return;
  648. }
  649. // work as in non-ordered finalize call, but we not need to
  650. // clone the matches, may just move them instead.
  651. // total need special care: just add two values and don't rely
  652. // on result of moving, since it will be wrong
  653. auto iTotal = dRhs.m_iTotal;
  654. for ( auto i : m_dIData )
  655. dRhs.PushT ( m_dData[i], [] ( CSphMatch & tTrg, CSphMatch & tMatch ) { Swap ( tTrg, tMatch ); } );
  656. dRhs.m_iTotal = m_iTotal + iTotal;
  657. }
  658. void SetMerge ( bool bMerge ) final {}
  659. private:
  660. InvCompareIndex_fn<COMP> m_fnComp;
  661. CSphMatch * Root() const
  662. {
  663. return &m_dData [ m_dIData.First() ];
  664. }
  665. /// generic add entry to the queue
  666. template <typename MATCH, typename PUSHER>
  667. bool PushT ( MATCH && tEntry, PUSHER && PUSH )
  668. {
  669. ++m_iTotal;
  670. if constexpr ( NOTIFICATIONS )
  671. {
  672. m_tJustPushed = RowTagged_t();
  673. m_dJustPopped.Resize(0);
  674. }
  675. if ( Used()==m_iSize )
  676. {
  677. // if it's worse that current min, reject it, else pop off current min
  678. if ( COMP::IsLess ( tEntry, *Root(), m_tState ) )
  679. return true;
  680. else
  681. PopAndProcess_T ( [] ( const CSphMatch & ) { return false; } );
  682. }
  683. // do add
  684. PUSH ( Add(), std::forward<MATCH> ( tEntry ));
  685. if constexpr ( NOTIFICATIONS )
  686. m_tJustPushed = RowTagged_t ( *Last() );
  687. int iEntry = Used()-1;
  688. // shift up if needed, so that worst (lesser) ones float to the top
  689. while ( iEntry )
  690. {
  691. int iParent = ( iEntry-1 ) / 2;
  692. if ( !m_fnComp.IsLess ( m_dIData[iParent], m_dIData[iEntry] ) )
  693. break;
  694. // entry is less than parent, should float to the top
  695. Swap ( m_dIData[iEntry], m_dIData[iParent] );
  696. iEntry = iParent;
  697. }
  698. return true;
  699. }
  700. /// remove root (ie. top priority) entry
  701. template<typename POPPER>
  702. void PopAndProcess_T ( POPPER && fnProcess )
  703. {
  704. assert ( !IsEmpty() );
  705. auto& iJustRemoved = m_dIData.Pop();
  706. if ( !IsEmpty() ) // for empty just popped is the root
  707. Swap ( m_dIData.First (), iJustRemoved );
  708. if ( !fnProcess ( m_dData[iJustRemoved] ) )
  709. {
  710. // make the last entry my new root
  711. if constexpr ( NOTIFICATIONS )
  712. {
  713. if ( m_dJustPopped.IsEmpty () )
  714. m_dJustPopped.Add ( RowTagged_t ( m_dData[iJustRemoved] ) );
  715. else
  716. m_dJustPopped[0] = RowTagged_t ( m_dData[iJustRemoved] );
  717. }
  718. m_pSchema->FreeDataPtrs ( m_dData[iJustRemoved] );
  719. }
  720. // sift down if needed
  721. int iEntry = 0;
  722. auto iUsed = Used();
  723. while (true)
  724. {
  725. // select child
  726. int iChild = (iEntry*2) + 1;
  727. if ( iChild>=iUsed )
  728. break;
  729. // select smallest child
  730. if ( iChild+1<iUsed )
  731. if ( m_fnComp.IsLess ( m_dIData[iChild], m_dIData[iChild+1] ) )
  732. ++iChild;
  733. // if smallest child is less than entry, do float it to the top
  734. if ( m_fnComp.IsLess ( m_dIData[iEntry], m_dIData[iChild] ) )
  735. {
  736. Swap ( m_dIData[iChild], m_dIData[iEntry] );
  737. iEntry = iChild;
  738. continue;
  739. }
  740. break;
  741. }
  742. }
  743. };
  744. #define LOG_COMPONENT_KBF __LINE__ << " *(" << this << ") "
  745. #define KBF LOC(DIAG,KBF)
  746. //////////////////////////////////////////////////////////////////////////
  747. /// K-buffer (generalized double buffer) sorter
  748. /// faster worst-case but slower average-case than the heap sorter
  749. /// invoked with select ... OPTION sort_method=kbuffer
  750. template < typename COMP, bool NOTIFICATIONS >
  751. class CSphKbufferMatchQueue : public CSphMatchQueueTraits
  752. {
  753. using MYTYPE = CSphKbufferMatchQueue<COMP, NOTIFICATIONS>;
  754. InvCompareIndex_fn<COMP> m_dComp;
  755. LOC_ADD;
  756. public:
  757. /// ctor
  758. explicit CSphKbufferMatchQueue ( int iSize )
  759. : CSphMatchQueueTraits ( iSize*COEFF )
  760. , m_dComp ( *this )
  761. {
  762. m_iSize /= COEFF;
  763. if constexpr ( NOTIFICATIONS )
  764. m_dJustPopped.Reserve ( m_iSize*(COEFF-1) );
  765. }
  766. bool IsGroupby () const final { return false; }
  767. int GetLength () final { return Min ( Used(), m_iSize ); }
  768. bool Push ( const CSphMatch & tEntry ) override { return PushT ( tEntry, [this] ( CSphMatch & tTrg, const CSphMatch & tMatch ) { m_pSchema->CloneMatch ( tTrg, tMatch ); }); }
  769. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) override
  770. {
  771. for ( const auto & i : dMatches )
  772. if ( i.m_tRowID!=INVALID_ROWID )
  773. PushT ( i, [this] ( CSphMatch & tTrg, const CSphMatch & tMatch ) { m_pSchema->CloneMatch ( tTrg, tMatch ); } );
  774. else
  775. m_iTotal++;
  776. }
  777. bool PushGrouped ( const CSphMatch &, bool ) final { assert(0); return false; }
  778. /// store all entries into specified location in sorted order, and remove them from queue
  779. int Flatten ( CSphMatch * pTo ) final
  780. {
  781. KBF << "Flatten";
  782. FinalizeMatches ();
  783. auto iReadyMatches = Used();
  784. for ( auto iMatch : m_dIData )
  785. {
  786. KBF << "fltn " << m_dData[iMatch].m_iTag << ":" << m_dData[iMatch].m_tRowID;
  787. Swap ( *pTo, m_dData[iMatch] );
  788. ++pTo;
  789. }
  790. m_iMaxUsed = ResetDynamic ( m_iMaxUsed );
  791. // clean up for the next work session
  792. m_pWorst = nullptr;
  793. m_iTotal = 0;
  794. m_bFinalized = false;
  795. m_dIData.Resize(0);
  796. return iReadyMatches;
  797. }
  798. /// finalize, perform final sort/cut as needed
  799. void Finalize ( MatchProcessor_i & tProcessor, bool, bool bFinalizeMatches ) final
  800. {
  801. KBF << "Finalize";
  802. if ( IsEmpty() )
  803. return;
  804. if ( bFinalizeMatches )
  805. FinalizeMatches();
  806. for ( auto iMatch : m_dIData )
  807. tProcessor.Process ( &m_dData[iMatch] );
  808. }
  809. ISphMatchSorter* Clone() const final
  810. {
  811. auto pClone = new MYTYPE ( m_iSize );
  812. CloneTo ( pClone );
  813. return pClone;
  814. }
  815. // FIXME! test CSphKbufferMatchQueue
  816. // FIXME! need to deal with justpushed/justpopped any other way!
  817. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final
  818. {
  819. auto& dRhs = *(CSphKbufferMatchQueue<COMP, NOTIFICATIONS>*) pRhs;
  820. if ( IsEmpty () )
  821. return;
  822. if ( dRhs.IsEmpty () )
  823. {
  824. SwapMatchQueueTraits (dRhs);
  825. dRhs.m_pWorst = m_pWorst;
  826. dRhs.m_bFinalized = m_bFinalized;
  827. return;
  828. }
  829. FinalizeMatches();
  830. // both are non-empty - need to process.
  831. // work as finalize call, but don't clone the matches; move them instead.
  832. // total need special care!
  833. auto iTotal = dRhs.m_iTotal;
  834. for ( auto iMatch : m_dIData )
  835. {
  836. dRhs.PushT ( m_dData[iMatch],
  837. [] ( CSphMatch & tTrg, CSphMatch & tMatch ) {
  838. Swap ( tTrg, tMatch );
  839. });
  840. }
  841. dRhs.m_iTotal = m_iTotal + iTotal;
  842. }
  843. void SetMerge ( bool bMerge ) final {}
  844. protected:
  845. CSphMatch * m_pWorst = nullptr;
  846. bool m_bFinalized = false;
  847. int m_iMaxUsed = -1;
  848. static const int COEFF = 4;
  849. private:
  850. void SortMatches () // sort from best to worst
  851. {
  852. m_dIData.Sort ( m_dComp );
  853. }
  854. void FreeMatch ( int iMatch )
  855. {
  856. if constexpr ( NOTIFICATIONS )
  857. m_dJustPopped.Add ( RowTagged_t ( m_dData[iMatch] ) );
  858. m_pSchema->FreeDataPtrs ( m_dData[iMatch] );
  859. }
  860. void CutTail()
  861. {
  862. if ( Used()<=m_iSize)
  863. return;
  864. m_iMaxUsed = Max ( m_iMaxUsed, this->m_dIData.GetLength () ); // memorize it for free dynamics later.
  865. m_dIData.Slice ( m_iSize ).Apply ( [this] ( int iMatch ) { FreeMatch ( iMatch ); } );
  866. m_dIData.Resize ( m_iSize );
  867. }
  868. // conception: we have array of N*COEFF elems.
  869. // We need only N the best elements from it (rest have to be disposed).
  870. // direct way: rsort, then take first N elems.
  871. // this way: rearrange array by performing one pass of quick sort
  872. // if we have exactly N elems left hand from pivot - we're done.
  873. // otherwise repeat rearranging only to right or left part until the target achieved.
  874. void BinaryPartition ()
  875. {
  876. int iPivot = m_dIData[m_iSize / COEFF+1];
  877. int iMaxIndex = m_iSize-1;
  878. int a=0;
  879. int b=Used()-1;
  880. while (true)
  881. {
  882. int i=a;
  883. int j=b;
  884. while (i<=j)
  885. {
  886. while (m_dComp.IsLess (m_dIData[i],iPivot)) ++i;
  887. while (m_dComp.IsLess (iPivot, m_dIData[j])) --j;
  888. if ( i<=j ) ::Swap( m_dIData[i++], m_dIData[j--]);
  889. }
  890. if ( iMaxIndex == j )
  891. break;
  892. if ( iMaxIndex < j)
  893. b = j; // too many elems acquired; continue with left part
  894. else
  895. a = i; // too less elems acquired; continue with right part
  896. iPivot = m_dIData[( a * ( COEFF-1 )+b ) / COEFF];
  897. }
  898. }
  899. void RepartitionMatches ()
  900. {
  901. assert ( Used ()>m_iSize );
  902. BinaryPartition ();
  903. CutTail();
  904. }
  905. void FinalizeMatches ()
  906. {
  907. if ( m_bFinalized )
  908. return;
  909. m_bFinalized = true;
  910. if ( Used ()>m_iSize )
  911. RepartitionMatches();
  912. SortMatches();
  913. }
  914. // generic push entry (add it some way to the queue clone or swap PUSHER depends on)
  915. template<typename MATCH, typename PUSHER>
  916. FORCE_INLINE bool PushT ( MATCH && tEntry, PUSHER && PUSH )
  917. {
  918. if constexpr ( NOTIFICATIONS )
  919. {
  920. m_tJustPushed = RowTagged_t();
  921. m_dJustPopped.Resize(0);
  922. }
  923. // quick early rejection checks
  924. ++m_iTotal;
  925. if ( m_pWorst && COMP::IsLess ( tEntry, *m_pWorst, m_tState ) )
  926. return true;
  927. // quick check passed
  928. // fill the data, back to front
  929. m_bFinalized = false;
  930. PUSH ( Add(), std::forward<MATCH> ( tEntry ));
  931. if constexpr ( NOTIFICATIONS )
  932. m_tJustPushed = RowTagged_t ( *Last() );
  933. // do the initial sort once
  934. if ( m_iTotal==m_iSize )
  935. {
  936. assert ( Used()==m_iSize && !m_pWorst );
  937. SortMatches();
  938. m_pWorst = Last();
  939. m_bFinalized = true;
  940. return true;
  941. }
  942. if ( Used ()<m_iSize*COEFF )
  943. return true;
  944. // do the sort/cut when the K-buffer is full
  945. assert ( Used ()==m_iSize*COEFF );
  946. RepartitionMatches();
  947. SortMatches ();
  948. m_pWorst = Last ();
  949. m_bFinalized = true;
  950. return true;
  951. }
  952. };
  953. //////////////////////////////////////////////////////////////////////////
  954. /// collect list of matched DOCIDs in aside compressed blob
  955. /// (mainly used to collect docs in `DELETE... WHERE` statement)
  956. class CollectQueue_c final : public MatchSorter_c, ISphNoncopyable
  957. {
  958. using BASE = MatchSorter_c;
  959. public:
  960. CollectQueue_c ( int iSize, CSphVector<BYTE>& dCollectedValues );
  961. bool IsGroupby () const final { return false; }
  962. int GetLength () final { return 0; } // that ensures, flatten() will never called;
  963. bool Push ( const CSphMatch& tEntry ) final { return PushMatch(tEntry); }
  964. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final
  965. {
  966. for ( const auto & i : dMatches )
  967. if ( i.m_tRowID!=INVALID_ROWID )
  968. PushMatch(i);
  969. }
  970. bool PushGrouped ( const CSphMatch &, bool ) final { assert(0); return false; }
  971. int Flatten ( CSphMatch * ) final { return 0; }
  972. void Finalize ( MatchProcessor_i &, bool, bool ) final;
  973. bool CanBeCloned() const final { return false; }
  974. ISphMatchSorter * Clone () const final { return nullptr; }
  975. void MoveTo ( ISphMatchSorter *, bool ) final {}
  976. void SetSchema ( ISphSchema * pSchema, bool bRemapCmp ) final;
  977. bool IsCutoffDisabled() const final { return true; }
  978. void SetMerge ( bool bMerge ) final {}
  979. private:
  980. DocID_t m_iLastID;
  981. int m_iMaxMatches;
  982. CSphVector<DocID_t> m_dUnsortedDocs;
  983. MemoryWriter_c m_tWriter;
  984. bool m_bDocIdDynamic = false;
  985. inline bool PushMatch ( const CSphMatch & tEntry );
  986. inline void ProcessPushed();
  987. };
  988. CollectQueue_c::CollectQueue_c ( int iSize, CSphVector<BYTE>& dCollectedValues )
  989. : m_iLastID ( 0 )
  990. , m_iMaxMatches ( iSize )
  991. , m_tWriter ( dCollectedValues )
  992. {}
  993. /// sort/uniq already collected and store them to writer
  994. void CollectQueue_c::ProcessPushed()
  995. {
  996. m_dUnsortedDocs.Uniq();
  997. for ( auto& iCurId : m_dUnsortedDocs )
  998. m_tWriter.ZipOffset ( iCurId - std::exchange ( m_iLastID, iCurId ) );
  999. m_dUnsortedDocs.Resize ( 0 );
  1000. }
  1001. bool CollectQueue_c::PushMatch ( const CSphMatch & tEntry )
  1002. {
  1003. if ( m_dUnsortedDocs.GetLength() >= m_iMaxMatches && m_dUnsortedDocs.GetLength() == m_dUnsortedDocs.GetLimit() )
  1004. ProcessPushed();
  1005. m_dUnsortedDocs.Add ( sphGetDocID ( m_bDocIdDynamic ? tEntry.m_pDynamic : tEntry.m_pStatic ) );
  1006. return true;
  1007. }
  1008. /// final update pass
  1009. void CollectQueue_c::Finalize ( MatchProcessor_i&, bool, bool )
  1010. {
  1011. ProcessPushed();
  1012. m_iLastID = 0;
  1013. }
  1014. void CollectQueue_c::SetSchema ( ISphSchema * pSchema, bool bRemapCmp )
  1015. {
  1016. BASE::SetSchema ( pSchema, bRemapCmp );
  1017. const CSphColumnInfo * pDocId = pSchema->GetAttr ( sphGetDocidName() );
  1018. assert(pDocId);
  1019. m_bDocIdDynamic = pDocId->m_tLocator.m_bDynamic;
  1020. }
  1021. //////////////////////////////////////////////////////////////////////////
  1022. void SendSqlSchema ( const ISphSchema& tSchema, RowBuffer_i* pRows, const VecTraits_T<int>& dOrder )
  1023. {
  1024. int iCount = 0;
  1025. for ( int i = 0; i < tSchema.GetAttrsCount(); ++i )
  1026. if ( !sphIsInternalAttr ( tSchema.GetAttr ( i ) ) )
  1027. ++iCount;
  1028. assert ( iCount == dOrder.GetLength() );
  1029. pRows->HeadBegin ( iCount );
  1030. for ( int i : dOrder )
  1031. {
  1032. const CSphColumnInfo& tCol = tSchema.GetAttr ( i );
  1033. if ( sphIsInternalAttr ( tCol ) )
  1034. continue;
  1035. pRows->HeadColumn ( tCol.m_sName.cstr(), ESphAttr2MysqlColumn ( tCol.m_eAttrType ) );
  1036. }
  1037. pRows->HeadEnd ( false, 0 );
  1038. }
  1039. void SendSqlMatch ( const ISphSchema& tSchema, RowBuffer_i* pRows, CSphMatch& tMatch, const BYTE* pBlobPool, const VecTraits_T<int>& dOrder, bool bDynamicDocid )
  1040. {
  1041. auto& dRows = *pRows;
  1042. for ( int i : dOrder )
  1043. {
  1044. const CSphColumnInfo& dAttr = tSchema.GetAttr ( i );
  1045. if ( sphIsInternalAttr ( dAttr ) )
  1046. continue;
  1047. CSphAttrLocator tLoc = dAttr.m_tLocator;
  1048. ESphAttr eAttrType = dAttr.m_eAttrType;
  1049. switch ( eAttrType )
  1050. {
  1051. case SPH_ATTR_STRING:
  1052. dRows.PutArray ( sphGetBlobAttr ( tMatch, tLoc, pBlobPool ) );
  1053. break;
  1054. case SPH_ATTR_STRINGPTR:
  1055. {
  1056. const BYTE* pStr = nullptr;
  1057. if ( dAttr.m_eStage == SPH_EVAL_POSTLIMIT )
  1058. {
  1059. if ( bDynamicDocid )
  1060. {
  1061. dAttr.m_pExpr->StringEval ( tMatch, &pStr );
  1062. } else
  1063. {
  1064. auto pDynamic = tMatch.m_pDynamic;
  1065. if ( tMatch.m_pStatic )
  1066. tMatch.m_pDynamic = nullptr;
  1067. dAttr.m_pExpr->StringEval ( tMatch, &pStr );
  1068. tMatch.m_pDynamic = pDynamic;
  1069. }
  1070. dRows.PutString ( (const char*)pStr );
  1071. SafeDeleteArray ( pStr );
  1072. } else {
  1073. pStr = (const BYTE*)tMatch.GetAttr ( tLoc );
  1074. auto dString = sphUnpackPtrAttr ( pStr );
  1075. dRows.PutArray ( dString );
  1076. }
  1077. }
  1078. break;
  1079. case SPH_ATTR_INTEGER:
  1080. case SPH_ATTR_TIMESTAMP:
  1081. case SPH_ATTR_BOOL:
  1082. dRows.PutNumAsString ( (DWORD)tMatch.GetAttr ( tLoc ) );
  1083. break;
  1084. case SPH_ATTR_BIGINT:
  1085. dRows.PutNumAsString ( tMatch.GetAttr ( tLoc ) );
  1086. break;
  1087. case SPH_ATTR_UINT64:
  1088. dRows.PutNumAsString ( (uint64_t)tMatch.GetAttr ( tLoc ) );
  1089. break;
  1090. case SPH_ATTR_FLOAT:
  1091. dRows.PutFloatAsString ( tMatch.GetAttrFloat ( tLoc ) );
  1092. break;
  1093. case SPH_ATTR_DOUBLE:
  1094. dRows.PutDoubleAsString ( tMatch.GetAttrDouble ( tLoc ) );
  1095. break;
  1096. case SPH_ATTR_INT64SET:
  1097. case SPH_ATTR_UINT32SET:
  1098. {
  1099. StringBuilder_c dStr;
  1100. auto dMVA = sphGetBlobAttr ( tMatch, tLoc, pBlobPool );
  1101. sphMVA2Str ( dMVA, eAttrType == SPH_ATTR_INT64SET, dStr );
  1102. dRows.PutArray ( dStr, false );
  1103. break;
  1104. }
  1105. case SPH_ATTR_INT64SET_PTR:
  1106. case SPH_ATTR_UINT32SET_PTR:
  1107. {
  1108. StringBuilder_c dStr;
  1109. sphPackedMVA2Str ( (const BYTE*)tMatch.GetAttr ( tLoc ), eAttrType == SPH_ATTR_INT64SET_PTR, dStr );
  1110. dRows.PutArray ( dStr, false );
  1111. break;
  1112. }
  1113. case SPH_ATTR_FLOAT_VECTOR:
  1114. {
  1115. StringBuilder_c dStr;
  1116. auto dFloatVec = sphGetBlobAttr ( tMatch, tLoc, pBlobPool );
  1117. sphFloatVec2Str ( dFloatVec, dStr );
  1118. dRows.PutArray ( dStr, false );
  1119. }
  1120. break;
  1121. case SPH_ATTR_FLOAT_VECTOR_PTR:
  1122. {
  1123. StringBuilder_c dStr;
  1124. sphPackedFloatVec2Str ( (const BYTE*)tMatch.GetAttr(tLoc), dStr );
  1125. dRows.PutArray ( dStr, false );
  1126. }
  1127. break;
  1128. case SPH_ATTR_JSON:
  1129. {
  1130. auto pJson = sphGetBlobAttr ( tMatch, tLoc, pBlobPool );
  1131. JsonEscapedBuilder sTmp;
  1132. if ( pJson.second )
  1133. sphJsonFormat ( sTmp, pJson.first );
  1134. dRows.PutArray ( sTmp );
  1135. }
  1136. break;
  1137. case SPH_ATTR_JSON_PTR:
  1138. {
  1139. auto* pString = (const BYTE*)tMatch.GetAttr ( tLoc );
  1140. JsonEscapedBuilder sTmp;
  1141. if ( pString )
  1142. {
  1143. auto dJson = sphUnpackPtrAttr ( pString );
  1144. sphJsonFormat ( sTmp, dJson.first );
  1145. }
  1146. dRows.PutArray ( sTmp );
  1147. }
  1148. break;
  1149. case SPH_ATTR_FACTORS:
  1150. case SPH_ATTR_FACTORS_JSON:
  1151. case SPH_ATTR_JSON_FIELD:
  1152. case SPH_ATTR_JSON_FIELD_PTR:
  1153. assert ( false ); // index schema never contain such column
  1154. break;
  1155. default:
  1156. dRows.Add ( 1 );
  1157. dRows.Add ( '-' );
  1158. break;
  1159. }
  1160. }
  1161. if ( !dRows.Commit() )
  1162. session::SetKilled ( true );
  1163. }
  1164. /// stream out matches
  1165. class DirectSqlQueue_c final : public MatchSorter_c, ISphNoncopyable
  1166. {
  1167. using BASE = MatchSorter_c;
  1168. public:
  1169. DirectSqlQueue_c ( RowBuffer_i * pOutput, void ** ppOpaque1, void ** ppOpaque2, StrVec_t dColumns );
  1170. ~DirectSqlQueue_c() override;
  1171. bool IsGroupby () const final { return false; }
  1172. int GetLength () final { return 0; } // that ensures, flatten() will never called;
  1173. bool Push ( const CSphMatch& tEntry ) final { return PushMatch(const_cast<CSphMatch&>(tEntry)); }
  1174. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final
  1175. {
  1176. for ( const auto & i : dMatches )
  1177. if ( i.m_tRowID!=INVALID_ROWID )
  1178. PushMatch(const_cast<CSphMatch&>(i));
  1179. }
  1180. bool PushGrouped ( const CSphMatch &, bool ) final { assert(0); return false; }
  1181. int Flatten ( CSphMatch * ) final { return 0; }
  1182. void Finalize ( MatchProcessor_i &, bool, bool ) final;
  1183. bool CanBeCloned() const final { return false; }
  1184. ISphMatchSorter * Clone () const final { return nullptr; }
  1185. void MoveTo ( ISphMatchSorter *, bool ) final {}
  1186. void SetSchema ( ISphSchema * pSchema, bool bRemapCmp ) final;
  1187. bool IsCutoffDisabled() const final { return true; }
  1188. void SetMerge ( bool bMerge ) final {}
  1189. void SetBlobPool ( const BYTE* pBlobPool ) final
  1190. {
  1191. m_pBlobPool = pBlobPool;
  1192. MakeCtx();
  1193. }
  1194. void SetColumnar ( columnar::Columnar_i* pColumnar ) final
  1195. {
  1196. m_pColumnar = pColumnar;
  1197. MakeCtx();
  1198. }
  1199. private:
  1200. bool m_bSchemaSent = false;
  1201. int64_t m_iDocs = 0;
  1202. RowBuffer_i* m_pOutput;
  1203. const BYTE* m_pBlobPool = nullptr;
  1204. columnar::Columnar_i* m_pColumnar = nullptr;
  1205. CSphVector<ISphExpr*> m_dDocstores;
  1206. CSphVector<ISphExpr*> m_dFinals;
  1207. void ** m_ppOpaque1 = nullptr;
  1208. void ** m_ppOpaque2 = nullptr;
  1209. void * m_pCurDocstore = nullptr;
  1210. void * m_pCurDocstoreReader = nullptr;
  1211. CSphQuery m_dFake;
  1212. CSphQueryContext m_dCtx;
  1213. StrVec_t m_dColumns;
  1214. CSphVector<int> m_dOrder;
  1215. bool m_bDynamicDocid;
  1216. bool m_bNotYetFinalized = true;
  1217. inline bool PushMatch ( CSphMatch & tEntry );
  1218. void SendSchemaOnce();
  1219. void FinalizeOnce();
  1220. void MakeCtx();
  1221. };
  1222. DirectSqlQueue_c::DirectSqlQueue_c ( RowBuffer_i * pOutput, void ** ppOpaque1, void ** ppOpaque2, StrVec_t dColumns )
  1223. : m_pOutput ( pOutput )
  1224. , m_ppOpaque1 ( ppOpaque1 )
  1225. , m_ppOpaque2 ( ppOpaque2 )
  1226. , m_dCtx (m_dFake)
  1227. , m_dColumns ( std::move ( dColumns ) )
  1228. {}
  1229. DirectSqlQueue_c::~DirectSqlQueue_c()
  1230. {
  1231. FinalizeOnce();
  1232. }
  1233. void DirectSqlQueue_c::SendSchemaOnce()
  1234. {
  1235. if ( m_bSchemaSent )
  1236. return;
  1237. assert ( !m_iDocs );
  1238. for ( const auto& sColumn : m_dColumns )
  1239. {
  1240. auto iIdx = m_pSchema->GetAttrIndex ( sColumn.cstr() );
  1241. if ( iIdx >= 0 )
  1242. m_dOrder.Add ( iIdx );
  1243. }
  1244. for ( int i = 0; i < m_pSchema->GetAttrsCount(); ++i )
  1245. {
  1246. auto& tCol = const_cast< CSphColumnInfo &>(m_pSchema->GetAttr ( i ));
  1247. if ( tCol.m_sName == sphGetDocidName() )
  1248. m_bDynamicDocid = tCol.m_tLocator.m_bDynamic;
  1249. if ( !tCol.m_pExpr )
  1250. continue;
  1251. switch ( tCol.m_eStage )
  1252. {
  1253. case SPH_EVAL_FINAL : m_dFinals.Add ( tCol.m_pExpr ); break;
  1254. case SPH_EVAL_POSTLIMIT: m_dDocstores.Add ( tCol.m_pExpr ); break;
  1255. default:
  1256. sphWarning ("Unknown stage in SendSchema(): %d", tCol.m_eStage);
  1257. }
  1258. }
  1259. SendSqlSchema ( *m_pSchema, m_pOutput, m_dOrder );
  1260. m_bSchemaSent = true;
  1261. }
  1262. void DirectSqlQueue_c::MakeCtx()
  1263. {
  1264. CSphQueryResultMeta tFakeMeta;
  1265. CSphVector<const ISphSchema*> tFakeSchemas;
  1266. m_dCtx.SetupCalc ( tFakeMeta, *m_pSchema, *m_pSchema, m_pBlobPool, m_pColumnar, tFakeSchemas );
  1267. }
  1268. bool DirectSqlQueue_c::PushMatch ( CSphMatch & tEntry )
  1269. {
  1270. SendSchemaOnce();
  1271. ++m_iDocs;
  1272. if ( m_ppOpaque1 )
  1273. {
  1274. auto pDocstoreReader = *m_ppOpaque1;
  1275. if ( pDocstoreReader!=std::exchange (m_pCurDocstore, pDocstoreReader) && pDocstoreReader )
  1276. {
  1277. DocstoreSession_c::InfoDocID_t tSessionInfo;
  1278. tSessionInfo.m_pDocstore = (const DocstoreReader_i *)pDocstoreReader;
  1279. tSessionInfo.m_iSessionId = -1;
  1280. // value is copied; no leak of pointer to local here.
  1281. m_dDocstores.for_each ( [&tSessionInfo] ( ISphExpr* pExpr ) { pExpr->Command ( SPH_EXPR_SET_DOCSTORE_DOCID, &tSessionInfo ); } );
  1282. }
  1283. }
  1284. if ( m_ppOpaque2 )
  1285. {
  1286. auto pDocstore = *m_ppOpaque2;
  1287. if ( pDocstore != std::exchange ( m_pCurDocstoreReader, pDocstore ) && pDocstore )
  1288. {
  1289. DocstoreSession_c::InfoRowID_t tSessionInfo;
  1290. tSessionInfo.m_pDocstore = (Docstore_i*)pDocstore;
  1291. tSessionInfo.m_iSessionId = -1;
  1292. // value is copied; no leak of pointer to local here.
  1293. m_dFinals.for_each ( [&tSessionInfo] ( ISphExpr* pExpr ) { pExpr->Command ( SPH_EXPR_SET_DOCSTORE_ROWID, &tSessionInfo ); } );
  1294. }
  1295. }
  1296. m_dCtx.CalcFinal(tEntry);
  1297. SendSqlMatch ( *m_pSchema, m_pOutput, tEntry, m_pBlobPool, m_dOrder, m_bDynamicDocid );
  1298. return true;
  1299. }
  1300. /// final update pass
  1301. void DirectSqlQueue_c::Finalize ( MatchProcessor_i&, bool, bool bFinalizeMatches )
  1302. {
  1303. if ( !bFinalizeMatches )
  1304. return;
  1305. FinalizeOnce();
  1306. }
  1307. void DirectSqlQueue_c::FinalizeOnce ()
  1308. {
  1309. if ( !std::exchange ( m_bNotYetFinalized, false ) )
  1310. return;
  1311. SendSchemaOnce();
  1312. m_pOutput->Eof();
  1313. }
  1314. void DirectSqlQueue_c::SetSchema ( ISphSchema * pSchema, bool bRemapCmp )
  1315. {
  1316. BASE::SetSchema ( pSchema, bRemapCmp );
  1317. }
  1318. //////////////////////////////////////////////////////////////////////////
  1319. // SORTING+GROUPING QUEUE
  1320. //////////////////////////////////////////////////////////////////////////
  1321. static bool IsCount ( const CSphString & s )
  1322. {
  1323. return s=="@count" || s=="count(*)";
  1324. }
  1325. static bool IsGroupby ( const CSphString & s )
  1326. {
  1327. return s=="@groupby"
  1328. || s=="@distinct"
  1329. || s=="groupby()"
  1330. || IsSortJsonInternal(s);
  1331. }
  1332. bool IsGroupbyMagic ( const CSphString & s )
  1333. {
  1334. return IsGroupby ( s ) || IsCount ( s );
  1335. }
  1336. /// groupers
  1337. #define GROUPER_BEGIN(_name) \
  1338. class _name : public CSphGrouper \
  1339. { \
  1340. protected: \
  1341. CSphAttrLocator m_tLocator; \
  1342. public: \
  1343. explicit _name ( const CSphAttrLocator & tLoc ) : m_tLocator ( tLoc ) {} \
  1344. void GetLocator ( CSphAttrLocator & tOut ) const override { tOut = m_tLocator; } \
  1345. ESphAttr GetResultType() const override { return m_tLocator.m_iBitCount>8*(int)sizeof(DWORD) ? SPH_ATTR_BIGINT : SPH_ATTR_INTEGER; } \
  1346. SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const override { return KeyFromValue ( tMatch.GetAttr ( m_tLocator ) ); } \
  1347. void MultipleKeysFromMatch ( const CSphMatch & tMatch, CSphVector<SphGroupKey_t> & dKeys ) const override { assert(0); } \
  1348. CSphGrouper * Clone() const override { return new _name (m_tLocator); } \
  1349. SphGroupKey_t KeyFromValue ( SphAttr_t uValue ) const override \
  1350. {
  1351. // NOLINT
  1352. #define GROUPER_END \
  1353. } \
  1354. };
  1355. #define GROUPER_BEGIN_SPLIT(_name) \
  1356. GROUPER_BEGIN(_name) \
  1357. time_t tStamp = (time_t)uValue; \
  1358. struct tm tSplit; \
  1359. localtime_r ( &tStamp, &tSplit );
  1360. GROUPER_BEGIN ( CSphGrouperAttr )
  1361. return uValue;
  1362. GROUPER_END
  1363. GROUPER_BEGIN_SPLIT ( CSphGrouperDay )
  1364. return (tSplit.tm_year+1900)*10000 + (1+tSplit.tm_mon)*100 + tSplit.tm_mday;
  1365. GROUPER_END
  1366. GROUPER_BEGIN_SPLIT ( CSphGrouperWeek )
  1367. int iPrevSunday = (1+tSplit.tm_yday) - tSplit.tm_wday; // prev Sunday day of year, base 1
  1368. int iYear = tSplit.tm_year+1900;
  1369. if ( iPrevSunday<=0 ) // check if we crossed year boundary
  1370. {
  1371. // adjust day and year
  1372. iPrevSunday += 365;
  1373. iYear--;
  1374. // adjust for leap years
  1375. if ( iYear%4==0 && ( iYear%100!=0 || iYear%400==0 ) )
  1376. iPrevSunday++;
  1377. }
  1378. return iYear*1000 + iPrevSunday;
  1379. GROUPER_END
  1380. GROUPER_BEGIN_SPLIT ( CSphGrouperMonth )
  1381. return (tSplit.tm_year+1900)*100 + (1+tSplit.tm_mon);
  1382. GROUPER_END
  1383. GROUPER_BEGIN_SPLIT ( CSphGrouperYear )
  1384. return (tSplit.tm_year+1900);
  1385. GROUPER_END
  1386. #define GROUPER_BEGIN_SPLIT_UTC( _name ) \
  1387. GROUPER_BEGIN(_name) \
  1388. time_t tStamp = (time_t)uValue; \
  1389. struct tm tSplit; \
  1390. gmtime_r ( &tStamp, &tSplit );
  1391. GROUPER_BEGIN_SPLIT_UTC ( CSphGrouperDayUtc )
  1392. return (tSplit.tm_year + 1900) * 10000 + (1 + tSplit.tm_mon) * 100 + tSplit.tm_mday;
  1393. GROUPER_END
  1394. GROUPER_BEGIN_SPLIT_UTC ( CSphGrouperWeekUtc )
  1395. int iPrevSunday = (1 + tSplit.tm_yday) - tSplit.tm_wday; // prev Sunday day of year, base 1
  1396. int iYear = tSplit.tm_year + 1900;
  1397. if ( iPrevSunday<=0 ) // check if we crossed year boundary
  1398. {
  1399. // adjust day and year
  1400. iPrevSunday += 365;
  1401. iYear--;
  1402. // adjust for leap years
  1403. if ( iYear % 4==0 && (iYear % 100!=0 || iYear % 400==0) )
  1404. iPrevSunday++;
  1405. }
  1406. return iYear * 1000 + iPrevSunday;
  1407. GROUPER_END
  1408. GROUPER_BEGIN_SPLIT_UTC ( CSphGrouperMonthUtc )
  1409. return (tSplit.tm_year + 1900) * 100 + (1 + tSplit.tm_mon);
  1410. GROUPER_END
  1411. GROUPER_BEGIN_SPLIT_UTC ( CSphGrouperYearUtc )
  1412. return (tSplit.tm_year + 1900);
  1413. GROUPER_END
  1414. static bool g_bSortGroupingInUtc = false;
  1415. void SetGroupingInUtcSort ( bool bGroupingInUtc )
  1416. {
  1417. g_bSortGroupingInUtc = bGroupingInUtc;
  1418. }
  1419. static CSphGrouper * getDayGrouper ( const CSphAttrLocator &tLoc )
  1420. {
  1421. return g_bSortGroupingInUtc
  1422. ? (CSphGrouper *) new CSphGrouperDayUtc ( tLoc )
  1423. : (CSphGrouper *) new CSphGrouperDay ( tLoc );
  1424. }
  1425. static CSphGrouper * getWeekGrouper ( const CSphAttrLocator &tLoc )
  1426. {
  1427. return g_bSortGroupingInUtc
  1428. ? (CSphGrouper *) new CSphGrouperWeekUtc ( tLoc )
  1429. : (CSphGrouper *) new CSphGrouperWeek ( tLoc );
  1430. }
  1431. static CSphGrouper * getMonthGrouper ( const CSphAttrLocator &tLoc )
  1432. {
  1433. return g_bSortGroupingInUtc
  1434. ? (CSphGrouper *) new CSphGrouperMonthUtc ( tLoc )
  1435. : (CSphGrouper *) new CSphGrouperMonth ( tLoc );
  1436. }
  1437. static CSphGrouper * getYearGrouper ( const CSphAttrLocator &tLoc )
  1438. {
  1439. return g_bSortGroupingInUtc
  1440. ? (CSphGrouper *) new CSphGrouperYearUtc ( tLoc )
  1441. : (CSphGrouper *) new CSphGrouperYear ( tLoc );
  1442. }
  1443. template <class PRED>
  1444. class CSphGrouperString final : public CSphGrouperAttr, public PRED
  1445. {
  1446. public:
  1447. explicit CSphGrouperString ( const CSphAttrLocator & tLoc )
  1448. : CSphGrouperAttr ( tLoc )
  1449. {}
  1450. ESphAttr GetResultType () const override
  1451. {
  1452. return SPH_ATTR_BIGINT;
  1453. }
  1454. SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const override
  1455. {
  1456. auto dBlobAttr = tMatch.FetchAttrData ( m_tLocator, GetBlobPool() );
  1457. if ( IsEmpty ( dBlobAttr ) )
  1458. return 0;
  1459. return PRED::Hash ( dBlobAttr.first,dBlobAttr.second );
  1460. }
  1461. CSphGrouper * Clone () const final
  1462. {
  1463. return new CSphGrouperString ( m_tLocator );
  1464. }
  1465. };
  1466. #if __has_include( <charconv>)
  1467. #include <charconv>
  1468. #else
  1469. template<typename T>
  1470. inline static char* FormatInt ( char sBuf[32], T v )
  1471. {
  1472. if ( sizeof ( T ) == 4 && v == INT_MIN )
  1473. return strncpy ( sBuf, "-2147483648", 32 );
  1474. if ( sizeof ( T ) == 8 && v == LLONG_MIN )
  1475. return strncpy ( sBuf, "-9223372036854775808", 32 );
  1476. bool s = ( v < 0 );
  1477. if ( s )
  1478. v = -v;
  1479. char* p = sBuf + 31;
  1480. *p = 0;
  1481. do
  1482. {
  1483. *--p = '0' + char ( v % 10 );
  1484. v /= 10;
  1485. } while ( v );
  1486. if ( s )
  1487. *--p = '-';
  1488. return p;
  1489. }
  1490. #endif
  1491. /// lookup JSON key, group by looked up value (used in CSphKBufferJsonGroupSorter)
  1492. class CSphGrouperJsonField final : public CSphGrouper
  1493. {
  1494. public:
  1495. CSphGrouperJsonField ( const CSphAttrLocator & tLoc, ISphExpr * pExpr )
  1496. : m_tLocator ( tLoc )
  1497. , m_pExpr ( pExpr )
  1498. {
  1499. SafeAddRef ( pExpr );
  1500. }
  1501. void SetBlobPool ( const BYTE * pBlobPool ) final
  1502. {
  1503. CSphGrouper::SetBlobPool ( pBlobPool );
  1504. if ( m_pExpr )
  1505. m_pExpr->Command ( SPH_EXPR_SET_BLOB_POOL, (void*)pBlobPool );
  1506. }
  1507. void GetLocator ( CSphAttrLocator & tOut ) const final
  1508. {
  1509. tOut = m_tLocator;
  1510. }
  1511. ESphAttr GetResultType () const final
  1512. {
  1513. return SPH_ATTR_BIGINT;
  1514. }
  1515. SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const final
  1516. {
  1517. if ( !m_pExpr )
  1518. return SphGroupKey_t();
  1519. return m_pExpr->Int64Eval ( tMatch );
  1520. }
  1521. void MultipleKeysFromMatch ( const CSphMatch & tMatch, CSphVector<SphGroupKey_t> & dKeys ) const final { assert(0); }
  1522. SphGroupKey_t KeyFromValue ( SphAttr_t ) const final { assert(0); return SphGroupKey_t(); }
  1523. CSphGrouper * Clone() const final { return new CSphGrouperJsonField (*this); }
  1524. protected:
  1525. CSphGrouperJsonField ( const CSphGrouperJsonField& rhs )
  1526. : m_tLocator ( rhs.m_tLocator )
  1527. , m_pExpr ( SafeClone ( rhs.m_pExpr ) )
  1528. {}
  1529. CSphAttrLocator m_tLocator;
  1530. ISphExprRefPtr_c m_pExpr;
  1531. };
  1532. template <class PRED>
  1533. class GrouperStringExpr_T final : public CSphGrouper, public PRED
  1534. {
  1535. public:
  1536. explicit GrouperStringExpr_T ( ISphExpr * pExpr )
  1537. : m_pExpr ( pExpr )
  1538. {
  1539. assert(m_pExpr);
  1540. SafeAddRef(pExpr);
  1541. }
  1542. void GetLocator ( CSphAttrLocator & tOut ) const final {}
  1543. ESphAttr GetResultType () const final { return SPH_ATTR_BIGINT; }
  1544. SphGroupKey_t KeyFromValue ( SphAttr_t ) const final { assert(0); return SphGroupKey_t(); }
  1545. void MultipleKeysFromMatch ( const CSphMatch & tMatch, CSphVector<SphGroupKey_t> & dKeys ) const final { assert(0); }
  1546. SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const final
  1547. {
  1548. assert ( !m_pExpr->IsDataPtrAttr() );
  1549. const BYTE * pStr = nullptr;
  1550. int iLen = m_pExpr->StringEval ( tMatch, &pStr );
  1551. if ( !iLen )
  1552. return 0;
  1553. return PRED::Hash ( pStr, iLen );
  1554. }
  1555. CSphGrouper * Clone() const final { return new GrouperStringExpr_T(*this); }
  1556. void SetColumnar ( const columnar::Columnar_i * pColumnar ) final { m_pExpr->Command ( SPH_EXPR_SET_COLUMNAR, (void*)pColumnar ); }
  1557. protected:
  1558. GrouperStringExpr_T (const GrouperStringExpr_T& rhs)
  1559. : m_pExpr { SafeClone ( rhs.m_pExpr ) }
  1560. {}
  1561. ISphExprRefPtr_c m_pExpr;
  1562. };
  1563. template <typename MVA, typename ADDER>
  1564. static void AddGroupedMVA ( ADDER && fnAdd, const ByteBlob_t& dRawMVA )
  1565. {
  1566. VecTraits_T<MVA> dMvas {dRawMVA};
  1567. for ( auto & tValue : dMvas )
  1568. fnAdd ( sphUnalignedRead(tValue) );
  1569. }
  1570. template <typename PUSH>
  1571. static bool PushJsonField ( int64_t iValue, const BYTE * pBlobPool, PUSH && fnPush )
  1572. {
  1573. int iLen;
  1574. char szBuf[32];
  1575. SphGroupKey_t uGroupKey;
  1576. ESphJsonType eJson = sphJsonUnpackType ( iValue );
  1577. const BYTE * pValue = pBlobPool + sphJsonUnpackOffset ( iValue );
  1578. switch ( eJson )
  1579. {
  1580. case JSON_ROOT:
  1581. {
  1582. iLen = sphJsonNodeSize ( JSON_ROOT, pValue );
  1583. bool bEmpty = iLen==5; // mask and JSON_EOF
  1584. uGroupKey = bEmpty ? 0 : sphFNV64 ( pValue, iLen );
  1585. return fnPush ( bEmpty ? nullptr : &iValue, uGroupKey );
  1586. }
  1587. case JSON_STRING:
  1588. case JSON_OBJECT:
  1589. case JSON_MIXED_VECTOR:
  1590. iLen = sphJsonUnpackInt ( &pValue );
  1591. uGroupKey = ( iLen==1 && eJson!=JSON_STRING ) ? 0 : sphFNV64 ( pValue, iLen );
  1592. return fnPush ( ( iLen==1 && eJson!=JSON_STRING ) ? nullptr : &iValue, uGroupKey );
  1593. case JSON_STRING_VECTOR:
  1594. {
  1595. bool bRes = false;
  1596. sphJsonUnpackInt ( &pValue );
  1597. iLen = sphJsonUnpackInt ( &pValue );
  1598. for ( int i=0;i<iLen;i++ )
  1599. {
  1600. int64_t iNewValue = sphJsonPackTypeOffset ( JSON_STRING, pValue-pBlobPool );
  1601. int iStrLen = sphJsonUnpackInt ( &pValue );
  1602. uGroupKey = sphFNV64 ( pValue, iStrLen );
  1603. bRes |= fnPush ( &iNewValue, uGroupKey );
  1604. pValue += iStrLen;
  1605. }
  1606. return bRes;
  1607. }
  1608. case JSON_INT32:
  1609. #if __has_include( <charconv>)
  1610. *std::to_chars ( szBuf, szBuf + 32, (int)sphGetDword ( pValue ) ).ptr = '\0';
  1611. return fnPush ( &iValue, sphFNV64 ( szBuf ) );
  1612. #else
  1613. return fnPush ( &iValue, sphFNV64 ( (BYTE*)FormatInt ( szBuf, (int)sphGetDword ( pValue ) ) ) );
  1614. #endif
  1615. case JSON_INT64:
  1616. #if __has_include( <charconv>)
  1617. *std::to_chars ( szBuf, szBuf + 32, sphJsonLoadBigint ( &pValue ) ).ptr = '\0';
  1618. return fnPush ( &iValue, sphFNV64 ( szBuf ) );
  1619. #else
  1620. return fnPush ( &iValue, sphFNV64 ( (BYTE*)FormatInt ( szBuf, (int)sphJsonLoadBigint ( &pValue ) ) ) );
  1621. #endif
  1622. case JSON_DOUBLE:
  1623. snprintf ( szBuf, sizeof(szBuf), "%f", sphQW2D ( sphJsonLoadBigint ( &pValue ) ) );
  1624. return fnPush ( &iValue, sphFNV64 ( (const BYTE*)szBuf ) );
  1625. case JSON_INT32_VECTOR:
  1626. {
  1627. bool bRes = false;
  1628. iLen = sphJsonUnpackInt ( &pValue );
  1629. auto p = (const int*)pValue;
  1630. for ( int i=0;i<iLen;i++ )
  1631. {
  1632. int64_t iPacked = sphJsonPackTypeOffset ( JSON_INT32, (const BYTE*)p-pBlobPool );
  1633. uGroupKey = *p++;
  1634. bRes |= fnPush ( &iPacked, uGroupKey );
  1635. }
  1636. return bRes;
  1637. }
  1638. case JSON_INT64_VECTOR:
  1639. case JSON_DOUBLE_VECTOR:
  1640. {
  1641. bool bRes = false;
  1642. iLen = sphJsonUnpackInt ( &pValue );
  1643. auto p = (const int64_t*)pValue;
  1644. ESphJsonType eType = eJson==JSON_INT64_VECTOR ? JSON_INT64 : JSON_DOUBLE;
  1645. for ( int i=0;i<iLen;i++ )
  1646. {
  1647. int64_t iPacked = sphJsonPackTypeOffset ( eType, (const BYTE*)p-pBlobPool );
  1648. uGroupKey = *p++;
  1649. bRes |= fnPush ( &iPacked, uGroupKey );
  1650. }
  1651. return bRes;
  1652. }
  1653. case JSON_TRUE:
  1654. case JSON_FALSE:
  1655. uGroupKey = eJson;
  1656. return fnPush ( &iValue, uGroupKey );
  1657. default:
  1658. uGroupKey = 0;
  1659. iValue = 0;
  1660. return fnPush ( &iValue, uGroupKey );
  1661. }
  1662. }
  1663. template<typename T>
  1664. void FetchMVAKeys ( CSphVector<SphGroupKey_t> & dKeys, const CSphMatch & tMatch, const CSphAttrLocator & tLocator, const BYTE * pBlobPool )
  1665. {
  1666. dKeys.Resize(0);
  1667. if ( !pBlobPool )
  1668. return;
  1669. int iLengthBytes = 0;
  1670. const BYTE * pMva = sphGetBlobAttr ( tMatch, tLocator, pBlobPool, iLengthBytes );
  1671. int iNumValues = iLengthBytes / sizeof(T);
  1672. const T * pValues = (const T*)pMva;
  1673. dKeys.Resize(iNumValues);
  1674. for ( int i = 0; i<iNumValues; i++ )
  1675. dKeys[i] = (SphGroupKey_t)pValues[i];
  1676. }
  1677. template <class PRED, bool HAVE_COLUMNAR>
  1678. class CSphGrouperMulti final: public CSphGrouper, public PRED
  1679. {
  1680. using MYTYPE = CSphGrouperMulti<PRED,HAVE_COLUMNAR>;
  1681. public:
  1682. CSphGrouperMulti ( const CSphVector<CSphColumnInfo> & dAttrs, VecRefPtrs_t<ISphExpr *> dJsonKeys, ESphCollation eCollation );
  1683. SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const final;
  1684. void SetBlobPool ( const BYTE * pBlobPool ) final;
  1685. void SetColumnar ( const columnar::Columnar_i * pColumnar ) final;
  1686. CSphGrouper * Clone() const final;
  1687. void MultipleKeysFromMatch ( const CSphMatch & tMatch, CSphVector<SphGroupKey_t> & dKeys ) const final;
  1688. SphGroupKey_t KeyFromValue ( SphAttr_t ) const final { assert(0); return SphGroupKey_t(); }
  1689. void GetLocator ( CSphAttrLocator & ) const final { assert(0); }
  1690. ESphAttr GetResultType() const final { return SPH_ATTR_BIGINT; }
  1691. bool IsMultiValue() const final;
  1692. private:
  1693. CSphVector<CSphColumnInfo> m_dAttrs;
  1694. VecRefPtrs_t<ISphExpr *> m_dJsonKeys;
  1695. ESphCollation m_eCollation = SPH_COLLATION_DEFAULT;
  1696. CSphVector<CSphRefcountedPtr<CSphGrouper>> m_dSingleKeyGroupers;
  1697. CSphVector<CSphRefcountedPtr<CSphGrouper>> m_dMultiKeyGroupers;
  1698. SphGroupKey_t FetchStringKey ( const CSphMatch & tMatch, const CSphAttrLocator & tLocator, SphGroupKey_t tPrevKey ) const;
  1699. void SpawnColumnarGroupers();
  1700. };
  1701. template <class PRED, bool HAVE_COLUMNAR>
  1702. CSphGrouperMulti<PRED,HAVE_COLUMNAR>::CSphGrouperMulti ( const CSphVector<CSphColumnInfo> & dAttrs, VecRefPtrs_t<ISphExpr *> dJsonKeys, ESphCollation eCollation )
  1703. : m_dAttrs ( dAttrs )
  1704. , m_dJsonKeys ( std::move(dJsonKeys) )
  1705. , m_eCollation ( eCollation )
  1706. {
  1707. assert ( dAttrs.GetLength()>1 );
  1708. assert ( dAttrs.GetLength()==m_dJsonKeys.GetLength() );
  1709. if constexpr ( HAVE_COLUMNAR )
  1710. SpawnColumnarGroupers();
  1711. }
  1712. template <class PRED, bool HAVE_COLUMNAR>
  1713. SphGroupKey_t CSphGrouperMulti<PRED,HAVE_COLUMNAR>::KeyFromMatch ( const CSphMatch & tMatch ) const
  1714. {
  1715. auto tKey = ( SphGroupKey_t ) SPH_FNV64_SEED;
  1716. for ( int i=0; i<m_dAttrs.GetLength(); i++ )
  1717. {
  1718. if constexpr ( HAVE_COLUMNAR )
  1719. {
  1720. if ( m_dSingleKeyGroupers[i] )
  1721. {
  1722. // use pre-spawned grouper
  1723. SphGroupKey_t tColumnarKey = m_dSingleKeyGroupers[i]->KeyFromMatch(tMatch);
  1724. tKey = ( SphGroupKey_t ) sphFNV64 ( tColumnarKey, tKey );
  1725. continue;
  1726. }
  1727. }
  1728. switch ( m_dAttrs[i].m_eAttrType )
  1729. {
  1730. case SPH_ATTR_STRING:
  1731. case SPH_ATTR_STRINGPTR:
  1732. tKey = FetchStringKey ( tMatch, m_dAttrs[i].m_tLocator, tKey );
  1733. break;
  1734. default:
  1735. {
  1736. SphAttr_t tAttr = tMatch.GetAttr ( m_dAttrs[i].m_tLocator );
  1737. tKey = ( SphGroupKey_t ) sphFNV64 ( tAttr, tKey );
  1738. }
  1739. break;
  1740. }
  1741. }
  1742. return tKey;
  1743. }
  1744. template <class PRED, bool HAVE_COLUMNAR>
  1745. void CSphGrouperMulti<PRED, HAVE_COLUMNAR>::SetBlobPool ( const BYTE * pBlobPool )
  1746. {
  1747. CSphGrouper::SetBlobPool ( pBlobPool );
  1748. for ( auto & i : m_dJsonKeys )
  1749. if ( i )
  1750. i->Command ( SPH_EXPR_SET_BLOB_POOL, (void*)pBlobPool );
  1751. }
  1752. template <class PRED, bool HAVE_COLUMNAR>
  1753. void CSphGrouperMulti<PRED,HAVE_COLUMNAR>::SetColumnar ( const columnar::Columnar_i * pColumnar )
  1754. {
  1755. CSphGrouper::SetColumnar ( pColumnar );
  1756. for ( auto & i : m_dSingleKeyGroupers )
  1757. if ( i )
  1758. i->SetColumnar ( pColumnar );
  1759. for ( auto & i : m_dMultiKeyGroupers )
  1760. if ( i )
  1761. i->SetColumnar ( pColumnar );
  1762. }
  1763. template <class PRED, bool HAVE_COLUMNAR>
  1764. CSphGrouper * CSphGrouperMulti<PRED,HAVE_COLUMNAR>::Clone() const
  1765. {
  1766. VecRefPtrs_t<ISphExpr *> dJsonKeys;
  1767. m_dJsonKeys.for_each ( [&dJsonKeys] ( ISphExpr * p ) { dJsonKeys.Add ( SafeClone ( p ) ); } );
  1768. return new MYTYPE ( m_dAttrs, std::move(dJsonKeys), m_eCollation );
  1769. }
  1770. template <class PRED, bool HAVE_COLUMNAR>
  1771. void CSphGrouperMulti<PRED,HAVE_COLUMNAR>::MultipleKeysFromMatch ( const CSphMatch & tMatch, CSphVector<SphGroupKey_t> & dKeys ) const
  1772. {
  1773. dKeys.Resize(0);
  1774. CSphFixedVector<CSphVector<SphGroupKey_t>> dAllKeys { m_dAttrs.GetLength() };
  1775. for ( int i=0; i<m_dAttrs.GetLength(); i++ )
  1776. {
  1777. auto & dCurKeys = dAllKeys[i];
  1778. if constexpr ( HAVE_COLUMNAR )
  1779. {
  1780. if ( m_dMultiKeyGroupers[i] )
  1781. {
  1782. // use pre-spawned grouper
  1783. m_dMultiKeyGroupers[i]->MultipleKeysFromMatch ( tMatch, dCurKeys );
  1784. continue;
  1785. }
  1786. }
  1787. switch ( m_dAttrs[i].m_eAttrType )
  1788. {
  1789. case SPH_ATTR_UINT32SET:
  1790. FetchMVAKeys<DWORD> ( dCurKeys, tMatch, m_dAttrs[i].m_tLocator, GetBlobPool() );
  1791. break;
  1792. case SPH_ATTR_INT64SET:
  1793. FetchMVAKeys<int64_t> ( dCurKeys, tMatch, m_dAttrs[i].m_tLocator, GetBlobPool() );
  1794. break;
  1795. case SPH_ATTR_JSON:
  1796. PushJsonField ( m_dJsonKeys[i]->Int64Eval(tMatch), m_pBlobPool, [&dCurKeys]( SphAttr_t * pAttr, SphGroupKey_t uMatchGroupKey ){ dCurKeys.Add(uMatchGroupKey); return true; } );
  1797. break;
  1798. case SPH_ATTR_JSON_FIELD:
  1799. {
  1800. assert ( m_dAttrs[i].m_pExpr );
  1801. PushJsonField ( m_dAttrs[i].m_pExpr->Int64Eval ( tMatch ), m_pBlobPool, [&dCurKeys]( SphAttr_t * pAttr, SphGroupKey_t uMatchGroupKey )
  1802. {
  1803. dCurKeys.Add ( uMatchGroupKey );
  1804. return true;
  1805. });
  1806. }
  1807. break;
  1808. case SPH_ATTR_STRING:
  1809. case SPH_ATTR_STRINGPTR:
  1810. {
  1811. SphGroupKey_t tStringKey = FetchStringKey ( tMatch, m_dAttrs[i].m_tLocator, SPH_FNV64_SEED );
  1812. if ( tStringKey!=(SphGroupKey_t)SPH_FNV64_SEED )
  1813. dAllKeys[i].Add ( tStringKey );
  1814. }
  1815. break;
  1816. default:
  1817. dAllKeys[i].Add ( tMatch.GetAttr ( m_dAttrs[i].m_tLocator ) );
  1818. break;
  1819. }
  1820. }
  1821. CSphFixedVector<int> dIndexes { m_dAttrs.GetLength() };
  1822. dIndexes.ZeroVec();
  1823. do
  1824. {
  1825. auto tKey = ( SphGroupKey_t ) SPH_FNV64_SEED;
  1826. ARRAY_FOREACH ( i, dAllKeys )
  1827. if ( dAllKeys[i].GetLength() )
  1828. tKey = (SphGroupKey_t)sphFNV64 ( dAllKeys[i][dIndexes[i]], tKey );
  1829. dKeys.Add(tKey);
  1830. }
  1831. while ( NextSet ( dIndexes, dAllKeys ) );
  1832. }
  1833. template <class PRED, bool HAVE_COLUMNAR>
  1834. bool CSphGrouperMulti<PRED,HAVE_COLUMNAR>::IsMultiValue() const
  1835. {
  1836. return m_dAttrs.any_of ( []( auto & tAttr ){ return tAttr.m_eAttrType==SPH_ATTR_JSON || tAttr.m_eAttrType==SPH_ATTR_JSON_FIELD || tAttr.m_eAttrType==SPH_ATTR_UINT32SET || tAttr.m_eAttrType==SPH_ATTR_INT64SET; } );
  1837. }
  1838. template <class PRED, bool HAVE_COLUMNAR>
  1839. SphGroupKey_t CSphGrouperMulti<PRED,HAVE_COLUMNAR>::FetchStringKey ( const CSphMatch & tMatch, const CSphAttrLocator & tLocator, SphGroupKey_t tPrevKey ) const
  1840. {
  1841. ByteBlob_t tData = tMatch.FetchAttrData ( tLocator, GetBlobPool() );
  1842. if ( !tData.first || !tData.second )
  1843. return tPrevKey;
  1844. return PRED::Hash ( tData.first, tData.second, tPrevKey );
  1845. }
  1846. template <class PRED, bool HAVE_COLUMNAR>
  1847. void CSphGrouperMulti<PRED,HAVE_COLUMNAR>::SpawnColumnarGroupers()
  1848. {
  1849. m_dSingleKeyGroupers.Resize ( m_dAttrs.GetLength() );
  1850. m_dMultiKeyGroupers.Resize ( m_dAttrs.GetLength() );
  1851. ARRAY_FOREACH ( i, m_dAttrs )
  1852. {
  1853. const auto & tAttr = m_dAttrs[i];
  1854. if ( !tAttr.IsColumnar() && !tAttr.IsColumnarExpr() )
  1855. continue;
  1856. switch ( tAttr.m_eAttrType )
  1857. {
  1858. case SPH_ATTR_STRING:
  1859. case SPH_ATTR_STRINGPTR:
  1860. m_dSingleKeyGroupers[i] = CreateGrouperColumnarString ( tAttr, m_eCollation );
  1861. break;
  1862. case SPH_ATTR_UINT32SET:
  1863. case SPH_ATTR_UINT32SET_PTR:
  1864. case SPH_ATTR_INT64SET:
  1865. case SPH_ATTR_INT64SET_PTR:
  1866. m_dMultiKeyGroupers[i] = CreateGrouperColumnarMVA(tAttr);
  1867. break;
  1868. default:
  1869. m_dSingleKeyGroupers[i] = CreateGrouperColumnarInt(tAttr);
  1870. break;
  1871. }
  1872. }
  1873. }
  1874. /////////////////////////////////////////////////////////////////////////////
  1875. template<typename T>
  1876. class GrouperMVA_T : public CSphGrouper
  1877. {
  1878. public:
  1879. explicit GrouperMVA_T ( const CSphAttrLocator & tLocator ) : m_tLocator ( tLocator ) {}
  1880. SphGroupKey_t KeyFromValue ( SphAttr_t ) const override { assert(0); return SphGroupKey_t(); }
  1881. SphGroupKey_t KeyFromMatch ( const CSphMatch & tMatch ) const override { assert(0); return SphGroupKey_t(); }
  1882. void MultipleKeysFromMatch ( const CSphMatch & tMatch, CSphVector<SphGroupKey_t> & dKeys ) const override;
  1883. void GetLocator ( CSphAttrLocator & tOut ) const override { tOut = m_tLocator; }
  1884. ESphAttr GetResultType () const override;
  1885. CSphGrouper * Clone() const override { return new GrouperMVA_T ( m_tLocator ); }
  1886. bool IsMultiValue() const override { return true; }
  1887. private:
  1888. CSphAttrLocator m_tLocator;
  1889. };
  1890. template<>
  1891. ESphAttr GrouperMVA_T<DWORD>::GetResultType() const
  1892. {
  1893. return SPH_ATTR_INTEGER;
  1894. }
  1895. template<>
  1896. ESphAttr GrouperMVA_T<int64_t>::GetResultType() const
  1897. {
  1898. return SPH_ATTR_BIGINT;
  1899. }
  1900. template<typename T>
  1901. void GrouperMVA_T<T>::MultipleKeysFromMatch ( const CSphMatch & tMatch, CSphVector<SphGroupKey_t> & dKeys ) const
  1902. {
  1903. FetchMVAKeys<T> ( dKeys, tMatch, m_tLocator, GetBlobPool() );
  1904. }
  1905. class DistinctFetcher_c : public DistinctFetcher_i
  1906. {
  1907. public:
  1908. explicit DistinctFetcher_c ( const CSphAttrLocator & tLocator ) : m_tLocator(tLocator) {}
  1909. void SetColumnar ( const columnar::Columnar_i * pColumnar ) override {}
  1910. void SetBlobPool ( const BYTE * pBlobPool ) override { m_pBlobPool = pBlobPool; }
  1911. void FixupLocators ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema ) override { sphFixupLocator ( m_tLocator, pOldSchema, pNewSchema ); }
  1912. protected:
  1913. CSphAttrLocator m_tLocator;
  1914. const BYTE * m_pBlobPool = nullptr;
  1915. };
  1916. class DistinctFetcherPlain_c : public DistinctFetcher_c
  1917. {
  1918. using DistinctFetcher_c::DistinctFetcher_c;
  1919. public:
  1920. void GetKeys ( const CSphMatch & tMatch, CSphVector<SphAttr_t> & dKeys ) const override { assert ( 0 && " Requesting multiple keys from plain distinct fetcher" ); }
  1921. bool IsMultiValue() const override { return false; }
  1922. };
  1923. class DistinctFetcherMulti_c : public DistinctFetcher_c
  1924. {
  1925. using DistinctFetcher_c::DistinctFetcher_c;
  1926. public:
  1927. SphAttr_t GetKey ( const CSphMatch & tMatch ) const override { assert ( 0 && " Requesting single keys from multi distinct fetcher" ); return 0; }
  1928. bool IsMultiValue() const override { return true; }
  1929. };
  1930. class DistinctFetcherInt_c : public DistinctFetcherPlain_c
  1931. {
  1932. using DistinctFetcherPlain_c::DistinctFetcherPlain_c;
  1933. public:
  1934. SphAttr_t GetKey ( const CSphMatch & tMatch ) const override { return tMatch.GetAttr(m_tLocator); }
  1935. DistinctFetcher_i * Clone() const override { return new DistinctFetcherInt_c(m_tLocator); }
  1936. };
  1937. class DistinctFetcherString_c : public DistinctFetcherPlain_c
  1938. {
  1939. using DistinctFetcherPlain_c::DistinctFetcherPlain_c;
  1940. public:
  1941. SphAttr_t GetKey ( const CSphMatch & tMatch ) const override;
  1942. DistinctFetcher_i * Clone() const override { return new DistinctFetcherString_c(m_tLocator); }
  1943. };
  1944. SphAttr_t DistinctFetcherString_c::GetKey ( const CSphMatch & tMatch ) const
  1945. {
  1946. auto dBlob = tMatch.FetchAttrData ( m_tLocator, m_pBlobPool );
  1947. return (SphAttr_t) sphFNV64 ( dBlob );
  1948. }
  1949. class DistinctFetcherJsonField_c : public DistinctFetcherMulti_c
  1950. {
  1951. using DistinctFetcherMulti_c::DistinctFetcherMulti_c;
  1952. public:
  1953. void GetKeys ( const CSphMatch & tMatch, CSphVector<SphAttr_t> & dKeys ) const override;
  1954. DistinctFetcher_i * Clone() const override { return new DistinctFetcherJsonField_c(m_tLocator); }
  1955. };
  1956. void DistinctFetcherJsonField_c::GetKeys ( const CSphMatch & tMatch, CSphVector<SphAttr_t> & dKeys ) const
  1957. {
  1958. dKeys.Resize(0);
  1959. PushJsonField ( tMatch.GetAttr(m_tLocator), m_pBlobPool, [&dKeys]( SphAttr_t * pAttr, SphGroupKey_t uGroupKey )
  1960. {
  1961. if ( uGroupKey )
  1962. dKeys.Add(uGroupKey);
  1963. return true;
  1964. } );
  1965. }
  1966. template<typename T>
  1967. class DistinctFetcherMva_T : public DistinctFetcherMulti_c
  1968. {
  1969. using DistinctFetcherMulti_c::DistinctFetcherMulti_c;
  1970. public:
  1971. void GetKeys ( const CSphMatch & tMatch, CSphVector<SphAttr_t> & dKeys ) const override;
  1972. DistinctFetcher_i * Clone() const override { return new DistinctFetcherMva_T(m_tLocator); }
  1973. };
  1974. template<typename T>
  1975. void DistinctFetcherMva_T<T>::GetKeys ( const CSphMatch & tMatch, CSphVector<SphAttr_t> & dKeys ) const
  1976. {
  1977. dKeys.Resize(0);
  1978. AddGroupedMVA<T> ( [&dKeys]( SphAttr_t tAttr ){ dKeys.Add(tAttr); }, tMatch.FetchAttrData ( m_tLocator, m_pBlobPool ) );
  1979. }
  1980. static DistinctFetcher_i * CreateDistinctFetcher ( const CSphString & sName, const CSphAttrLocator & tLocator, ESphAttr eType )
  1981. {
  1982. // fixme! what about json?
  1983. switch ( eType )
  1984. {
  1985. case SPH_ATTR_STRING:
  1986. case SPH_ATTR_STRINGPTR: return new DistinctFetcherString_c(tLocator);
  1987. case SPH_ATTR_JSON_FIELD: return new DistinctFetcherJsonField_c(tLocator);
  1988. case SPH_ATTR_UINT32SET:
  1989. case SPH_ATTR_UINT32SET_PTR: return new DistinctFetcherMva_T<DWORD>(tLocator);
  1990. case SPH_ATTR_INT64SET:
  1991. case SPH_ATTR_INT64SET_PTR: return new DistinctFetcherMva_T<int64_t>(tLocator);
  1992. default: return new DistinctFetcherInt_c(tLocator);
  1993. }
  1994. }
  1995. /////////////////////////////////////////////////////////////////////////////
  1996. /// group sorting functor
  1997. template < typename COMPGROUP >
  1998. struct GroupSorter_fn : public CSphMatchComparatorState, public MatchSortAccessor_t
  1999. {
  2000. const VecTraits_T<CSphMatch> & m_dBase;
  2001. explicit GroupSorter_fn ( const CSphMatchQueueTraits& dBase )
  2002. : m_dBase ( dBase.GetMatches() )
  2003. {}
  2004. FORCE_INLINE bool IsLess ( int a, int b ) const
  2005. {
  2006. return COMPGROUP::IsLess ( m_dBase[b], m_dBase[a], *this );
  2007. }
  2008. };
  2009. /// additional group-by sorter settings
  2010. struct CSphGroupSorterSettings
  2011. {
  2012. CSphAttrLocator m_tLocGroupby; ///< locator for @groupby
  2013. CSphAttrLocator m_tLocCount; ///< locator for @count
  2014. CSphAttrLocator m_tLocDistinct; ///< locator for @distinct
  2015. CSphAttrLocator m_tLocGroupbyStr; ///< locator for @groupbystr
  2016. bool m_bDistinct = false;///< whether we need distinct
  2017. CSphRefcountedPtr<CSphGrouper> m_pGrouper;///< group key calculator
  2018. CSphRefcountedPtr<DistinctFetcher_i> m_pDistinctFetcher;
  2019. bool m_bImplicit = false;///< for queries with aggregate functions but without group by clause
  2020. SharedPtr_t<ISphFilter> m_pAggrFilterTrait; ///< aggregate filter that got owned by grouper
  2021. bool m_bJson = false; ///< whether we're grouping by Json attribute
  2022. int m_iMaxMatches = 0;
  2023. bool m_bGrouped = false; ///< are we going to push already grouped matches to it?
  2024. int m_iDistinctAccuracy = 16; ///< HyperLogLog accuracy. 0 means "don't use HLL"
  2025. void FixupLocators ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema )
  2026. {
  2027. sphFixupLocator ( m_tLocGroupby, pOldSchema, pNewSchema );
  2028. sphFixupLocator ( m_tLocCount, pOldSchema, pNewSchema );
  2029. sphFixupLocator ( m_tLocDistinct, pOldSchema, pNewSchema );
  2030. sphFixupLocator ( m_tLocGroupbyStr, pOldSchema, pNewSchema );
  2031. if ( m_pDistinctFetcher )
  2032. m_pDistinctFetcher->FixupLocators ( pOldSchema, pNewSchema );
  2033. }
  2034. void SetupDistinctAccuracy ( int iThresh )
  2035. {
  2036. if ( !iThresh )
  2037. {
  2038. m_iDistinctAccuracy = 0;
  2039. return;
  2040. }
  2041. iThresh = int ( float(iThresh) / OpenHashTable_T<int,int>::GetLoadFactor() ) + 1;
  2042. m_iDistinctAccuracy = iThresh ? sphLog2(iThresh) + 4 : 0;
  2043. m_iDistinctAccuracy = Min ( m_iDistinctAccuracy, 18 );
  2044. m_iDistinctAccuracy = Max ( m_iDistinctAccuracy, 14 );
  2045. }
  2046. };
  2047. struct MatchCloner_t
  2048. {
  2049. private:
  2050. CSphFixedVector<CSphRowitem> m_dRowBuf { 0 };
  2051. CSphVector<CSphAttrLocator> m_dAttrsGrp; // locators for grouping attrs (@groupby, @count, @distinct, etc.)
  2052. CSphVector<CSphAttrLocator> m_dAttrsPtr; // locators for group_concat attrs
  2053. CSphVector<int> m_dMyPtrRows; // rowids matching m_dAttrsPtr. i.e. grpconcat ptr result I own
  2054. CSphVector<int> m_dOtherPtrRows; // rest rowids NOT matching m_dAttrsPtr. i.e. other ptr results
  2055. const CSphSchemaHelper * m_pSchema = nullptr;
  2056. bool m_bPtrRowsCommited = false; // readiness of m_dMyPtrRows and m_dOtherPtrRows
  2057. public:
  2058. void SetSchema ( const ISphSchema * pSchema )
  2059. {
  2060. m_pSchema = (const CSphSchemaHelper *) pSchema; /// lazy hack
  2061. m_dRowBuf.Reset ( m_pSchema->GetDynamicSize() );
  2062. }
  2063. // clone plain part (incl. pointers) from src to dst
  2064. // keep group part (aggregates, group_concat) of dst intact
  2065. // it assumes that tDst m_pDynamic contains correct data, or wiped away.
  2066. void CloneKeepingAggrs ( CSphMatch & tDst, const CSphMatch & tSrc )
  2067. {
  2068. assert ( m_pSchema );
  2069. assert ( m_bPtrRowsCommited );
  2070. // memorize old dynamic first
  2071. memcpy ( m_dRowBuf.Begin(), tDst.m_pDynamic, m_dRowBuf.GetLengthBytes() );
  2072. m_pSchema->CloneMatchSpecial ( tDst, tSrc, m_dOtherPtrRows );
  2073. /*
  2074. FreeDataSpecial ( tDst, m_dOtherPtrRows );
  2075. pDst->Combine ( *pSrc, GetDynamicSize () );
  2076. CopyPtrsSpecial ( tDst, tSrc, m_dOtherPtrRows );
  2077. */
  2078. // restore back group-by attributes
  2079. for ( auto & tAttrGrp : m_dAttrsGrp )
  2080. tDst.SetAttr ( tAttrGrp, sphGetRowAttr ( m_dRowBuf.Begin(), tAttrGrp ) );
  2081. // restore back group_concat attribute(s)
  2082. for ( auto & tAttrPtr : m_dAttrsPtr )
  2083. tDst.SetAttr ( tAttrPtr, sphGetRowAttr ( m_dRowBuf.Begin (), tAttrPtr ) );
  2084. }
  2085. // clone plain part (incl. pointers) from src to dst
  2086. // group part (aggregates, group_concat) is not copied
  2087. void CloneWithoutAggrs ( CSphMatch & tDst, const CSphMatch & tSrc )
  2088. {
  2089. assert ( m_pSchema );
  2090. assert ( m_bPtrRowsCommited );
  2091. m_pSchema->CloneMatchSpecial ( tDst, tSrc, m_dOtherPtrRows );
  2092. /*
  2093. FreeDataSpecial ( tDst, m_dOtherPtrRows );
  2094. pDst->Combine ( *pSrc, GetDynamicSize () );
  2095. CopyPtrsSpecial ( tDst, tSrc, m_dOtherPtrRows );
  2096. */
  2097. }
  2098. // just write group part (aggregates, group_concat) without cloning
  2099. // assumes tDst has allocated m_pDynamic. Fixme! look to #881 again...
  2100. void CopyAggrs ( CSphMatch & tDst, const CSphMatch & tSrc )
  2101. {
  2102. assert ( m_pSchema );
  2103. assert ( m_bPtrRowsCommited );
  2104. assert ( &tDst!=&tSrc );
  2105. assert ( tDst.m_pDynamic );
  2106. for ( auto & dAttrGrp : m_dAttrsGrp )
  2107. tDst.SetAttr ( dAttrGrp, tSrc.GetAttr ( dAttrGrp ));
  2108. CSphSchemaHelper::FreeDataSpecial ( tDst, m_dMyPtrRows );
  2109. CSphSchemaHelper::CopyPtrsSpecial ( tDst, tSrc, m_dMyPtrRows );
  2110. }
  2111. // copy group part (aggregates)
  2112. // move group_concat part without reallocating
  2113. void MoveAggrs ( CSphMatch & tDst, CSphMatch & tSrc )
  2114. {
  2115. assert ( m_pSchema );
  2116. assert ( m_bPtrRowsCommited );
  2117. assert ( &tDst!=&tSrc );
  2118. assert ( tDst.m_pDynamic );
  2119. for ( auto & dAttrGrp : m_dAttrsGrp )
  2120. tDst.SetAttr ( dAttrGrp, tSrc.GetAttr ( dAttrGrp ));
  2121. CSphSchemaHelper::MovePtrsSpecial( tDst, tSrc, m_dMyPtrRows );
  2122. }
  2123. inline void AddRaw ( const CSphAttrLocator& tLoc )
  2124. {
  2125. m_dAttrsGrp.Add ( tLoc );
  2126. }
  2127. inline void AddPtr ( const CSphAttrLocator &tLoc )
  2128. {
  2129. m_dAttrsPtr.Add ( tLoc );
  2130. }
  2131. inline void ResetAttrs()
  2132. {
  2133. m_dAttrsGrp.Resize ( 0 );
  2134. m_dAttrsPtr.Resize ( 0 );
  2135. }
  2136. // (re)fill m_dMyPtrRows and m_dOtherPtrRows from m_dAttrsPtr
  2137. inline void CommitPtrs ()
  2138. {
  2139. assert ( m_pSchema );
  2140. static const int SIZE_OF_ROW = 8 * sizeof ( CSphRowitem );
  2141. if ( m_bPtrRowsCommited )
  2142. m_dMyPtrRows.Resize(0);
  2143. for ( const CSphAttrLocator &tLoc : m_dAttrsPtr )
  2144. m_dMyPtrRows.Add ( tLoc.m_iBitOffset / SIZE_OF_ROW );
  2145. m_dOtherPtrRows = m_pSchema->SubsetPtrs ( m_dMyPtrRows );
  2146. #ifndef NDEBUG
  2147. // sanitize check
  2148. m_dMyPtrRows = m_pSchema->SubsetPtrs ( m_dOtherPtrRows );
  2149. assert ( m_dMyPtrRows.GetLength ()==m_dAttrsPtr.GetLength () );
  2150. #endif
  2151. m_bPtrRowsCommited = true;
  2152. }
  2153. };
  2154. class BaseGroupSorter_c : public BlobPool_c, protected CSphGroupSorterSettings
  2155. {
  2156. using BASE = CSphGroupSorterSettings;
  2157. public:
  2158. FWD_BASECTOR( BaseGroupSorter_c )
  2159. ~BaseGroupSorter_c() override { ResetAggregates(); }
  2160. protected:
  2161. MatchCloner_t m_tPregroup;
  2162. CSphVector<AggrFunc_i *> m_dAggregates;
  2163. void SetColumnar ( columnar::Columnar_i * pColumnar )
  2164. {
  2165. for ( auto i : m_dAggregates )
  2166. i->SetColumnar(pColumnar);
  2167. }
  2168. /// schema, aggregates setup
  2169. template <int DISTINCT>
  2170. inline void SetupBaseGrouper ( ISphSchema * pSchema, CSphVector<AggrFunc_i *> * pAvgs = nullptr )
  2171. {
  2172. m_tPregroup.ResetAttrs();
  2173. ResetAggregates();
  2174. m_tPregroup.SetSchema ( pSchema );
  2175. m_tPregroup.AddRaw ( m_tLocGroupby ); // @groupby
  2176. m_tPregroup.AddRaw ( m_tLocCount ); // @count
  2177. if constexpr ( DISTINCT )
  2178. m_tPregroup.AddRaw ( m_tLocDistinct ); // @distinct
  2179. // extract aggregates
  2180. for ( int i = 0; i<pSchema->GetAttrsCount (); ++i )
  2181. {
  2182. const CSphColumnInfo &tAttr = pSchema->GetAttr ( i );
  2183. if ( tAttr.m_eAggrFunc==SPH_AGGR_NONE
  2184. || IsGroupbyMagic ( tAttr.m_sName ) // @count, @groupby, @groupbystr, @distinct, count(*), groupby()
  2185. || IsSortStringInternal ( tAttr.m_sName.cstr () ) )
  2186. continue;
  2187. switch ( tAttr.m_eAggrFunc )
  2188. {
  2189. case SPH_AGGR_SUM: m_dAggregates.Add ( CreateAggrSum(tAttr) ); break;
  2190. case SPH_AGGR_AVG:
  2191. m_dAggregates.Add ( CreateAggrAvg ( tAttr, m_tLocCount ) );
  2192. // store avg to calculate these attributes prior to groups sort
  2193. if ( pAvgs )
  2194. pAvgs->Add ( m_dAggregates.Last() );
  2195. break;
  2196. case SPH_AGGR_MIN: m_dAggregates.Add ( CreateAggrMin(tAttr) ); break;
  2197. case SPH_AGGR_MAX: m_dAggregates.Add ( CreateAggrMax(tAttr) ); break;
  2198. case SPH_AGGR_CAT:
  2199. m_dAggregates.Add ( CreateAggrConcat(tAttr) );
  2200. m_tPregroup.AddPtr ( tAttr.m_tLocator );
  2201. break;
  2202. default: assert ( 0 && "internal error: unhandled aggregate function" );
  2203. break;
  2204. }
  2205. if ( tAttr.m_eAggrFunc!=SPH_AGGR_CAT )
  2206. m_tPregroup.AddRaw ( tAttr.m_tLocator );
  2207. }
  2208. m_tPregroup.CommitPtrs();
  2209. }
  2210. // HAVING filtering
  2211. bool EvalHAVING ( const CSphMatch& tMatch )
  2212. {
  2213. return !m_pAggrFilterTrait || m_pAggrFilterTrait->Eval ( tMatch );
  2214. }
  2215. void AggrUpdate ( CSphMatch & tDst, const CSphMatch & tSrc, bool bGrouped, bool bMerge = false )
  2216. {
  2217. for ( auto * pAggregate : this->m_dAggregates )
  2218. pAggregate->Update ( tDst, tSrc, bGrouped, bMerge );
  2219. }
  2220. void AggrSetup ( CSphMatch & tDst, const CSphMatch & tSrc, bool bMerge = false )
  2221. {
  2222. for ( auto * pAggregate : this->m_dAggregates )
  2223. pAggregate->Setup ( tDst, tSrc, bMerge );
  2224. }
  2225. void AggrUngroup ( CSphMatch & tMatch )
  2226. {
  2227. for ( auto * pAggregate : this->m_dAggregates )
  2228. pAggregate->Ungroup ( tMatch );
  2229. }
  2230. private:
  2231. void ResetAggregates()
  2232. {
  2233. for ( auto & pAggregate : m_dAggregates )
  2234. SafeDelete ( pAggregate );
  2235. m_dAggregates.Resize(0);
  2236. }
  2237. };
  2238. class SubGroupSorter_fn : public ISphNoncopyable
  2239. {
  2240. const VecTraits_T<CSphMatch> & m_dBase;
  2241. const CSphMatchComparatorState& m_tState;
  2242. const ISphMatchComparator * m_pComp;
  2243. public:
  2244. SubGroupSorter_fn ( const CSphMatchQueueTraits & dBase, const ISphMatchComparator * pC )
  2245. : m_dBase ( dBase.GetMatches () )
  2246. , m_tState ( dBase.GetState() )
  2247. , m_pComp ( pC )
  2248. {
  2249. assert ( m_pComp );
  2250. m_pComp->AddRef();
  2251. }
  2252. ~SubGroupSorter_fn()
  2253. {
  2254. m_pComp->Release();
  2255. }
  2256. const ISphMatchComparator * GetComparator() const
  2257. {
  2258. return m_pComp;
  2259. }
  2260. bool MatchIsGreater ( const CSphMatch & a, const CSphMatch & b ) const
  2261. {
  2262. return m_pComp->VirtualIsLess ( b, a, m_tState );
  2263. }
  2264. // inverse order, i.e. work as IsGreater
  2265. bool IsLess ( int a, int b ) const
  2266. {
  2267. return m_pComp->VirtualIsLess ( m_dBase[b], m_dBase[a], m_tState );
  2268. }
  2269. };
  2270. /// match sorter with k-buffering and group-by - common part
  2271. template<typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS>
  2272. class KBufferGroupSorter_T : public CSphMatchQueueTraits, protected BaseGroupSorter_c
  2273. {
  2274. using MYTYPE = KBufferGroupSorter_T<COMPGROUP,UNIQ,DISTINCT,NOTIFICATIONS>;
  2275. using BASE = CSphMatchQueueTraits;
  2276. public:
  2277. KBufferGroupSorter_T ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
  2278. : CSphMatchQueueTraits ( tSettings.m_iMaxMatches*GROUPBY_FACTOR )
  2279. , BaseGroupSorter_c ( tSettings )
  2280. , m_eGroupBy ( pQuery->m_eGroupFunc )
  2281. , m_iLimit ( tSettings.m_iMaxMatches )
  2282. , m_tGroupSorter (*this)
  2283. , m_tSubSorter ( *this, pComp )
  2284. {
  2285. assert ( GROUPBY_FACTOR>1 );
  2286. assert ( !DISTINCT || tSettings.m_pDistinctFetcher );
  2287. if constexpr ( NOTIFICATIONS )
  2288. m_dJustPopped.Reserve ( m_iSize );
  2289. m_pGrouper = tSettings.m_pGrouper;
  2290. m_pDistinctFetcher = tSettings.m_pDistinctFetcher;
  2291. m_tUniq.SetAccuracy ( tSettings.m_iDistinctAccuracy );
  2292. }
  2293. /// schema setup
  2294. void SetSchema ( ISphSchema * pSchema, bool bRemapCmp ) final
  2295. {
  2296. if ( m_pSchema )
  2297. {
  2298. FixupLocators ( m_pSchema, pSchema );
  2299. m_tGroupSorter.FixupLocators ( m_pSchema, pSchema, bRemapCmp );
  2300. m_tPregroup.ResetAttrs ();
  2301. m_dAggregates.Apply ( [] ( AggrFunc_i * pAggr ) { SafeDelete ( pAggr ); } );
  2302. m_dAggregates.Resize ( 0 );
  2303. m_dAvgs.Resize ( 0 );
  2304. }
  2305. BASE::SetSchema ( pSchema, bRemapCmp );
  2306. SetupBaseGrouper<DISTINCT> ( pSchema, &m_dAvgs );
  2307. }
  2308. /// check if this sorter does groupby
  2309. bool IsGroupby () const final
  2310. {
  2311. return true;
  2312. }
  2313. /// set blob pool pointer (for string+groupby sorters)
  2314. void SetBlobPool ( const BYTE * pBlobPool ) final
  2315. {
  2316. BlobPool_c::SetBlobPool ( pBlobPool );
  2317. m_pGrouper->SetBlobPool ( pBlobPool );
  2318. if ( m_pDistinctFetcher )
  2319. m_pDistinctFetcher->SetBlobPool(pBlobPool);
  2320. }
  2321. void SetColumnar ( columnar::Columnar_i * pColumnar ) final
  2322. {
  2323. CSphMatchQueueTraits::SetColumnar(pColumnar);
  2324. BaseGroupSorter_c::SetColumnar(pColumnar);
  2325. m_pGrouper->SetColumnar(pColumnar);
  2326. if ( m_pDistinctFetcher )
  2327. m_pDistinctFetcher->SetColumnar(pColumnar);
  2328. }
  2329. /// get entries count
  2330. int GetLength () override
  2331. {
  2332. return Min ( Used(), m_iLimit );
  2333. }
  2334. /// set group comparator state
  2335. void SetGroupState ( const CSphMatchComparatorState & tState ) final
  2336. {
  2337. m_tGroupSorter.m_fnStrCmp = tState.m_fnStrCmp;
  2338. // FIXME! manual bitwise copying.. yuck
  2339. for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; ++i )
  2340. {
  2341. m_tGroupSorter.m_eKeypart[i] = tState.m_eKeypart[i];
  2342. m_tGroupSorter.m_tLocator[i] = tState.m_tLocator[i];
  2343. }
  2344. m_tGroupSorter.m_uAttrDesc = tState.m_uAttrDesc;
  2345. m_tGroupSorter.m_iNow = tState.m_iNow;
  2346. // check whether we sort by distinct
  2347. if constexpr ( DISTINCT )
  2348. {
  2349. const CSphColumnInfo * pDistinct = m_pSchema->GetAttr("@distinct");
  2350. assert(pDistinct);
  2351. for ( const auto & tLocator : m_tGroupSorter.m_tLocator )
  2352. if ( tLocator==pDistinct->m_tLocator )
  2353. {
  2354. m_bSortByDistinct = true;
  2355. break;
  2356. }
  2357. }
  2358. }
  2359. bool CanBeCloned() const final { return !DISTINCT && BASE::CanBeCloned(); }
  2360. protected:
  2361. ESphGroupBy m_eGroupBy; ///< group-by function
  2362. int m_iLimit; ///< max matches to be retrieved
  2363. UNIQ m_tUniq;
  2364. bool m_bSortByDistinct = false;
  2365. GroupSorter_fn<COMPGROUP> m_tGroupSorter;
  2366. SubGroupSorter_fn m_tSubSorter;
  2367. CSphVector<AggrFunc_i *> m_dAvgs;
  2368. bool m_bAvgFinal = false;
  2369. CSphVector<SphAttr_t> m_dDistinctKeys;
  2370. static const int GROUPBY_FACTOR = 4; ///< allocate this times more storage when doing group-by (k, as in k-buffer)
  2371. /// finalize distinct counters
  2372. template <typename FIND>
  2373. void Distinct ( FIND&& fnFind )
  2374. {
  2375. m_tUniq.Sort ();
  2376. SphGroupKey_t uGroup;
  2377. for ( int iCount = m_tUniq.CountStart ( uGroup ); iCount; iCount = m_tUniq.CountNext ( uGroup ) )
  2378. {
  2379. CSphMatch * pMatch = fnFind ( uGroup );
  2380. if ( pMatch )
  2381. pMatch->SetAttr ( m_tLocDistinct, iCount );
  2382. }
  2383. }
  2384. inline void SetupBaseGrouperWrp ( ISphSchema * pSchema, CSphVector<AggrFunc_i *> * pAvgs )
  2385. {
  2386. SetupBaseGrouper<DISTINCT> ( pSchema, pAvgs );
  2387. }
  2388. void CloneKBufferGroupSorter ( MYTYPE* pClone ) const
  2389. {
  2390. // basic clone
  2391. BASE::CloneTo ( pClone );
  2392. // actions from SetGroupState
  2393. pClone->m_bSortByDistinct = m_bSortByDistinct;
  2394. pClone->m_tGroupSorter.m_fnStrCmp = m_tGroupSorter.m_fnStrCmp;
  2395. for ( int i = 0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  2396. {
  2397. pClone->m_tGroupSorter.m_eKeypart[i] = m_tGroupSorter.m_eKeypart[i];
  2398. pClone->m_tGroupSorter.m_tLocator[i] = m_tGroupSorter.m_tLocator[i];
  2399. }
  2400. pClone->m_tGroupSorter.m_uAttrDesc = m_tGroupSorter.m_uAttrDesc;
  2401. pClone->m_tGroupSorter.m_iNow = m_tGroupSorter.m_iNow;
  2402. // complete SetSchema
  2403. pClone->m_dAvgs.Resize ( 0 );
  2404. pClone->SetupBaseGrouperWrp ( pClone->m_pSchema, &pClone->m_dAvgs );
  2405. // m_pGrouper also need to be cloned (otherwise SetBlobPool will cause races)
  2406. if ( m_pGrouper )
  2407. pClone->m_pGrouper = m_pGrouper->Clone ();
  2408. if ( m_pDistinctFetcher )
  2409. pClone->m_pDistinctFetcher = m_pDistinctFetcher->Clone ();
  2410. }
  2411. template<typename SORTER> SORTER * CloneSorterT () const
  2412. {
  2413. CSphQuery dFoo;
  2414. dFoo.m_iMaxMatches = m_iLimit;
  2415. dFoo.m_eGroupFunc = m_eGroupBy;
  2416. auto pClone = new SORTER ( m_tSubSorter.GetComparator (), &dFoo, *this );
  2417. CloneKBufferGroupSorter ( pClone );
  2418. return pClone;
  2419. }
  2420. CSphVector<AggrFunc_i *> GetAggregatesWithoutAvgs() const
  2421. {
  2422. CSphVector<AggrFunc_i *> dAggrs;
  2423. if ( m_dAggregates.GetLength ()!=m_dAvgs.GetLength ())
  2424. {
  2425. dAggrs = m_dAggregates;
  2426. for ( auto * pAvg : this->m_dAvgs )
  2427. dAggrs.RemoveValue ( pAvg );
  2428. }
  2429. return dAggrs;
  2430. }
  2431. FORCE_INLINE void FreeMatchPtrs ( int iMatch, bool bNotify=true )
  2432. {
  2433. if ( NOTIFICATIONS && bNotify )
  2434. m_dJustPopped.Add ( RowTagged_t ( m_dData[iMatch] ) );
  2435. m_pSchema->FreeDataPtrs ( m_dData[iMatch] );
  2436. // on final pass we totally wipe match.
  2437. // That is need, since otherwise such 'garbage' matches with non-null m_pDynamic
  2438. // will be targeted in d-tr with FreeDataPtrs with possible another(!) schema
  2439. if ( !bNotify )
  2440. m_dData[iMatch].ResetDynamic ();
  2441. }
  2442. template <bool GROUPED>
  2443. FORCE_INLINE void UpdateDistinct ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey )
  2444. {
  2445. int iCount = 1;
  2446. if constexpr ( GROUPED )
  2447. iCount = (int)tEntry.GetAttr ( m_tLocDistinct );
  2448. assert(m_pDistinctFetcher);
  2449. if constexpr ( DISTINCT==1 )
  2450. m_tUniq.Add ( {uGroupKey, m_pDistinctFetcher->GetKey(tEntry), iCount} );
  2451. else
  2452. {
  2453. m_pDistinctFetcher->GetKeys ( tEntry, this->m_dDistinctKeys );
  2454. for ( auto i : this->m_dDistinctKeys )
  2455. m_tUniq.Add ( {uGroupKey, i, iCount} );
  2456. }
  2457. }
  2458. void RemoveDistinct ( VecTraits_T<SphGroupKey_t>& dRemove )
  2459. {
  2460. // sort and compact
  2461. if ( !m_bSortByDistinct )
  2462. m_tUniq.Sort ();
  2463. m_tUniq.Compact ( dRemove );
  2464. }
  2465. };
  2466. /// match sorter with k-buffering and group-by
  2467. /// invoking by select ... group by ... where only plain attributes (i.e. NO mva, NO jsons)
  2468. template < typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS, bool HAS_AGGREGATES >
  2469. class CSphKBufferGroupSorter : public KBufferGroupSorter_T<COMPGROUP,UNIQ,DISTINCT,NOTIFICATIONS>
  2470. {
  2471. using MYTYPE = CSphKBufferGroupSorter<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>;
  2472. bool m_bMatchesFinalized = false;
  2473. int m_iMaxUsed = -1;
  2474. protected:
  2475. OpenHashTableFastClear_T <SphGroupKey_t, CSphMatch *> m_hGroup2Match;
  2476. // since we inherit from template, we need to write boring 'using' block
  2477. using KBufferGroupSorter = KBufferGroupSorter_T<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS>;
  2478. using KBufferGroupSorter::m_eGroupBy;
  2479. using KBufferGroupSorter::m_pGrouper;
  2480. using KBufferGroupSorter::m_iLimit;
  2481. using KBufferGroupSorter::m_tUniq;
  2482. using KBufferGroupSorter::m_bSortByDistinct;
  2483. using KBufferGroupSorter::m_tGroupSorter;
  2484. using KBufferGroupSorter::m_tSubSorter;
  2485. using KBufferGroupSorter::m_dAvgs;
  2486. using KBufferGroupSorter::GROUPBY_FACTOR;
  2487. using KBufferGroupSorter::GetAggregatesWithoutAvgs;
  2488. using KBufferGroupSorter::Distinct;
  2489. using KBufferGroupSorter::UpdateDistinct;
  2490. using KBufferGroupSorter::RemoveDistinct;
  2491. using KBufferGroupSorter::FreeMatchPtrs;
  2492. using KBufferGroupSorter::m_bAvgFinal;
  2493. using CSphGroupSorterSettings::m_tLocGroupby;
  2494. using CSphGroupSorterSettings::m_tLocCount;
  2495. using CSphGroupSorterSettings::m_tLocDistinct;
  2496. using BaseGroupSorter_c::EvalHAVING;
  2497. using BaseGroupSorter_c::AggrSetup;
  2498. using BaseGroupSorter_c::AggrUpdate;
  2499. using BaseGroupSorter_c::AggrUngroup;
  2500. using CSphMatchQueueTraits::m_iSize;
  2501. using CSphMatchQueueTraits::m_dData;
  2502. using CSphMatchQueueTraits::Get;
  2503. using CSphMatchQueueTraits::Add;
  2504. using CSphMatchQueueTraits::Used;
  2505. using CSphMatchQueueTraits::ResetAfterFlatten;
  2506. using CSphMatchQueueTraits::ResetDynamic;
  2507. using CSphMatchQueueTraits::ResetDynamicFreeData;
  2508. using MatchSorter_c::m_iTotal;
  2509. using MatchSorter_c::m_tJustPushed;
  2510. using MatchSorter_c::m_dJustPopped;
  2511. using MatchSorter_c::m_pSchema;
  2512. public:
  2513. /// ctor
  2514. CSphKBufferGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
  2515. : KBufferGroupSorter ( pComp, pQuery, tSettings )
  2516. , m_hGroup2Match ( tSettings.m_iMaxMatches*GROUPBY_FACTOR )
  2517. {}
  2518. bool Push ( const CSphMatch & tEntry ) override { return PushEx<false> ( tEntry, m_pGrouper->KeyFromMatch(tEntry), false, false, true, nullptr ); }
  2519. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) override { assert ( 0 && "Not supported in grouping"); }
  2520. bool PushGrouped ( const CSphMatch & tEntry, bool ) override { return PushEx<true> ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), false, false, true, nullptr ); }
  2521. ISphMatchSorter * Clone() const override { return this->template CloneSorterT<MYTYPE>(); }
  2522. /// store all entries into specified location in sorted order, and remove them from queue
  2523. int Flatten ( CSphMatch * pTo ) override
  2524. {
  2525. FinalizeMatches();
  2526. auto dAggrs = GetAggregatesWithoutAvgs();
  2527. const CSphMatch * pBegin = pTo;
  2528. for ( auto iMatch : this->m_dIData )
  2529. {
  2530. CSphMatch & tMatch = m_dData[iMatch];
  2531. if constexpr ( HAS_AGGREGATES )
  2532. dAggrs.Apply ( [&tMatch] ( AggrFunc_i * pAggr ) { pAggr->Finalize ( tMatch ); } );
  2533. if ( !EvalHAVING ( tMatch ))
  2534. {
  2535. FreeMatchPtrs ( iMatch, false );
  2536. continue;
  2537. }
  2538. Swap ( *pTo, tMatch );
  2539. ++pTo;
  2540. }
  2541. m_iTotal = 0;
  2542. m_bMatchesFinalized = false;
  2543. if constexpr ( DISTINCT )
  2544. m_tUniq.Reset();
  2545. ResetAfterFlatten ();
  2546. m_iMaxUsed = ResetDynamic ( m_iMaxUsed );
  2547. return int ( pTo-pBegin );
  2548. }
  2549. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final
  2550. {
  2551. if ( !Used () )
  2552. return;
  2553. auto& dRhs = *(MYTYPE *) pRhs;
  2554. if ( dRhs.IsEmpty () )
  2555. {
  2556. CSphMatchQueueTraits::SwapMatchQueueTraits ( dRhs );
  2557. dRhs.m_hGroup2Match = std::move ( m_hGroup2Match );
  2558. dRhs.m_bMatchesFinalized = m_bMatchesFinalized;
  2559. dRhs.m_iMaxUsed = m_iMaxUsed;
  2560. if ( !m_bMatchesFinalized && bCopyMeta )
  2561. dRhs.m_tUniq = std::move(m_tUniq);
  2562. m_iMaxUsed = -1;
  2563. return;
  2564. }
  2565. bool bUniqUpdated = false;
  2566. if ( !m_bMatchesFinalized && bCopyMeta )
  2567. {
  2568. // can not move m_tUniq into dRhs as move invalidates m_tUniq then breaks FinalizeMatches
  2569. m_tUniq.CopyTo ( dRhs.m_tUniq );
  2570. bUniqUpdated = true;
  2571. }
  2572. // if we're copying meta (uniq counters), we don't need distinct calculation right now
  2573. // we can do it later after all sorters are merged
  2574. FinalizeMatches ( !bCopyMeta );
  2575. // matches in dRhs are using a new (standalone) schema
  2576. // however, some supposedly unused matches still have old schema
  2577. // they were not cleared immediately for performance reasons
  2578. // we need to do that now
  2579. for ( int i = dRhs.m_dIData.GetLength(); i < dRhs.m_dData.GetLength(); i++ )
  2580. {
  2581. int iId = *(dRhs.m_dIData.Begin()+i);
  2582. dRhs.m_dData[iId].ResetDynamic();
  2583. }
  2584. dRhs.m_bUpdateDistinct = !bUniqUpdated;
  2585. dRhs.SetMerge(true);
  2586. // just push in heap order
  2587. // since we have grouped matches, it is not always possible to move them,
  2588. // so use plain push instead
  2589. for ( auto iMatch : this->m_dIData )
  2590. dRhs.PushGrouped ( m_dData[iMatch], false );
  2591. dRhs.m_bUpdateDistinct = true;
  2592. dRhs.SetMerge(false);
  2593. // once we're done copying, cleanup
  2594. m_iMaxUsed = ResetDynamicFreeData ( m_iMaxUsed );
  2595. }
  2596. void Finalize ( MatchProcessor_i & tProcessor, bool, bool bFinalizeMatches ) override
  2597. {
  2598. if ( !Used() )
  2599. return;
  2600. if ( bFinalizeMatches )
  2601. FinalizeMatches();
  2602. else if constexpr ( DISTINCT )
  2603. {
  2604. // if we are not finalizing matches, we are using global sorters
  2605. // let's try to remove dupes while we are processing data in separate threads
  2606. // so that the main thread will have fewer data to work with
  2607. m_tUniq.Sort();
  2608. VecTraits_T<SphGroupKey_t> dStub;
  2609. m_tUniq.Compact(dStub);
  2610. }
  2611. // just evaluate in heap order
  2612. for ( auto iMatch : this->m_dIData )
  2613. tProcessor.Process ( &m_dData[iMatch] );
  2614. if constexpr ( DISTINCT )
  2615. {
  2616. // need to clean up matches NOT from m_dIData with current schema
  2617. // as after schema change data_ptr attributes will have garbage in ptr part for matches not processed by tProcessor
  2618. // and global sorters have differrent clean up code path that do not handle this garbage as usual sorters do
  2619. if ( this->m_dIData.GetLength()!=m_iMaxUsed )
  2620. {
  2621. for ( int i=0; i<m_iMaxUsed; i++ )
  2622. {
  2623. CSphMatch & tMatch = m_dData[i];
  2624. if ( !tMatch.m_pStatic ) // clean up match that was in m_dIData set
  2625. continue;
  2626. m_pSchema->FreeDataPtrs ( tMatch );
  2627. tMatch.ResetDynamic ();
  2628. }
  2629. }
  2630. }
  2631. }
  2632. void SetMerge ( bool bMerge ) override { m_bMerge = bMerge; }
  2633. protected:
  2634. template <bool GROUPED>
  2635. bool PushIntoExistingGroup( CSphMatch & tGroup, const CSphMatch & tEntry, SphGroupKey_t uGroupKey, SphAttr_t * pAttr )
  2636. {
  2637. assert ( tGroup.GetAttr ( m_tLocGroupby )==uGroupKey );
  2638. assert ( tGroup.m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
  2639. auto & tLocCount = m_tLocCount;
  2640. if constexpr ( GROUPED )
  2641. tGroup.AddCounterAttr ( tLocCount, tEntry );
  2642. else
  2643. tGroup.AddCounterScalar ( tLocCount, 1 );
  2644. if constexpr ( HAS_AGGREGATES )
  2645. AggrUpdate ( tGroup, tEntry, GROUPED, m_bMerge );
  2646. // if new entry is more relevant, update from it
  2647. if ( m_tSubSorter.MatchIsGreater ( tEntry, tGroup ) )
  2648. {
  2649. if constexpr ( NOTIFICATIONS )
  2650. {
  2651. m_tJustPushed = RowTagged_t ( tEntry );
  2652. this->m_dJustPopped.Add ( RowTagged_t ( tGroup ) );
  2653. }
  2654. // clone the low part of the match
  2655. this->m_tPregroup.CloneKeepingAggrs ( tGroup, tEntry );
  2656. if ( pAttr )
  2657. UpdateGroupbyStr ( tGroup, pAttr );
  2658. }
  2659. // submit actual distinct value
  2660. if ( DISTINCT && m_bUpdateDistinct )
  2661. KBufferGroupSorter::template UpdateDistinct<GROUPED> ( tEntry, uGroupKey );
  2662. return false; // since it is a dupe
  2663. }
  2664. /// add entry to the queue
  2665. template <bool GROUPED>
  2666. FORCE_INLINE bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, [[maybe_unused]] bool bNewSet, [[maybe_unused]] bool bTailFinalized, bool bClearNotify, SphAttr_t * pAttr )
  2667. {
  2668. if constexpr ( NOTIFICATIONS )
  2669. {
  2670. if ( bClearNotify )
  2671. {
  2672. m_tJustPushed = RowTagged_t();
  2673. this->m_dJustPopped.Resize ( 0 );
  2674. }
  2675. }
  2676. auto & tLocCount = m_tLocCount;
  2677. m_bMatchesFinalized = false;
  2678. if ( HAS_AGGREGATES && m_bAvgFinal )
  2679. CalcAvg ( Avg_e::UNGROUP );
  2680. // if this group is already hashed, we only need to update the corresponding match
  2681. CSphMatch ** ppMatch = m_hGroup2Match.Find ( uGroupKey );
  2682. if ( ppMatch )
  2683. {
  2684. CSphMatch * pMatch = (*ppMatch);
  2685. assert ( pMatch );
  2686. assert ( pMatch->GetAttr ( m_tLocGroupby )==uGroupKey );
  2687. return PushIntoExistingGroup<GROUPED> ( *pMatch, tEntry, uGroupKey, pAttr );
  2688. }
  2689. // submit actual distinct value
  2690. if constexpr ( DISTINCT )
  2691. KBufferGroupSorter::template UpdateDistinct<GROUPED> ( tEntry, uGroupKey );
  2692. // if we're full, let's cut off some worst groups
  2693. if ( Used()==m_iSize )
  2694. CutWorst ( m_iLimit * (int)(GROUPBY_FACTOR/2) );
  2695. // do add
  2696. assert ( Used()<m_iSize );
  2697. CSphMatch & tNew = Add();
  2698. m_pSchema->CloneMatch ( tNew, tEntry );
  2699. if constexpr ( HAS_AGGREGATES )
  2700. AggrSetup ( tNew, tEntry, m_bMerge );
  2701. if constexpr ( NOTIFICATIONS )
  2702. m_tJustPushed = RowTagged_t ( tNew );
  2703. if constexpr ( GROUPED )
  2704. {
  2705. if constexpr ( HAS_AGGREGATES )
  2706. AggrUngroup(tNew);
  2707. }
  2708. else
  2709. {
  2710. tNew.SetAttr ( m_tLocGroupby, uGroupKey );
  2711. tNew.SetAttr ( tLocCount, 1 );
  2712. if ( DISTINCT && m_bUpdateDistinct )
  2713. tNew.SetAttr ( m_tLocDistinct, 0 );
  2714. if ( pAttr )
  2715. UpdateGroupbyStr ( tNew, pAttr );
  2716. }
  2717. m_hGroup2Match.Add ( uGroupKey, &tNew );
  2718. ++m_iTotal;
  2719. return true;
  2720. }
  2721. private:
  2722. enum class Avg_e { FINALIZE, UNGROUP };
  2723. bool m_bUpdateDistinct = true;
  2724. bool m_bMerge = false;
  2725. CSphVector<SphGroupKey_t> m_dRemove;
  2726. void CalcAvg ( Avg_e eGroup )
  2727. {
  2728. if ( m_dAvgs.IsEmpty() )
  2729. return;
  2730. m_bAvgFinal = ( eGroup==Avg_e::FINALIZE );
  2731. if ( eGroup==Avg_e::FINALIZE )
  2732. for ( auto i : this->m_dIData )
  2733. m_dAvgs.Apply( [this,i] ( AggrFunc_i * pAvg ) { pAvg->Finalize ( m_dData[i] ); } );
  2734. else
  2735. for ( auto i : this->m_dIData )
  2736. m_dAvgs.Apply ( [this,i] ( AggrFunc_i * pAvg ) { pAvg->Ungroup ( m_dData[i] ); } );
  2737. }
  2738. /// finalize counted distinct values
  2739. void CountDistinct ()
  2740. {
  2741. Distinct ( [this] ( SphGroupKey_t uGroup )->CSphMatch *
  2742. {
  2743. auto ppMatch = m_hGroup2Match.Find ( uGroup );
  2744. return ppMatch ? *ppMatch : nullptr;
  2745. });
  2746. }
  2747. // make final order before finalize/flatten call
  2748. void FinalizeMatches ( bool bCountDistinct=true )
  2749. {
  2750. if ( m_bMatchesFinalized )
  2751. return;
  2752. m_bMatchesFinalized = true;
  2753. if ( Used() > m_iLimit )
  2754. CutWorst ( m_iLimit, true );
  2755. else
  2756. {
  2757. if ( DISTINCT && bCountDistinct )
  2758. CountDistinct();
  2759. CalcAvg ( Avg_e::FINALIZE );
  2760. SortGroups();
  2761. }
  2762. }
  2763. void RebuildHash ()
  2764. {
  2765. for ( auto iMatch : this->m_dIData ) {
  2766. auto & tMatch = m_dData[iMatch];
  2767. m_hGroup2Match.Add ( tMatch.GetAttr ( m_tLocGroupby ), &tMatch );
  2768. }
  2769. }
  2770. /// cut worst N groups off the buffer tail, and maybe sort the best part
  2771. void CutWorst ( int iBound, bool bFinalize=false )
  2772. {
  2773. // prepare to partition - finalize distinct, avgs to provide smooth sorting
  2774. if ( DISTINCT && m_bSortByDistinct )
  2775. CountDistinct ();
  2776. CalcAvg ( Avg_e::FINALIZE );
  2777. // relocate best matches to the low part (up to the iBound)
  2778. BinaryPartition (iBound);
  2779. // take worst matches and free them (distinct stuff, data ptrs)
  2780. auto dWorst = this->m_dIData.Slice ( iBound );
  2781. if constexpr ( DISTINCT )
  2782. {
  2783. m_dRemove.Resize(0);
  2784. for ( auto iMatch : dWorst )
  2785. m_dRemove.Add ( m_dData[iMatch].GetAttr ( m_tLocGroupby ));
  2786. RemoveDistinct ( m_dRemove );
  2787. }
  2788. dWorst.Apply ( [this,bFinalize] ( int iMatch ) { FreeMatchPtrs ( iMatch, !bFinalize ); } );
  2789. m_iMaxUsed = Max ( m_iMaxUsed, this->m_dIData.GetLength() ); // memorize it for free dynamics later.
  2790. this->m_dIData.Resize ( iBound );
  2791. m_hGroup2Match.Clear();
  2792. if ( bFinalize )
  2793. {
  2794. SortGroups();
  2795. if ( DISTINCT && !m_bSortByDistinct ) // since they haven't counted at the top
  2796. {
  2797. RebuildHash(); // distinct uses m_hGroup2Match
  2798. CountDistinct();
  2799. }
  2800. } else
  2801. {
  2802. // we've called CalcAvg ( Avg_e::FINALIZE ) before partitioning groups
  2803. // now we can undo this calculation for the rest apart from thrown away
  2804. // on finalize (sorting) cut we don't need to ungroup here
  2805. CalcAvg ( Avg_e::UNGROUP );
  2806. RebuildHash();
  2807. }
  2808. }
  2809. /// sort groups buffer
  2810. void SortGroups ()
  2811. {
  2812. this->m_dIData.Sort ( m_tGroupSorter );
  2813. }
  2814. // update @groupbystr value, if available
  2815. void UpdateGroupbyStr ( CSphMatch& tMatch, const SphAttr_t * pAttr )
  2816. {
  2817. if ( this->m_tLocGroupbyStr.m_bDynamic )
  2818. tMatch.SetAttr ( this->m_tLocGroupbyStr, *pAttr );
  2819. }
  2820. // lazy resort matches so that best are located up to iBound
  2821. void BinaryPartition ( int iBound )
  2822. {
  2823. float COEFF = Max ( 1.0f, float(Used()) / iBound );
  2824. int iPivot = this->m_dIData[ int(iBound/COEFF) ];
  2825. --iBound;
  2826. int a=0;
  2827. int b=Used()-1;
  2828. while (true)
  2829. {
  2830. int i=a;
  2831. int j=b;
  2832. while (i<=j)
  2833. {
  2834. while (m_tGroupSorter.IsLess (this->m_dIData[i],iPivot)) ++i;
  2835. while (m_tGroupSorter.IsLess (iPivot, this->m_dIData[j])) --j;
  2836. if ( i<=j ) ::Swap( this->m_dIData[i++], this->m_dIData[j--]);
  2837. }
  2838. if ( iBound == j )
  2839. break;
  2840. if ( iBound < j)
  2841. b = j; // too many elems acquired; continue with left part
  2842. else
  2843. a = i; // too less elems acquired; continue with right part
  2844. int iPivotIndex = int ( ( a * ( COEFF-1 )+b ) / COEFF );
  2845. iPivot = this->m_dIData[iPivotIndex];
  2846. }
  2847. }
  2848. };
  2849. #define LOG_COMPONENT_NG __FILE__ << ":" << __LINE__ << " -"
  2850. #define LOG_LEVEL_DIAG false
  2851. #define DBG LOC(DIAG,NG)
  2852. /// match sorter with k-buffering and N-best group-by
  2853. /* Trick explanation
  2854. *
  2855. * Here we keep several grouped matches, but each one is not a single match, but a group.
  2856. * On the backend we have solid vector of real matches. They are allocated once and freed, and never moved around.
  2857. * To work with them, we have vector of indexes, so that each index points to corresponding match in the backend.
  2858. * So when performing moving operations (sort, etc.) we actually change indexes and never move matches themselves.
  2859. *
  2860. * Say, when user pushes matches with weights of 5,2,3,1,4,6, and we then sort them, we will have the following relations:
  2861. *
  2862. * m5 m2 m3 m1 m4 m6 // backend, placed in natural order as they come here
  2863. * 1 2 3 4 5 6 // original indexes, just points directly to backend matches.
  2864. *
  2865. * After, say, sort by asc matches weights, only index vector modified and became this:
  2866. *
  2867. * 4 2 3 5 1 6 // reading match[i[k]] for k in 0..5 will return matches in weight ascending order.
  2868. *
  2869. * When grouping we collect several matches together and sort them.
  2870. * Say, if one group contains matches m1, m2, m5, m6 and second - m4, m3, we have to keep 2 sets of matches in hash:
  2871. *
  2872. * h1: m1 m2 m5 m6
  2873. * h2: m4 m3
  2874. *
  2875. * How to store that sequences?
  2876. *
  2877. * Well, we can do it directly, set by set, keeping heads in hash:
  2878. * m1 m2 m5 m6 m4 m3, heads 1, 5
  2879. *
  2880. * going to indirection indexes we have sequence
  2881. * 4 2 1 6 5 3, hash 1, 4
  2882. *
  2883. * That looks ok, but since sets can dynamically change, it is hard to insert more into existing group.
  2884. * That is like insertion into the middle of vector.
  2885. *
  2886. * Let's try to make a list (chain). Don't care about in-group ordering, just keep things chained.
  2887. * To make things easier, ring the list (connect tail back to head), and store pos of one of the elems in the hash
  2888. * (since it is ring - that is not important which exactly, just to have something to glue).
  2889. *
  2890. * m5 -> 1 heads 1
  2891. * m2 -> 2, 1 heads 2
  2892. * m3 -> 2, 1, 3, heads 2, 3
  2893. * m1 -> 2, 4, 3, 1, heads 4, 3
  2894. * m4 -> 2, 4, 5, 1, 3, heads 4, 5
  2895. * m6 -> 2, 4, 5, 6, 3, 1 heads 6, 5
  2896. *
  2897. * On insert, we store old head into new elem, and new elem into the place of old head.
  2898. * One thing rest here is indirect ref by position. I.e. we assume that index at position 6 points to match at position 6.
  2899. * However, we can notice, that since it is ring, left elem of 6-th points to it directly by number 6.
  2900. * So we can just shift heads back by one position - and that's all, indirect assumption no more necessary.
  2901. * Final sequence will be this one:
  2902. * m5 m2 m3 m1 m4 m6 - matches in their natural order
  2903. * 2, 4, 5, 6, 3, 1 - indirection vec. 4, 3. - heads of groups.
  2904. *
  2905. * Iteration: take 1-st group with head 4:
  2906. * 6->1->2->4*. Each num is both index of the link, and index of backend match. So, matches here are:
  2907. * m6 m5 m2 m1, and we can resort them as necessary (indirectly). Viola!
  2908. *
  2909. * On deletion item goes to freelist.
  2910. * Allocation of an elem is separate task, it is achieved by linear allocation (first), and by freelist (when filled).
  2911. *
  2912. */
  2913. template < typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS, bool HAS_AGGREGATES >
  2914. class CSphKBufferNGroupSorter : public KBufferGroupSorter_T<COMPGROUP,UNIQ,DISTINCT,NOTIFICATIONS>
  2915. {
  2916. using MYTYPE = CSphKBufferNGroupSorter<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS,HAS_AGGREGATES>;
  2917. protected:
  2918. using KBufferGroupSorter = KBufferGroupSorter_T<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS>;
  2919. using KBufferGroupSorter::m_eGroupBy;
  2920. using KBufferGroupSorter::m_pGrouper;
  2921. using KBufferGroupSorter::m_iLimit;
  2922. using KBufferGroupSorter::m_tUniq;
  2923. using KBufferGroupSorter::m_bSortByDistinct;
  2924. using KBufferGroupSorter::m_tGroupSorter;
  2925. using KBufferGroupSorter::m_tSubSorter;
  2926. using KBufferGroupSorter::m_dAvgs;
  2927. using KBufferGroupSorter::GROUPBY_FACTOR;
  2928. using KBufferGroupSorter::GetAggregatesWithoutAvgs;
  2929. using KBufferGroupSorter::Distinct;
  2930. using KBufferGroupSorter::FreeMatchPtrs;
  2931. using KBufferGroupSorter::UpdateDistinct;
  2932. using KBufferGroupSorter::RemoveDistinct;
  2933. using KBufferGroupSorter::m_bAvgFinal;
  2934. using CSphGroupSorterSettings::m_tLocGroupby;
  2935. using CSphGroupSorterSettings::m_tLocCount;
  2936. using CSphGroupSorterSettings::m_tLocDistinct;
  2937. // using CSphGroupSorterSettings::m_tLocGroupbyStr; // check! unimplemented?
  2938. using BaseGroupSorter_c::EvalHAVING;
  2939. using BaseGroupSorter_c::AggrUpdate;
  2940. using BaseGroupSorter_c::AggrUngroup;
  2941. using CSphMatchQueueTraits::m_iSize;
  2942. using CSphMatchQueueTraits::m_dData;
  2943. using MatchSorter_c::m_iTotal;
  2944. using MatchSorter_c::m_tJustPushed;
  2945. using MatchSorter_c::m_pSchema;
  2946. public:
  2947. /// ctor
  2948. CSphKBufferNGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings ) // FIXME! make k configurable
  2949. : KBufferGroupSorter ( pComp, pQuery, tSettings )
  2950. , m_hGroup2Index ( tSettings.m_iMaxMatches*GROUPBY_FACTOR )
  2951. , m_iGLimit ( Min ( pQuery->m_iGroupbyLimit, m_iLimit ) )
  2952. {
  2953. #ifndef NDEBUG
  2954. DBG << "Created iruns = " << m_iruns << " ipushed = " << m_ipushed;
  2955. #endif
  2956. this->m_dIData.Resize ( m_iSize ); // m_iLimit * GROUPBY_FACTOR
  2957. }
  2958. inline void SetGLimit ( int iGLimit ) { m_iGLimit = Min ( iGLimit, m_iLimit ); }
  2959. int GetLength() override { return Min ( m_iUsed, m_iLimit ); }
  2960. bool Push ( const CSphMatch & tEntry ) override { return PushEx<false> ( tEntry, m_pGrouper->KeyFromMatch(tEntry), false, false, true, nullptr ); }
  2961. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final { assert ( 0 && "Not supported in grouping"); }
  2962. bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet ) override { return PushEx<true> ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), bNewSet, false, true, nullptr ); }
  2963. /// store all entries into specified location in sorted order, and remove them from queue
  2964. int Flatten ( CSphMatch * pTo ) override
  2965. {
  2966. if ( !GetLength() )
  2967. return 0;
  2968. if ( !m_bFinalized )
  2969. {
  2970. FinalizeChains ();
  2971. PrepareForExport ();
  2972. CountDistinct ();
  2973. }
  2974. auto fnSwap = [&pTo] ( CSphMatch & tSrc ) { // the writer
  2975. Swap ( *pTo, tSrc );
  2976. ++pTo;
  2977. };
  2978. const CSphMatch * pBegin = pTo;
  2979. for ( auto iHead : m_dFinalizedHeads )
  2980. {
  2981. CSphMatch & tGroupHead = m_dData[iHead];
  2982. if ( !EvalHAVING ( tGroupHead ))
  2983. {
  2984. DeleteChain ( iHead, false );
  2985. continue;
  2986. }
  2987. fnSwap ( tGroupHead ); // move top group match
  2988. for ( int i=this->m_dIData[iHead]; i!=iHead; i = this->m_dIData[i] )
  2989. fnSwap ( m_dData[i] ); // move tail matches
  2990. }
  2991. // final clean up before possible next pass
  2992. m_uLastGroupKey = -1;
  2993. m_iFree = 0;
  2994. m_iUsed = 0;
  2995. m_bFinalized = false;
  2996. m_iStorageSolidFrom = 0;
  2997. m_iTotal = 0;
  2998. m_dFinalizedHeads.Reset ();
  2999. m_hGroup2Index.Clear();
  3000. if constexpr ( DISTINCT )
  3001. m_tUniq.Reset();
  3002. return int ( pTo-pBegin );
  3003. }
  3004. void Finalize ( MatchProcessor_i & tProcessor, bool, bool bFinalizeMatches ) override
  3005. {
  3006. if ( !GetLength() )
  3007. return;
  3008. if ( bFinalizeMatches )
  3009. {
  3010. if ( !m_bFinalized )
  3011. {
  3012. FinalizeChains();
  3013. PrepareForExport();
  3014. CountDistinct();
  3015. }
  3016. ProcessData ( tProcessor, m_dFinalizedHeads );
  3017. }
  3018. else
  3019. {
  3020. ProcessData ( tProcessor, GetAllHeads() );
  3021. if constexpr ( DISTINCT )
  3022. {
  3023. // if we are not finalizing matches, we are using global sorters
  3024. // let's try to remove dupes while we are processing data in separate threads
  3025. // so that the main thread will have fewer data to work with
  3026. m_tUniq.Sort();
  3027. VecTraits_T<SphGroupKey_t> dStub;
  3028. m_tUniq.Compact(dStub);
  3029. }
  3030. }
  3031. }
  3032. // TODO! TEST!
  3033. ISphMatchSorter * Clone () const override
  3034. {
  3035. auto* pClone = this->template CloneSorterT<MYTYPE>();
  3036. pClone->SetGLimit (m_iGLimit);
  3037. return pClone;
  3038. }
  3039. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final
  3040. {
  3041. #ifndef NDEBUG
  3042. DBG << " MoveTo " << pRhs << " iRuns:iPushed - " << m_iruns << " " << m_ipushed;
  3043. #endif
  3044. auto& dRhs = *(MYTYPE *) pRhs;
  3045. if ( !dRhs.m_iTotal )
  3046. {
  3047. DBG << " Rhs is empty, adopt! ";
  3048. CSphMatchQueueTraits::SwapMatchQueueTraits ( dRhs );
  3049. dRhs.m_hGroup2Index = std::move ( m_hGroup2Index );
  3050. ::Swap ( m_uLastGroupKey, dRhs.m_uLastGroupKey );
  3051. ::Swap ( m_iFree, dRhs.m_iFree );
  3052. ::Swap ( m_iUsed, dRhs.m_iUsed );
  3053. ::Swap ( m_bFinalized, dRhs.m_bFinalized );
  3054. m_dFinalizedHeads.SwapData ( dRhs.m_dFinalizedHeads );
  3055. ::Swap ( m_iStorageSolidFrom, dRhs.m_iStorageSolidFrom );
  3056. #ifndef NDEBUG
  3057. ::Swap ( m_iruns, dRhs.m_iruns );
  3058. ::Swap ( m_ipushed, dRhs.m_ipushed );
  3059. LOC_SWAP(dRhs);
  3060. #endif
  3061. if ( !m_bFinalized && bCopyMeta )
  3062. dRhs.m_tUniq = std::move(m_tUniq);
  3063. return;
  3064. }
  3065. bool bUniqUpdated = false;
  3066. if ( !m_bFinalized && bCopyMeta )
  3067. {
  3068. m_tUniq.CopyTo ( dRhs.m_tUniq );
  3069. bUniqUpdated = true;
  3070. }
  3071. if ( !m_bFinalized )
  3072. {
  3073. FinalizeChains();
  3074. // PrepareForExport(); // for moving we not need fine-finaled matches; just cleaned is enough
  3075. CountDistinct();
  3076. }
  3077. dRhs.m_bUpdateDistinct = !bUniqUpdated;
  3078. dRhs.SetMerge(true);
  3079. auto iTotal = dRhs.m_iTotal;
  3080. for ( auto iHead : m_dFinalizedHeads )
  3081. {
  3082. auto uGroupKey = m_dData[iHead].GetAttr ( m_tLocGroupby );
  3083. // have to set bNewSet to true
  3084. // as need to fallthrough at PushAlreadyHashed and update count and aggregates values for head match
  3085. // even uGroupKey match already exists
  3086. dRhs.template PushEx<true> ( m_dData[iHead], uGroupKey, true, true, true, nullptr );
  3087. for ( int i = this->m_dIData[iHead]; i!=iHead; i = this->m_dIData[i] )
  3088. dRhs.template PushEx<false> ( m_dData[i], uGroupKey, false, true, true, nullptr );
  3089. DeleteChain ( iHead, false );
  3090. }
  3091. dRhs.m_bUpdateDistinct = true;
  3092. dRhs.SetMerge(false);
  3093. dRhs.m_iTotal = m_iTotal+iTotal;
  3094. }
  3095. void SetMerge ( bool bMerge ) override { m_bMerge = bMerge; }
  3096. protected:
  3097. int m_iStorageSolidFrom = 0; // edge from witch storage is not yet touched and need no chaining freelist
  3098. OpenHashTable_T<SphGroupKey_t, int> m_hGroup2Index; // used to quickly locate group for incoming match
  3099. int m_iGLimit; ///< limit per one group
  3100. SphGroupKey_t m_uLastGroupKey = -1; ///< helps to determine in pushEx whether the new subgroup started
  3101. int m_iFree = 0; ///< current insertion point
  3102. int m_iUsed = 0;
  3103. // final cached data valid when everything is finalized
  3104. bool m_bFinalized = false; // helper to avoid double work
  3105. CSphVector<int> m_dFinalizedHeads; /// < sorted finalized heads
  3106. int m_iLastGroupCutoff; /// < cutoff edge of last group to fit limit
  3107. #ifndef NDEBUG
  3108. int m_iruns = 0; ///< helpers for conditional breakpoints on debug
  3109. int m_ipushed = 0;
  3110. #endif
  3111. LOC_ADD;
  3112. /*
  3113. * Every match according to uGroupKey came to own subset.
  3114. * Head match of each group stored in the hash to quickly locate on next pushes
  3115. * It hold all calculated stuff from aggregates/group_concat until finalization.
  3116. */
  3117. template <bool GROUPED>
  3118. bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool bNewSet, bool bTailFinalized, bool bClearNotify, [[maybe_unused]] SphAttr_t * pAttr )
  3119. {
  3120. #ifndef NDEBUG
  3121. ++m_ipushed;
  3122. DBG << "PushEx: tag" << tEntry.m_iTag << ",g" << uGroupKey << ": pushed" << m_ipushed
  3123. << " g" << GROUPED << " n" << bNewSet;
  3124. #endif
  3125. if constexpr ( NOTIFICATIONS )
  3126. {
  3127. if ( bClearNotify )
  3128. {
  3129. m_tJustPushed = RowTagged_t();
  3130. this->m_dJustPopped.Resize ( 0 );
  3131. }
  3132. }
  3133. this->m_bFinalized = false;
  3134. if ( HAS_AGGREGATES && m_bAvgFinal )
  3135. CalcAvg ( Avg_e::UNGROUP );
  3136. // place elem into the set
  3137. auto iNew = AllocateMatch ();
  3138. CSphMatch & tNew = m_dData[iNew];
  3139. // if such group already hashed
  3140. int * pGroupIdx = m_hGroup2Index.Find ( uGroupKey );
  3141. if ( pGroupIdx )
  3142. return PushAlreadyHashed<GROUPED> ( pGroupIdx, iNew, tEntry, uGroupKey, bNewSet, bTailFinalized );
  3143. // match came from MoveTo of another sorter, it is tail, and it has no group here (m.b. it is already
  3144. // deleted during finalization as one of worst). Just discard the whole group in the case.
  3145. if ( bTailFinalized && !GROUPED )
  3146. {
  3147. DeallocateMatch ( iNew );
  3148. return false;
  3149. }
  3150. m_pSchema->CloneMatch ( tNew, tEntry ); // fixme! check if essential data cloned
  3151. // else
  3152. // this->m_tPregroup.CloneWithoutAggrs ( tNew, tEntry );
  3153. // this->m_tPregroup.CopyAggrs ( tNew, tEntry );
  3154. // submit actual distinct value in all cases
  3155. if ( DISTINCT && m_bUpdateDistinct )
  3156. KBufferGroupSorter::template UpdateDistinct<GROUPED> ( tNew, uGroupKey );
  3157. if constexpr ( NOTIFICATIONS )
  3158. m_tJustPushed = RowTagged_t ( tNew );
  3159. this->m_dIData[iNew] = iNew; // new head - points to self (0-ring)
  3160. Verify ( m_hGroup2Index.Add ( uGroupKey, iNew ));
  3161. ++m_iTotal;
  3162. if constexpr ( GROUPED )
  3163. {
  3164. m_uLastGroupKey = uGroupKey;
  3165. if constexpr ( HAS_AGGREGATES )
  3166. AggrUngroup ( m_dData[iNew] );
  3167. } else
  3168. {
  3169. tNew.SetAttr ( m_tLocGroupby, uGroupKey );
  3170. tNew.SetAttr ( m_tLocCount, 1 );
  3171. if constexpr ( DISTINCT )
  3172. tNew.SetAttr ( m_tLocDistinct, 0 );
  3173. }
  3174. return true;
  3175. }
  3176. private:
  3177. bool m_bUpdateDistinct = true;
  3178. bool m_bMerge = false;
  3179. // surely give place for a match (do vacuum-cleaning, if there is no place)
  3180. inline int AllocateMatch ()
  3181. {
  3182. auto iPlace = TryAllocateMatch ();
  3183. if ( iPlace<0 )
  3184. {
  3185. VacuumClean ();
  3186. iPlace = TryAllocateMatch ();
  3187. }
  3188. assert ( iPlace>=0 && iPlace<m_iSize );
  3189. DBG << "allocated: " << iPlace;
  3190. return iPlace;
  3191. }
  3192. // return match and free it's dataptrs
  3193. FORCE_INLINE void FreeMatch ( int iElem, bool bNotify ) // fixme! intersects with parent by name
  3194. {
  3195. FreeMatchPtrs ( iElem, bNotify );
  3196. DeallocateMatch ( iElem );
  3197. }
  3198. inline int TryAllocateMatch ()
  3199. {
  3200. if ( m_iUsed==m_iSize )
  3201. return -1; // no more place..
  3202. ++m_iUsed;
  3203. auto iElem = m_iFree;
  3204. if ( iElem<m_iStorageSolidFrom )
  3205. m_iFree = this->m_dIData[iElem];
  3206. else {
  3207. ++m_iFree;
  3208. m_iStorageSolidFrom = m_iFree;
  3209. }
  3210. return iElem;
  3211. }
  3212. inline void DeallocateMatch (int iElem)
  3213. {
  3214. --m_iUsed;
  3215. this->m_dIData[iElem] = m_iFree; // put to chain
  3216. m_iFree = iElem;
  3217. assert ( m_iFree >=0 );
  3218. }
  3219. // return length of the matches chain (-1 terminated)
  3220. int ChainLen ( int iPos ) const
  3221. {
  3222. int iChainLen = 1;
  3223. for ( int i = this->m_dIData[iPos]; i!=iPos; i = this->m_dIData[i] )
  3224. ++iChainLen;
  3225. return iChainLen;
  3226. }
  3227. // add new match into the chain. Aggregates are relaxed and not managed till finalize
  3228. /*
  3229. * chain of the matches is actually ring of integers. Each one points to the coherent
  3230. * match in the storage, and simultaneously next member of the ring.
  3231. * We can iterate over the chain starting from the head and looking until same index met again.
  3232. */
  3233. void AddToChain ( int iNew, const CSphMatch & tEntry, int iHead )
  3234. {
  3235. CSphMatch & tNew = m_dData[iNew];
  3236. this->m_tPregroup.CloneWithoutAggrs ( tNew, tEntry );
  3237. if constexpr ( NOTIFICATIONS )
  3238. m_tJustPushed = RowTagged_t ( tNew );
  3239. // put after the head
  3240. auto iPrevChain = this->m_dIData[iHead];
  3241. this->m_dIData[iNew] = iPrevChain;
  3242. this->m_dIData[iHead] = iNew;
  3243. }
  3244. // add entry to existing group
  3245. /*
  3246. * If group is not full, and new match is less than head, it will replace the head.
  3247. * calculated stuff will be moved and adopted by this new replacement.
  3248. * If group is full, and new match is less than head, it will be early rejected.
  3249. * In all other cases new match will be inserted into the group right after head
  3250. */
  3251. template <bool GROUPED>
  3252. bool PushAlreadyHashed ( int * pHead, int iNew, const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool bNewSet, bool bTailFinalized )
  3253. {
  3254. int & iHead = *pHead;
  3255. assert ( m_dData[iHead].GetAttr ( m_tLocGroupby )==uGroupKey );
  3256. assert ( m_dData[iHead].m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
  3257. DBG << "existing " << m_dData[iHead].m_iTag << "," << uGroupKey
  3258. << " m_pDynamic: " << m_dData[iHead].m_pDynamic;
  3259. // check if we need to push the match at all
  3260. if ( m_tSubSorter.MatchIsGreater ( tEntry, m_dData[iHead] ) )
  3261. AddToChain ( iNew, tEntry, iHead ); // always add; bad will be filtered later in gc
  3262. else if ( ChainLen ( iHead )>=m_iGLimit ) // less than worst, drop it
  3263. DeallocateMatch ( iNew );
  3264. else
  3265. {
  3266. AddToChain ( iNew, tEntry, iHead );
  3267. this->m_tPregroup.MoveAggrs ( m_dData[iNew], m_dData[iHead] );
  3268. *pHead = iNew;
  3269. }
  3270. auto & tHeadMatch = m_dData[iHead];
  3271. // submit actual distinct value in all cases
  3272. if ( DISTINCT && m_bUpdateDistinct )
  3273. KBufferGroupSorter::template UpdateDistinct<GROUPED> ( tEntry, uGroupKey );
  3274. // update group-wide counters
  3275. auto & tLocCount = m_tLocCount;
  3276. if constexpr ( GROUPED )
  3277. {
  3278. // it's already grouped match
  3279. // sum grouped matches count
  3280. if ( bNewSet || uGroupKey!=m_uLastGroupKey )
  3281. {
  3282. tHeadMatch.AddCounterAttr ( tLocCount, tEntry );
  3283. m_uLastGroupKey = uGroupKey;
  3284. bNewSet = true;
  3285. }
  3286. } else if ( !bTailFinalized )
  3287. {
  3288. // it's a simple match
  3289. // increase grouped matches count
  3290. tHeadMatch.AddCounterScalar ( tLocCount, 1 );
  3291. bNewSet = true;
  3292. }
  3293. // update aggregates
  3294. if constexpr ( HAS_AGGREGATES )
  3295. {
  3296. if ( bNewSet )
  3297. AggrUpdate ( tHeadMatch, tEntry, GROUPED, m_bMerge );
  3298. }
  3299. // since it is dupe (i.e. such group is already pushed) - return false;
  3300. return false;
  3301. }
  3302. enum class Avg_e { FINALIZE, UNGROUP };
  3303. void CalcAvg ( Avg_e eGroup )
  3304. {
  3305. if ( this->m_dAvgs.IsEmpty() )
  3306. return;
  3307. m_bAvgFinal = ( eGroup==Avg_e::FINALIZE );
  3308. int64_t i = 0;
  3309. if ( eGroup==Avg_e::FINALIZE )
  3310. for ( auto tData = m_hGroup2Index.Iterate(i); tData.second; tData = m_hGroup2Index.Iterate(i) )
  3311. m_dAvgs.Apply ( [this, &tData] ( AggrFunc_i * pAvg ) {
  3312. pAvg->Finalize ( m_dData[*tData.second] );
  3313. });
  3314. else
  3315. for ( auto tData = m_hGroup2Index.Iterate(i); tData.second; tData = m_hGroup2Index.Iterate(i) )
  3316. m_dAvgs.Apply ( [this, &tData] ( AggrFunc_i * pAvg ) {
  3317. pAvg->Ungroup ( m_dData[*tData.second] );
  3318. });
  3319. }
  3320. void BinaryPartitionTail ( VecTraits_T<int>& dData, int iBound )
  3321. {
  3322. --iBound;
  3323. int iPivot = dData[iBound];
  3324. int a = 0;
  3325. int b = dData.GetLength ()-1;
  3326. while (true) {
  3327. int i = a;
  3328. int j = b;
  3329. while (i<=j) {
  3330. while ( m_tSubSorter.IsLess ( dData[i], iPivot )) ++i;
  3331. while ( m_tSubSorter.IsLess ( iPivot, dData[j] )) --j;
  3332. if ( i<=j ) ::Swap ( dData[i++], dData[j--] );
  3333. }
  3334. if ( iBound==j )
  3335. break;
  3336. if ( iBound<j )
  3337. b = j; // too many elems acquired; continue with left part
  3338. else
  3339. a = i; // too few elems acquired; continue with right part
  3340. iPivot = dData[( a+b ) / 2];
  3341. }
  3342. }
  3343. CSphVector<int> GetAllHeads()
  3344. {
  3345. CSphVector<int> dAllHeads;
  3346. dAllHeads.Reserve ( m_hGroup2Index.GetLength ());
  3347. int64_t i = 0;
  3348. for ( auto tData = m_hGroup2Index.Iterate(i); tData.second; tData = m_hGroup2Index.Iterate(i) )
  3349. dAllHeads.Add ( *tData.second );
  3350. return dAllHeads;
  3351. }
  3352. // free place for new matches
  3353. void VacuumClean()
  3354. {
  3355. auto iLimit = m_iLimit * GROUPBY_FACTOR / 2;
  3356. // first try to cut out too long tails
  3357. int iSize = 0;
  3358. int64_t i = 0;
  3359. for ( auto tData = m_hGroup2Index.Iterate(i); tData.second; tData = m_hGroup2Index.Iterate(i) )
  3360. iSize += VacuumTail ( tData.second, m_iGLimit );
  3361. // if we reached the limit now - bail, no need to free more.
  3362. if ( iSize<=iLimit )
  3363. return;
  3364. // if we're here, just vacuuming tails wasn't effective enough and some deeper cleaning necessary
  3365. SortThenVacuumWorstHeads ( iLimit );
  3366. }
  3367. // final pass before iface finalize/flatten - cut worst, sort everything
  3368. void FinalizeChains()
  3369. {
  3370. if ( m_bFinalized )
  3371. return;
  3372. m_bFinalized = true;
  3373. int64_t i = 0;
  3374. for ( auto tData = m_hGroup2Index.Iterate(i); tData.second; tData = m_hGroup2Index.Iterate(i) )
  3375. VacuumTail ( tData.second, m_iGLimit, Stage_e::FINAL );
  3376. // Continue by cut out whole groups
  3377. SortThenVacuumWorstHeads ( m_iLimit, Stage_e::FINAL ); // false since it is already sorted
  3378. // also free matches in the chain were cleared with FreeDataPtrs, but *now* we also need to free their dynamics
  3379. // otherwise in d-tr FreDataPtr on non-zero dynamics will be called again with probably another schema and crash
  3380. // FIXME!!! need to keep and restore all members changed by TryAllocateMatch - it'd be better to rewrite code to pass state into TryAllocateMatch or use common code
  3381. auto iFree = m_iFree;
  3382. auto iUsed = m_iUsed;
  3383. auto iSSFrom = m_iStorageSolidFrom;
  3384. for ( auto iElem = TryAllocateMatch (); iElem>=0; iElem = TryAllocateMatch () )
  3385. m_dData[iElem].ResetDynamic ();
  3386. m_iFree = iFree;
  3387. m_iUsed = iUsed;
  3388. m_iStorageSolidFrom = iSSFrom;
  3389. }
  3390. /*
  3391. * Here we
  3392. * 1) Cut off very last head if it would exceed the limit.
  3393. * 1) Copy all calculated stuff (aggr attributes) from head match to every other match of a group
  3394. * 2) Sort group in decreasing order, and then shift the ring ahead to 1 match.
  3395. * That is necessary since head is worst match, and next after it is the best one (since just sorted)
  3396. * Since it is ring, by moving ahead we will have 1-st match the best, last - the worst.
  3397. */
  3398. void PrepareForExport()
  3399. {
  3400. VacuumTail ( &m_dFinalizedHeads.Last(), m_iLastGroupCutoff, Stage_e::FINAL );
  3401. auto dAggrs = GetAggregatesWithoutAvgs ();
  3402. for ( auto& iHead : m_dFinalizedHeads )
  3403. {
  3404. for ( auto * pAggr : dAggrs )
  3405. pAggr->Finalize ( m_dData[iHead] );
  3406. PropagateAggregates ( iHead );
  3407. iHead = this->m_dIData[iHead]; // shift
  3408. }
  3409. }
  3410. void PropagateAggregates ( int iHead )
  3411. {
  3412. for ( auto i = this->m_dIData[iHead]; i!=iHead; i = this->m_dIData[i] )
  3413. this->m_tPregroup.CopyAggrs ( m_dData[i], m_dData[iHead] );
  3414. }
  3415. // at collect stage we don't need to strictly sort matches inside groups,
  3416. // but we need to track pushed/deleted matches.
  3417. // at finalize stage, in opposite, no tracking need, but matches must be sorted.
  3418. enum class Stage_e { COLLECT, FINAL };
  3419. // sorts by next-to-worst element in the chain
  3420. struct FinalGroupSorter_t
  3421. {
  3422. const GroupSorter_fn<COMPGROUP> & m_tGroupSorter;
  3423. const CSphTightVector<int> & m_dIData;
  3424. FinalGroupSorter_t ( const GroupSorter_fn<COMPGROUP> & tSorter, const CSphTightVector<int> & dIData )
  3425. : m_tGroupSorter ( tSorter )
  3426. , m_dIData ( dIData )
  3427. {}
  3428. bool IsLess ( int a, int b ) const
  3429. {
  3430. return m_tGroupSorter.IsLess ( m_dIData[a], m_dIData[b] );
  3431. }
  3432. };
  3433. // full clean - sort the groups, then iterate on them until iLimit elems counted. Cut out the rest.
  3434. // if last group is not fit into rest of iLimit, it still kept whole, no fraction performed over it.
  3435. // returns desired length of the last chain to make the limit hard ( 1..m_iGLimit )
  3436. void SortThenVacuumWorstHeads ( int iSoftLimit, Stage_e eStage = Stage_e::COLLECT )
  3437. {
  3438. m_dFinalizedHeads = GetAllHeads();
  3439. CalcAvg ( Avg_e::FINALIZE );
  3440. // in this final sort we need to keep the heads but to sort by next-to-head element (which is the best in group)
  3441. FinalGroupSorter_t tFinalSorter ( m_tGroupSorter, this->m_dIData );
  3442. m_dFinalizedHeads.Sort ( tFinalSorter );
  3443. int iRetainMatches = 0;
  3444. CSphVector<SphGroupKey_t> dRemovedHeads; // to remove distinct
  3445. // delete worst heads
  3446. ARRAY_FOREACH ( i, m_dFinalizedHeads )
  3447. if ( iSoftLimit > iRetainMatches )
  3448. iRetainMatches += ChainLen ( m_dFinalizedHeads[i] );
  3449. else
  3450. {
  3451. // all quota exceeded, the rest just to be cut totally
  3452. auto iRemoved = DeleteChain ( m_dFinalizedHeads[i], eStage==Stage_e::COLLECT );
  3453. if constexpr ( DISTINCT )
  3454. dRemovedHeads.Add( iRemoved );
  3455. m_dFinalizedHeads.RemoveFast ( i-- );
  3456. }
  3457. // discard removed distinct
  3458. if constexpr ( DISTINCT )
  3459. RemoveDistinct ( dRemovedHeads );
  3460. if ( eStage==Stage_e::COLLECT )
  3461. CalcAvg ( Avg_e::UNGROUP );
  3462. m_iLastGroupCutoff = m_iGLimit+iSoftLimit-iRetainMatches;
  3463. }
  3464. // for given chain throw out worst elems to fit in iLimit quota.
  3465. // Returns length of the chain
  3466. int VacuumTail ( int* pHead, int iLimit, Stage_e eStage = Stage_e::COLLECT )
  3467. {
  3468. assert ( iLimit>0 );
  3469. CSphVector<int> dChain;
  3470. dChain.Add ( *pHead );
  3471. for ( auto i = this->m_dIData[*pHead]; i!=*pHead; i = this->m_dIData[i] )
  3472. dChain.Add ( i );
  3473. if ( dChain.GetLength()==1 )
  3474. return 1; // fast over
  3475. auto dWorstTail = dChain.Slice ( iLimit );
  3476. // if no sort necessary and limit not exceeded - nothing to do
  3477. if ( eStage==Stage_e::COLLECT && dWorstTail.IsEmpty() )
  3478. return dChain.GetLength();
  3479. // chain need to be shortened
  3480. if ( !dWorstTail.IsEmpty() )
  3481. {
  3482. BinaryPartitionTail ( dChain, iLimit );
  3483. dChain.Resize ( iLimit );
  3484. }
  3485. // sort if necessary and ensure last elem of chain is the worst one
  3486. if ( eStage==Stage_e::FINAL )
  3487. {
  3488. dChain.Sort( m_tSubSorter ); // sorted in reverse order, so the worst match here is the last one.
  3489. iLimit = dChain.GetLength();
  3490. } else
  3491. {
  3492. assert ( dChain.GetLength ()==iLimit );
  3493. // not sorted, need to find worst match for new head
  3494. int iWorst = 0;
  3495. for (int i=1; i<iLimit; ++i)
  3496. {
  3497. if ( m_tSubSorter.IsLess ( dChain[iWorst], dChain[i] ) )
  3498. iWorst = i;
  3499. }
  3500. ::Swap ( dChain[iWorst], dChain[iLimit-1] );
  3501. }
  3502. auto iNewHead = dChain.Last ();
  3503. // move calculated aggregates to the new head
  3504. if ( iNewHead!=*pHead )
  3505. {
  3506. SphGroupKey_t uGroupKey = m_dData[*pHead].GetAttr ( m_tLocGroupby );
  3507. int * pHeadInHash = m_hGroup2Index.Find(uGroupKey);
  3508. assert(pHeadInHash);
  3509. this->m_tPregroup.MoveAggrs ( m_dData[iNewHead], m_dData[*pHead] );
  3510. *pHead = iNewHead;
  3511. *pHeadInHash = iNewHead;
  3512. }
  3513. // now we can safely free worst matches
  3514. for ( auto iWorst : dWorstTail )
  3515. FreeMatch ( iWorst, eStage==Stage_e::COLLECT );
  3516. // recreate the chain. It is actually ring, and external hash points to the minimal elem
  3517. this->m_dIData[iNewHead] = dChain[0]; // head points to begin of chain
  3518. for ( int i = 0; i<iLimit-1; ++i ) // each elem points to the next, last again to head
  3519. this->m_dIData[dChain[i]] = dChain[i+1];
  3520. return iLimit;
  3521. }
  3522. // delete whole chain (and remove from hash also).
  3523. SphGroupKey_t DeleteChain ( int iPos, bool bNotify )
  3524. {
  3525. SphGroupKey_t uGroupKey = m_dData[iPos].GetAttr ( m_tLocGroupby );
  3526. m_hGroup2Index.Delete ( uGroupKey );
  3527. int iNext = this->m_dIData[iPos];
  3528. FreeMatch ( iPos, bNotify );
  3529. for ( auto i = iNext; i!=iPos; i = iNext )
  3530. {
  3531. iNext = this->m_dIData[i];
  3532. FreeMatch ( i, bNotify );
  3533. }
  3534. return uGroupKey;
  3535. }
  3536. /// count distinct values if necessary
  3537. void CountDistinct ()
  3538. {
  3539. if constexpr ( DISTINCT )
  3540. Distinct ( [this] ( SphGroupKey_t uGroup )->CSphMatch *
  3541. {
  3542. auto pIdx = m_hGroup2Index.Find ( uGroup );
  3543. return pIdx? &m_dData[*pIdx] : nullptr;
  3544. });
  3545. }
  3546. void ProcessData ( MatchProcessor_i & tProcessor, const IntVec_t & dHeads )
  3547. {
  3548. for ( auto iHead : dHeads )
  3549. {
  3550. tProcessor.Process ( &m_dData[iHead] ); // process top group match
  3551. for ( int i = this->m_dIData[iHead]; i!=iHead; i = this->m_dIData[i] )
  3552. tProcessor.Process ( &m_dData[i] ); // process tail matches
  3553. }
  3554. }
  3555. };
  3556. /////////////////////////////////////////////////////////////////////
  3557. /// generic match sorter that understands groupers that return multiple keys per match
  3558. template < typename T >
  3559. class MultiValueGroupSorterTraits_T : public T
  3560. {
  3561. using BASE = T;
  3562. public:
  3563. MultiValueGroupSorterTraits_T ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
  3564. : T ( pComp, pQuery, tSettings )
  3565. {}
  3566. bool Push ( const CSphMatch & tMatch ) override
  3567. {
  3568. this->m_pGrouper->MultipleKeysFromMatch ( tMatch, m_dKeys );
  3569. bool bRes = false;
  3570. ARRAY_FOREACH ( i, m_dKeys )
  3571. {
  3572. SphGroupKey_t tKey = m_dKeys[i];
  3573. // need to clear notifications once per match - not for every pushed value
  3574. bRes |= BASE::template PushEx<false> ( tMatch, tKey, false, false, ( i==0 ), nullptr );
  3575. }
  3576. return bRes;
  3577. }
  3578. bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet ) override
  3579. {
  3580. return BASE::template PushEx<true> ( tEntry, tEntry.GetAttr ( BASE::m_tLocGroupby ), bNewSet, false, true, nullptr );
  3581. }
  3582. private:
  3583. CSphVector<SphGroupKey_t> m_dKeys;
  3584. };
  3585. template < typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS, bool HAS_AGGREGATES >
  3586. class MultiValueGroupSorter_T : public MultiValueGroupSorterTraits_T <CSphKBufferGroupSorter <COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>>
  3587. {
  3588. using BASE = MultiValueGroupSorterTraits_T <CSphKBufferGroupSorter < COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>>;
  3589. using MYTYPE = MultiValueGroupSorter_T < COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES >;
  3590. public:
  3591. using BASE::BASE;
  3592. ISphMatchSorter * Clone () const final { return this->template CloneSorterT<MYTYPE>(); }
  3593. };
  3594. template < typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS, bool HAS_AGGREGATES >
  3595. class MultiValueNGroupSorter_T : public MultiValueGroupSorterTraits_T < CSphKBufferNGroupSorter<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>>
  3596. {
  3597. using BASE = MultiValueGroupSorterTraits_T <CSphKBufferNGroupSorter < COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>>;
  3598. using MYTYPE = MultiValueNGroupSorter_T <COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>;
  3599. public:
  3600. using BASE::BASE;
  3601. ISphMatchSorter * Clone () const final
  3602. {
  3603. auto * pClone = this->template CloneSorterT<MYTYPE>();
  3604. pClone->SetGLimit (this->m_iGLimit);
  3605. return pClone;
  3606. }
  3607. };
  3608. /////////////////////////////////////////////////////////////////////
  3609. /// match sorter with k-buffering and group-by for JSON arrays
  3610. template < typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS, bool HAS_AGGREGATES >
  3611. class CSphKBufferJsonGroupSorter : public CSphKBufferGroupSorter < COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES >
  3612. {
  3613. public:
  3614. using BASE = CSphKBufferGroupSorter<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>;
  3615. using MYTYPE = CSphKBufferJsonGroupSorter<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>;
  3616. // since we inherit from template, we need to write boring 'using' block
  3617. using KBufferGroupSorter = KBufferGroupSorter_T<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS>;
  3618. using KBufferGroupSorter::m_eGroupBy;
  3619. using KBufferGroupSorter::m_iLimit;
  3620. using KBufferGroupSorter::m_tSubSorter;
  3621. /// ctor
  3622. FWD_BASECTOR( CSphKBufferJsonGroupSorter )
  3623. bool Push ( const CSphMatch & tEntry ) final { return PushMatch(tEntry); }
  3624. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final { assert ( 0 && "Not supported in grouping"); }
  3625. /// add pre-grouped entry to the queue
  3626. bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet ) override
  3627. {
  3628. // re-group it based on the group key
  3629. return BASE::template PushEx<true> ( tEntry, tEntry.GetAttr ( BASE::m_tLocGroupby ), bNewSet, false, true, nullptr );
  3630. }
  3631. ISphMatchSorter * Clone () const final
  3632. {
  3633. return this->template CloneSorterT<MYTYPE>();
  3634. }
  3635. private:
  3636. FORCE_INLINE bool PushMatch ( const CSphMatch & tMatch )
  3637. {
  3638. SphGroupKey_t uGroupKey = this->m_pGrouper->KeyFromMatch ( tMatch );
  3639. const BYTE * pBlobPool = this->m_pGrouper->GetBlobPool();
  3640. bool bClearNotify = true;
  3641. return PushJsonField ( uGroupKey, pBlobPool, [this, &tMatch, &bClearNotify]( SphAttr_t * pAttr, SphGroupKey_t uMatchGroupKey )
  3642. {
  3643. bool bPushed = BASE::template PushEx<false> ( tMatch, uMatchGroupKey, false, false, bClearNotify, pAttr );
  3644. bClearNotify = false; // need to clear notifications once per match - not for every pushed value
  3645. return bPushed;
  3646. }
  3647. );
  3648. }
  3649. };
  3650. /// implicit group-by sorter
  3651. /// invoked when no 'group-by', but count(*) or count(distinct attr) are in game
  3652. template < typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS, bool HAS_AGGREGATES>
  3653. class CSphImplicitGroupSorter final : public MatchSorter_c, ISphNoncopyable, protected BaseGroupSorter_c
  3654. {
  3655. using MYTYPE = CSphImplicitGroupSorter<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>;
  3656. using BASE = MatchSorter_c;
  3657. public:
  3658. CSphImplicitGroupSorter ( const ISphMatchComparator * DEBUGARG(pComp), const CSphQuery *, const CSphGroupSorterSettings & tSettings )
  3659. : BaseGroupSorter_c ( tSettings )
  3660. {
  3661. assert ( !DISTINCT || tSettings.m_pDistinctFetcher );
  3662. assert ( !pComp );
  3663. if constexpr ( NOTIFICATIONS )
  3664. m_dJustPopped.Reserve(1);
  3665. m_iMatchCapacity = 1;
  3666. m_pDistinctFetcher = tSettings.m_pDistinctFetcher;
  3667. }
  3668. /// schema setup
  3669. void SetSchema ( ISphSchema * pSchema, bool bRemapCmp ) final
  3670. {
  3671. if ( m_pSchema )
  3672. {
  3673. FixupLocators ( m_pSchema, pSchema );
  3674. m_tPregroup.ResetAttrs ();
  3675. m_dAggregates.Apply ( [] ( AggrFunc_i * pAggr ) {SafeDelete ( pAggr ); } );
  3676. m_dAggregates.Resize ( 0 );
  3677. }
  3678. BASE::SetSchema ( pSchema, bRemapCmp );
  3679. SetupBaseGrouper<DISTINCT> ( pSchema );
  3680. }
  3681. bool IsGroupby () const final { return true; }
  3682. void SetBlobPool ( const BYTE * pBlobPool ) final
  3683. {
  3684. BlobPool_c::SetBlobPool ( pBlobPool );
  3685. if ( m_pDistinctFetcher )
  3686. m_pDistinctFetcher->SetBlobPool(pBlobPool);
  3687. }
  3688. void SetColumnar ( columnar::Columnar_i * pColumnar ) final
  3689. {
  3690. BASE::SetColumnar(pColumnar);
  3691. BaseGroupSorter_c::SetColumnar(pColumnar);
  3692. if ( m_pDistinctFetcher )
  3693. m_pDistinctFetcher->SetColumnar(pColumnar);
  3694. }
  3695. bool IsCutoffDisabled() const final { return true; }
  3696. bool Push ( const CSphMatch & tEntry ) final { return PushEx<false>(tEntry); }
  3697. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final { assert ( 0 && "Not supported in grouping"); }
  3698. bool PushGrouped ( const CSphMatch & tEntry, bool ) final { return PushEx<true>(tEntry); }
  3699. /// store all entries into specified location in sorted order, and remove them from queue
  3700. int Flatten ( CSphMatch * pTo ) final
  3701. {
  3702. assert ( m_bDataInitialized );
  3703. CountDistinct ();
  3704. if constexpr ( HAS_AGGREGATES )
  3705. {
  3706. for ( auto * pAggregate : m_dAggregates )
  3707. pAggregate->Finalize ( m_tData );
  3708. }
  3709. int iCopied = 0;
  3710. if ( EvalHAVING ( m_tData ) )
  3711. {
  3712. iCopied = 1;
  3713. Swap ( *pTo, m_tData );
  3714. } else
  3715. {
  3716. m_pSchema->FreeDataPtrs ( m_tData );
  3717. m_tData.ResetDynamic ();
  3718. }
  3719. m_iTotal = 0;
  3720. m_bDataInitialized = false;
  3721. if constexpr ( DISTINCT )
  3722. m_tUniq.Reset();
  3723. return iCopied;
  3724. }
  3725. /// finalize, perform final sort/cut as needed
  3726. void Finalize ( MatchProcessor_i & tProcessor, bool, bool bFinalizeMatches ) final
  3727. {
  3728. if ( !GetLength() )
  3729. return;
  3730. tProcessor.Process ( &m_tData );
  3731. if ( !bFinalizeMatches )
  3732. m_tUniq.Compact();
  3733. }
  3734. int GetLength() final { return m_bDataInitialized ? 1 : 0; }
  3735. bool CanBeCloned() const final { return !DISTINCT && BASE::CanBeCloned(); }
  3736. // TODO! test.
  3737. ISphMatchSorter * Clone () const final
  3738. {
  3739. auto pClone = new MYTYPE ( nullptr, nullptr, *this );
  3740. CloneTo ( pClone );
  3741. pClone->SetupBaseGrouperWrp ( pClone->m_pSchema );
  3742. if ( m_pDistinctFetcher )
  3743. pClone->m_pDistinctFetcher = m_pDistinctFetcher->Clone();
  3744. return pClone;
  3745. }
  3746. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final
  3747. {
  3748. if (!m_bDataInitialized)
  3749. return;
  3750. auto& dRhs = *(MYTYPE *) pRhs;
  3751. if ( !dRhs.m_bDataInitialized )
  3752. {
  3753. // ISphMatchSorter
  3754. ::Swap ( m_iTotal, dRhs.m_iTotal );
  3755. ::Swap ( m_tData, dRhs.m_tData );
  3756. ::Swap ( m_bDataInitialized, dRhs.m_bDataInitialized );
  3757. if ( bCopyMeta )
  3758. dRhs.m_tUniq = std::move ( m_tUniq );
  3759. return;
  3760. }
  3761. if ( bCopyMeta )
  3762. m_tUniq.CopyTo ( dRhs.m_tUniq );
  3763. // other step is a bit tricky:
  3764. // we just can't add current count uniq to final; need to append m_tUniq instead,
  3765. // so that final flattening will calculate real uniq count.
  3766. dRhs.AddCount ( m_tData );
  3767. if constexpr ( HAS_AGGREGATES )
  3768. dRhs.UpdateAggregates ( m_tData, false, true );
  3769. if ( !bCopyMeta && DISTINCT )
  3770. dRhs.UpdateDistinct ( m_tData );
  3771. }
  3772. void SetMerge ( bool bMerge ) override { m_bMerge = bMerge; }
  3773. protected:
  3774. CSphMatch m_tData;
  3775. bool m_bDataInitialized = false;
  3776. bool m_bMerge = false;
  3777. UNIQ m_tUniq;
  3778. private:
  3779. CSphVector<SphAttr_t> m_dDistinctKeys;
  3780. CSphRefcountedPtr<DistinctFetcher_i> m_pDistinctFetcher;
  3781. inline void SetupBaseGrouperWrp ( ISphSchema * pSchema ) { SetupBaseGrouper<DISTINCT> ( pSchema ); }
  3782. void AddCount ( const CSphMatch & tEntry ) { m_tData.AddCounterAttr ( m_tLocCount, tEntry ); }
  3783. void UpdateAggregates ( const CSphMatch & tEntry, bool bGrouped = true, bool bMerge = false ) { AggrUpdate ( m_tData, tEntry, bGrouped, bMerge ); }
  3784. void SetupAggregates ( const CSphMatch & tEntry ) { AggrSetup ( m_tData, tEntry, m_bMerge ); }
  3785. // submit actual distinct value in all cases
  3786. template <bool GROUPED = true>
  3787. void UpdateDistinct ( const CSphMatch & tEntry )
  3788. {
  3789. int iCount = 1;
  3790. if constexpr ( GROUPED )
  3791. iCount = (int) tEntry.GetAttr ( m_tLocDistinct );
  3792. if constexpr ( DISTINCT==1 )
  3793. m_tUniq.Add ( { 0, m_pDistinctFetcher->GetKey(tEntry), iCount } );
  3794. else
  3795. {
  3796. m_pDistinctFetcher->GetKeys ( tEntry, m_dDistinctKeys );
  3797. for ( auto i : m_dDistinctKeys )
  3798. this->m_tUniq.Add ( { 0, i, iCount } );
  3799. }
  3800. }
  3801. /// add entry to the queue
  3802. template <bool GROUPED>
  3803. FORCE_INLINE bool PushEx ( const CSphMatch & tEntry )
  3804. {
  3805. if constexpr ( NOTIFICATIONS )
  3806. {
  3807. m_tJustPushed = RowTagged_t();
  3808. m_dJustPopped.Resize(0);
  3809. }
  3810. if ( m_bDataInitialized )
  3811. {
  3812. assert ( m_tData.m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
  3813. if constexpr ( GROUPED )
  3814. {
  3815. // it's already grouped match
  3816. // sum grouped matches count
  3817. AddCount ( tEntry );
  3818. } else
  3819. {
  3820. // it's a simple match
  3821. // increase grouped matches count
  3822. m_tData.AddCounterScalar ( m_tLocCount, 1 );
  3823. }
  3824. // update aggregates
  3825. if constexpr ( HAS_AGGREGATES )
  3826. UpdateAggregates ( tEntry, GROUPED, m_bMerge );
  3827. }
  3828. if constexpr ( DISTINCT )
  3829. UpdateDistinct<GROUPED> ( tEntry );
  3830. // it's a dupe anyway, so we shouldn't update total matches count
  3831. if ( m_bDataInitialized )
  3832. return false;
  3833. // add first
  3834. m_pSchema->CloneMatch ( m_tData, tEntry );
  3835. // first-time aggregate setup
  3836. if constexpr ( HAS_AGGREGATES )
  3837. SetupAggregates(tEntry);
  3838. if constexpr ( NOTIFICATIONS )
  3839. m_tJustPushed = RowTagged_t ( m_tData );
  3840. if constexpr ( !GROUPED )
  3841. {
  3842. m_tData.SetAttr ( m_tLocGroupby, 1 ); // fake group number
  3843. m_tData.SetAttr ( m_tLocCount, 1 );
  3844. if constexpr ( DISTINCT )
  3845. m_tData.SetAttr ( m_tLocDistinct, 0 );
  3846. }
  3847. else
  3848. {
  3849. if constexpr ( HAS_AGGREGATES )
  3850. AggrUngroup ( m_tData );
  3851. }
  3852. m_bDataInitialized = true;
  3853. ++m_iTotal;
  3854. return true;
  3855. }
  3856. /// count distinct values if necessary
  3857. void CountDistinct ()
  3858. {
  3859. if constexpr ( !DISTINCT )
  3860. return;
  3861. assert ( m_bDataInitialized );
  3862. m_tData.SetAttr ( m_tLocDistinct, m_tUniq.CountDistinct() );
  3863. }
  3864. };
  3865. class FastBaseSorter_c : public MatchSorter_c, ISphNoncopyable, protected BaseGroupSorter_c
  3866. {
  3867. public:
  3868. FastBaseSorter_c ( const CSphGroupSorterSettings & tSettings ) : BaseGroupSorter_c ( tSettings ) {}
  3869. bool IsGroupby () const final { return true; }
  3870. bool CanBeCloned() const final { return false; }
  3871. void SetMerge ( bool bMerge ) final {}
  3872. void Finalize ( MatchProcessor_i & tProcessor, bool, bool bFinalizeMatches ) final { if ( GetLength() ) tProcessor.Process ( &m_tData ); }
  3873. int GetLength() final { return m_bDataInitialized ? 1 : 0; }
  3874. ISphMatchSorter * Clone() const final { return nullptr; }
  3875. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final { assert ( 0 && "Not supported"); }
  3876. bool IsPrecalc() const final { return true; }
  3877. int Flatten ( CSphMatch * pTo ) final
  3878. {
  3879. assert ( m_bDataInitialized );
  3880. Swap ( *pTo, m_tData );
  3881. m_iTotal = 0;
  3882. m_bDataInitialized = false;
  3883. return 1;
  3884. }
  3885. protected:
  3886. CSphMatch m_tData;
  3887. bool m_bDataInitialized = false;
  3888. };
  3889. // fast count distinct sorter
  3890. // works by using precalculated count distinct taken from secondary indexes
  3891. class FastCountDistinctSorter_c final : public FastBaseSorter_c
  3892. {
  3893. public:
  3894. FastCountDistinctSorter_c ( int iCountDistinct, const CSphGroupSorterSettings & tSettings )
  3895. : FastBaseSorter_c ( tSettings )
  3896. , m_iCountDistinct ( iCountDistinct )
  3897. {}
  3898. bool Push ( const CSphMatch & tEntry ) final { return PushEx(tEntry); }
  3899. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final { assert ( 0 && "Not supported in grouping"); }
  3900. bool PushGrouped ( const CSphMatch & tEntry, bool ) final { return PushEx(tEntry); }
  3901. private:
  3902. int m_iCountDistinct = 0;
  3903. FORCE_INLINE bool PushEx ( const CSphMatch & tEntry )
  3904. {
  3905. if ( m_bDataInitialized )
  3906. return true; // always return true, otherwise in RT indexes we won't be able to hit cutoff in disk chunks after the first one
  3907. m_pSchema->CloneMatch ( m_tData, tEntry );
  3908. m_tData.SetAttr ( m_tLocGroupby, 1 ); // fake group number
  3909. m_tData.SetAttr ( m_tLocCount, 1 );
  3910. m_tData.SetAttr ( m_tLocDistinct, m_iCountDistinct );
  3911. m_bDataInitialized = true;
  3912. m_iTotal++;
  3913. return true;
  3914. }
  3915. };
  3916. // fast count sorter
  3917. // works by using precalculated count taken from secondary indexes
  3918. class FastCountFilterSorter_c final : public FastBaseSorter_c
  3919. {
  3920. public:
  3921. FastCountFilterSorter_c ( int iCount, const CSphGroupSorterSettings & tSettings )
  3922. : FastBaseSorter_c ( tSettings )
  3923. , m_iCount ( iCount )
  3924. {}
  3925. bool Push ( const CSphMatch & tEntry ) final { return PushEx(tEntry); }
  3926. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final { assert ( 0 && "Not supported in grouping"); }
  3927. bool PushGrouped ( const CSphMatch & tEntry, bool ) final { return PushEx(tEntry); }
  3928. private:
  3929. int m_iCount = 0;
  3930. FORCE_INLINE bool PushEx ( const CSphMatch & tEntry )
  3931. {
  3932. if ( m_bDataInitialized )
  3933. return true; // always return true, otherwise in RT indexes we won't be able to hit cutoff in disk chunks after the first one
  3934. m_pSchema->CloneMatch ( m_tData, tEntry );
  3935. m_tData.SetAttr ( m_tLocGroupby, 1 ); // fake group number
  3936. m_tData.SetAttr ( m_tLocCount, 1 );
  3937. m_tData.SetAttr ( m_tLocCount, m_iCount );
  3938. m_bDataInitialized = true;
  3939. m_iTotal++;
  3940. return true;
  3941. }
  3942. };
  3943. //////////////////////////////////////////////////////////////////////////
  3944. // SORT CLAUSE PARSER
  3945. //////////////////////////////////////////////////////////////////////////
  3946. class SortClauseTokenizer_t
  3947. {
  3948. protected:
  3949. const char * m_pCur;
  3950. const char * m_pMax;
  3951. char * m_pBuf;
  3952. protected:
  3953. char ToLower ( char c )
  3954. {
  3955. // 0..9, A..Z->a..z, _, a..z, @, .
  3956. if ( ( c>='0' && c<='9' ) || ( c>='a' && c<='z' ) || c=='_' || c=='@' || c=='.' || c=='[' || c==']' || c=='\'' || c=='\"' || c=='(' || c==')' || c=='*' )
  3957. return c;
  3958. if ( c>='A' && c<='Z' )
  3959. return c-'A'+'a';
  3960. return 0;
  3961. }
  3962. public:
  3963. explicit SortClauseTokenizer_t ( const char * sBuffer )
  3964. {
  3965. auto iLen = (int) strlen(sBuffer);
  3966. m_pBuf = new char [ iLen+1 ];
  3967. m_pMax = m_pBuf+iLen;
  3968. m_pCur = m_pBuf;
  3969. // make string lowercase but keep case of JSON.field
  3970. bool bJson = false;
  3971. for ( int i=0; i<=iLen; i++ )
  3972. {
  3973. char cSrc = sBuffer[i];
  3974. char cDst = ToLower ( cSrc );
  3975. bJson = ( cSrc=='.' || cSrc=='[' || ( bJson && cDst>0 ) ); // keep case of valid char sequence after '.' and '[' symbols
  3976. m_pBuf[i] = bJson ? cSrc : cDst;
  3977. }
  3978. }
  3979. ~SortClauseTokenizer_t ()
  3980. {
  3981. SafeDeleteArray ( m_pBuf );
  3982. }
  3983. const char * GetToken ()
  3984. {
  3985. // skip spaces
  3986. while ( m_pCur<m_pMax && !*m_pCur )
  3987. m_pCur++;
  3988. if ( m_pCur>=m_pMax )
  3989. return nullptr;
  3990. // memorize token start, and move pointer forward
  3991. const char * sRes = m_pCur;
  3992. while ( *m_pCur )
  3993. m_pCur++;
  3994. return sRes;
  3995. }
  3996. bool IsSparseCount ( const char * sTok )
  3997. {
  3998. const char * sSeq = "(*)";
  3999. for ( ; sTok<m_pMax && *sSeq; sTok++ )
  4000. {
  4001. bool bGotSeq = ( *sSeq==*sTok );
  4002. if ( bGotSeq )
  4003. sSeq++;
  4004. // stop checking on any non-space char outside sequence or sequence end
  4005. if ( ( !bGotSeq && !sphIsSpace ( *sTok ) && *sTok!='\0' ) || !*sSeq )
  4006. break;
  4007. }
  4008. if ( !*sSeq && sTok+1<m_pMax && !sTok[1] )
  4009. {
  4010. // advance token iterator after composite count(*) token
  4011. m_pCur = sTok+1;
  4012. return true;
  4013. } else
  4014. {
  4015. return false;
  4016. }
  4017. }
  4018. };
  4019. static inline ESphSortKeyPart Attr2Keypart ( ESphAttr eType )
  4020. {
  4021. switch ( eType )
  4022. {
  4023. case SPH_ATTR_FLOAT:
  4024. return SPH_KEYPART_FLOAT;
  4025. case SPH_ATTR_DOUBLE:
  4026. return SPH_KEYPART_DOUBLE;
  4027. case SPH_ATTR_STRING:
  4028. return SPH_KEYPART_STRING;
  4029. case SPH_ATTR_JSON:
  4030. case SPH_ATTR_JSON_PTR:
  4031. case SPH_ATTR_JSON_FIELD:
  4032. case SPH_ATTR_JSON_FIELD_PTR:
  4033. case SPH_ATTR_STRINGPTR:
  4034. return SPH_KEYPART_STRINGPTR;
  4035. default:
  4036. return SPH_KEYPART_INT;
  4037. }
  4038. }
  4039. //////////////////////////////////////////////////////////////////////////
  4040. // SORTING+GROUPING INSTANTIATION
  4041. //////////////////////////////////////////////////////////////////////////
  4042. struct Precalculated_t
  4043. {
  4044. int64_t m_iCountDistinct = -1;
  4045. int64_t m_iCountFilter = -1;
  4046. int64_t m_iCount = -1;
  4047. };
  4048. #define CREATE_SORTER_4TH(SORTER,COMPGROUP,UNIQ,COMP,QUERY,SETTINGS,HAS_PACKEDFACTORS,HAS_AGGREGATES) \
  4049. { \
  4050. int iMultiDistict = 0; \
  4051. if ( tSettings.m_bDistinct ) \
  4052. { \
  4053. assert(tSettings.m_pDistinctFetcher); \
  4054. iMultiDistict = tSettings.m_pDistinctFetcher->IsMultiValue() ? 2 : 1; \
  4055. } \
  4056. BYTE uSelector = 4*iMultiDistict + 2*(bHasPackedFactors?1:0) + (HAS_AGGREGATES?1:0); \
  4057. switch ( uSelector ) \
  4058. { \
  4059. case 0: return new SORTER<COMPGROUP,UNIQ,0, false,false> ( pComp, pQuery, tSettings ); \
  4060. case 1: return new SORTER<COMPGROUP,UNIQ,0, false,true> ( pComp, pQuery, tSettings ); \
  4061. case 2: return new SORTER<COMPGROUP,UNIQ,0, true, false> ( pComp, pQuery, tSettings ); \
  4062. case 3: return new SORTER<COMPGROUP,UNIQ,0, true, true> ( pComp, pQuery, tSettings ); \
  4063. case 4: return new SORTER<COMPGROUP,UNIQ,1, false,false> ( pComp, pQuery, tSettings ); \
  4064. case 5: return new SORTER<COMPGROUP,UNIQ,1, false,true> ( pComp, pQuery, tSettings ); \
  4065. case 6: return new SORTER<COMPGROUP,UNIQ,1, true, false> ( pComp, pQuery, tSettings ); \
  4066. case 7: return new SORTER<COMPGROUP,UNIQ,1, true, true> ( pComp, pQuery, tSettings ); \
  4067. case 8: return new SORTER<COMPGROUP,UNIQ,2, false,false> ( pComp, pQuery, tSettings ); \
  4068. case 9: return new SORTER<COMPGROUP,UNIQ,2, false,true> ( pComp, pQuery, tSettings ); \
  4069. case 10:return new SORTER<COMPGROUP,UNIQ,2, true, false> ( pComp, pQuery, tSettings ); \
  4070. case 11:return new SORTER<COMPGROUP,UNIQ,2, true, true> ( pComp, pQuery, tSettings ); \
  4071. default: assert(0); return nullptr; \
  4072. } \
  4073. }
  4074. template < typename COMPGROUP >
  4075. static ISphMatchSorter * sphCreateSorter3rd ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors, bool bHasAggregates, const Precalculated_t & tPrecalc )
  4076. {
  4077. if ( tPrecalc.m_iCountDistinct!=-1 )
  4078. return new FastCountDistinctSorter_c ( tPrecalc.m_iCountDistinct, tSettings );
  4079. if ( tPrecalc.m_iCountFilter!=-1 )
  4080. return new FastCountFilterSorter_c ( tPrecalc.m_iCountFilter, tSettings );
  4081. if ( tPrecalc.m_iCount!=-1 )
  4082. return new FastCountFilterSorter_c ( tPrecalc.m_iCount, tSettings );
  4083. bool bUseHLL = tSettings.m_iDistinctAccuracy > 0;
  4084. using Uniq_c = UniqGrouped_T<ValueWithGroup_t>;
  4085. using UniqSingle_c = UniqSingle_T<SphAttr_t>;
  4086. using UniqCount_c = UniqGrouped_T<ValueWithGroupCount_t>;
  4087. using UniqCountSingle_c = UniqSingle_T<ValueWithCount_t>;
  4088. BYTE uSelector3rd = 32*( bUseHLL ? 1 : 0 ) + 16*( tSettings.m_bGrouped ? 1:0 ) + 8*( tSettings.m_bJson ? 1:0 ) + 4*( pQuery->m_iGroupbyLimit>1 ? 1:0 ) + 2*( tSettings.m_bImplicit ? 1:0 ) + ( ( tSettings.m_pGrouper && tSettings.m_pGrouper->IsMultiValue() ) ? 1:0 );
  4089. switch ( uSelector3rd )
  4090. {
  4091. case 0: CREATE_SORTER_4TH ( CSphKBufferGroupSorter, COMPGROUP, Uniq_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4092. case 1: CREATE_SORTER_4TH ( MultiValueGroupSorter_T, COMPGROUP, Uniq_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4093. case 2: CREATE_SORTER_4TH ( CSphImplicitGroupSorter, COMPGROUP, UniqSingle_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4094. case 4: CREATE_SORTER_4TH ( CSphKBufferNGroupSorter, COMPGROUP, Uniq_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4095. case 5: CREATE_SORTER_4TH ( MultiValueNGroupSorter_T, COMPGROUP, Uniq_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4096. case 8: CREATE_SORTER_4TH ( CSphKBufferJsonGroupSorter, COMPGROUP, Uniq_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4097. case 16:CREATE_SORTER_4TH ( CSphKBufferGroupSorter, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4098. case 17:CREATE_SORTER_4TH ( MultiValueGroupSorter_T, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4099. case 18:CREATE_SORTER_4TH ( CSphImplicitGroupSorter, COMPGROUP, UniqCountSingle_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4100. case 20:CREATE_SORTER_4TH ( CSphKBufferNGroupSorter, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4101. case 21:CREATE_SORTER_4TH ( MultiValueNGroupSorter_T, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4102. case 24:CREATE_SORTER_4TH ( CSphKBufferJsonGroupSorter, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4103. case 32:CREATE_SORTER_4TH ( CSphKBufferGroupSorter, COMPGROUP, UniqHLL_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4104. case 33:CREATE_SORTER_4TH ( MultiValueGroupSorter_T, COMPGROUP, UniqHLL_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4105. case 34:CREATE_SORTER_4TH ( CSphImplicitGroupSorter, COMPGROUP, UniqHLLSingle_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4106. case 36:CREATE_SORTER_4TH ( CSphKBufferNGroupSorter, COMPGROUP, UniqHLL_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4107. case 37:CREATE_SORTER_4TH ( MultiValueNGroupSorter_T, COMPGROUP, UniqHLL_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4108. case 40:CREATE_SORTER_4TH ( CSphKBufferJsonGroupSorter, COMPGROUP, UniqHLL_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4109. case 48:CREATE_SORTER_4TH ( CSphKBufferGroupSorter, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4110. case 49:CREATE_SORTER_4TH ( MultiValueGroupSorter_T, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4111. case 50:CREATE_SORTER_4TH ( CSphImplicitGroupSorter, COMPGROUP, UniqCountSingle_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4112. case 52:CREATE_SORTER_4TH ( CSphKBufferNGroupSorter, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4113. case 53:CREATE_SORTER_4TH ( MultiValueNGroupSorter_T, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4114. case 56:CREATE_SORTER_4TH ( CSphKBufferJsonGroupSorter, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  4115. default: assert(0); return nullptr;
  4116. }
  4117. }
  4118. static ISphMatchSorter * sphCreateSorter2nd ( ESphSortFunc eGroupFunc, const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors, bool bHasAggregates, const Precalculated_t & tPrecalc )
  4119. {
  4120. switch ( eGroupFunc )
  4121. {
  4122. case FUNC_GENERIC1: return sphCreateSorter3rd<MatchGeneric1_fn> ( pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  4123. case FUNC_GENERIC2: return sphCreateSorter3rd<MatchGeneric2_fn> ( pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  4124. case FUNC_GENERIC3: return sphCreateSorter3rd<MatchGeneric3_fn> ( pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  4125. case FUNC_GENERIC4: return sphCreateSorter3rd<MatchGeneric4_fn> ( pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  4126. case FUNC_GENERIC5: return sphCreateSorter3rd<MatchGeneric5_fn> ( pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  4127. case FUNC_EXPR: return sphCreateSorter3rd<MatchExpr_fn> ( pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  4128. default: return nullptr;
  4129. }
  4130. }
  4131. static ISphMatchSorter * sphCreateSorter1st ( ESphSortFunc eMatchFunc, ESphSortFunc eGroupFunc, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors, bool bHasAggregates, const Precalculated_t & tPrecalc )
  4132. {
  4133. CSphRefcountedPtr<ISphMatchComparator> pComp;
  4134. if ( !tSettings.m_bImplicit )
  4135. switch ( eMatchFunc )
  4136. {
  4137. case FUNC_REL_DESC: pComp = new MatchRelevanceLt_fn(); break;
  4138. case FUNC_ATTR_DESC: pComp = new MatchAttrLt_fn(); break;
  4139. case FUNC_ATTR_ASC: pComp = new MatchAttrGt_fn(); break;
  4140. case FUNC_TIMESEGS: pComp = new MatchTimeSegments_fn(); break;
  4141. case FUNC_GENERIC1: pComp = new MatchGeneric1_fn(); break;
  4142. case FUNC_GENERIC2: pComp = new MatchGeneric2_fn(); break;
  4143. case FUNC_GENERIC3: pComp = new MatchGeneric3_fn(); break;
  4144. case FUNC_GENERIC4: pComp = new MatchGeneric4_fn(); break;
  4145. case FUNC_GENERIC5: pComp = new MatchGeneric5_fn(); break;
  4146. case FUNC_EXPR: pComp = new MatchExpr_fn(); break; // only for non-bitfields, obviously
  4147. }
  4148. return sphCreateSorter2nd ( eGroupFunc, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  4149. }
  4150. //////////////////////////////////////////////////////////////////////////
  4151. // GEODIST
  4152. //////////////////////////////////////////////////////////////////////////
  4153. struct ExprGeodist_t : public ISphExpr
  4154. {
  4155. public:
  4156. ExprGeodist_t () = default;
  4157. bool Setup ( const CSphQuery * pQuery, const ISphSchema & tSchema, CSphString & sError );
  4158. float Eval ( const CSphMatch & tMatch ) const final;
  4159. void FixupLocator ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema ) final;
  4160. void Command ( ESphExprCommand eCmd, void * pArg ) final;
  4161. uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) final;
  4162. ISphExpr * Clone() const final;
  4163. protected:
  4164. CSphAttrLocator m_tGeoLatLoc;
  4165. CSphAttrLocator m_tGeoLongLoc;
  4166. float m_fGeoAnchorLat;
  4167. float m_fGeoAnchorLong;
  4168. int m_iLat;
  4169. int m_iLon;
  4170. };
  4171. bool ExprGeodist_t::Setup ( const CSphQuery * pQuery, const ISphSchema & tSchema, CSphString & sError )
  4172. {
  4173. if ( !pQuery->m_bGeoAnchor )
  4174. {
  4175. sError.SetSprintf ( "INTERNAL ERROR: no geoanchor, can not create geodist evaluator" );
  4176. return false;
  4177. }
  4178. int iLat = tSchema.GetAttrIndex ( pQuery->m_sGeoLatAttr.cstr() );
  4179. if ( iLat<0 )
  4180. {
  4181. sError.SetSprintf ( "unknown latitude attribute '%s'", pQuery->m_sGeoLatAttr.cstr() );
  4182. return false;
  4183. }
  4184. int iLong = tSchema.GetAttrIndex ( pQuery->m_sGeoLongAttr.cstr() );
  4185. if ( iLong<0 )
  4186. {
  4187. sError.SetSprintf ( "unknown latitude attribute '%s'", pQuery->m_sGeoLongAttr.cstr() );
  4188. return false;
  4189. }
  4190. m_tGeoLatLoc = tSchema.GetAttr(iLat).m_tLocator;
  4191. m_tGeoLongLoc = tSchema.GetAttr(iLong).m_tLocator;
  4192. m_fGeoAnchorLat = pQuery->m_fGeoLatitude;
  4193. m_fGeoAnchorLong = pQuery->m_fGeoLongitude;
  4194. m_iLat = iLat;
  4195. m_iLon = iLong;
  4196. return true;
  4197. }
  4198. static inline double sphSqr ( double v )
  4199. {
  4200. return v*v;
  4201. }
  4202. float ExprGeodist_t::Eval ( const CSphMatch & tMatch ) const
  4203. {
  4204. const double R = 6384000;
  4205. float plat = tMatch.GetAttrFloat ( m_tGeoLatLoc );
  4206. float plon = tMatch.GetAttrFloat ( m_tGeoLongLoc );
  4207. double dlat = plat - m_fGeoAnchorLat;
  4208. double dlon = plon - m_fGeoAnchorLong;
  4209. double a = sphSqr ( sin ( dlat/2 ) ) + cos(plat)*cos(m_fGeoAnchorLat)*sphSqr(sin(dlon/2));
  4210. double c = 2*asin ( Min ( 1.0, sqrt(a) ) );
  4211. return (float)(R*c);
  4212. }
  4213. void ExprGeodist_t::FixupLocator ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema )
  4214. {
  4215. sphFixupLocator ( m_tGeoLatLoc, pOldSchema, pNewSchema );
  4216. sphFixupLocator ( m_tGeoLongLoc, pOldSchema, pNewSchema );
  4217. }
  4218. void ExprGeodist_t::Command ( ESphExprCommand eCmd, void * pArg )
  4219. {
  4220. if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
  4221. {
  4222. static_cast < CSphVector<int>* >(pArg)->Add ( m_iLat );
  4223. static_cast < CSphVector<int>* >(pArg)->Add ( m_iLon );
  4224. }
  4225. if ( eCmd==SPH_EXPR_UPDATE_DEPENDENT_COLS )
  4226. {
  4227. int iRef = *static_cast<int*>(pArg);
  4228. if ( m_iLat>=iRef ) m_iLat--;
  4229. if ( m_iLon>=iRef ) m_iLon--;
  4230. }
  4231. }
  4232. uint64_t ExprGeodist_t::GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  4233. {
  4234. uint64_t uHash = sphCalcExprDepHash ( this, tSorterSchema, uPrevHash, bDisable );
  4235. static const char * EXPR_TAG = "ExprGeodist_t";
  4236. uHash = sphFNV64 ( EXPR_TAG, (int) strlen(EXPR_TAG), uHash );
  4237. uHash = sphFNV64 ( &m_fGeoAnchorLat, sizeof(m_fGeoAnchorLat), uHash );
  4238. uHash = sphFNV64 ( &m_fGeoAnchorLong, sizeof(m_fGeoAnchorLong), uHash );
  4239. return uHash;
  4240. }
  4241. ISphExpr * ExprGeodist_t::Clone() const
  4242. {
  4243. auto * pClone = new ExprGeodist_t;
  4244. pClone->m_tGeoLatLoc = m_tGeoLatLoc;
  4245. pClone->m_tGeoLongLoc = m_tGeoLongLoc;
  4246. pClone->m_fGeoAnchorLat = m_fGeoAnchorLat;
  4247. pClone->m_fGeoAnchorLong = m_fGeoAnchorLong;
  4248. pClone->m_iLat = m_iLat;
  4249. pClone->m_iLon = m_iLon;
  4250. return pClone;
  4251. }
  4252. //////////////////////////////////////////////////////////////////////////
  4253. // PUBLIC FUNCTIONS (FACTORY AND FLATTENING)
  4254. //////////////////////////////////////////////////////////////////////////
  4255. static CSphGrouper * sphCreateGrouperString ( const CSphAttrLocator & tLoc, ESphCollation eCollation );
  4256. static CSphGrouper * sphCreateGrouperMulti ( const CSphVector<CSphColumnInfo> & dAttrs, VecRefPtrs_t<ISphExpr *> dJsonKeys, ESphCollation eCollation );
  4257. static CSphGrouper * CreateGrouperStringExpr ( ISphExpr * pExpr, ESphCollation eCollation );
  4258. bool HasImplicitGrouping ( const CSphQuery & tQuery )
  4259. {
  4260. auto fnIsImplicit = [] ( const CSphQueryItem & t )
  4261. {
  4262. return ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr=="@distinct";
  4263. };
  4264. return tQuery.m_sGroupBy.IsEmpty() ? tQuery.m_dItems.any_of(fnIsImplicit) : false;
  4265. }
  4266. class QueueCreator_c
  4267. {
  4268. public:
  4269. bool m_bMulti = false;
  4270. bool m_bCreate = true;
  4271. bool m_bZonespanlist = false;
  4272. DWORD m_uPackedFactorFlags = SPH_FACTOR_DISABLE;
  4273. QueueCreator_c ( const SphQueueSettings_t & tSettings, const CSphQuery & tQuery, CSphString & sError, StrVec_t * pExtra, QueryProfile_c * pProfile );
  4274. bool SetupComputeQueue();
  4275. bool SetupGroupQueue();
  4276. bool SetupQueue();
  4277. CSphRsetSchema & SorterSchema() const { return *m_pSorterSchema; }
  4278. bool HasJson() const { return m_tGroupSorterSettings.m_bJson; }
  4279. bool SetSchemaGroupQueue ( const CSphRsetSchema & tNewSchema );
  4280. /// creates proper queue for given query
  4281. /// may return NULL on error; in this case, error message is placed in sError
  4282. /// if the pUpdate is given, creates the updater's queue and perform the index update
  4283. /// instead of searching
  4284. ISphMatchSorter * CreateQueue();
  4285. private:
  4286. const SphQueueSettings_t & m_tSettings;
  4287. const CSphQuery & m_tQuery;
  4288. CSphString & m_sError;
  4289. StrVec_t * m_pExtra = nullptr;
  4290. QueryProfile_c * m_pProfile = nullptr;
  4291. bool m_bHasCount = false;
  4292. bool m_bHasGroupByExpr = false;
  4293. sph::StringSet m_hQueryAttrs;
  4294. std::unique_ptr<CSphRsetSchema> m_pSorterSchema;
  4295. bool m_bGotGroupby;
  4296. bool m_bRandomize;
  4297. ESphSortFunc m_eMatchFunc = FUNC_REL_DESC;
  4298. ESphSortFunc m_eGroupFunc = FUNC_REL_DESC;
  4299. CSphMatchComparatorState m_tStateMatch;
  4300. CSphVector<ExtraSortExpr_t> m_dMatchJsonExprs;
  4301. CSphMatchComparatorState m_tStateGroup;
  4302. CSphVector<ExtraSortExpr_t> m_dGroupJsonExprs;
  4303. CSphGroupSorterSettings m_tGroupSorterSettings;
  4304. CSphVector<std::pair<int,bool>> m_dGroupColumns;
  4305. bool m_bHeadWOGroup;
  4306. bool m_bGotDistinct;
  4307. bool m_bExprsNeedDocids = false;
  4308. // for sorter to create pooled attributes
  4309. bool m_bHaveStar = false;
  4310. // fixme! transform to StringSet on end of merge!
  4311. sph::StringSet m_hQueryColumns; // FIXME!!! unify with Extra schema after merge master into branch
  4312. sph::StringSet m_hQueryDups;
  4313. sph::StringSet m_hExtra;
  4314. bool ParseQueryItem ( const CSphQueryItem & tItem );
  4315. bool MaybeAddGeodistColumn();
  4316. bool MaybeAddExprColumn();
  4317. bool MaybeAddExpressionsFromSelectList();
  4318. bool AddExpressionsForUpdates();
  4319. bool MaybeAddGroupbyMagic ( bool bGotDistinct );
  4320. bool AddKNNDistColumn();
  4321. bool CheckHavingConstraints() const;
  4322. bool SetupGroupbySettings ( bool bHasImplicitGrouping );
  4323. void AssignOrderByToPresortStage ( const int * pAttrs, int iAttrCount );
  4324. void ReplaceGroupbyStrWithExprs ( CSphMatchComparatorState & tState, int iNumOldAttrs );
  4325. void ReplaceStaticStringsWithExprs ( CSphMatchComparatorState & tState );
  4326. void ReplaceJsonWithExprs ( CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs );
  4327. void AddColumnarExprsAsAttrs ( CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs );
  4328. void RemapAttrs ( CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs );
  4329. static void SetupRemapColJson ( CSphColumnInfo & tRemapCol, CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs, int iStateAttr ) ;
  4330. const CSphColumnInfo * GetGroupbyStr ( int iAttr, int iNumOldAttrs ) const;
  4331. bool SetupMatchesSortingFunc();
  4332. bool SetupGroupSortingFunc ( bool bGotDistinct );
  4333. bool AddGroupbyStuff();
  4334. void AddKnnDistSort ( CSphString & sSortBy );
  4335. bool SetGroupSorting();
  4336. void ExtraAddSortkeys ( const int * dAttrs );
  4337. bool AddStoredFieldExpressions();
  4338. bool AddColumnarAttributeExpressions();
  4339. void CreateGrouperByAttr ( ESphAttr eType, const CSphColumnInfo & tGroupByAttr, bool & bGrouperUsesAttrs );
  4340. void SelectStageForColumnarExpr ( CSphColumnInfo & tExprCol );
  4341. void FetchDependencyChains ( IntVec_t & dDependentCols );
  4342. void PropagateEvalStage ( CSphColumnInfo & tExprCol, IntVec_t & dDependentCols );
  4343. bool SetupDistinctAttr();
  4344. bool PredictAggregates() const;
  4345. bool ReplaceWithColumnarItem ( const CSphString & sAttr, ESphEvalStage eStage );
  4346. int ReduceMaxMatches() const;
  4347. int AdjustMaxMatches ( int iMaxMatches ) const;
  4348. bool ConvertColumnarToDocstore();
  4349. const CSphColumnInfo * GetAliasedColumnarAttr ( const CSphColumnInfo & tAttr );
  4350. bool SetupAggregateExpr ( CSphColumnInfo & tExprCol, const CSphString & sExpr, DWORD uQueryPackedFactorFlags );
  4351. bool SetupColumnarAggregates ( CSphColumnInfo & tExprCol );
  4352. void UpdateAggregateDependencies ( CSphColumnInfo & tExprCol );
  4353. int GetGroupbyAttrIndex() const { return GetAliasedAttrIndex ( m_tQuery.m_sGroupBy, m_tQuery, *m_pSorterSchema ); }
  4354. int GetGroupDistinctAttrIndex() const { return GetAliasedAttrIndex ( m_tQuery.m_sGroupDistinct, m_tQuery, *m_pSorterSchema ); }
  4355. bool CanCalcFastCountDistinct() const;
  4356. bool CanCalcFastCountFilter() const;
  4357. bool CanCalcFastCount() const;
  4358. Precalculated_t FetchPrecalculatedValues() const;
  4359. ISphMatchSorter * SpawnQueue();
  4360. std::unique_ptr<ISphFilter> CreateAggrFilter() const;
  4361. void SetupCollation();
  4362. bool Err ( const char * sFmt, ... ) const;
  4363. };
  4364. QueueCreator_c::QueueCreator_c ( const SphQueueSettings_t & tSettings, const CSphQuery & tQuery, CSphString & sError, StrVec_t * pExtra, QueryProfile_c * pProfile )
  4365. : m_tSettings ( tSettings )
  4366. , m_tQuery ( tQuery )
  4367. , m_sError ( sError )
  4368. , m_pExtra ( pExtra )
  4369. , m_pProfile ( pProfile )
  4370. , m_pSorterSchema { std::make_unique<CSphRsetSchema>() }
  4371. {
  4372. // short-cuts
  4373. m_sError = "";
  4374. *m_pSorterSchema = m_tSettings.m_tSchema;
  4375. m_dMatchJsonExprs.Resize ( CSphMatchComparatorState::MAX_ATTRS );
  4376. m_dGroupJsonExprs.Resize ( CSphMatchComparatorState::MAX_ATTRS );
  4377. }
  4378. const CSphColumnInfo * QueueCreator_c::GetAliasedColumnarAttr ( const CSphColumnInfo & tAttr )
  4379. {
  4380. if ( !tAttr.IsColumnarExpr() )
  4381. return &tAttr;
  4382. CSphString sAliasedCol;
  4383. tAttr.m_pExpr->Command ( SPH_EXPR_GET_COLUMNAR_COL, &sAliasedCol );
  4384. const CSphColumnInfo * pAttr = m_pSorterSchema->GetAttr ( sAliasedCol.cstr() );
  4385. assert(pAttr);
  4386. return pAttr;
  4387. }
  4388. void QueueCreator_c::CreateGrouperByAttr ( ESphAttr eType, const CSphColumnInfo & tGroupByAttr, bool & bGrouperUsesAttrs )
  4389. {
  4390. assert ( m_pSorterSchema );
  4391. auto & tSchema = *m_pSorterSchema;
  4392. const CSphAttrLocator & tLoc = tGroupByAttr.m_tLocator;
  4393. switch ( eType )
  4394. {
  4395. case SPH_ATTR_JSON:
  4396. case SPH_ATTR_JSON_FIELD:
  4397. {
  4398. ExprParseArgs_t tExprArgs;
  4399. tExprArgs.m_eCollation = m_tQuery.m_eCollation;
  4400. ISphExprRefPtr_c pExpr { sphExprParse ( m_tQuery.m_sGroupBy.cstr(), tSchema, m_sError, tExprArgs ) };
  4401. m_tGroupSorterSettings.m_pGrouper = new CSphGrouperJsonField ( tLoc, pExpr );
  4402. m_tGroupSorterSettings.m_bJson = true;
  4403. }
  4404. break;
  4405. case SPH_ATTR_STRING:
  4406. case SPH_ATTR_STRINGPTR:
  4407. // percolate select list push matches with string_ptr
  4408. // check if it is a columnar attr or an expression spawned instead of a columnar attr
  4409. // even if it is an expression, spawn a new one, because a specialized grouper works a lot faster because it doesn't allocate and store string in the match
  4410. if ( tGroupByAttr.IsColumnar() || tGroupByAttr.IsColumnarExpr() )
  4411. {
  4412. m_tGroupSorterSettings.m_pGrouper = CreateGrouperColumnarString ( *GetAliasedColumnarAttr(tGroupByAttr), m_tQuery.m_eCollation );
  4413. bGrouperUsesAttrs = false;
  4414. }
  4415. else if ( tGroupByAttr.m_pExpr && !tGroupByAttr.m_pExpr->IsDataPtrAttr() )
  4416. {
  4417. m_tGroupSorterSettings.m_pGrouper = CreateGrouperStringExpr ( tGroupByAttr.m_pExpr, m_tQuery.m_eCollation );
  4418. bGrouperUsesAttrs = false;
  4419. }
  4420. else
  4421. m_tGroupSorterSettings.m_pGrouper = sphCreateGrouperString ( tLoc, m_tQuery.m_eCollation );
  4422. break;
  4423. case SPH_ATTR_UINT32SET:
  4424. case SPH_ATTR_INT64SET:
  4425. if ( tGroupByAttr.IsColumnar() || tGroupByAttr.IsColumnarExpr() )
  4426. {
  4427. m_tGroupSorterSettings.m_pGrouper = CreateGrouperColumnarMVA ( *GetAliasedColumnarAttr(tGroupByAttr) );
  4428. bGrouperUsesAttrs = false;
  4429. break;
  4430. }
  4431. if ( eType==SPH_ATTR_UINT32SET )
  4432. m_tGroupSorterSettings.m_pGrouper = new GrouperMVA_T<DWORD>(tLoc);
  4433. else
  4434. m_tGroupSorterSettings.m_pGrouper = new GrouperMVA_T<int64_t>(tLoc);
  4435. break;
  4436. case SPH_ATTR_UINT32SET_PTR:
  4437. case SPH_ATTR_INT64SET_PTR:
  4438. if ( tGroupByAttr.IsColumnar() || tGroupByAttr.IsColumnarExpr() )
  4439. {
  4440. m_tGroupSorterSettings.m_pGrouper = CreateGrouperColumnarMVA ( *GetAliasedColumnarAttr(tGroupByAttr) );
  4441. bGrouperUsesAttrs = false;
  4442. }
  4443. break;
  4444. case SPH_ATTR_BOOL:
  4445. case SPH_ATTR_INTEGER:
  4446. case SPH_ATTR_BIGINT:
  4447. case SPH_ATTR_FLOAT:
  4448. if ( tGroupByAttr.IsColumnar() || ( tGroupByAttr.IsColumnarExpr() && tGroupByAttr.m_eStage>SPH_EVAL_PREFILTER ) )
  4449. {
  4450. m_tGroupSorterSettings.m_pGrouper = CreateGrouperColumnarInt ( *GetAliasedColumnarAttr(tGroupByAttr) );
  4451. bGrouperUsesAttrs = false;
  4452. }
  4453. break;
  4454. default:
  4455. break;
  4456. }
  4457. if ( !m_tGroupSorterSettings.m_pGrouper )
  4458. m_tGroupSorterSettings.m_pGrouper = new CSphGrouperAttr(tLoc);
  4459. }
  4460. bool QueueCreator_c::SetupDistinctAttr()
  4461. {
  4462. if ( m_tQuery.m_sGroupDistinct.IsEmpty() )
  4463. return true;
  4464. assert ( m_pSorterSchema );
  4465. auto & tSchema = *m_pSorterSchema;
  4466. int iDistinct = tSchema.GetAttrIndex ( m_tQuery.m_sGroupDistinct.cstr () );
  4467. if ( iDistinct<0 )
  4468. return Err ( "group-count-distinct attribute '%s' not found", m_tQuery.m_sGroupDistinct.cstr() );
  4469. const auto & tDistinctAttr = tSchema.GetAttr(iDistinct);
  4470. if ( IsNotRealAttribute(tDistinctAttr) )
  4471. return Err ( "group-count-distinct attribute '%s' not found", m_tQuery.m_sGroupDistinct.cstr() );
  4472. if ( tDistinctAttr.IsColumnar() )
  4473. m_tGroupSorterSettings.m_pDistinctFetcher = CreateColumnarDistinctFetcher ( tDistinctAttr.m_sName, tDistinctAttr.m_eAttrType, m_tQuery.m_eCollation );
  4474. else
  4475. m_tGroupSorterSettings.m_pDistinctFetcher = CreateDistinctFetcher ( tDistinctAttr.m_sName, tDistinctAttr.m_tLocator, tDistinctAttr.m_eAttrType );
  4476. return true;
  4477. }
  4478. bool QueueCreator_c::SetupGroupbySettings ( bool bHasImplicitGrouping )
  4479. {
  4480. if ( m_tQuery.m_sGroupBy.IsEmpty() && !bHasImplicitGrouping )
  4481. return true;
  4482. if ( m_tQuery.m_eGroupFunc==SPH_GROUPBY_ATTRPAIR )
  4483. return Err ( "SPH_GROUPBY_ATTRPAIR is not supported any more (just group on 'bigint' attribute)" );
  4484. assert ( m_pSorterSchema );
  4485. auto & tSchema = *m_pSorterSchema;
  4486. m_tGroupSorterSettings.m_iMaxMatches = m_tSettings.m_iMaxMatches;
  4487. if ( !SetupDistinctAttr() )
  4488. return false;
  4489. CSphString sJsonColumn;
  4490. if ( m_tQuery.m_eGroupFunc==SPH_GROUPBY_MULTIPLE )
  4491. {
  4492. CSphVector<CSphColumnInfo> dAttrs;
  4493. VecRefPtrs_t<ISphExpr *> dJsonKeys;
  4494. StrVec_t dGroupBy;
  4495. sph::Split ( m_tQuery.m_sGroupBy.cstr (), -1, ",", [&] ( const char * sToken, int iLen )
  4496. {
  4497. CSphString sGroupBy ( sToken, iLen );
  4498. sGroupBy.Trim ();
  4499. dGroupBy.Add ( std::move ( sGroupBy ));
  4500. } );
  4501. dGroupBy.Uniq();
  4502. for ( auto & sGroupBy : dGroupBy )
  4503. {
  4504. CSphString sJsonExpr;
  4505. if ( sphJsonNameSplit ( sGroupBy.cstr(), &sJsonColumn ) )
  4506. {
  4507. sJsonExpr = sGroupBy;
  4508. sGroupBy = sJsonColumn;
  4509. }
  4510. const int iAttr = tSchema.GetAttrIndex ( sGroupBy.cstr() );
  4511. if ( iAttr<0 )
  4512. return Err( "group-by attribute '%s' not found", sGroupBy.cstr() );
  4513. auto tAttr = tSchema.GetAttr ( iAttr );
  4514. ESphAttr eType = tAttr.m_eAttrType;
  4515. if ( eType==SPH_ATTR_UINT32SET || eType==SPH_ATTR_INT64SET )
  4516. return Err ( "MVA values can't be used in multiple group-by" );
  4517. if ( eType==SPH_ATTR_JSON && sJsonExpr.IsEmpty() )
  4518. return Err ( "JSON blob can't be used in multiple group-by" );
  4519. dAttrs.Add ( tAttr );
  4520. m_dGroupColumns.Add ( { iAttr, true } );
  4521. if ( !sJsonExpr.IsEmpty() )
  4522. {
  4523. ExprParseArgs_t tExprArgs;
  4524. dJsonKeys.Add ( sphExprParse ( sJsonExpr.cstr(), tSchema, m_sError, tExprArgs ) );
  4525. }
  4526. else
  4527. dJsonKeys.Add ( nullptr );
  4528. }
  4529. m_tGroupSorterSettings.m_pGrouper = sphCreateGrouperMulti ( dAttrs, std::move(dJsonKeys), m_tQuery.m_eCollation );
  4530. return true;
  4531. }
  4532. if ( sphJsonNameSplit ( m_tQuery.m_sGroupBy.cstr(), &sJsonColumn ) )
  4533. {
  4534. const int iAttr = tSchema.GetAttrIndex ( sJsonColumn.cstr() );
  4535. if ( iAttr<0 )
  4536. return Err ( "groupby: no such attribute '%s'", sJsonColumn.cstr ());
  4537. if ( tSchema.GetAttr(iAttr).m_eAttrType!=SPH_ATTR_JSON
  4538. && tSchema.GetAttr(iAttr).m_eAttrType!=SPH_ATTR_JSON_PTR )
  4539. return Err ( "groupby: attribute '%s' does not have subfields (must be sql_attr_json)",
  4540. sJsonColumn.cstr() );
  4541. if ( m_tQuery.m_eGroupFunc!=SPH_GROUPBY_ATTR )
  4542. return Err ( "groupby: legacy groupby modes are not supported on JSON attributes" );
  4543. m_dGroupColumns.Add ( { iAttr, true } );
  4544. ExprParseArgs_t tExprArgs;
  4545. tExprArgs.m_eCollation = m_tQuery.m_eCollation;
  4546. ISphExprRefPtr_c pExpr { sphExprParse ( m_tQuery.m_sGroupBy.cstr(), tSchema, m_sError, tExprArgs ) };
  4547. m_tGroupSorterSettings.m_pGrouper = new CSphGrouperJsonField ( tSchema.GetAttr(iAttr).m_tLocator, pExpr );
  4548. m_tGroupSorterSettings.m_bJson = true;
  4549. return true;
  4550. }
  4551. if ( bHasImplicitGrouping )
  4552. {
  4553. m_tGroupSorterSettings.m_bImplicit = true;
  4554. return true;
  4555. }
  4556. // setup groupby attr
  4557. int iGroupBy = GetGroupbyAttrIndex();
  4558. if ( iGroupBy<0 )
  4559. return Err ( "group-by attribute '%s' not found", m_tQuery.m_sGroupBy.cstr() );
  4560. const CSphColumnInfo & tGroupByAttr = tSchema.GetAttr(iGroupBy);
  4561. ESphAttr eType = tGroupByAttr.m_eAttrType;
  4562. CSphAttrLocator tLoc = tGroupByAttr.m_tLocator;
  4563. bool bGrouperUsesAttrs = true;
  4564. switch (m_tQuery.m_eGroupFunc )
  4565. {
  4566. case SPH_GROUPBY_DAY:
  4567. m_tGroupSorterSettings.m_pGrouper = getDayGrouper ( tLoc ); break;
  4568. case SPH_GROUPBY_WEEK:
  4569. m_tGroupSorterSettings.m_pGrouper = getWeekGrouper ( tLoc ); break;
  4570. case SPH_GROUPBY_MONTH:
  4571. m_tGroupSorterSettings.m_pGrouper = getMonthGrouper ( tLoc ); break;
  4572. case SPH_GROUPBY_YEAR:
  4573. m_tGroupSorterSettings.m_pGrouper = getYearGrouper ( tLoc ); break;
  4574. case SPH_GROUPBY_ATTR:
  4575. CreateGrouperByAttr ( eType, tGroupByAttr, bGrouperUsesAttrs );
  4576. break;
  4577. default:
  4578. return Err ( "invalid group-by mode (mode=%d)", m_tQuery.m_eGroupFunc );
  4579. }
  4580. m_dGroupColumns.Add ( { iGroupBy, bGrouperUsesAttrs } );
  4581. return true;
  4582. }
  4583. // move expressions used in ORDER BY or WITHIN GROUP ORDER BY to presort phase
  4584. void QueueCreator_c::AssignOrderByToPresortStage ( const int * pAttrs, int iAttrCount )
  4585. {
  4586. if ( !iAttrCount )
  4587. return;
  4588. assert ( pAttrs );
  4589. assert ( m_pSorterSchema );
  4590. CSphVector<int> dCur;
  4591. // add valid attributes to processing list
  4592. for ( int i=0; i<iAttrCount; ++i )
  4593. if ( pAttrs[i]>=0 )
  4594. dCur.Add ( pAttrs[i] );
  4595. // collect columns which affect current expressions
  4596. for ( int i=0; i<dCur.GetLength(); ++i )
  4597. {
  4598. const CSphColumnInfo & tCol = m_pSorterSchema->GetAttr ( dCur[i] );
  4599. if ( tCol.m_eStage>SPH_EVAL_PRESORT && tCol.m_pExpr )
  4600. tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
  4601. }
  4602. // get rid of dupes
  4603. dCur.Uniq();
  4604. // fix up of attributes stages
  4605. for ( int iAttr : dCur )
  4606. {
  4607. if ( iAttr<0 )
  4608. continue;
  4609. auto & tCol = const_cast < CSphColumnInfo & > ( m_pSorterSchema->GetAttr ( iAttr ) );
  4610. if ( tCol.m_eStage==SPH_EVAL_FINAL )
  4611. tCol.m_eStage = SPH_EVAL_PRESORT;
  4612. }
  4613. }
  4614. // expression that transform string pool base + offset -> ptr
  4615. class ExprSortStringAttrFixup_c : public BlobPool_c, public ISphExpr
  4616. {
  4617. public:
  4618. explicit ExprSortStringAttrFixup_c ( const CSphAttrLocator & tLocator )
  4619. : m_tLocator ( tLocator )
  4620. {}
  4621. float Eval ( const CSphMatch & ) const override { assert ( 0 ); return 0.0f; }
  4622. const BYTE * StringEvalPacked ( const CSphMatch & tMatch ) const override
  4623. {
  4624. // our blob strings are not null-terminated!
  4625. // we can either store nulls in .SPB or add them here
  4626. return sphPackPtrAttr ( sphGetBlobAttr ( tMatch, m_tLocator, GetBlobPool() ) );
  4627. }
  4628. void FixupLocator ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema ) override
  4629. {
  4630. sphFixupLocator ( m_tLocator, pOldSchema, pNewSchema );
  4631. }
  4632. void Command ( ESphExprCommand eCmd, void * pArg ) override
  4633. {
  4634. if ( eCmd==SPH_EXPR_SET_BLOB_POOL )
  4635. SetBlobPool( (const BYTE*)pArg);
  4636. }
  4637. uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) override
  4638. {
  4639. EXPR_CLASS_NAME_NOCHECK("ExprSortStringAttrFixup_c");
  4640. uHash = sphFNV64 ( &m_tLocator, sizeof(m_tLocator), uHash );
  4641. return CALC_DEP_HASHES();
  4642. }
  4643. ISphExpr * Clone() const final
  4644. {
  4645. return new ExprSortStringAttrFixup_c ( *this );
  4646. }
  4647. public:
  4648. CSphAttrLocator m_tLocator; ///< string attribute to fix
  4649. private:
  4650. ExprSortStringAttrFixup_c ( const ExprSortStringAttrFixup_c& rhs ) : m_tLocator ( rhs.m_tLocator ) {}
  4651. };
  4652. // expression that transform string pool base + offset -> ptr
  4653. class ExprSortJson2StringPtr_c : public BlobPool_c, public ISphExpr
  4654. {
  4655. public:
  4656. ExprSortJson2StringPtr_c ( const CSphAttrLocator & tLocator, ISphExpr * pExpr )
  4657. : m_tJsonCol ( tLocator )
  4658. , m_pExpr ( pExpr )
  4659. {
  4660. if ( pExpr ) // adopt the expression
  4661. pExpr->AddRef();
  4662. }
  4663. bool IsDataPtrAttr () const final { return true; }
  4664. float Eval ( const CSphMatch & ) const override { assert ( 0 ); return 0.0f; }
  4665. int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const override
  4666. {
  4667. if ( !GetBlobPool() || !m_pExpr )
  4668. {
  4669. *ppStr = nullptr;
  4670. return 0;
  4671. }
  4672. uint64_t uPacked = m_pExpr->Int64Eval ( tMatch );
  4673. const BYTE * pVal = GetBlobPool() + sphJsonUnpackOffset ( uPacked );
  4674. ESphJsonType eJson = sphJsonUnpackType ( uPacked );
  4675. CSphString sVal;
  4676. // FIXME!!! make string length configurable for STRING and STRING_VECTOR to compare and allocate only Min(String.Length, CMP_LENGTH)
  4677. switch ( eJson )
  4678. {
  4679. case JSON_INT32:
  4680. sVal.SetSprintf ( "%d", sphJsonLoadInt ( &pVal ) );
  4681. break;
  4682. case JSON_INT64:
  4683. sVal.SetSprintf ( INT64_FMT, sphJsonLoadBigint ( &pVal ) );
  4684. break;
  4685. case JSON_DOUBLE:
  4686. sVal.SetSprintf ( "%f", sphQW2D ( sphJsonLoadBigint ( &pVal ) ) );
  4687. break;
  4688. case JSON_STRING:
  4689. {
  4690. int iLen = sphJsonUnpackInt ( &pVal );
  4691. sVal.SetBinary ( (const char *)pVal, iLen );
  4692. break;
  4693. }
  4694. case JSON_STRING_VECTOR:
  4695. {
  4696. int iTotalLen = sphJsonUnpackInt ( &pVal );
  4697. int iCount = sphJsonUnpackInt ( &pVal );
  4698. CSphFixedVector<BYTE> dBuf ( iTotalLen + 4 + iCount ); // data and tail GAP and space count
  4699. BYTE * pDst = dBuf.Begin();
  4700. // head element
  4701. if ( iCount )
  4702. {
  4703. int iElemLen = sphJsonUnpackInt ( &pVal );
  4704. memcpy ( pDst, pVal, iElemLen );
  4705. pDst += iElemLen;
  4706. pVal += iElemLen;
  4707. }
  4708. // tail elements separated by space
  4709. for ( int i=1; i<iCount; i++ )
  4710. {
  4711. *pDst++ = ' ';
  4712. int iElemLen = sphJsonUnpackInt ( &pVal );
  4713. memcpy ( pDst, pVal, iElemLen );
  4714. pDst += iElemLen;
  4715. pVal += iElemLen;
  4716. }
  4717. int iStrLen = int ( pDst-dBuf.Begin() );
  4718. // filling junk space
  4719. while ( pDst<dBuf.Begin()+dBuf.GetLength() )
  4720. *pDst++ = '\0';
  4721. *ppStr = dBuf.LeakData();
  4722. return iStrLen;
  4723. }
  4724. default:
  4725. break;
  4726. }
  4727. int iStriLen = sVal.Length();
  4728. *ppStr = (const BYTE *)sVal.Leak();
  4729. return iStriLen;
  4730. }
  4731. void FixupLocator ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema ) override
  4732. {
  4733. sphFixupLocator ( m_tJsonCol, pOldSchema, pNewSchema );
  4734. if ( m_pExpr )
  4735. m_pExpr->FixupLocator ( pOldSchema, pNewSchema );
  4736. }
  4737. void Command ( ESphExprCommand eCmd, void * pArg ) override
  4738. {
  4739. if ( eCmd==SPH_EXPR_SET_BLOB_POOL )
  4740. {
  4741. SetBlobPool((const BYTE*)pArg);
  4742. if ( m_pExpr )
  4743. m_pExpr->Command ( eCmd, pArg );
  4744. }
  4745. }
  4746. uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) override
  4747. {
  4748. EXPR_CLASS_NAME_NOCHECK("ExprSortJson2StringPtr_c");
  4749. CALC_CHILD_HASH(m_pExpr);
  4750. // uHash = sphFNV64 ( &m_tJsonCol, sizeof ( m_tJsonCol ), uHash ); //< that is wrong! Locator may have padding uninitialized data, valgrind will warn!
  4751. uHash = sphCalcLocatorHash ( m_tJsonCol, uHash ); //< that is right, only meaningful fields processed without padding.
  4752. return CALC_DEP_HASHES();
  4753. }
  4754. ISphExpr * Clone() const final
  4755. {
  4756. return new ExprSortJson2StringPtr_c ( *this );
  4757. }
  4758. private:
  4759. CSphAttrLocator m_tJsonCol; ///< JSON attribute to fix
  4760. ISphExprRefPtr_c m_pExpr;
  4761. private:
  4762. ExprSortJson2StringPtr_c ( const ExprSortJson2StringPtr_c & rhs )
  4763. : m_tJsonCol ( rhs.m_tJsonCol )
  4764. , m_pExpr ( SafeClone (rhs.m_pExpr) )
  4765. {}
  4766. };
  4767. const char * GetInternalAttrPrefix()
  4768. {
  4769. return g_sIntAttrPrefix;
  4770. }
  4771. bool IsSortStringInternal ( const CSphString & sColumnName )
  4772. {
  4773. assert ( sColumnName.cstr ());
  4774. return ( strncmp ( sColumnName.cstr (), g_sIntAttrPrefix, sizeof ( g_sIntAttrPrefix )-1 )==0 );
  4775. }
  4776. bool IsSortJsonInternal ( const CSphString& sColumnName )
  4777. {
  4778. assert ( sColumnName.cstr ());
  4779. return ( strncmp ( sColumnName.cstr (), g_sIntJsonPrefix, sizeof ( g_sIntJsonPrefix )-1 )==0 );
  4780. }
  4781. CSphString SortJsonInternalSet ( const CSphString& sColumnName )
  4782. {
  4783. CSphString sName;
  4784. if ( !sColumnName.IsEmpty() )
  4785. ( StringBuilder_c () << g_sIntJsonPrefix << "_" << sColumnName ).MoveTo ( sName );
  4786. return sName;
  4787. }
  4788. CSphGrouper * sphCreateGrouperString ( const CSphAttrLocator & tLoc, ESphCollation eCollation )
  4789. {
  4790. switch ( eCollation )
  4791. {
  4792. case SPH_COLLATION_UTF8_GENERAL_CI: return new CSphGrouperString<Utf8CIHash_fn>(tLoc);
  4793. case SPH_COLLATION_LIBC_CI: return new CSphGrouperString<LibcCIHash_fn>(tLoc);
  4794. case SPH_COLLATION_LIBC_CS: return new CSphGrouperString<LibcCSHash_fn>(tLoc);
  4795. default: return new CSphGrouperString<BinaryHash_fn>(tLoc);
  4796. }
  4797. }
  4798. static CSphGrouper * CreateGrouperStringExpr ( ISphExpr * pExpr, ESphCollation eCollation )
  4799. {
  4800. switch ( eCollation )
  4801. {
  4802. case SPH_COLLATION_UTF8_GENERAL_CI: return new GrouperStringExpr_T<Utf8CIHash_fn>(pExpr);
  4803. case SPH_COLLATION_LIBC_CI: return new GrouperStringExpr_T<LibcCIHash_fn>(pExpr);
  4804. case SPH_COLLATION_LIBC_CS: return new GrouperStringExpr_T<LibcCSHash_fn>(pExpr);
  4805. default: return new GrouperStringExpr_T<BinaryHash_fn>(pExpr);
  4806. }
  4807. }
  4808. static CSphGrouper * sphCreateGrouperMulti ( const CSphVector<CSphColumnInfo> & dAttrs, VecRefPtrs_t<ISphExpr *> dJsonKeys, ESphCollation eCollation )
  4809. {
  4810. bool bHaveColumnar = dAttrs.any_of ( []( auto & tAttr ){ return tAttr.IsColumnar() || tAttr.IsColumnarExpr(); } );
  4811. bool bAllColumnar = dAttrs.all_of ( []( auto & tAttr ){ return tAttr.IsColumnar() || tAttr.IsColumnarExpr(); } );
  4812. if ( bAllColumnar )
  4813. return CreateGrouperColumnarMulti ( dAttrs, eCollation );
  4814. switch ( eCollation )
  4815. {
  4816. case SPH_COLLATION_UTF8_GENERAL_CI:
  4817. if ( bHaveColumnar )
  4818. return new CSphGrouperMulti<Utf8CIHash_fn,true> ( dAttrs, std::move(dJsonKeys), eCollation );
  4819. else
  4820. return new CSphGrouperMulti<Utf8CIHash_fn,false> ( dAttrs, std::move(dJsonKeys), eCollation );
  4821. case SPH_COLLATION_LIBC_CI:
  4822. if ( bHaveColumnar )
  4823. return new CSphGrouperMulti<LibcCIHash_fn,true> ( dAttrs, std::move(dJsonKeys), eCollation );
  4824. else
  4825. return new CSphGrouperMulti<LibcCIHash_fn,false> ( dAttrs, std::move(dJsonKeys), eCollation );
  4826. case SPH_COLLATION_LIBC_CS:
  4827. if ( bHaveColumnar )
  4828. return new CSphGrouperMulti<LibcCSHash_fn,true> ( dAttrs, std::move(dJsonKeys), eCollation );
  4829. else
  4830. return new CSphGrouperMulti<LibcCSHash_fn,false> ( dAttrs, std::move(dJsonKeys), eCollation );
  4831. default:
  4832. if ( bHaveColumnar )
  4833. return new CSphGrouperMulti<BinaryHash_fn,true> ( dAttrs, std::move(dJsonKeys), eCollation );
  4834. else
  4835. return new CSphGrouperMulti<BinaryHash_fn,false> ( dAttrs, std::move(dJsonKeys), eCollation );
  4836. }
  4837. }
  4838. /////////////////////////
  4839. // SORTING QUEUE FACTORY
  4840. /////////////////////////
  4841. template < typename COMP >
  4842. static ISphMatchSorter * CreatePlainSorter ( bool bKbuffer, int iMaxMatches, bool bFactors )
  4843. {
  4844. if ( bKbuffer )
  4845. {
  4846. if ( bFactors )
  4847. return new CSphKbufferMatchQueue<COMP, true> ( iMaxMatches );
  4848. return new CSphKbufferMatchQueue<COMP, false> ( iMaxMatches );
  4849. }
  4850. if ( bFactors )
  4851. return new CSphMatchQueue<COMP, true> ( iMaxMatches );
  4852. return new CSphMatchQueue<COMP, false> ( iMaxMatches );
  4853. }
  4854. static ISphMatchSorter * CreatePlainSorter ( ESphSortFunc eMatchFunc, bool bKbuffer, int iMaxMatches, bool bFactors )
  4855. {
  4856. switch ( eMatchFunc )
  4857. {
  4858. case FUNC_REL_DESC: return CreatePlainSorter<MatchRelevanceLt_fn> ( bKbuffer, iMaxMatches, bFactors );
  4859. case FUNC_ATTR_DESC: return CreatePlainSorter<MatchAttrLt_fn> ( bKbuffer, iMaxMatches, bFactors );
  4860. case FUNC_ATTR_ASC: return CreatePlainSorter<MatchAttrGt_fn> ( bKbuffer, iMaxMatches, bFactors );
  4861. case FUNC_TIMESEGS: return CreatePlainSorter<MatchTimeSegments_fn> ( bKbuffer, iMaxMatches, bFactors );
  4862. case FUNC_GENERIC1: return CreatePlainSorter<MatchGeneric1_fn> ( bKbuffer, iMaxMatches, bFactors );
  4863. case FUNC_GENERIC2: return CreatePlainSorter<MatchGeneric2_fn> ( bKbuffer, iMaxMatches, bFactors );
  4864. case FUNC_GENERIC3: return CreatePlainSorter<MatchGeneric3_fn> ( bKbuffer, iMaxMatches, bFactors );
  4865. case FUNC_GENERIC4: return CreatePlainSorter<MatchGeneric4_fn> ( bKbuffer, iMaxMatches, bFactors );
  4866. case FUNC_GENERIC5: return CreatePlainSorter<MatchGeneric5_fn> ( bKbuffer, iMaxMatches, bFactors );
  4867. case FUNC_EXPR: return CreatePlainSorter<MatchExpr_fn> ( bKbuffer, iMaxMatches, bFactors );
  4868. default: return nullptr;
  4869. }
  4870. }
  4871. void QueueCreator_c::ExtraAddSortkeys ( const int * dAttrs )
  4872. {
  4873. for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; ++i )
  4874. if ( dAttrs[i]>=0 )
  4875. m_hExtra.Add ( m_pSorterSchema->GetAttr ( dAttrs[i] ).m_sName );
  4876. }
  4877. bool QueueCreator_c::Err ( const char * sFmt, ... ) const
  4878. {
  4879. va_list ap;
  4880. va_start ( ap, sFmt );
  4881. m_sError.SetSprintfVa ( sFmt, ap );
  4882. va_end ( ap );
  4883. return false;
  4884. }
  4885. void QueueCreator_c::SelectStageForColumnarExpr ( CSphColumnInfo & tExprCol )
  4886. {
  4887. if ( !tExprCol.IsColumnarExpr() )
  4888. {
  4889. tExprCol.m_eStage = SPH_EVAL_PREFILTER;
  4890. return;
  4891. }
  4892. // columnar expressions are a special case
  4893. // it is sometimes faster to evaluate them in the filter than to evaluate the expression, store it in the match and then use it in the filter
  4894. // FIXME: add sorters?
  4895. int iRank = 0;
  4896. iRank += tExprCol.m_sName==m_tQuery.m_sGroupBy ? 1 : 0;
  4897. iRank += m_tQuery.m_dFilters.any_of ( [&tExprCol]( const CSphFilterSettings & tFilter ) { return tFilter.m_sAttrName==tExprCol.m_sName; } ) ? 1 : 0;
  4898. if ( iRank>1 )
  4899. tExprCol.m_eStage = SPH_EVAL_PREFILTER;
  4900. }
  4901. void QueueCreator_c::FetchDependencyChains ( IntVec_t & dDependentCols )
  4902. {
  4903. ARRAY_FOREACH ( i, dDependentCols )
  4904. {
  4905. const CSphColumnInfo & tCol = m_pSorterSchema->GetAttr ( dDependentCols[i] );
  4906. // handle chains of dependencies (e.g. SELECT 1+attr f1, f1-1 f2 ... WHERE f2>5)
  4907. if ( tCol.m_pExpr )
  4908. tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dDependentCols );
  4909. }
  4910. dDependentCols.Uniq();
  4911. }
  4912. void QueueCreator_c::PropagateEvalStage ( CSphColumnInfo & tExprCol, IntVec_t & dDependentCols )
  4913. {
  4914. bool bWeight = false;
  4915. for ( auto i : dDependentCols )
  4916. {
  4917. const CSphColumnInfo & tCol = m_pSorterSchema->GetAttr(i);
  4918. bWeight |= tCol.m_bWeight;
  4919. }
  4920. if ( bWeight )
  4921. {
  4922. tExprCol.m_eStage = SPH_EVAL_PRESORT;
  4923. tExprCol.m_bWeight = true;
  4924. }
  4925. for ( auto i : dDependentCols )
  4926. {
  4927. auto & tDep = const_cast < CSphColumnInfo & > ( m_pSorterSchema->GetAttr(i) );
  4928. if ( tDep.m_eStage > tExprCol.m_eStage )
  4929. tDep.m_eStage = tExprCol.m_eStage;
  4930. }
  4931. }
  4932. bool QueueCreator_c::SetupAggregateExpr ( CSphColumnInfo & tExprCol, const CSphString & sExpr, DWORD uQueryPackedFactorFlags )
  4933. {
  4934. switch ( tExprCol.m_eAggrFunc )
  4935. {
  4936. case SPH_AGGR_AVG:
  4937. // force AVG() to be computed in doubles
  4938. tExprCol.m_eAttrType = SPH_ATTR_DOUBLE;
  4939. tExprCol.m_tLocator.m_iBitCount = 64;
  4940. break;
  4941. case SPH_AGGR_CAT:
  4942. // force GROUP_CONCAT() to be computed as strings
  4943. tExprCol.m_eAttrType = SPH_ATTR_STRINGPTR;
  4944. tExprCol.m_tLocator.m_iBitCount = ROWITEMPTR_BITS;
  4945. break;
  4946. case SPH_AGGR_SUM:
  4947. if ( tExprCol.m_eAttrType==SPH_ATTR_BOOL )
  4948. {
  4949. tExprCol.m_eAttrType = SPH_ATTR_INTEGER;
  4950. tExprCol.m_tLocator.m_iBitCount = 32;
  4951. } else if ( tExprCol.m_eAttrType==SPH_ATTR_INTEGER )
  4952. {
  4953. tExprCol.m_eAttrType = SPH_ATTR_BIGINT;
  4954. tExprCol.m_tLocator.m_iBitCount = 64;
  4955. }
  4956. break;
  4957. default:
  4958. break;
  4959. }
  4960. // force explicit type conversion for JSON attributes
  4961. if ( tExprCol.m_eAggrFunc!=SPH_AGGR_NONE && tExprCol.m_eAttrType==SPH_ATTR_JSON_FIELD )
  4962. return Err ( "ambiguous attribute type '%s', use INTEGER(), BIGINT() or DOUBLE() conversion functions", sExpr.cstr() );
  4963. if ( uQueryPackedFactorFlags & SPH_FACTOR_JSON_OUT )
  4964. tExprCol.m_eAttrType = SPH_ATTR_FACTORS_JSON;
  4965. return true;
  4966. }
  4967. bool QueueCreator_c::SetupColumnarAggregates ( CSphColumnInfo & tExprCol )
  4968. {
  4969. CSphVector<int> dDependentCols;
  4970. tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dDependentCols );
  4971. FetchDependencyChains ( dDependentCols );
  4972. if ( !dDependentCols.GetLength() )
  4973. return tExprCol.IsColumnarExpr();
  4974. if ( dDependentCols.GetLength()==1 )
  4975. {
  4976. const CSphColumnInfo & tColumnarAttr = m_pSorterSchema->GetAttr ( dDependentCols[0] );
  4977. if ( tColumnarAttr.IsColumnarExpr() )
  4978. {
  4979. CSphString sColumnarCol;
  4980. tColumnarAttr.m_pExpr->Command ( SPH_EXPR_GET_COLUMNAR_COL, &sColumnarCol );
  4981. // let aggregate expression know that it is working with that columnar attribute
  4982. tExprCol.m_pExpr->Command ( SPH_EXPR_SET_COLUMNAR_COL, &sColumnarCol );
  4983. return true;
  4984. }
  4985. }
  4986. return false;
  4987. }
  4988. void QueueCreator_c::UpdateAggregateDependencies ( CSphColumnInfo & tExprCol )
  4989. {
  4990. /// update aggregate dependencies (e.g. SELECT 1+attr f1, min(f1), ...)
  4991. CSphVector<int> dDependentCols;
  4992. tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dDependentCols );
  4993. FetchDependencyChains ( dDependentCols );
  4994. ARRAY_FOREACH ( j, dDependentCols )
  4995. {
  4996. auto & tDep = const_cast < CSphColumnInfo & > ( m_pSorterSchema->GetAttr ( dDependentCols[j] ) );
  4997. if ( tDep.m_eStage>tExprCol.m_eStage )
  4998. tDep.m_eStage = tExprCol.m_eStage;
  4999. }
  5000. }
  5001. bool QueueCreator_c::ParseQueryItem ( const CSphQueryItem & tItem )
  5002. {
  5003. assert ( m_pSorterSchema );
  5004. const CSphString & sExpr = tItem.m_sExpr;
  5005. bool bIsCount = IsCount(sExpr);
  5006. m_bHasCount |= bIsCount;
  5007. if ( sExpr=="*" )
  5008. {
  5009. m_bHaveStar = true;
  5010. for ( int i=0; i<m_tSettings.m_tSchema.GetAttrsCount(); ++i )
  5011. {
  5012. m_hQueryDups.Add ( m_tSettings.m_tSchema.GetAttr(i).m_sName );
  5013. m_hQueryColumns.Add ( m_tSettings.m_tSchema.GetAttr(i).m_sName );
  5014. }
  5015. }
  5016. // for now, just always pass "plain" attrs from index to sorter; they will be filtered on searchd level
  5017. int iAttrIdx = m_tSettings.m_tSchema.GetAttrIndex ( sExpr.cstr() );
  5018. bool bColumnar = iAttrIdx>=0 && m_tSettings.m_tSchema.GetAttr(iAttrIdx).IsColumnar();
  5019. bool bPlainAttr = ( ( sExpr=="*" || ( iAttrIdx>=0 && tItem.m_eAggrFunc==SPH_AGGR_NONE && !bColumnar ) ) &&
  5020. ( tItem.m_sAlias.IsEmpty() || tItem.m_sAlias==tItem.m_sExpr ) );
  5021. if ( iAttrIdx>=0 )
  5022. {
  5023. ESphAttr eAttr = m_tSettings.m_tSchema.GetAttr ( iAttrIdx ).m_eAttrType;
  5024. if ( eAttr==SPH_ATTR_STRING || eAttr==SPH_ATTR_STRINGPTR
  5025. || eAttr==SPH_ATTR_UINT32SET || eAttr==SPH_ATTR_INT64SET )
  5026. {
  5027. if ( tItem.m_eAggrFunc!=SPH_AGGR_NONE )
  5028. return Err ( "can not aggregate non-scalar attribute '%s'", tItem.m_sExpr.cstr() );
  5029. if ( !bPlainAttr && !bColumnar && ( eAttr==SPH_ATTR_STRING || eAttr==SPH_ATTR_STRINGPTR ) )
  5030. {
  5031. bPlainAttr = true;
  5032. for ( const auto & i : m_tQuery.m_dItems )
  5033. if ( sExpr==i.m_sAlias )
  5034. bPlainAttr = false;
  5035. }
  5036. }
  5037. }
  5038. if ( bPlainAttr || IsGroupby ( sExpr ) || bIsCount )
  5039. {
  5040. if ( sExpr!="*" && !tItem.m_sAlias.IsEmpty() )
  5041. {
  5042. m_hQueryDups.Add ( tItem.m_sAlias );
  5043. if ( bPlainAttr )
  5044. m_hQueryColumns.Add ( tItem.m_sExpr );
  5045. }
  5046. m_bHasGroupByExpr = IsGroupby ( sExpr );
  5047. return true;
  5048. }
  5049. // not an attribute? must be an expression, and must be aliased by query parser
  5050. assert ( !tItem.m_sAlias.IsEmpty() );
  5051. // tricky part
  5052. // we might be fed with precomputed matches, but it's all or nothing
  5053. // the incoming match either does not have anything computed, or it has everything
  5054. int iSorterAttr = m_pSorterSchema->GetAttrIndex ( tItem.m_sAlias.cstr() );
  5055. if ( iSorterAttr>=0 )
  5056. {
  5057. if ( m_hQueryDups[tItem.m_sAlias] )
  5058. {
  5059. if ( bColumnar ) // we might have several similar aliases for columnar attributes (and they are not plain attrs but expressions)
  5060. return true;
  5061. else
  5062. return Err ( "alias '%s' must be unique (conflicts with another alias)", tItem.m_sAlias.cstr() );
  5063. }
  5064. }
  5065. // a new and shiny expression, lets parse
  5066. CSphColumnInfo tExprCol ( tItem.m_sAlias.cstr(), SPH_ATTR_NONE );
  5067. DWORD uQueryPackedFactorFlags = SPH_FACTOR_DISABLE;
  5068. bool bHasZonespanlist = false;
  5069. bool bExprsNeedDocids = false;
  5070. ExprParseArgs_t tExprParseArgs;
  5071. tExprParseArgs.m_pAttrType = &tExprCol.m_eAttrType;
  5072. tExprParseArgs.m_pUsesWeight = &tExprCol.m_bWeight;
  5073. tExprParseArgs.m_pProfiler = m_tSettings.m_pProfiler;
  5074. tExprParseArgs.m_eCollation = m_tQuery.m_eCollation;
  5075. tExprParseArgs.m_pHook = m_tSettings.m_pHook;
  5076. tExprParseArgs.m_pZonespanlist = &bHasZonespanlist;
  5077. tExprParseArgs.m_pPackedFactorsFlags = &uQueryPackedFactorFlags;
  5078. tExprParseArgs.m_pEvalStage = &tExprCol.m_eStage;
  5079. tExprParseArgs.m_pStoredField = &tExprCol.m_uFieldFlags;
  5080. tExprParseArgs.m_pNeedDocIds = &bExprsNeedDocids;
  5081. // tricky bit
  5082. // GROUP_CONCAT() adds an implicit TO_STRING() conversion on top of its argument
  5083. // and then the aggregate operation simply concatenates strings as matches arrive
  5084. // ideally, we would instead pass ownership of the expression to G_C() implementation
  5085. // and also the original expression type, and let the string conversion happen in G_C() itself
  5086. // but that ideal route seems somewhat more complicated in the current architecture
  5087. if ( tItem.m_eAggrFunc==SPH_AGGR_CAT )
  5088. {
  5089. CSphString sExpr2;
  5090. sExpr2.SetSprintf ( "TO_STRING(%s)", sExpr.cstr() );
  5091. tExprCol.m_pExpr = sphExprParse ( sExpr2.cstr(), *m_pSorterSchema, m_sError, tExprParseArgs );
  5092. } else
  5093. {
  5094. tExprCol.m_pExpr = sphExprParse ( sExpr.cstr(), *m_pSorterSchema, m_sError, tExprParseArgs );
  5095. }
  5096. m_uPackedFactorFlags |= uQueryPackedFactorFlags;
  5097. m_bZonespanlist |= bHasZonespanlist;
  5098. m_bExprsNeedDocids |= bExprsNeedDocids;
  5099. tExprCol.m_eAggrFunc = tItem.m_eAggrFunc;
  5100. tExprCol.m_iIndex = iSorterAttr>= 0 ? m_pSorterSchema->GetAttrIndexOriginal ( tItem.m_sAlias.cstr() ) : -1;
  5101. if ( !tExprCol.m_pExpr )
  5102. return Err ( "parse error: %s", m_sError.cstr() );
  5103. // remove original column
  5104. if ( iSorterAttr>=0 )
  5105. m_pSorterSchema->RemoveStaticAttr(iSorterAttr);
  5106. if ( !SetupAggregateExpr ( tExprCol, tItem.m_sExpr, uQueryPackedFactorFlags ) )
  5107. return false;
  5108. // postpone aggregates, add non-aggregates
  5109. if ( tExprCol.m_eAggrFunc==SPH_AGGR_NONE )
  5110. {
  5111. // is this expression used in filter?
  5112. // OPTIMIZE? hash filters and do hash lookups?
  5113. if ( tExprCol.m_eAttrType!=SPH_ATTR_JSON_FIELD )
  5114. ARRAY_FOREACH ( i, m_tQuery.m_dFilters )
  5115. if ( m_tQuery.m_dFilters[i].m_sAttrName==tExprCol.m_sName )
  5116. {
  5117. // is this a hack?
  5118. // m_bWeight is computed after EarlyReject() get called
  5119. // that means we can't evaluate expressions with WEIGHT() in prefilter phase
  5120. if ( tExprCol.m_bWeight )
  5121. {
  5122. tExprCol.m_eStage = SPH_EVAL_PRESORT; // special, weight filter ( short cut )
  5123. break;
  5124. }
  5125. // so we are about to add a filter condition,
  5126. // but it might depend on some preceding columns (e.g. SELECT 1+attr f1 ... WHERE f1>5)
  5127. // lets detect those and move them to prefilter \ presort phase too
  5128. CSphVector<int> dDependentCols;
  5129. tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dDependentCols );
  5130. SelectStageForColumnarExpr(tExprCol);
  5131. FetchDependencyChains ( dDependentCols );
  5132. PropagateEvalStage ( tExprCol, dDependentCols );
  5133. break;
  5134. }
  5135. // add it!
  5136. // NOTE, "final" stage might need to be fixed up later
  5137. // we'll do that when parsing sorting clause
  5138. m_pSorterSchema->AddAttr ( tExprCol, true );
  5139. } else // some aggregate
  5140. {
  5141. bool bColumnarAggregate = SetupColumnarAggregates(tExprCol);
  5142. // columnar aggregates have their own code path; no need to calculate them in presort
  5143. tExprCol.m_eStage = bColumnarAggregate ? SPH_EVAL_SORTER : SPH_EVAL_PRESORT;
  5144. m_pSorterSchema->AddAttr ( tExprCol, true );
  5145. m_hExtra.Add ( tExprCol.m_sName );
  5146. if ( !bColumnarAggregate )
  5147. UpdateAggregateDependencies ( tExprCol );
  5148. }
  5149. m_hQueryDups.Add ( tExprCol.m_sName );
  5150. m_hQueryColumns.Add ( tExprCol.m_sName );
  5151. // need to add all dependent columns for post limit expressions
  5152. if ( tExprCol.m_eStage==SPH_EVAL_POSTLIMIT && tExprCol.m_pExpr )
  5153. {
  5154. CSphVector<int> dCur;
  5155. tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
  5156. ARRAY_FOREACH ( j, dCur )
  5157. {
  5158. const CSphColumnInfo & tCol = m_pSorterSchema->GetAttr ( dCur[j] );
  5159. if ( tCol.m_pExpr )
  5160. tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
  5161. }
  5162. dCur.Uniq ();
  5163. ARRAY_FOREACH ( j, dCur )
  5164. {
  5165. const CSphColumnInfo & tDep = m_pSorterSchema->GetAttr ( dCur[j] );
  5166. m_hQueryColumns.Add ( tDep.m_sName );
  5167. }
  5168. }
  5169. return true;
  5170. }
  5171. bool QueueCreator_c::ReplaceWithColumnarItem ( const CSphString & sAttr, ESphEvalStage eStage )
  5172. {
  5173. const CSphColumnInfo * pAttr = m_pSorterSchema->GetAttr ( sAttr.cstr() );
  5174. if ( !pAttr->IsColumnar() )
  5175. return true;
  5176. m_hQueryDups.Delete(sAttr);
  5177. CSphQueryItem tItem;
  5178. tItem.m_sExpr = tItem.m_sAlias = sAttr;
  5179. if ( !ParseQueryItem ( tItem ) )
  5180. return false;
  5181. // force stage
  5182. const CSphColumnInfo * pNewAttr = m_pSorterSchema->GetAttr ( sAttr.cstr() );
  5183. const_cast<CSphColumnInfo *>(pNewAttr)->m_eStage = Min ( pNewAttr->m_eStage, eStage );
  5184. return true;
  5185. }
  5186. // Test for @geodist and setup, if any
  5187. bool QueueCreator_c::MaybeAddGeodistColumn ()
  5188. {
  5189. if ( !m_tQuery.m_bGeoAnchor || m_pSorterSchema->GetAttrIndex ( "@geodist" )>=0 )
  5190. return true;
  5191. // replace columnar lat/lon with expressions before adding geodist
  5192. if ( !ReplaceWithColumnarItem ( m_tQuery.m_sGeoLatAttr, SPH_EVAL_PREFILTER ) ) return false;
  5193. if ( !ReplaceWithColumnarItem ( m_tQuery.m_sGeoLongAttr, SPH_EVAL_PREFILTER ) ) return false;
  5194. auto pExpr = new ExprGeodist_t();
  5195. if ( !pExpr->Setup ( &m_tQuery, *m_pSorterSchema, m_sError ))
  5196. {
  5197. pExpr->Release ();
  5198. return false;
  5199. }
  5200. CSphColumnInfo tCol ( "@geodist", SPH_ATTR_FLOAT );
  5201. tCol.m_pExpr = pExpr; // takes ownership, no need to for explicit pExpr release
  5202. tCol.m_eStage = SPH_EVAL_PREFILTER; // OPTIMIZE? actual stage depends on usage
  5203. m_pSorterSchema->AddAttr ( tCol, true );
  5204. m_hExtra.Add ( tCol.m_sName );
  5205. m_hQueryAttrs.Add ( tCol.m_sName );
  5206. return true;
  5207. }
  5208. // Test for @expr and setup, if any
  5209. bool QueueCreator_c::MaybeAddExprColumn ()
  5210. {
  5211. if ( m_tQuery.m_eSort!=SPH_SORT_EXPR || m_pSorterSchema->GetAttrIndex ( "@expr" )>=0 )
  5212. return true;
  5213. CSphColumnInfo tCol ( "@expr", SPH_ATTR_FLOAT ); // enforce float type for backwards compatibility
  5214. // (i.e. too lazy to fix those tests right now)
  5215. bool bHasZonespanlist;
  5216. ExprParseArgs_t tExprArgs;
  5217. tExprArgs.m_pProfiler = m_tSettings.m_pProfiler;
  5218. tExprArgs.m_eCollation = m_tQuery.m_eCollation;
  5219. tExprArgs.m_pZonespanlist = &bHasZonespanlist;
  5220. tCol.m_pExpr = sphExprParse ( m_tQuery.m_sSortBy.cstr (), *m_pSorterSchema, m_sError, tExprArgs );
  5221. if ( !tCol.m_pExpr )
  5222. return false;
  5223. m_bZonespanlist |= bHasZonespanlist;
  5224. tCol.m_eStage = SPH_EVAL_PRESORT;
  5225. m_pSorterSchema->AddAttr ( tCol, true );
  5226. m_hQueryAttrs.Add ( tCol.m_sName );
  5227. return true;
  5228. }
  5229. bool QueueCreator_c::AddStoredFieldExpressions()
  5230. {
  5231. for ( int i = 0; i<m_tSettings.m_tSchema.GetFieldsCount(); i++ )
  5232. {
  5233. const CSphColumnInfo & tField = m_tSettings.m_tSchema.GetField(i);
  5234. if ( !(tField.m_uFieldFlags & CSphColumnInfo::FIELD_STORED) )
  5235. continue;
  5236. CSphQueryItem tItem;
  5237. tItem.m_sExpr = tItem.m_sAlias = tField.m_sName;
  5238. if ( !ParseQueryItem ( tItem ) )
  5239. return false;
  5240. }
  5241. return true;
  5242. }
  5243. bool QueueCreator_c::AddColumnarAttributeExpressions()
  5244. {
  5245. for ( int i = 0; i<m_tSettings.m_tSchema.GetAttrsCount(); i++ )
  5246. {
  5247. const CSphColumnInfo & tAttr = m_tSettings.m_tSchema.GetAttr(i);
  5248. const CSphColumnInfo * pSorterAttr = m_pSorterSchema->GetAttr ( tAttr.m_sName.cstr() );
  5249. if ( !tAttr.IsColumnar() || ( pSorterAttr && !pSorterAttr->IsColumnar() ) )
  5250. continue;
  5251. m_hQueryDups.Delete ( tAttr.m_sName );
  5252. CSphQueryItem tItem;
  5253. tItem.m_sExpr = tItem.m_sAlias = tAttr.m_sName;
  5254. if ( !ParseQueryItem ( tItem ) )
  5255. return false;
  5256. }
  5257. return true;
  5258. }
  5259. // Add computed items
  5260. bool QueueCreator_c::MaybeAddExpressionsFromSelectList ()
  5261. {
  5262. // expressions from select items
  5263. if ( !m_tSettings.m_bComputeItems )
  5264. return true;
  5265. if ( !m_tQuery.m_dItems.all_of ( [&] ( const CSphQueryItem & v ) { return ParseQueryItem ( v ); } ))
  5266. return false;
  5267. if ( m_bHaveStar )
  5268. {
  5269. if ( !AddColumnarAttributeExpressions() )
  5270. return false;
  5271. if ( !AddStoredFieldExpressions() )
  5272. return false;
  5273. }
  5274. return true;
  5275. }
  5276. bool QueueCreator_c::AddExpressionsForUpdates()
  5277. {
  5278. if ( !m_tSettings.m_pCollection )
  5279. return true;
  5280. const CSphColumnInfo * pOldDocId = m_pSorterSchema->GetAttr ( sphGetDocidName() );
  5281. if ( !pOldDocId->IsColumnar() && !pOldDocId->IsColumnarExpr() )
  5282. return true;
  5283. if ( pOldDocId->IsColumnar() )
  5284. {
  5285. // add columnar id expressions to update queue. otherwise we won't be able to fetch docids which are needed to run updates/deletes
  5286. CSphQueryItem tItem;
  5287. tItem.m_sExpr = tItem.m_sAlias = sphGetDocidName();
  5288. if ( !ParseQueryItem ( tItem ) )
  5289. return false;
  5290. }
  5291. auto * pDocId = const_cast<CSphColumnInfo *> ( m_pSorterSchema->GetAttr ( sphGetDocidName() ) );
  5292. assert(pDocId);
  5293. pDocId->m_eStage = SPH_EVAL_PRESORT; // update/delete queues don't have real Finalize(), so just evaluate it at presort stage
  5294. return true;
  5295. }
  5296. bool QueueCreator_c::MaybeAddGroupbyMagic ( bool bGotDistinct )
  5297. {
  5298. CSphString sJsonGroupBy;
  5299. // now let's add @groupby etc. if needed
  5300. if ( m_bGotGroupby && m_pSorterSchema->GetAttrIndex ( "@groupby" )<0 )
  5301. {
  5302. ESphAttr eGroupByResult = ( !m_tGroupSorterSettings.m_bImplicit )
  5303. ? m_tGroupSorterSettings.m_pGrouper->GetResultType ()
  5304. : SPH_ATTR_INTEGER; // implicit do not have grouper
  5305. // all FACET group by should be the widest possible type
  5306. if ( m_tQuery.m_bFacet || m_tQuery.m_bFacetHead || m_bMulti )
  5307. eGroupByResult = SPH_ATTR_BIGINT;
  5308. CSphColumnInfo tGroupby ( "@groupby", eGroupByResult );
  5309. CSphColumnInfo tCount ( "@count", SPH_ATTR_BIGINT );
  5310. tGroupby.m_eStage = SPH_EVAL_SORTER;
  5311. tCount.m_eStage = SPH_EVAL_SORTER;
  5312. auto AddColumn = [this] ( const CSphColumnInfo & tCol )
  5313. {
  5314. m_pSorterSchema->AddAttr ( tCol, true );
  5315. m_hQueryColumns.Add ( tCol.m_sName );
  5316. };
  5317. AddColumn ( tGroupby );
  5318. AddColumn ( tCount );
  5319. if ( bGotDistinct )
  5320. {
  5321. CSphColumnInfo tDistinct ( "@distinct", SPH_ATTR_INTEGER );
  5322. tDistinct.m_eStage = SPH_EVAL_SORTER;
  5323. AddColumn ( tDistinct );
  5324. }
  5325. // add @groupbystr last in case we need to skip it on sending (like @int_attr_*)
  5326. if ( m_tGroupSorterSettings.m_bJson )
  5327. {
  5328. sJsonGroupBy = SortJsonInternalSet ( m_tQuery.m_sGroupBy );
  5329. if ( !m_pSorterSchema->GetAttr ( sJsonGroupBy.cstr() ) )
  5330. {
  5331. CSphColumnInfo tGroupbyStr ( sJsonGroupBy.cstr(), SPH_ATTR_JSON_FIELD );
  5332. tGroupbyStr.m_eStage = SPH_EVAL_SORTER;
  5333. AddColumn ( tGroupbyStr );
  5334. }
  5335. }
  5336. }
  5337. #define LOC_CHECK( _cond, _msg ) if (!(_cond)) { m_sError = "invalid schema: " _msg; return false; }
  5338. int iGroupby = m_pSorterSchema->GetAttrIndex ( "@groupby" );
  5339. if ( iGroupby>=0 )
  5340. {
  5341. m_tGroupSorterSettings.m_bDistinct = bGotDistinct;
  5342. m_tGroupSorterSettings.m_tLocGroupby = m_pSorterSchema->GetAttr ( iGroupby ).m_tLocator;
  5343. LOC_CHECK ( m_tGroupSorterSettings.m_tLocGroupby.m_bDynamic, "@groupby must be dynamic" );
  5344. int iCount = m_pSorterSchema->GetAttrIndex ( "@count" );
  5345. LOC_CHECK ( iCount>=0, "missing @count" );
  5346. m_tGroupSorterSettings.m_tLocCount = m_pSorterSchema->GetAttr ( iCount ).m_tLocator;
  5347. LOC_CHECK ( m_tGroupSorterSettings.m_tLocCount.m_bDynamic, "@count must be dynamic" );
  5348. int iDistinct = m_pSorterSchema->GetAttrIndex ( "@distinct" );
  5349. if ( bGotDistinct )
  5350. {
  5351. LOC_CHECK ( iDistinct>=0, "missing @distinct" );
  5352. m_tGroupSorterSettings.m_tLocDistinct = m_pSorterSchema->GetAttr ( iDistinct ).m_tLocator;
  5353. LOC_CHECK ( m_tGroupSorterSettings.m_tLocDistinct.m_bDynamic, "@distinct must be dynamic" );
  5354. }
  5355. else
  5356. LOC_CHECK ( iDistinct<=0, "unexpected @distinct" );
  5357. int iGroupbyStr = m_pSorterSchema->GetAttrIndex ( sJsonGroupBy.cstr() );
  5358. if ( iGroupbyStr>=0 )
  5359. m_tGroupSorterSettings.m_tLocGroupbyStr = m_pSorterSchema->GetAttr ( iGroupbyStr ).m_tLocator;
  5360. }
  5361. if ( m_bHasCount )
  5362. LOC_CHECK ( m_pSorterSchema->GetAttrIndex ( "@count" )>=0, "Count(*) or @count is queried, but not available in the schema" );
  5363. #undef LOC_CHECK
  5364. return true;
  5365. }
  5366. bool QueueCreator_c::AddKNNDistColumn()
  5367. {
  5368. if ( m_tQuery.m_sKNNAttr.IsEmpty() || m_pSorterSchema->GetAttrIndex ( GetKnnDistAttrName() )>=0 )
  5369. return true;
  5370. auto pAttr = m_pSorterSchema->GetAttr ( m_tQuery.m_sKNNAttr.cstr() );
  5371. if ( !pAttr )
  5372. {
  5373. m_sError.SetSprintf ( "requested KNN search attribute '%s' not found", m_tQuery.m_sKNNAttr.cstr() );
  5374. return false;
  5375. }
  5376. if ( !pAttr->IsIndexedKNN() )
  5377. {
  5378. m_sError.SetSprintf ( "KNN index not enabled for attribute '%s'", m_tQuery.m_sKNNAttr.cstr() );
  5379. return false;
  5380. }
  5381. if ( pAttr->m_tKNN.m_iDims!=m_tQuery.m_dKNNVec.GetLength() )
  5382. {
  5383. m_sError.SetSprintf ( "KNN index '%s' requires a vector of %d entries; %d entries specified", m_tQuery.m_sKNNAttr.cstr(), pAttr->m_tKNN.m_iDims, m_tQuery.m_dKNNVec.GetLength() );
  5384. return false;
  5385. }
  5386. CSphColumnInfo tKNNDist ( GetKnnDistAttrName(), SPH_ATTR_FLOAT );
  5387. tKNNDist.m_eStage = SPH_EVAL_PRESORT;
  5388. tKNNDist.m_pExpr = CreateExpr_KNNDist ( m_tQuery.m_dKNNVec, *pAttr );
  5389. m_pSorterSchema->AddAttr ( tKNNDist, true );
  5390. m_hQueryColumns.Add ( tKNNDist.m_sName );
  5391. return true;
  5392. }
  5393. bool QueueCreator_c::CheckHavingConstraints () const
  5394. {
  5395. if ( m_tSettings.m_pAggrFilter && !m_tSettings.m_pAggrFilter->m_sAttrName.IsEmpty () )
  5396. {
  5397. if ( !m_bGotGroupby )
  5398. return Err ( "can not use HAVING without GROUP BY" );
  5399. // should be column named at group by, or it's alias or aggregate
  5400. const CSphString & sHaving = m_tSettings.m_pAggrFilter->m_sAttrName;
  5401. if ( !IsGroupbyMagic ( sHaving ) )
  5402. {
  5403. bool bValidHaving = false;
  5404. for ( const CSphQueryItem & tItem : m_tQuery.m_dItems )
  5405. {
  5406. if ( tItem.m_sAlias!=sHaving )
  5407. continue;
  5408. bValidHaving = ( IsGroupbyMagic ( tItem.m_sExpr ) || tItem.m_eAggrFunc!=SPH_AGGR_NONE );
  5409. break;
  5410. }
  5411. if ( !bValidHaving )
  5412. return Err ( "can not use HAVING with attribute not related to GROUP BY" );
  5413. }
  5414. }
  5415. return true;
  5416. }
  5417. void QueueCreator_c::SetupRemapColJson ( CSphColumnInfo & tRemapCol, CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs, int iStateAttr )
  5418. {
  5419. bool bFunc = dExtraExprs[iStateAttr].m_tKey.m_uMask==0;
  5420. tRemapCol.m_eStage = SPH_EVAL_PRESORT;
  5421. if ( bFunc )
  5422. {
  5423. tRemapCol.m_pExpr = dExtraExprs[iStateAttr].m_pExpr;
  5424. tRemapCol.m_eAttrType = dExtraExprs[iStateAttr].m_eType;
  5425. tState.m_eKeypart[iStateAttr] = Attr2Keypart ( tRemapCol.m_eAttrType );
  5426. }
  5427. else
  5428. tRemapCol.m_pExpr = new ExprSortJson2StringPtr_c ( tState.m_tLocator[iStateAttr], dExtraExprs[iStateAttr].m_pExpr );
  5429. }
  5430. const CSphColumnInfo * QueueCreator_c::GetGroupbyStr ( int iAttr, int iNumOldAttrs ) const
  5431. {
  5432. assert ( m_pSorterSchema );
  5433. auto & tSorterSchema = *m_pSorterSchema;
  5434. if ( m_tSettings.m_bComputeItems && iAttr>=0 && iAttr<iNumOldAttrs && tSorterSchema.GetAttr(iAttr).m_sName=="@groupby" && m_dGroupColumns.GetLength() )
  5435. {
  5436. // FIXME!!! add support of multi group by
  5437. const CSphColumnInfo & tGroupCol = tSorterSchema.GetAttr ( m_dGroupColumns[0].first );
  5438. if ( tGroupCol.m_eAttrType==SPH_ATTR_STRING || tGroupCol.m_eAttrType==SPH_ATTR_STRINGPTR )
  5439. return &tGroupCol;
  5440. }
  5441. return nullptr;
  5442. }
  5443. void QueueCreator_c::ReplaceGroupbyStrWithExprs ( CSphMatchComparatorState & tState, int iNumOldAttrs )
  5444. {
  5445. assert ( m_pSorterSchema );
  5446. auto & tSorterSchema = *m_pSorterSchema;
  5447. for ( int i = 0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  5448. {
  5449. const CSphColumnInfo * pGroupStrBase = GetGroupbyStr ( tState.m_dAttrs[i], iNumOldAttrs );
  5450. if ( !pGroupStrBase )
  5451. continue;
  5452. assert ( tState.m_dAttrs[i]>=0 && tState.m_dAttrs[i]<iNumOldAttrs );
  5453. int iRemap = -1;
  5454. if ( pGroupStrBase->m_eAttrType==SPH_ATTR_STRINGPTR )
  5455. {
  5456. // grouping by (columnar) string; and the same string is used in sorting
  5457. // correct the locator and change the evaluation stage to PRESORT
  5458. iRemap = tSorterSchema.GetAttrIndex ( pGroupStrBase->m_sName.cstr() );
  5459. assert ( iRemap>=0 );
  5460. const CSphColumnInfo & tAttr = tSorterSchema.GetAttr(iRemap);
  5461. const_cast<CSphColumnInfo &>(tAttr).m_eStage = SPH_EVAL_PRESORT;
  5462. }
  5463. else if ( !pGroupStrBase->IsColumnar() )
  5464. {
  5465. CSphString sRemapCol;
  5466. sRemapCol.SetSprintf ( "%s%s", g_sIntAttrPrefix, pGroupStrBase->m_sName.cstr() );
  5467. iRemap = tSorterSchema.GetAttrIndex ( sRemapCol.cstr() );
  5468. if ( iRemap==-1 )
  5469. {
  5470. CSphColumnInfo tRemapCol ( sRemapCol.cstr(), SPH_ATTR_STRINGPTR );
  5471. tRemapCol.m_pExpr = new ExprSortStringAttrFixup_c ( pGroupStrBase->m_tLocator );
  5472. tRemapCol.m_eStage = SPH_EVAL_PRESORT;
  5473. iRemap = tSorterSchema.GetAttrsCount();
  5474. tSorterSchema.AddAttr ( tRemapCol, true );
  5475. }
  5476. }
  5477. if ( iRemap!=-1 )
  5478. {
  5479. tState.m_eKeypart[i] = SPH_KEYPART_STRINGPTR;
  5480. tState.m_tLocator[i] = tSorterSchema.GetAttr(iRemap).m_tLocator;
  5481. tState.m_dAttrs[i] = iRemap;
  5482. tState.m_dRemapped.BitSet ( i );
  5483. }
  5484. }
  5485. }
  5486. void QueueCreator_c::ReplaceStaticStringsWithExprs ( CSphMatchComparatorState & tState )
  5487. {
  5488. assert ( m_pSorterSchema );
  5489. auto & tSorterSchema = *m_pSorterSchema;
  5490. for ( int i = 0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  5491. {
  5492. if ( tState.m_dRemapped.BitGet ( i ) )
  5493. continue;
  5494. if ( tState.m_eKeypart[i]!=SPH_KEYPART_STRING )
  5495. continue;
  5496. int iRemap = -1;
  5497. int iAttrId = tState.m_dAttrs[i];
  5498. const CSphColumnInfo & tAttr = tSorterSchema.GetAttr(iAttrId);
  5499. if ( tAttr.IsColumnar() )
  5500. {
  5501. CSphString sAttrName = tAttr.m_sName;
  5502. tSorterSchema.RemoveStaticAttr(iAttrId);
  5503. CSphColumnInfo tRemapCol ( sAttrName.cstr(), SPH_ATTR_STRINGPTR );
  5504. tRemapCol.m_eStage = SPH_EVAL_PRESORT;
  5505. tRemapCol.m_pExpr = CreateExpr_GetColumnarString ( sAttrName, tAttr.m_uAttrFlags & CSphColumnInfo::ATTR_STORED );
  5506. tSorterSchema.AddAttr ( tRemapCol, true );
  5507. iRemap = tSorterSchema.GetAttrIndex ( sAttrName.cstr() );
  5508. }
  5509. else
  5510. {
  5511. CSphString sRemapCol;
  5512. sRemapCol.SetSprintf ( "%s%s", g_sIntAttrPrefix, tSorterSchema.GetAttr(iAttrId).m_sName.cstr() );
  5513. iRemap = tSorterSchema.GetAttrIndex ( sRemapCol.cstr() );
  5514. if ( iRemap==-1 )
  5515. {
  5516. CSphColumnInfo tRemapCol ( sRemapCol.cstr(), SPH_ATTR_STRINGPTR );
  5517. tRemapCol.m_eStage = SPH_EVAL_PRESORT;
  5518. tRemapCol.m_pExpr = new ExprSortStringAttrFixup_c ( tState.m_tLocator[i] );
  5519. iRemap = tSorterSchema.GetAttrsCount();
  5520. tSorterSchema.AddAttr ( tRemapCol, true );
  5521. }
  5522. }
  5523. tState.m_tLocator[i] = tSorterSchema.GetAttr ( iRemap ).m_tLocator;
  5524. tState.m_dAttrs[i] = iRemap;
  5525. tState.m_eKeypart[i] = SPH_KEYPART_STRINGPTR;
  5526. tState.m_dRemapped.BitSet ( i );
  5527. }
  5528. }
  5529. void QueueCreator_c::ReplaceJsonWithExprs ( CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs )
  5530. {
  5531. assert ( m_pSorterSchema );
  5532. auto & tSorterSchema = *m_pSorterSchema;
  5533. for ( int i = 0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  5534. {
  5535. if ( tState.m_dRemapped.BitGet ( i ) )
  5536. continue;
  5537. if ( dExtraExprs[i].m_tKey.m_sKey.IsEmpty() )
  5538. continue;
  5539. CSphString sRemapCol;
  5540. sRemapCol.SetSprintf ( "%s%s", g_sIntAttrPrefix, dExtraExprs[i].m_tKey.m_sKey.cstr() );
  5541. int iRemap = tSorterSchema.GetAttrIndex ( sRemapCol.cstr() );
  5542. if ( iRemap==-1 )
  5543. {
  5544. CSphString sRemapLowercase = sRemapCol;
  5545. sRemapLowercase.ToLower();
  5546. iRemap = tSorterSchema.GetAttrIndex ( sRemapLowercase.cstr() );
  5547. }
  5548. if ( iRemap==-1 )
  5549. {
  5550. CSphColumnInfo tRemapCol ( sRemapCol.cstr(), SPH_ATTR_STRINGPTR );
  5551. SetupRemapColJson ( tRemapCol, tState, dExtraExprs, i );
  5552. iRemap = tSorterSchema.GetAttrsCount();
  5553. tSorterSchema.AddAttr ( tRemapCol, true );
  5554. }
  5555. tState.m_tLocator[i] = tSorterSchema.GetAttr(iRemap).m_tLocator;
  5556. tState.m_dAttrs[i] = iRemap;
  5557. tState.m_dRemapped.BitSet ( i );
  5558. }
  5559. }
  5560. void QueueCreator_c::AddColumnarExprsAsAttrs ( CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs )
  5561. {
  5562. assert ( m_pSorterSchema );
  5563. auto & tSorterSchema = *m_pSorterSchema;
  5564. for ( int i = 0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  5565. {
  5566. if ( tState.m_dRemapped.BitGet ( i ) )
  5567. continue;
  5568. ISphExpr * pExpr = dExtraExprs[i].m_pExpr;
  5569. if ( !pExpr || !pExpr->IsColumnar() )
  5570. continue;
  5571. const CSphString & sAttrName = tSorterSchema.GetAttr ( tState.m_dAttrs[i] ).m_sName;
  5572. CSphColumnInfo tRemapCol ( sAttrName.cstr(), dExtraExprs[i].m_eType );
  5573. tRemapCol.m_eStage = SPH_EVAL_PRESORT;
  5574. tRemapCol.m_pExpr = pExpr;
  5575. tRemapCol.m_pExpr->AddRef();
  5576. int iRemap = tSorterSchema.GetAttrsCount();
  5577. tSorterSchema.AddAttr ( tRemapCol, true );
  5578. // remove initial attribute from m_hExtra
  5579. // that way it won't be evaluated twice when it is not in select list
  5580. m_hExtra.Delete(sAttrName);
  5581. tState.m_tLocator[i] = tSorterSchema.GetAttr ( iRemap ).m_tLocator;
  5582. tState.m_dAttrs[i] = iRemap;
  5583. tState.m_eKeypart[i] = Attr2Keypart ( dExtraExprs[i].m_eType );
  5584. tState.m_dRemapped.BitSet ( i );
  5585. }
  5586. }
  5587. void QueueCreator_c::RemapAttrs ( CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs )
  5588. {
  5589. // we have extra attrs (expressions) that we created while parsing the sort clause
  5590. // we couldn't add them to the schema at that stage,
  5591. // but now we can. we create attributes, assign internal names and set their expressions
  5592. assert ( m_pSorterSchema );
  5593. auto & tSorterSchema = *m_pSorterSchema;
  5594. int iNumOldAttrs = tSorterSchema.GetAttrsCount();
  5595. ReplaceGroupbyStrWithExprs ( tState, iNumOldAttrs );
  5596. ReplaceStaticStringsWithExprs ( tState );
  5597. ReplaceJsonWithExprs ( tState, dExtraExprs );
  5598. AddColumnarExprsAsAttrs ( tState, dExtraExprs );
  5599. // need another sort keys add after setup remap
  5600. if ( iNumOldAttrs!=tSorterSchema.GetAttrsCount() )
  5601. ExtraAddSortkeys ( tState.m_dAttrs );
  5602. }
  5603. void QueueCreator_c::AddKnnDistSort ( CSphString & sSortBy )
  5604. {
  5605. if ( m_pSorterSchema->GetAttr ( GetKnnDistAttrName() ) && !strstr ( sSortBy.cstr(), "knn_dist" ) )
  5606. sSortBy.SetSprintf ( "knn_dist() asc, %s", sSortBy.cstr() );
  5607. }
  5608. // matches sorting function
  5609. bool QueueCreator_c::SetupMatchesSortingFunc()
  5610. {
  5611. m_bRandomize = false;
  5612. if ( m_tQuery.m_eSort==SPH_SORT_EXTENDED )
  5613. {
  5614. CSphString sSortBy = m_tQuery.m_sSortBy;
  5615. AddKnnDistSort ( sSortBy );
  5616. ESortClauseParseResult eRes = sphParseSortClause ( m_tQuery, sSortBy.cstr(), *m_pSorterSchema, m_eMatchFunc, m_tStateMatch, m_dMatchJsonExprs, m_tSettings.m_bComputeItems, m_sError );
  5617. if ( eRes==SORT_CLAUSE_ERROR )
  5618. return false;
  5619. if ( eRes==SORT_CLAUSE_RANDOM )
  5620. m_bRandomize = true;
  5621. ExtraAddSortkeys ( m_tStateMatch.m_dAttrs );
  5622. AssignOrderByToPresortStage ( m_tStateMatch.m_dAttrs, CSphMatchComparatorState::MAX_ATTRS );
  5623. RemapAttrs ( m_tStateMatch, m_dMatchJsonExprs );
  5624. return true;
  5625. }
  5626. if ( m_tQuery.m_eSort==SPH_SORT_EXPR )
  5627. {
  5628. m_tStateMatch.m_eKeypart[0] = SPH_KEYPART_INT;
  5629. m_tStateMatch.m_tLocator[0] = m_pSorterSchema->GetAttr ( m_pSorterSchema->GetAttrIndex ( "@expr" ) ).m_tLocator;
  5630. m_tStateMatch.m_eKeypart[1] = SPH_KEYPART_ROWID;
  5631. m_tStateMatch.m_uAttrDesc = 1;
  5632. m_eMatchFunc = FUNC_EXPR;
  5633. return true;
  5634. }
  5635. // check sort-by attribute
  5636. if ( m_tQuery.m_eSort!=SPH_SORT_RELEVANCE )
  5637. {
  5638. int iSortAttr = m_pSorterSchema->GetAttrIndex ( m_tQuery.m_sSortBy.cstr() );
  5639. if ( iSortAttr<0 )
  5640. {
  5641. Err ( "sort-by attribute '%s' not found", m_tQuery.m_sSortBy.cstr() );
  5642. return false;
  5643. }
  5644. const CSphColumnInfo & tAttr = m_pSorterSchema->GetAttr ( iSortAttr );
  5645. m_tStateMatch.m_eKeypart[0] = Attr2Keypart ( tAttr.m_eAttrType );
  5646. m_tStateMatch.m_tLocator[0] = tAttr.m_tLocator;
  5647. m_tStateMatch.m_dAttrs[0] = iSortAttr;
  5648. RemapAttrs ( m_tStateMatch, m_dMatchJsonExprs );
  5649. }
  5650. ExtraAddSortkeys ( m_tStateMatch.m_dAttrs );
  5651. // find out what function to use and whether it needs attributes
  5652. switch (m_tQuery.m_eSort )
  5653. {
  5654. case SPH_SORT_ATTR_DESC: m_eMatchFunc = FUNC_ATTR_DESC; break;
  5655. case SPH_SORT_ATTR_ASC: m_eMatchFunc = FUNC_ATTR_ASC; break;
  5656. case SPH_SORT_TIME_SEGMENTS: m_eMatchFunc = FUNC_TIMESEGS; break;
  5657. case SPH_SORT_RELEVANCE: m_eMatchFunc = FUNC_REL_DESC; break;
  5658. default:
  5659. Err ( "unknown sorting mode %d", m_tQuery.m_eSort );
  5660. return false;
  5661. }
  5662. return true;
  5663. }
  5664. bool QueueCreator_c::SetupGroupSortingFunc ( bool bGotDistinct )
  5665. {
  5666. assert ( m_bGotGroupby );
  5667. CSphString sGroupOrderBy = m_tQuery.m_sGroupSortBy;
  5668. if ( sGroupOrderBy=="@weight desc" )
  5669. AddKnnDistSort ( sGroupOrderBy );
  5670. ESortClauseParseResult eRes = sphParseSortClause ( m_tQuery, sGroupOrderBy.cstr(), *m_pSorterSchema, m_eGroupFunc, m_tStateGroup, m_dGroupJsonExprs, m_tSettings.m_bComputeItems, m_sError );
  5671. if ( eRes==SORT_CLAUSE_ERROR || eRes==SORT_CLAUSE_RANDOM )
  5672. {
  5673. if ( eRes==SORT_CLAUSE_RANDOM )
  5674. m_sError = "groups can not be sorted by @random";
  5675. return false;
  5676. }
  5677. ExtraAddSortkeys ( m_tStateGroup.m_dAttrs );
  5678. if ( !m_tGroupSorterSettings.m_bImplicit )
  5679. {
  5680. for ( const auto & tGroupColumn : m_dGroupColumns )
  5681. m_hExtra.Add ( m_pSorterSchema->GetAttr ( tGroupColumn.first ).m_sName );
  5682. }
  5683. if ( bGotDistinct )
  5684. {
  5685. m_dGroupColumns.Add ( { m_pSorterSchema->GetAttrIndex ( m_tQuery.m_sGroupDistinct.cstr() ), true } );
  5686. assert ( m_dGroupColumns.Last().first>=0 );
  5687. m_hExtra.Add ( m_pSorterSchema->GetAttr ( m_dGroupColumns.Last().first ).m_sName );
  5688. }
  5689. // implicit case
  5690. CSphVector<int> dGroupByCols;
  5691. for ( const auto & i : m_dGroupColumns )
  5692. if ( i.second )
  5693. dGroupByCols.Add ( i.first );
  5694. AssignOrderByToPresortStage ( dGroupByCols.Begin(), dGroupByCols.GetLength() );
  5695. AssignOrderByToPresortStage ( m_tStateGroup.m_dAttrs, CSphMatchComparatorState::MAX_ATTRS );
  5696. // GroupSortBy str attributes setup
  5697. RemapAttrs ( m_tStateGroup, m_dGroupJsonExprs );
  5698. return true;
  5699. }
  5700. // set up aggregate filter for grouper
  5701. std::unique_ptr<ISphFilter> QueueCreator_c::CreateAggrFilter () const
  5702. {
  5703. assert ( m_bGotGroupby );
  5704. if ( m_pSorterSchema->GetAttr ( m_tSettings.m_pAggrFilter->m_sAttrName.cstr() ) )
  5705. return sphCreateAggrFilter ( m_tSettings.m_pAggrFilter, m_tSettings.m_pAggrFilter->m_sAttrName,
  5706. *m_pSorterSchema, m_sError );
  5707. // having might reference aliased attributes but @* attributes got stored without alias in sorter schema
  5708. CSphString sHaving;
  5709. for ( const auto & tItem : m_tQuery.m_dItems )
  5710. if ( tItem.m_sAlias==m_tSettings.m_pAggrFilter->m_sAttrName )
  5711. {
  5712. sHaving = tItem.m_sExpr;
  5713. break;
  5714. }
  5715. if ( sHaving=="groupby()" )
  5716. sHaving = "@groupby";
  5717. else if ( sHaving=="count(*)" )
  5718. sHaving = "@count";
  5719. return sphCreateAggrFilter ( m_tSettings.m_pAggrFilter, sHaving, *m_pSorterSchema, m_sError );
  5720. }
  5721. void QueueCreator_c::SetupCollation()
  5722. {
  5723. SphStringCmp_fn fnCmp = GetStringCmpFunc ( m_tQuery.m_eCollation );
  5724. m_tStateMatch.m_fnStrCmp = fnCmp;
  5725. m_tStateGroup.m_fnStrCmp = fnCmp;
  5726. }
  5727. bool QueueCreator_c::AddGroupbyStuff ()
  5728. {
  5729. // need schema with group related columns however not need grouper
  5730. m_bHeadWOGroup = ( m_tQuery.m_sGroupBy.IsEmpty () && m_tQuery.m_bFacetHead );
  5731. auto fnIsImplicit = [] ( const CSphQueryItem & t )
  5732. {
  5733. return ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr=="@distinct";
  5734. };
  5735. bool bHasImplicitGrouping = HasImplicitGrouping(m_tQuery);
  5736. // count(*) and distinct wo group by at main query should keep implicit flag
  5737. if ( bHasImplicitGrouping && m_bHeadWOGroup )
  5738. m_bHeadWOGroup = !m_tQuery.m_dRefItems.any_of ( fnIsImplicit );
  5739. if ( !SetupGroupbySettings(bHasImplicitGrouping) )
  5740. return false;
  5741. // or else, check in SetupGroupbySettings() would already fail
  5742. m_bGotGroupby = !m_tQuery.m_sGroupBy.IsEmpty () || m_tGroupSorterSettings.m_bImplicit;
  5743. m_bGotDistinct = !!m_tGroupSorterSettings.m_pDistinctFetcher;
  5744. if ( m_bHasGroupByExpr && !m_bGotGroupby )
  5745. return Err ( "GROUPBY() is allowed only in GROUP BY queries" );
  5746. // check for HAVING constrains
  5747. if ( !CheckHavingConstraints() )
  5748. return false;
  5749. // now let's add @groupby stuff, if necessary
  5750. return MaybeAddGroupbyMagic(m_bGotDistinct);
  5751. }
  5752. bool QueueCreator_c::SetGroupSorting()
  5753. {
  5754. if ( m_bGotGroupby )
  5755. {
  5756. if ( !SetupGroupSortingFunc ( m_bGotDistinct ) )
  5757. return false;
  5758. if ( m_tSettings.m_pAggrFilter && !m_tSettings.m_pAggrFilter->m_sAttrName.IsEmpty() )
  5759. {
  5760. auto pFilter = CreateAggrFilter ();
  5761. if ( !pFilter )
  5762. return false;
  5763. m_tGroupSorterSettings.m_pAggrFilterTrait = pFilter.release();
  5764. }
  5765. int iDistinctAccuracyThresh = m_tQuery.m_bExplicitDistinctThresh ? m_tQuery.m_iDistinctThresh : GetDistinctThreshDefault();
  5766. m_tGroupSorterSettings.SetupDistinctAccuracy ( iDistinctAccuracyThresh );
  5767. }
  5768. for ( auto & tIdx: m_hExtra )
  5769. {
  5770. m_hQueryColumns.Add ( tIdx.first );
  5771. if ( m_pExtra )
  5772. m_pExtra->Add ( tIdx.first );
  5773. }
  5774. return true;
  5775. }
  5776. bool QueueCreator_c::PredictAggregates() const
  5777. {
  5778. for ( int i = 0; i < m_pSorterSchema->GetAttrsCount(); i++ )
  5779. {
  5780. const CSphColumnInfo & tAttr = m_pSorterSchema->GetAttr(i);
  5781. if ( !(tAttr.m_eAggrFunc==SPH_AGGR_NONE || IsGroupbyMagic ( tAttr.m_sName ) || IsSortStringInternal ( tAttr.m_sName.cstr () )) )
  5782. return true;
  5783. }
  5784. return false;
  5785. }
  5786. int QueueCreator_c::ReduceMaxMatches() const
  5787. {
  5788. assert ( !m_bGotGroupby );
  5789. if ( m_tQuery.m_bExplicitMaxMatches || m_tQuery.m_bHasOuter || !m_tSettings.m_bComputeItems )
  5790. return Max ( m_tSettings.m_iMaxMatches, 1 );
  5791. return Max ( Min ( m_tSettings.m_iMaxMatches, m_tQuery.m_iLimit+m_tQuery.m_iOffset ), 1 );
  5792. }
  5793. int QueueCreator_c::AdjustMaxMatches ( int iMaxMatches ) const
  5794. {
  5795. assert ( m_bGotGroupby );
  5796. if ( m_tQuery.m_bExplicitMaxMatches || m_tSettings.m_bForceSingleThread )
  5797. return iMaxMatches;
  5798. int iGroupbyAttr = GetGroupbyAttrIndex();
  5799. if ( iGroupbyAttr<0 )
  5800. return iMaxMatches;
  5801. int iCountDistinct = m_tSettings.m_fnGetCountDistinct ? m_tSettings.m_fnGetCountDistinct ( m_pSorterSchema->GetAttr(iGroupbyAttr).m_sName ) : -1;
  5802. if ( iCountDistinct > m_tQuery.m_iMaxMatchThresh )
  5803. return iMaxMatches;
  5804. return Max ( iCountDistinct, iMaxMatches );
  5805. }
  5806. bool QueueCreator_c::CanCalcFastCountDistinct() const
  5807. {
  5808. bool bHasAggregates = PredictAggregates();
  5809. return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.IsEmpty() && m_tQuery.m_sQuery.IsEmpty() && m_tQuery.m_sKNNAttr.IsEmpty();
  5810. }
  5811. bool QueueCreator_c::CanCalcFastCountFilter() const
  5812. {
  5813. bool bHasAggregates = PredictAggregates();
  5814. return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && !m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.GetLength()==1 && m_tQuery.m_sQuery.IsEmpty() && m_tQuery.m_sKNNAttr.IsEmpty();
  5815. }
  5816. bool QueueCreator_c::CanCalcFastCount() const
  5817. {
  5818. bool bHasAggregates = PredictAggregates();
  5819. return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && !m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.IsEmpty() && m_tQuery.m_sQuery.IsEmpty() && m_tQuery.m_sKNNAttr.IsEmpty();
  5820. }
  5821. Precalculated_t QueueCreator_c::FetchPrecalculatedValues() const
  5822. {
  5823. Precalculated_t tPrecalc;
  5824. if ( CanCalcFastCountDistinct() )
  5825. {
  5826. int iCountDistinctAttr = GetGroupDistinctAttrIndex();
  5827. if ( iCountDistinctAttr>0 && m_tSettings.m_bEnableFastDistinct )
  5828. tPrecalc.m_iCountDistinct = m_tSettings.m_fnGetCountDistinct ? m_tSettings.m_fnGetCountDistinct ( m_pSorterSchema->GetAttr(iCountDistinctAttr).m_sName ) : -1;
  5829. }
  5830. if ( CanCalcFastCountFilter() )
  5831. tPrecalc.m_iCountFilter = m_tSettings.m_fnGetCountFilter ? m_tSettings.m_fnGetCountFilter ( m_tQuery.m_dFilters[0] ) : -1;
  5832. if ( CanCalcFastCount() )
  5833. tPrecalc.m_iCount = m_tSettings.m_fnGetCount ? m_tSettings.m_fnGetCount() : -1;
  5834. return tPrecalc;
  5835. }
  5836. ISphMatchSorter * QueueCreator_c::SpawnQueue()
  5837. {
  5838. bool bNeedFactors = !!(m_uPackedFactorFlags & SPH_FACTOR_ENABLE);
  5839. if ( m_bGotGroupby )
  5840. {
  5841. m_tGroupSorterSettings.m_bGrouped = m_tSettings.m_bGrouped;
  5842. m_tGroupSorterSettings.m_iMaxMatches = AdjustMaxMatches ( m_tGroupSorterSettings.m_iMaxMatches );
  5843. if ( m_pProfile )
  5844. m_pProfile->m_iMaxMatches = m_tGroupSorterSettings.m_iMaxMatches;
  5845. Precalculated_t tPrecalc = FetchPrecalculatedValues();
  5846. return sphCreateSorter1st ( m_eMatchFunc, m_eGroupFunc, &m_tQuery, m_tGroupSorterSettings, bNeedFactors, PredictAggregates(), tPrecalc );
  5847. }
  5848. if ( m_tQuery.m_iLimit == -1 && m_tSettings.m_pSqlRowBuffer )
  5849. return new DirectSqlQueue_c ( m_tSettings.m_pSqlRowBuffer, m_tSettings.m_ppOpaque1, m_tSettings.m_ppOpaque2, std::move (m_tSettings.m_dCreateSchema) );
  5850. if ( m_tSettings.m_pCollection )
  5851. return new CollectQueue_c ( m_tSettings.m_iMaxMatches, *m_tSettings.m_pCollection );
  5852. int iMaxMatches = ReduceMaxMatches();
  5853. if ( m_pProfile )
  5854. m_pProfile->m_iMaxMatches = iMaxMatches;
  5855. ISphMatchSorter * pResult = CreatePlainSorter ( m_eMatchFunc, m_tQuery.m_bSortKbuffer, iMaxMatches, bNeedFactors );
  5856. if ( !pResult )
  5857. return nullptr;
  5858. return CreateColumnarProxySorter ( pResult, iMaxMatches, *m_pSorterSchema, m_tStateMatch, m_eMatchFunc, bNeedFactors, m_tSettings.m_bComputeItems, m_bMulti );
  5859. }
  5860. bool QueueCreator_c::SetupComputeQueue ()
  5861. {
  5862. return MaybeAddGeodistColumn ()
  5863. && AddKNNDistColumn()
  5864. && MaybeAddExprColumn ()
  5865. && MaybeAddExpressionsFromSelectList ()
  5866. && AddExpressionsForUpdates();
  5867. }
  5868. bool QueueCreator_c::SetupGroupQueue ()
  5869. {
  5870. return AddGroupbyStuff ()
  5871. && SetupMatchesSortingFunc ()
  5872. && SetGroupSorting ();
  5873. }
  5874. bool QueueCreator_c::ConvertColumnarToDocstore()
  5875. {
  5876. // don't use docstore (need to try to keep schemas similar for multiquery to work)
  5877. if ( m_tQuery.m_bFacet || m_tQuery.m_bFacetHead )
  5878. return true;
  5879. // check for columnar attributes that have FINAL eval stage
  5880. // if we have more than 1 of such attributes (and they are also stored), we replace columnar expressions with columnar expressions
  5881. CSphVector<int> dStoredColumnar;
  5882. auto & tSchema = *m_pSorterSchema;
  5883. for ( int i = 0; i < tSchema.GetAttrsCount(); i++ )
  5884. {
  5885. auto & tAttr = tSchema.GetAttr(i);
  5886. bool bStored = false;
  5887. bool bColumnar = tAttr.m_pExpr && tAttr.m_pExpr->IsColumnar(&bStored);
  5888. if ( bColumnar && bStored && tAttr.m_eStage==SPH_EVAL_FINAL )
  5889. dStoredColumnar.Add(i);
  5890. }
  5891. if ( dStoredColumnar.GetLength()<=1 )
  5892. return true;
  5893. for ( auto i : dStoredColumnar )
  5894. {
  5895. auto & tAttr = const_cast<CSphColumnInfo&>( tSchema.GetAttr(i) );
  5896. CSphString sColumnarAttrName;
  5897. tAttr.m_pExpr->Command ( SPH_EXPR_GET_COLUMNAR_COL, &sColumnarAttrName );
  5898. tAttr.m_pExpr = CreateExpr_GetStoredAttr ( sColumnarAttrName, tAttr.m_eAttrType );
  5899. }
  5900. return true;
  5901. }
  5902. bool QueueCreator_c::SetupQueue ()
  5903. {
  5904. return SetupComputeQueue ()
  5905. && SetupGroupQueue ()
  5906. && ConvertColumnarToDocstore();
  5907. }
  5908. ISphMatchSorter * QueueCreator_c::CreateQueue ()
  5909. {
  5910. SetupCollation();
  5911. if ( m_bHeadWOGroup && m_tGroupSorterSettings.m_bImplicit )
  5912. {
  5913. m_tGroupSorterSettings.m_bImplicit = false;
  5914. m_bGotGroupby = false;
  5915. }
  5916. ///////////////////
  5917. // spawn the queue
  5918. ///////////////////
  5919. ISphMatchSorter * pTop = SpawnQueue();
  5920. if ( !pTop )
  5921. {
  5922. Err ( "internal error: unhandled sorting mode (match-sort=%d, group=%d, group-sort=%d)", m_eMatchFunc, m_bGotGroupby, m_eGroupFunc );
  5923. return nullptr;
  5924. }
  5925. assert ( pTop );
  5926. pTop->SetSchema ( m_pSorterSchema.release(), false );
  5927. pTop->SetState ( m_tStateMatch );
  5928. pTop->SetGroupState ( m_tStateGroup );
  5929. pTop->SetRandom ( m_bRandomize );
  5930. if ( !m_bHaveStar && m_hQueryColumns.GetLength() )
  5931. pTop->SetFilteredAttrs ( m_hQueryColumns, m_tSettings.m_bNeedDocids || m_bExprsNeedDocids );
  5932. if ( m_bRandomize )
  5933. {
  5934. if ( m_tQuery.m_iRandSeed>=0 )
  5935. sphSrand ( (DWORD)m_tQuery.m_iRandSeed );
  5936. else
  5937. sphAutoSrand();
  5938. }
  5939. return pTop;
  5940. }
  5941. static void ResetRemaps ( CSphMatchComparatorState & tState )
  5942. {
  5943. for ( int i = 0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  5944. {
  5945. if ( tState.m_dRemapped.BitGet ( i ) && tState.m_eKeypart[i]==SPH_KEYPART_STRINGPTR )
  5946. tState.m_dRemapped.BitClear ( i );
  5947. }
  5948. }
  5949. bool QueueCreator_c::SetSchemaGroupQueue ( const CSphRsetSchema & tNewSchema )
  5950. {
  5951. // need to reissue remap but with existed attributes
  5952. ResetRemaps ( m_tStateMatch );
  5953. ResetRemaps ( m_tStateGroup );
  5954. *m_pSorterSchema = tNewSchema;
  5955. return SetupGroupQueue();
  5956. }
  5957. static ISphMatchSorter * CreateQueue ( QueueCreator_c & tCreator, SphQueueRes_t & tRes )
  5958. {
  5959. ISphMatchSorter * pSorter = tCreator.CreateQueue ();
  5960. tRes.m_bZonespanlist = tCreator.m_bZonespanlist;
  5961. tRes.m_uPackedFactorFlags = tCreator.m_uPackedFactorFlags;
  5962. return pSorter;
  5963. }
  5964. bool sphHasExpressions ( const CSphQuery & tQuery, const CSphSchema & tSchema )
  5965. {
  5966. return !tQuery.m_dItems.all_of ( [&tSchema] ( const CSphQueryItem& tItem )
  5967. {
  5968. const CSphString & sExpr = tItem.m_sExpr;
  5969. // all expressions that come from parser are automatically aliased
  5970. assert ( !tItem.m_sAlias.IsEmpty() );
  5971. return sExpr=="*"
  5972. || ( tSchema.GetAttrIndex ( sExpr.cstr() )>=0 && tItem.m_eAggrFunc==SPH_AGGR_NONE && tItem.m_sAlias==sExpr )
  5973. || IsGroupbyMagic ( sExpr );
  5974. });
  5975. }
  5976. int GetAliasedAttrIndex ( const CSphString & sAttr, const CSphQuery & tQuery, const ISphSchema & tSchema )
  5977. {
  5978. int iAttr = tSchema.GetAttrIndex ( sAttr.cstr() );
  5979. if ( iAttr>=0 )
  5980. return iAttr;
  5981. // try aliased groupby attr (facets)
  5982. ARRAY_FOREACH ( i, tQuery.m_dItems )
  5983. {
  5984. if ( sAttr==tQuery.m_dItems[i].m_sExpr )
  5985. return tSchema.GetAttrIndex ( tQuery.m_dItems[i].m_sAlias.cstr() );
  5986. else if ( sAttr==tQuery.m_dItems[i].m_sAlias )
  5987. return tSchema.GetAttrIndex ( tQuery.m_dItems[i].m_sExpr.cstr() );
  5988. }
  5989. return iAttr;
  5990. }
  5991. static void CreateSorters ( const VecTraits_T<CSphQuery> & dQueries, const VecTraits_T<ISphMatchSorter*> & dSorters, const VecTraits_T<QueueCreator_c> & dCreators, const VecTraits_T<CSphString> & dErrors, SphQueueRes_t & tRes )
  5992. {
  5993. ARRAY_FOREACH ( i, dCreators )
  5994. {
  5995. if ( !dCreators[i].m_bCreate )
  5996. continue;
  5997. dSorters[i] = CreateQueue ( dCreators[i], tRes );
  5998. assert ( dSorters[i]!=nullptr );
  5999. }
  6000. if ( tRes.m_bAlowMulti )
  6001. {
  6002. ISphMatchSorter * pSorter0 = nullptr;
  6003. for ( int iCheck=0; iCheck<dSorters.GetLength(); ++iCheck )
  6004. {
  6005. if ( !dCreators[iCheck].m_bCreate )
  6006. continue;
  6007. assert ( dSorters[iCheck] );
  6008. if ( !pSorter0 )
  6009. {
  6010. pSorter0 = dSorters[iCheck];
  6011. continue;
  6012. }
  6013. assert ( dSorters[iCheck]->GetSchema()->GetAttrsCount()==pSorter0->GetSchema()->GetAttrsCount() );
  6014. }
  6015. }
  6016. }
  6017. int ApplyImplicitCutoff ( const CSphQuery & tQuery, const VecTraits_T<ISphMatchSorter*> & dSorters, bool bFT )
  6018. {
  6019. bool bAllPrecalc = dSorters.GetLength() && dSorters.all_of ( []( auto pSorter ){ return pSorter->IsPrecalc(); } );
  6020. if ( bAllPrecalc )
  6021. return 1; // only need one match for precalc sorters
  6022. if ( tQuery.m_iCutoff>0 )
  6023. return tQuery.m_iCutoff;
  6024. if ( !tQuery.m_iCutoff )
  6025. return -1;
  6026. // this is the same as checking the sorters for disabled cutoff
  6027. // but this works when sorters are not yet available (e.g. GetPseudoShardingMetric())
  6028. if ( HasImplicitGrouping ( tQuery ) )
  6029. return -1;
  6030. bool bDisableCutoff = dSorters.any_of ( []( auto * pSorter ){ return pSorter->IsCutoffDisabled(); } );
  6031. if ( bDisableCutoff )
  6032. return -1;
  6033. // implicit cutoff when there's no sorting and no grouping
  6034. if ( !bFT && ( tQuery.m_sSortBy=="@weight desc" || tQuery.m_sSortBy.IsEmpty() ) && tQuery.m_sGroupBy.IsEmpty() && !tQuery.m_bFacet && !tQuery.m_bFacetHead )
  6035. return tQuery.m_iLimit+tQuery.m_iOffset;
  6036. return -1;
  6037. }
  6038. ISphMatchSorter * sphCreateQueue ( const SphQueueSettings_t & tQueue, const CSphQuery & tQuery, CSphString & sError, SphQueueRes_t & tRes, StrVec_t * pExtra, QueryProfile_c * pProfile )
  6039. {
  6040. QueueCreator_c tCreator ( tQueue, tQuery, sError, pExtra, pProfile );
  6041. if ( !tCreator.SetupQueue () )
  6042. return nullptr;
  6043. return CreateQueue ( tCreator, tRes );
  6044. }
  6045. static void CreateMultiQueue ( RawVector_T<QueueCreator_c> & dCreators, const SphQueueSettings_t & tQueue, const VecTraits_T<CSphQuery> & dQueries, VecTraits_T<ISphMatchSorter*> & dSorters, VecTraits_T<CSphString> & dErrors, SphQueueRes_t & tRes, StrVec_t * pExtra, QueryProfile_c * pProfile )
  6046. {
  6047. assert ( dSorters.GetLength()>1 );
  6048. assert ( dSorters.GetLength()==dQueries.GetLength() );
  6049. assert ( dSorters.GetLength()==dErrors.GetLength() );
  6050. dCreators.Reserve_static ( dSorters.GetLength () );
  6051. dCreators.Emplace_back( tQueue, dQueries[0], dErrors[0], pExtra, pProfile );
  6052. dCreators[0].m_bMulti = true;
  6053. // same as SetupQueue
  6054. dCreators[0].SetupComputeQueue ();
  6055. // copy schema WO group by and internals
  6056. CSphRsetSchema tRefSchema = dCreators[0].SorterSchema();
  6057. bool bHasJson = dCreators[0].HasJson();
  6058. bool bJsonMixed = false;
  6059. dCreators[0].SetupGroupQueue ();
  6060. // create rest of schemas
  6061. for ( int i=1; i<dSorters.GetLength(); ++i )
  6062. {
  6063. // fill extra only for initial pass
  6064. dCreators.Emplace_back ( tQueue, dQueries[i], dErrors[i], pExtra, pProfile );
  6065. dCreators[i].m_bMulti = true;
  6066. if ( !dCreators[i].SetupQueue () )
  6067. {
  6068. dCreators[i].m_bCreate = false;
  6069. continue;
  6070. }
  6071. bJsonMixed |= ( bHasJson!=dCreators[i].HasJson () );
  6072. bHasJson |= dCreators[i].HasJson();
  6073. }
  6074. // FIXME!!! check attributes and expressions matches
  6075. bool bSame = !bJsonMixed;
  6076. const auto& tSchema0 = dCreators[0].SorterSchema();
  6077. for ( int i=1; i<dCreators.GetLength() && bSame; ++i )
  6078. {
  6079. const auto & tCur = dCreators[i].SorterSchema();
  6080. bSame &= ( tSchema0.GetDynamicSize()==tCur.GetDynamicSize() && tSchema0.GetAttrsCount()==tCur.GetAttrsCount() );
  6081. }
  6082. // same schemes
  6083. if ( bSame )
  6084. return;
  6085. CSphRsetSchema tMultiSchema = tRefSchema;
  6086. int iMinGroups = INT_MAX;
  6087. int iMaxGroups = 0;
  6088. bool bHasMulti = false;
  6089. ARRAY_FOREACH ( iSchema, dCreators )
  6090. {
  6091. if ( !dCreators[iSchema].m_bCreate )
  6092. continue;
  6093. int iGroups = 0;
  6094. const CSphRsetSchema & tSchema = dCreators[iSchema].SorterSchema();
  6095. for ( int iCol=0; iCol<tSchema.GetAttrsCount(); ++iCol )
  6096. {
  6097. const CSphColumnInfo & tCol = tSchema.GetAttr ( iCol );
  6098. if ( !tCol.m_tLocator.m_bDynamic && !tCol.IsColumnar() )
  6099. continue;
  6100. if ( IsGroupbyMagic ( tCol.m_sName ) )
  6101. {
  6102. ++iGroups;
  6103. if ( !IsSortJsonInternal ( tCol.m_sName ))
  6104. continue;
  6105. }
  6106. const CSphColumnInfo * pMultiCol = tMultiSchema.GetAttr ( tCol.m_sName.cstr() );
  6107. if ( pMultiCol )
  6108. {
  6109. bool bDisable1 = false;
  6110. bool bDisable2 = false;
  6111. // no need to add attributes that already exists
  6112. if ( pMultiCol->m_eAttrType==tCol.m_eAttrType &&
  6113. ( ( !pMultiCol->m_pExpr && !tCol.m_pExpr ) ||
  6114. ( pMultiCol->m_pExpr && tCol.m_pExpr
  6115. && pMultiCol->m_pExpr->GetHash ( tMultiSchema, SPH_FNV64_SEED, bDisable1 )==tCol.m_pExpr->GetHash ( tSchema, SPH_FNV64_SEED, bDisable2 ) )
  6116. ) )
  6117. continue;
  6118. // no need to add a new column, but we need the same schema for the sorters
  6119. if ( tCol.IsColumnar() && pMultiCol->IsColumnarExpr() )
  6120. {
  6121. bHasMulti = true;
  6122. continue;
  6123. }
  6124. if ( !tCol.IsColumnarExpr() || !pMultiCol->IsColumnar() ) // need a new column
  6125. {
  6126. tRes.m_bAlowMulti = false; // if attr or expr differs need to create regular sorters and issue search WO multi-query
  6127. return;
  6128. }
  6129. }
  6130. bHasMulti = true;
  6131. tMultiSchema.AddAttr ( tCol, true );
  6132. if ( tCol.m_pExpr )
  6133. tCol.m_pExpr->FixupLocator ( &tSchema, &tMultiSchema );
  6134. }
  6135. iMinGroups = Min ( iMinGroups, iGroups );
  6136. iMaxGroups = Max ( iMaxGroups, iGroups );
  6137. }
  6138. // usual multi query should all have similar group by
  6139. if ( iMinGroups!=iMaxGroups && !dQueries[0].m_bFacetHead && !dQueries[0].m_bFacet )
  6140. {
  6141. tRes.m_bAlowMulti = false;
  6142. return;
  6143. }
  6144. // only group attributes differs - create regular sorters
  6145. if ( !bHasMulti && !bJsonMixed )
  6146. return;
  6147. // setup common schemas
  6148. for ( QueueCreator_c & tCreator : dCreators )
  6149. {
  6150. if ( !tCreator.m_bCreate )
  6151. continue;
  6152. if ( !tCreator.SetSchemaGroupQueue ( tMultiSchema ) )
  6153. tCreator.m_bCreate = false;
  6154. }
  6155. }
  6156. void sphCreateMultiQueue ( const SphQueueSettings_t & tQueue, const VecTraits_T<CSphQuery> & dQueries, VecTraits_T<ISphMatchSorter *> & dSorters, VecTraits_T<CSphString> & dErrors, SphQueueRes_t & tRes, StrVec_t * pExtra, QueryProfile_c * pProfile )
  6157. {
  6158. RawVector_T<QueueCreator_c> dCreators;
  6159. CreateMultiQueue ( dCreators, tQueue, dQueries, dSorters, dErrors, tRes, pExtra, pProfile );
  6160. CreateSorters ( dQueries, dSorters, dCreators, dErrors, tRes );
  6161. }