sphinxsort.cpp 201 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622
  1. //
  2. // Copyright (c) 2017-2024, Manticore Software LTD (https://manticoresearch.com)
  3. // Copyright (c) 2001-2016, Andrew Aksyonoff
  4. // Copyright (c) 2008-2016, Sphinx Technologies Inc
  5. // All rights reserved
  6. //
  7. // This program is free software; you can redistribute it and/or modify
  8. // it under the terms of the GNU General Public License. You should have
  9. // received a copy of the GPL license along with this program; if you
  10. // did not, you can find it at http://www.gnu.org/
  11. //
  12. #include "sphinxsort.h"
  13. #include "sphinxint.h"
  14. #include "sphinxjson.h"
  15. #include "attribute.h"
  16. #include "collation.h"
  17. #include "memio.h"
  18. #include "columnargrouper.h"
  19. #include "columnarexpr.h"
  20. #include "exprtraits.h"
  21. #include "columnarsort.h"
  22. #include "sortcomp.h"
  23. #include "conversion.h"
  24. #include "docstore.h"
  25. #include "schema/rset.h"
  26. #include "aggregate.h"
  27. #include "distinct.h"
  28. #include "netreceive_ql.h"
  29. #include "grouper.h"
  30. #include "knnmisc.h"
  31. #include <ctime>
  32. #include <cmath>
  33. #if !_WIN32
  34. #include <unistd.h>
  35. #include <sys/time.h>
  36. #endif
  37. static bool g_bAccurateAggregation = false;
  38. static int g_iDistinctThresh = 3500;
  39. void SetAccurateAggregationDefault ( bool bEnabled )
  40. {
  41. g_bAccurateAggregation = bEnabled;
  42. }
  43. bool GetAccurateAggregationDefault()
  44. {
  45. return g_bAccurateAggregation;
  46. }
  47. void SetDistinctThreshDefault ( int iThresh )
  48. {
  49. g_iDistinctThresh = iThresh;
  50. }
  51. int GetDistinctThreshDefault()
  52. {
  53. return g_iDistinctThresh;
  54. }
  55. void sphFixupLocator ( CSphAttrLocator & tLocator, const ISphSchema * pOldSchema, const ISphSchema * pNewSchema )
  56. {
  57. // first time schema setup?
  58. if ( !pOldSchema )
  59. return;
  60. if ( tLocator.m_iBlobAttrId==-1 && tLocator.m_iBitCount==-1 )
  61. return;
  62. assert ( pNewSchema );
  63. for ( int i = 0; i < pOldSchema->GetAttrsCount(); i++ )
  64. {
  65. const CSphColumnInfo & tAttr = pOldSchema->GetAttr(i);
  66. if ( tLocator==tAttr.m_tLocator )
  67. {
  68. const CSphColumnInfo * pAttrInNewSchema = pNewSchema->GetAttr ( tAttr.m_sName.cstr() );
  69. if ( pAttrInNewSchema )
  70. {
  71. tLocator = pAttrInNewSchema->m_tLocator;
  72. return;
  73. }
  74. }
  75. }
  76. }
  77. namespace {
  78. const char g_sIntAttrPrefix[] = "@int_attr_";
  79. const char g_sIntJsonPrefix[] = "@groupbystr";
  80. template <typename FN>
  81. void FnSortGetStringRemap ( const ISphSchema & tDstSchema, const ISphSchema & tSrcSchema, FN fnProcess )
  82. {
  83. for ( int i = 0; i<tDstSchema.GetAttrsCount (); ++i )
  84. {
  85. const CSphColumnInfo & tDst = tDstSchema.GetAttr ( i );
  86. // remap only static strings
  87. if ( tDst.m_eAttrType==SPH_ATTR_STRINGPTR || !IsSortStringInternal ( tDst.m_sName ) )
  88. continue;
  89. auto iSrcCol = tSrcSchema.GetAttrIndex ( tDst.m_sName.cstr ()+sizeof ( g_sIntAttrPrefix )-1 );
  90. if ( iSrcCol!=-1 ) // skip internal attributes received from agents
  91. fnProcess ( iSrcCol, i );
  92. }
  93. }
  94. } // unnamed (static) namespace
  95. int GetStringRemapCount ( const ISphSchema & tDstSchema, const ISphSchema & tSrcSchema )
  96. {
  97. int iMaps = 0;
  98. FnSortGetStringRemap ( tDstSchema, tSrcSchema, [&iMaps] ( int, int ) { ++iMaps; } );
  99. return iMaps;
  100. }
  101. //////////////////////////////////////////////////////////////////////////
  102. class TransformedSchemaBuilder_c
  103. {
  104. public:
  105. TransformedSchemaBuilder_c ( const ISphSchema & tOldSchema, CSphSchema & tNewSchema );
  106. void AddAttr ( const CSphString & sName );
  107. private:
  108. const ISphSchema & m_tOldSchema;
  109. CSphSchema & m_tNewSchema;
  110. void ReplaceColumnarAttrWithExpression ( CSphColumnInfo & tAttr, int iLocator );
  111. };
  112. TransformedSchemaBuilder_c::TransformedSchemaBuilder_c ( const ISphSchema & tOldSchema, CSphSchema & tNewSchema )
  113. : m_tOldSchema ( tOldSchema )
  114. , m_tNewSchema ( tNewSchema )
  115. {}
  116. void TransformedSchemaBuilder_c::AddAttr ( const CSphString & sName )
  117. {
  118. const CSphColumnInfo * pAttr = m_tOldSchema.GetAttr ( sName.cstr() );
  119. if ( !pAttr )
  120. return;
  121. CSphColumnInfo tAttr = *pAttr;
  122. tAttr.m_tLocator.Reset();
  123. if ( tAttr.m_iIndex==-1 )
  124. tAttr.m_iIndex = m_tOldSchema.GetAttrIndexOriginal ( tAttr.m_sName.cstr() );
  125. // check if new columnar attributes were added (that were not in the select list originally)
  126. if ( tAttr.IsColumnar() )
  127. ReplaceColumnarAttrWithExpression ( tAttr, m_tNewSchema.GetAttrsCount() );
  128. tAttr.m_eAttrType = sphPlainAttrToPtrAttr ( tAttr.m_eAttrType );
  129. m_tNewSchema.AddAttr ( tAttr, true );
  130. }
  131. void TransformedSchemaBuilder_c::ReplaceColumnarAttrWithExpression ( CSphColumnInfo & tAttr, int iLocator )
  132. {
  133. assert ( tAttr.IsColumnar() );
  134. assert ( !tAttr.m_pExpr );
  135. // temporarily add attr to new schema
  136. // when result set is finalized, corresponding columnar expression (will be spawned later)
  137. // will be evaluated and put into the match
  138. // and this expression will be used to fetch that value
  139. tAttr.m_uAttrFlags &= ~CSphColumnInfo::ATTR_COLUMNAR;
  140. tAttr.m_eAttrType = sphPlainAttrToPtrAttr ( tAttr.m_eAttrType );
  141. m_tNewSchema.AddAttr ( tAttr, true );
  142. // parse expression as if it is not columnar
  143. CSphString sError;
  144. ExprParseArgs_t tExprArgs;
  145. tAttr.m_pExpr = sphExprParse ( tAttr.m_sName.cstr(), m_tNewSchema, sError, tExprArgs );
  146. assert ( tAttr.m_pExpr );
  147. // now remove it from schema (it will be added later with the supplied expression)
  148. m_tNewSchema.RemoveAttr( tAttr.m_sName.cstr(), true );
  149. }
  150. //////////////////////////////////////////////////////////////////////////
  151. class MatchesToNewSchema_c : public MatchProcessor_i
  152. {
  153. public:
  154. MatchesToNewSchema_c ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema, GetBlobPoolFromMatch_fn fnGetBlobPool, GetColumnarFromMatch_fn fnGetColumnar );
  155. // performs actual processing according created plan
  156. void Process ( CSphMatch * pMatch ) final { ProcessMatch(pMatch); }
  157. void Process ( VecTraits_T<CSphMatch *> & dMatches ) final { dMatches.for_each ( [this]( CSphMatch * pMatch ){ ProcessMatch(pMatch); } ); }
  158. bool ProcessInRowIdOrder() const final { return m_dActions.any_of ( []( const MapAction_t & i ){ return i.IsExprEval(); } ); }
  159. private:
  160. struct MapAction_t
  161. {
  162. // what is to do with current position
  163. enum Action_e
  164. {
  165. SETZERO, // set default (0)
  166. COPY, // copy as is (plain attribute)
  167. COPYBLOB, // deep copy (unpack/pack) the blob
  168. COPYJSONFIELD, // json field (packed blob with type)
  169. EVALEXPR_INT, // evaluate the expression for the recently added int attribute
  170. EVALEXPR_BIGINT,// evaluate the expression for the recently added bigint attribute
  171. EVALEXPR_STR, // evaluate the expression for the recently added string attribute
  172. EVALEXPR_MVA // evaluate the expression for the recently added mva attribute
  173. };
  174. const CSphAttrLocator * m_pFrom;
  175. const CSphAttrLocator * m_pTo;
  176. ISphExprRefPtr_c m_pExpr;
  177. Action_e m_eAction;
  178. mutable columnar::Columnar_i * m_pPrevColumnar = nullptr;
  179. bool IsExprEval() const
  180. {
  181. return m_eAction==EVALEXPR_INT || m_eAction==EVALEXPR_BIGINT || m_eAction==EVALEXPR_STR || m_eAction==EVALEXPR_MVA;
  182. }
  183. };
  184. int m_iDynamicSize; // target dynamic size, from schema
  185. CSphVector<MapAction_t> m_dActions; // the recipe
  186. CSphVector<std::pair<CSphAttrLocator, CSphAttrLocator>> m_dRemapCmp; // remap @int_attr_ATTR -> ATTR
  187. CSphVector<int> m_dDataPtrAttrs; // orphaned attrs we have to free before swap to new attr
  188. GetBlobPoolFromMatch_fn m_fnGetBlobPool; // provides base for pool copying
  189. GetColumnarFromMatch_fn m_fnGetColumnar; // columnar storage getter
  190. static void SetupAction ( const CSphColumnInfo & tOld, const CSphColumnInfo & tNew, const ISphSchema * pOldSchema, MapAction_t & tAction );
  191. inline void ProcessMatch ( CSphMatch * pMatch );
  192. inline static void PerformAction ( const MapAction_t & tAction, CSphMatch * pMatch, CSphMatch & tResult, const BYTE * pBlobPool, columnar::Columnar_i * pColumnar );
  193. };
  194. MatchesToNewSchema_c::MatchesToNewSchema_c ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema, GetBlobPoolFromMatch_fn fnGetBlobPool, GetColumnarFromMatch_fn fnGetColumnar )
  195. : m_iDynamicSize ( pNewSchema->GetDynamicSize () )
  196. , m_fnGetBlobPool ( std::move ( fnGetBlobPool ) )
  197. , m_fnGetColumnar ( std::move ( fnGetColumnar ) )
  198. {
  199. assert ( pOldSchema && pNewSchema );
  200. // prepare transforming recipe
  201. // initial state: set all new columns to be reset by default
  202. for ( int i = 0; i<pNewSchema->GetAttrsCount(); ++i )
  203. m_dActions.Add ( { nullptr, &pNewSchema->GetAttr(i).m_tLocator, nullptr, MapAction_t::SETZERO } );
  204. // add mapping from old to new according to column type
  205. for ( int i = 0; i<pOldSchema->GetAttrsCount(); ++i )
  206. {
  207. const CSphColumnInfo & tOld = pOldSchema->GetAttr(i);
  208. auto iNewIdx = pNewSchema->GetAttrIndex ( tOld.m_sName.cstr () );
  209. if ( iNewIdx == -1 )
  210. {
  211. // dataptr present in old, but not in the new - mark it for releasing
  212. if ( sphIsDataPtrAttr ( tOld.m_eAttrType ) && tOld.m_tLocator.m_bDynamic )
  213. m_dDataPtrAttrs.Add( tOld.m_tLocator.m_iBitOffset >> ROWITEM_SHIFT );
  214. continue;
  215. }
  216. const CSphColumnInfo & tNew = pNewSchema->GetAttr(iNewIdx);
  217. auto & tAction = m_dActions[iNewIdx];
  218. SetupAction ( tOld, tNew, pOldSchema, tAction );
  219. }
  220. // need to update @int_attr_ locator to use new schema
  221. // no need to pass pOldSchema as we remap only new schema pointers
  222. // also need to update group sorter keypart to be str_ptr in caller code SetSchema
  223. FnSortGetStringRemap ( *pNewSchema, *pNewSchema, [this, pNewSchema] ( int iSrc, int iDst )
  224. {
  225. m_dRemapCmp.Add ( { pNewSchema->GetAttr(iSrc).m_tLocator, pNewSchema->GetAttr(iDst).m_tLocator } );
  226. } );
  227. }
  228. void MatchesToNewSchema_c::SetupAction ( const CSphColumnInfo & tOld, const CSphColumnInfo & tNew, const ISphSchema * pOldSchema, MapAction_t & tAction )
  229. {
  230. tAction.m_pFrom = &tOld.m_tLocator;
  231. // columnar attr replaced by an expression
  232. // we now need to create an expression that fetches data from columnar storage
  233. if ( tOld.IsColumnar() && tNew.m_pExpr )
  234. {
  235. CSphString sError;
  236. ExprParseArgs_t tExprArgs;
  237. tAction.m_pExpr = sphExprParse ( tOld.m_sName.cstr(), *pOldSchema, sError, tExprArgs );
  238. assert ( tAction.m_pExpr );
  239. switch ( tNew.m_eAttrType )
  240. {
  241. case SPH_ATTR_STRINGPTR: tAction.m_eAction = MapAction_t::EVALEXPR_STR; break;
  242. case SPH_ATTR_BIGINT: tAction.m_eAction = MapAction_t::EVALEXPR_BIGINT; break;
  243. case SPH_ATTR_UINT32SET_PTR:
  244. case SPH_ATTR_INT64SET_PTR:
  245. case SPH_ATTR_FLOAT_VECTOR_PTR: tAction.m_eAction = MapAction_t::EVALEXPR_MVA; break;
  246. default: tAction.m_eAction = MapAction_t::EVALEXPR_INT; break;
  247. }
  248. return;
  249. }
  250. // same type - just copy attr as is
  251. if ( tOld.m_eAttrType==tNew.m_eAttrType )
  252. {
  253. tAction.m_eAction = MapAction_t::COPY;
  254. return;
  255. }
  256. assert ( !sphIsDataPtrAttr ( tOld.m_eAttrType ) && sphIsDataPtrAttr ( tNew.m_eAttrType ) );
  257. if ( tOld.m_eAttrType==SPH_ATTR_JSON_FIELD )
  258. tAction.m_eAction = MapAction_t::COPYJSONFIELD;
  259. else
  260. tAction.m_eAction = MapAction_t::COPYBLOB;
  261. }
  262. void MatchesToNewSchema_c::ProcessMatch ( CSphMatch * pMatch )
  263. {
  264. CSphMatch tResult;
  265. tResult.Reset ( m_iDynamicSize );
  266. const BYTE * pBlobPool = m_fnGetBlobPool(pMatch);
  267. columnar::Columnar_i * pColumnar = m_fnGetColumnar(pMatch);
  268. for ( const auto & tAction : m_dActions )
  269. PerformAction ( tAction, pMatch, tResult, pBlobPool, pColumnar );
  270. // remap comparator attributes
  271. for ( const auto & tRemap : m_dRemapCmp )
  272. tResult.SetAttr ( tRemap.second, tResult.GetAttr ( tRemap.first ) );
  273. // free original orphaned pointers
  274. CSphSchemaHelper::FreeDataSpecial ( *pMatch, m_dDataPtrAttrs );
  275. Swap ( pMatch->m_pDynamic, tResult.m_pDynamic );
  276. pMatch->m_pStatic = nullptr;
  277. }
  278. inline void MatchesToNewSchema_c::PerformAction ( const MapAction_t & tAction, CSphMatch * pMatch, CSphMatch & tResult, const BYTE * pBlobPool, columnar::Columnar_i * pColumnar )
  279. {
  280. // try to minimize columnar switches inside the expression as this leads to recreating iterators
  281. if ( tAction.IsExprEval() && pColumnar!=tAction.m_pPrevColumnar )
  282. {
  283. tAction.m_pExpr->Command ( SPH_EXPR_SET_COLUMNAR, (void*)pColumnar );
  284. tAction.m_pPrevColumnar = pColumnar;
  285. }
  286. SphAttr_t uValue = 0;
  287. switch ( tAction.m_eAction )
  288. {
  289. case MapAction_t::SETZERO:
  290. break;
  291. case MapAction_t::COPY:
  292. uValue = pMatch->GetAttr ( *tAction.m_pFrom );
  293. break;
  294. case MapAction_t::COPYBLOB:
  295. {
  296. auto dBlob = sphGetBlobAttr ( *pMatch, *tAction.m_pFrom, pBlobPool );
  297. uValue = (SphAttr_t) sphPackPtrAttr ( dBlob );
  298. }
  299. break;
  300. case MapAction_t::COPYJSONFIELD:
  301. {
  302. SphAttr_t uPacked = pMatch->GetAttr ( *tAction.m_pFrom );
  303. const BYTE * pStr = uPacked ? pBlobPool+sphJsonUnpackOffset ( uPacked ) : nullptr;
  304. ESphJsonType eJson = sphJsonUnpackType ( uPacked );
  305. if ( pStr && eJson!=JSON_NULL )
  306. {
  307. int iLengthBytes = sphJsonNodeSize ( eJson, pStr );
  308. BYTE * pData = nullptr;
  309. uValue = (SphAttr_t) sphPackPtrAttr ( iLengthBytes+1, &pData );
  310. // store field type before the field
  311. *pData = (BYTE) eJson;
  312. memcpy ( pData+1, pStr, iLengthBytes );
  313. }
  314. }
  315. break;
  316. case MapAction_t::EVALEXPR_INT:
  317. uValue = (SphAttr_t)tAction.m_pExpr->IntEval(*pMatch);
  318. break;
  319. case MapAction_t::EVALEXPR_BIGINT:
  320. uValue = (SphAttr_t)tAction.m_pExpr->Int64Eval(*pMatch);
  321. break;
  322. case MapAction_t::EVALEXPR_STR:
  323. uValue = (SphAttr_t)tAction.m_pExpr->StringEvalPacked(*pMatch);
  324. break;
  325. case MapAction_t::EVALEXPR_MVA:
  326. uValue = (SphAttr_t)tAction.m_pExpr->Int64Eval(*pMatch);
  327. break;
  328. default:
  329. assert(false && "Unknown state");
  330. }
  331. tResult.SetAttr ( *tAction.m_pTo, uValue );
  332. }
  333. //////////////////////////////////////////////////////////////////////////
  334. class MatchSorter_c : public ISphMatchSorter
  335. {
  336. public:
  337. void SetState ( const CSphMatchComparatorState & tState ) override;
  338. const CSphMatchComparatorState & GetState() const override { return m_tState; }
  339. void SetSchema ( ISphSchema * pSchema, bool bRemapCmp ) override;
  340. const ISphSchema * GetSchema() const override { return ( ISphSchema *) m_pSchema; }
  341. void SetColumnar ( columnar::Columnar_i * pColumnar ) override { m_pColumnar = pColumnar; }
  342. int64_t GetTotalCount() const override { return m_iTotal; }
  343. void CloneTo ( ISphMatchSorter * pTrg ) const override;
  344. bool CanBeCloned() const override;
  345. void SetFilteredAttrs ( const sph::StringSet & hAttrs, bool bAddDocid ) override;
  346. void TransformPooled2StandalonePtrs ( GetBlobPoolFromMatch_fn fnBlobPoolFromMatch, GetColumnarFromMatch_fn fnGetColumnarFromMatch, bool bFinalizeSorters ) override;
  347. void SetRandom ( bool bRandom ) override { m_bRandomize = bRandom; }
  348. bool IsRandom() const override { return m_bRandomize; }
  349. int GetMatchCapacity() const override { return m_iMatchCapacity; }
  350. RowTagged_t GetJustPushed() const override { return m_tJustPushed; }
  351. VecTraits_T<RowTagged_t> GetJustPopped() const override { return m_dJustPopped; }
  352. protected:
  353. SharedPtr_t<ISphSchema> m_pSchema; ///< sorter schema (adds dynamic attributes on top of index schema)
  354. CSphMatchComparatorState m_tState; ///< protected to set m_iNow automatically on SetState() calls
  355. StrVec_t m_dTransformed;
  356. columnar::Columnar_i * m_pColumnar = nullptr;
  357. bool m_bRandomize = false;
  358. int64_t m_iTotal = 0;
  359. int m_iMatchCapacity = 0;
  360. RowTagged_t m_tJustPushed;
  361. CSphTightVector<RowTagged_t> m_dJustPopped;
  362. };
  363. void MatchSorter_c::SetSchema ( ISphSchema * pSchema, bool bRemapCmp )
  364. {
  365. assert ( pSchema );
  366. m_tState.FixupLocators ( m_pSchema, pSchema, bRemapCmp );
  367. m_pSchema = pSchema;
  368. }
  369. void MatchSorter_c::SetState ( const CSphMatchComparatorState & tState )
  370. {
  371. m_tState = tState;
  372. m_tState.m_iNow = (DWORD) time ( nullptr );
  373. }
  374. void MatchSorter_c::CloneTo ( ISphMatchSorter * pTrg ) const
  375. {
  376. assert ( pTrg );
  377. pTrg->SetRandom(m_bRandomize);
  378. pTrg->SetState(m_tState);
  379. pTrg->SetSchema ( m_pSchema->CloneMe(), false );
  380. }
  381. bool MatchSorter_c::CanBeCloned() const
  382. {
  383. if ( !m_pSchema )
  384. return true;
  385. bool bGotStatefulUDF = false;
  386. for ( int i = 0; i < m_pSchema->GetAttrsCount() && !bGotStatefulUDF; i++ )
  387. {
  388. auto & pExpr = m_pSchema->GetAttr(i).m_pExpr;
  389. if ( pExpr )
  390. pExpr->Command ( SPH_EXPR_GET_STATEFUL_UDF, &bGotStatefulUDF );
  391. }
  392. return !bGotStatefulUDF;
  393. }
  394. void MatchSorter_c::SetFilteredAttrs ( const sph::StringSet & hAttrs, bool bAddDocid )
  395. {
  396. assert ( m_pSchema );
  397. m_dTransformed.Reserve ( hAttrs.GetLength() );
  398. if ( bAddDocid && !hAttrs[sphGetDocidName()] )
  399. m_dTransformed.Add ( sphGetDocidName() );
  400. for ( auto & tName : hAttrs )
  401. {
  402. const CSphColumnInfo * pCol = m_pSchema->GetAttr ( tName.first.cstr() );
  403. if ( pCol )
  404. m_dTransformed.Add ( pCol->m_sName );
  405. }
  406. }
  407. void MatchSorter_c::TransformPooled2StandalonePtrs ( GetBlobPoolFromMatch_fn fnBlobPoolFromMatch, GetColumnarFromMatch_fn fnGetColumnarFromMatch, bool bFinalizeSorters )
  408. {
  409. auto * pOldSchema = GetSchema();
  410. assert ( pOldSchema );
  411. // create new standalone schema (from old, or from filtered)
  412. auto * pNewSchema = new CSphSchema ( "standalone" );
  413. for ( int i = 0; i<pOldSchema->GetFieldsCount (); ++i )
  414. pNewSchema->AddField ( pOldSchema->GetField(i) );
  415. TransformedSchemaBuilder_c tBuilder ( *pOldSchema, *pNewSchema );
  416. if ( m_dTransformed.IsEmpty() )
  417. {
  418. // keep id as the first attribute
  419. const CSphColumnInfo* pId = pOldSchema->GetAttr ( sphGetDocidName() );
  420. if ( pId )
  421. tBuilder.AddAttr ( sphGetDocidName() );
  422. // add the rest
  423. for ( int i = 0; i<pOldSchema->GetAttrsCount (); i++ )
  424. {
  425. const CSphColumnInfo & tAttr = pOldSchema->GetAttr(i);
  426. if ( tAttr.m_sName!=sphGetDocidName() )
  427. tBuilder.AddAttr ( tAttr.m_sName );
  428. }
  429. }
  430. else
  431. {
  432. // keep id as the first attribute, then the rest.
  433. m_dTransformed.any_of ( [&tBuilder] ( const auto& sName ) { auto bID = ( sName==sphGetDocidName() ); if ( bID ) tBuilder.AddAttr(sName); return bID; } );
  434. m_dTransformed.for_each ( [&tBuilder] ( const auto& sName ) { if ( sName!=sphGetDocidName() ) tBuilder.AddAttr(sName); } );
  435. }
  436. for ( int i = 0; i <pNewSchema->GetAttrsCount(); ++i )
  437. {
  438. auto & pExpr = pNewSchema->GetAttr(i).m_pExpr;
  439. if ( pExpr )
  440. pExpr->FixupLocator ( pOldSchema, pNewSchema );
  441. }
  442. MatchesToNewSchema_c fnFinal ( pOldSchema, pNewSchema, std::move ( fnBlobPoolFromMatch ), std::move ( fnGetColumnarFromMatch ) );
  443. Finalize ( fnFinal, false, bFinalizeSorters );
  444. SetSchema ( pNewSchema, true );
  445. }
  446. //////////////////////////////////////////////////////////////////////////
  447. /// match-sorting priority queue traits
  448. class CSphMatchQueueTraits : public MatchSorter_c, ISphNoncopyable
  449. {
  450. protected:
  451. int m_iSize; // size of internal struct we can operate
  452. CSphFixedVector<CSphMatch> m_dData;
  453. CSphTightVector<int> m_dIData; // indexes into m_pData, to avoid extra moving of matches themselves
  454. public:
  455. /// ctor
  456. explicit CSphMatchQueueTraits ( int iSize )
  457. : m_iSize ( iSize )
  458. , m_dData { iSize }
  459. {
  460. assert ( iSize>0 );
  461. m_iMatchCapacity = iSize;
  462. m_dIData.Resize ( iSize );
  463. m_tState.m_iNow = (DWORD) time ( nullptr );
  464. ARRAY_FOREACH ( i, m_dIData )
  465. m_dIData[i] = i;
  466. m_dIData.Resize ( 0 );
  467. }
  468. /// dtor make FreeDataPtrs here, then ResetDynamic also get called on m_dData d-tr.
  469. ~CSphMatchQueueTraits () override
  470. {
  471. if ( m_pSchema )
  472. m_dData.Apply ( [this] ( CSphMatch& tMatch ) { m_pSchema->FreeDataPtrs ( tMatch ); } );
  473. }
  474. public:
  475. int GetLength() override { return Used(); }
  476. // helper
  477. void SwapMatchQueueTraits ( CSphMatchQueueTraits& rhs )
  478. {
  479. // ISphMatchSorter
  480. ::Swap ( m_iTotal, rhs.m_iTotal );
  481. // CSphMatchQueueTraits
  482. m_dData.SwapData ( rhs.m_dData );
  483. m_dIData.SwapData ( rhs.m_dIData );
  484. assert ( m_iSize==rhs.m_iSize );
  485. }
  486. const VecTraits_T<CSphMatch>& GetMatches() const { return m_dData; }
  487. protected:
  488. CSphMatch * Last () const
  489. {
  490. return &m_dData[m_dIData.Last ()];
  491. }
  492. CSphMatch & Get ( int iElem ) const
  493. {
  494. return m_dData[m_dIData[iElem]];
  495. }
  496. CSphMatch & Add ()
  497. {
  498. // proper ids at m_dIData already set at constructor
  499. // they will be same during life-span - that is why Add used like anti-Pop
  500. int iLast = m_dIData.Add();
  501. return m_dData[iLast];
  502. }
  503. int Used() const
  504. {
  505. return m_dIData.GetLength();
  506. }
  507. bool IsEmpty() const
  508. {
  509. return m_dIData.IsEmpty();
  510. }
  511. void ResetAfterFlatten()
  512. {
  513. m_dIData.Resize(0);
  514. }
  515. int ResetDynamic ( int iMaxUsed )
  516. {
  517. for ( int i=0; i<iMaxUsed; i++ )
  518. m_dData[i].ResetDynamic();
  519. return -1;
  520. }
  521. int ResetDynamicFreeData ( int iMaxUsed )
  522. {
  523. for ( int i=0; i<iMaxUsed; i++ )
  524. {
  525. m_pSchema->FreeDataPtrs ( m_dData[i] );
  526. m_dData[i].ResetDynamic();
  527. }
  528. return -1;
  529. }
  530. };
  531. //////////////////////////////////////////////////////////////////////////
  532. // SORTING QUEUES
  533. //////////////////////////////////////////////////////////////////////////
  534. template < typename COMP >
  535. struct InvCompareIndex_fn
  536. {
  537. const VecTraits_T<CSphMatch>& m_dBase;
  538. const CSphMatchComparatorState & m_tState;
  539. explicit InvCompareIndex_fn ( const CSphMatchQueueTraits & tBase )
  540. : m_dBase ( tBase.GetMatches() )
  541. , m_tState ( tBase.GetState() )
  542. {}
  543. bool IsLess ( int a, int b ) const // inverts COMP::IsLess
  544. {
  545. return COMP::IsLess ( m_dBase[b], m_dBase[a], m_tState );
  546. }
  547. };
  548. #define LOG_COMPONENT_KMQ __LINE__ << " *(" << this << ") "
  549. #define LOG_LEVEL_DIAG false
  550. #define KMQ LOC(DIAG,KMQ)
  551. /// heap sorter
  552. /// plain binary heap based PQ
  553. template < typename COMP, bool NOTIFICATIONS >
  554. class CSphMatchQueue final : public CSphMatchQueueTraits
  555. {
  556. using MYTYPE = CSphMatchQueue<COMP, NOTIFICATIONS>;
  557. LOC_ADD;
  558. public:
  559. /// ctor
  560. explicit CSphMatchQueue ( int iSize )
  561. : CSphMatchQueueTraits ( iSize )
  562. , m_fnComp ( *this )
  563. {
  564. if constexpr ( NOTIFICATIONS )
  565. m_dJustPopped.Reserve(1);
  566. }
  567. bool IsGroupby () const final { return false; }
  568. const CSphMatch * GetWorst() const final { return m_dIData.IsEmpty() ? nullptr : Root(); }
  569. bool Push ( const CSphMatch & tEntry ) final { return PushT ( tEntry, [this] ( CSphMatch & tTrg, const CSphMatch & tMatch ) { m_pSchema->CloneMatch ( tTrg, tMatch ); }); }
  570. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final
  571. {
  572. for ( auto & i : dMatches )
  573. if ( i.m_tRowID!=INVALID_ROWID )
  574. PushT ( i, [this] ( CSphMatch & tTrg, const CSphMatch & tMatch ) { m_pSchema->CloneMatch ( tTrg, tMatch ); } );
  575. else
  576. m_iTotal++;
  577. }
  578. bool PushGrouped ( const CSphMatch &, bool ) final { assert(0); return false; }
  579. /// store all entries into specified location in sorted order, and remove them from queue
  580. int Flatten ( CSphMatch * pTo ) final
  581. {
  582. KMQ << "flatten";
  583. assert ( !IsEmpty() );
  584. int iReadyMatches = Used();
  585. pTo += iReadyMatches;
  586. while ( !IsEmpty() )
  587. {
  588. --pTo;
  589. // m_pSchema->FreeDataPtrs ( *pTo );
  590. PopAndProcess_T ( [pTo] ( CSphMatch & tRoot ) { Swap ( *pTo, tRoot ); return true; } );
  591. }
  592. m_iTotal = 0;
  593. return iReadyMatches;
  594. }
  595. /// finalize, perform final sort/cut as needed
  596. void Finalize ( MatchProcessor_i & tProcessor, bool bCallProcessInResultSetOrder, bool bFinalizeMatches ) final
  597. {
  598. KMQ << "finalize";
  599. if ( !GetLength() )
  600. return;
  601. if ( bCallProcessInResultSetOrder )
  602. m_dIData.Sort ( m_fnComp );
  603. if ( tProcessor.ProcessInRowIdOrder() )
  604. {
  605. CSphFixedVector<int> dSorted ( m_dIData.GetLength() );
  606. memcpy ( dSorted.Begin(), m_dIData.Begin(), m_dIData.GetLength()*sizeof(m_dIData[0]) );
  607. // sort by tag, rowid. minimize columnar switches inside expressions and minimize seeks inside columnar iterators
  608. dSorted.Sort ( Lesser ( [this] ( int l, int r )
  609. {
  610. int iTagL = m_dData[l].m_iTag;
  611. int iTagR = m_dData[r].m_iTag;
  612. if ( iTagL!=iTagR )
  613. return iTagL < iTagR;
  614. return m_dData[l].m_tRowID < m_dData[r].m_tRowID;
  615. }
  616. ) );
  617. CSphFixedVector<CSphMatch *> dMatchPtrs ( dSorted.GetLength() );
  618. ARRAY_FOREACH ( i, dSorted )
  619. dMatchPtrs[i] = &m_dData[dSorted[i]];
  620. tProcessor.Process(dMatchPtrs);
  621. }
  622. else
  623. {
  624. for ( auto iMatch : m_dIData )
  625. tProcessor.Process ( &m_dData[iMatch] );
  626. }
  627. }
  628. // fixme! test
  629. ISphMatchSorter * Clone () const final
  630. {
  631. auto pClone = new MYTYPE ( m_iSize );
  632. CloneTo ( pClone );
  633. return pClone;
  634. }
  635. // FIXME! test CSphMatchQueue
  636. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final
  637. {
  638. KMQ << "moveto";
  639. // m_dLogger.Print ();
  640. auto& dRhs = *(MYTYPE *) pRhs;
  641. if ( IsEmpty() )
  642. return; // no matches, nothing to do.
  643. // dRhs.m_dLogger.Print ();
  644. // install into virgin sorter - no need to do something; just swap
  645. if ( dRhs.IsEmpty() )
  646. {
  647. SwapMatchQueueTraits ( dRhs );
  648. return;
  649. }
  650. // work as in non-ordered finalize call, but we not need to
  651. // clone the matches, may just move them instead.
  652. // total need special care: just add two values and don't rely
  653. // on result of moving, since it will be wrong
  654. auto iTotal = dRhs.m_iTotal;
  655. for ( auto i : m_dIData )
  656. dRhs.PushT ( m_dData[i], [] ( CSphMatch & tTrg, CSphMatch & tMatch ) { Swap ( tTrg, tMatch ); } );
  657. dRhs.m_iTotal = m_iTotal + iTotal;
  658. }
  659. void SetMerge ( bool bMerge ) final {}
  660. private:
  661. InvCompareIndex_fn<COMP> m_fnComp;
  662. CSphMatch * Root() const
  663. {
  664. return &m_dData [ m_dIData.First() ];
  665. }
  666. /// generic add entry to the queue
  667. template <typename MATCH, typename PUSHER>
  668. bool PushT ( MATCH && tEntry, PUSHER && PUSH )
  669. {
  670. ++m_iTotal;
  671. if constexpr ( NOTIFICATIONS )
  672. {
  673. m_tJustPushed = RowTagged_t();
  674. m_dJustPopped.Resize(0);
  675. }
  676. if ( Used()==m_iSize )
  677. {
  678. // if it's worse that current min, reject it, else pop off current min
  679. if ( COMP::IsLess ( tEntry, *Root(), m_tState ) )
  680. return true;
  681. else
  682. PopAndProcess_T ( [] ( const CSphMatch & ) { return false; } );
  683. }
  684. // do add
  685. PUSH ( Add(), std::forward<MATCH> ( tEntry ));
  686. if constexpr ( NOTIFICATIONS )
  687. m_tJustPushed = RowTagged_t ( *Last() );
  688. int iEntry = Used()-1;
  689. // shift up if needed, so that worst (lesser) ones float to the top
  690. while ( iEntry )
  691. {
  692. int iParent = ( iEntry-1 ) / 2;
  693. if ( !m_fnComp.IsLess ( m_dIData[iParent], m_dIData[iEntry] ) )
  694. break;
  695. // entry is less than parent, should float to the top
  696. Swap ( m_dIData[iEntry], m_dIData[iParent] );
  697. iEntry = iParent;
  698. }
  699. return true;
  700. }
  701. /// remove root (ie. top priority) entry
  702. template<typename POPPER>
  703. void PopAndProcess_T ( POPPER && fnProcess )
  704. {
  705. assert ( !IsEmpty() );
  706. auto& iJustRemoved = m_dIData.Pop();
  707. if ( !IsEmpty() ) // for empty just popped is the root
  708. Swap ( m_dIData.First (), iJustRemoved );
  709. if ( !fnProcess ( m_dData[iJustRemoved] ) )
  710. {
  711. // make the last entry my new root
  712. if constexpr ( NOTIFICATIONS )
  713. {
  714. if ( m_dJustPopped.IsEmpty () )
  715. m_dJustPopped.Add ( RowTagged_t ( m_dData[iJustRemoved] ) );
  716. else
  717. m_dJustPopped[0] = RowTagged_t ( m_dData[iJustRemoved] );
  718. }
  719. m_pSchema->FreeDataPtrs ( m_dData[iJustRemoved] );
  720. }
  721. // sift down if needed
  722. int iEntry = 0;
  723. auto iUsed = Used();
  724. while (true)
  725. {
  726. // select child
  727. int iChild = (iEntry*2) + 1;
  728. if ( iChild>=iUsed )
  729. break;
  730. // select smallest child
  731. if ( iChild+1<iUsed )
  732. if ( m_fnComp.IsLess ( m_dIData[iChild], m_dIData[iChild+1] ) )
  733. ++iChild;
  734. // if smallest child is less than entry, do float it to the top
  735. if ( m_fnComp.IsLess ( m_dIData[iEntry], m_dIData[iChild] ) )
  736. {
  737. Swap ( m_dIData[iChild], m_dIData[iEntry] );
  738. iEntry = iChild;
  739. continue;
  740. }
  741. break;
  742. }
  743. }
  744. };
  745. #define LOG_COMPONENT_KBF __LINE__ << " *(" << this << ") "
  746. #define KBF LOC(DIAG,KBF)
  747. //////////////////////////////////////////////////////////////////////////
  748. /// K-buffer (generalized double buffer) sorter
  749. /// faster worst-case but slower average-case than the heap sorter
  750. /// invoked with select ... OPTION sort_method=kbuffer
  751. template < typename COMP, bool NOTIFICATIONS >
  752. class CSphKbufferMatchQueue : public CSphMatchQueueTraits
  753. {
  754. using MYTYPE = CSphKbufferMatchQueue<COMP, NOTIFICATIONS>;
  755. InvCompareIndex_fn<COMP> m_dComp;
  756. LOC_ADD;
  757. public:
  758. /// ctor
  759. explicit CSphKbufferMatchQueue ( int iSize )
  760. : CSphMatchQueueTraits ( iSize*COEFF )
  761. , m_dComp ( *this )
  762. {
  763. m_iSize /= COEFF;
  764. if constexpr ( NOTIFICATIONS )
  765. m_dJustPopped.Reserve ( m_iSize*(COEFF-1) );
  766. }
  767. bool IsGroupby () const final { return false; }
  768. int GetLength () final { return Min ( Used(), m_iSize ); }
  769. bool Push ( const CSphMatch & tEntry ) override { return PushT ( tEntry, [this] ( CSphMatch & tTrg, const CSphMatch & tMatch ) { m_pSchema->CloneMatch ( tTrg, tMatch ); }); }
  770. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) override
  771. {
  772. for ( const auto & i : dMatches )
  773. if ( i.m_tRowID!=INVALID_ROWID )
  774. PushT ( i, [this] ( CSphMatch & tTrg, const CSphMatch & tMatch ) { m_pSchema->CloneMatch ( tTrg, tMatch ); } );
  775. else
  776. m_iTotal++;
  777. }
  778. bool PushGrouped ( const CSphMatch &, bool ) final { assert(0); return false; }
  779. /// store all entries into specified location in sorted order, and remove them from queue
  780. int Flatten ( CSphMatch * pTo ) final
  781. {
  782. KBF << "Flatten";
  783. FinalizeMatches ();
  784. auto iReadyMatches = Used();
  785. for ( auto iMatch : m_dIData )
  786. {
  787. KBF << "fltn " << m_dData[iMatch].m_iTag << ":" << m_dData[iMatch].m_tRowID;
  788. Swap ( *pTo, m_dData[iMatch] );
  789. ++pTo;
  790. }
  791. m_iMaxUsed = ResetDynamic ( m_iMaxUsed );
  792. // clean up for the next work session
  793. m_pWorst = nullptr;
  794. m_iTotal = 0;
  795. m_bFinalized = false;
  796. m_dIData.Resize(0);
  797. return iReadyMatches;
  798. }
  799. /// finalize, perform final sort/cut as needed
  800. void Finalize ( MatchProcessor_i & tProcessor, bool, bool bFinalizeMatches ) final
  801. {
  802. KBF << "Finalize";
  803. if ( IsEmpty() )
  804. return;
  805. if ( bFinalizeMatches )
  806. FinalizeMatches();
  807. for ( auto iMatch : m_dIData )
  808. tProcessor.Process ( &m_dData[iMatch] );
  809. }
  810. ISphMatchSorter* Clone() const final
  811. {
  812. auto pClone = new MYTYPE ( m_iSize );
  813. CloneTo ( pClone );
  814. return pClone;
  815. }
  816. // FIXME! test CSphKbufferMatchQueue
  817. // FIXME! need to deal with justpushed/justpopped any other way!
  818. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final
  819. {
  820. auto& dRhs = *(CSphKbufferMatchQueue<COMP, NOTIFICATIONS>*) pRhs;
  821. if ( IsEmpty () )
  822. return;
  823. if ( dRhs.IsEmpty () )
  824. {
  825. SwapMatchQueueTraits (dRhs);
  826. dRhs.m_pWorst = m_pWorst;
  827. dRhs.m_bFinalized = m_bFinalized;
  828. return;
  829. }
  830. FinalizeMatches();
  831. // both are non-empty - need to process.
  832. // work as finalize call, but don't clone the matches; move them instead.
  833. // total need special care!
  834. auto iTotal = dRhs.m_iTotal;
  835. for ( auto iMatch : m_dIData )
  836. {
  837. dRhs.PushT ( m_dData[iMatch],
  838. [] ( CSphMatch & tTrg, CSphMatch & tMatch ) {
  839. Swap ( tTrg, tMatch );
  840. });
  841. }
  842. dRhs.m_iTotal = m_iTotal + iTotal;
  843. }
  844. void SetMerge ( bool bMerge ) final {}
  845. protected:
  846. CSphMatch * m_pWorst = nullptr;
  847. bool m_bFinalized = false;
  848. int m_iMaxUsed = -1;
  849. static const int COEFF = 4;
  850. private:
  851. void SortMatches () // sort from best to worst
  852. {
  853. m_dIData.Sort ( m_dComp );
  854. }
  855. void FreeMatch ( int iMatch )
  856. {
  857. if constexpr ( NOTIFICATIONS )
  858. m_dJustPopped.Add ( RowTagged_t ( m_dData[iMatch] ) );
  859. m_pSchema->FreeDataPtrs ( m_dData[iMatch] );
  860. }
  861. void CutTail()
  862. {
  863. if ( Used()<=m_iSize)
  864. return;
  865. m_iMaxUsed = Max ( m_iMaxUsed, this->m_dIData.GetLength () ); // memorize it for free dynamics later.
  866. m_dIData.Slice ( m_iSize ).Apply ( [this] ( int iMatch ) { FreeMatch ( iMatch ); } );
  867. m_dIData.Resize ( m_iSize );
  868. }
  869. // conception: we have array of N*COEFF elems.
  870. // We need only N the best elements from it (rest have to be disposed).
  871. // direct way: rsort, then take first N elems.
  872. // this way: rearrange array by performing one pass of quick sort
  873. // if we have exactly N elems left hand from pivot - we're done.
  874. // otherwise repeat rearranging only to right or left part until the target achieved.
  875. void BinaryPartition ()
  876. {
  877. int iPivot = m_dIData[m_iSize / COEFF+1];
  878. int iMaxIndex = m_iSize-1;
  879. int a=0;
  880. int b=Used()-1;
  881. while (true)
  882. {
  883. int i=a;
  884. int j=b;
  885. while (i<=j)
  886. {
  887. while (m_dComp.IsLess (m_dIData[i],iPivot)) ++i;
  888. while (m_dComp.IsLess (iPivot, m_dIData[j])) --j;
  889. if ( i<=j ) ::Swap( m_dIData[i++], m_dIData[j--]);
  890. }
  891. if ( iMaxIndex == j )
  892. break;
  893. if ( iMaxIndex < j)
  894. b = j; // too many elems acquired; continue with left part
  895. else
  896. a = i; // too less elems acquired; continue with right part
  897. iPivot = m_dIData[( a * ( COEFF-1 )+b ) / COEFF];
  898. }
  899. }
  900. void RepartitionMatches ()
  901. {
  902. assert ( Used ()>m_iSize );
  903. BinaryPartition ();
  904. CutTail();
  905. }
  906. void FinalizeMatches ()
  907. {
  908. if ( m_bFinalized )
  909. return;
  910. m_bFinalized = true;
  911. if ( Used ()>m_iSize )
  912. RepartitionMatches();
  913. SortMatches();
  914. }
  915. // generic push entry (add it some way to the queue clone or swap PUSHER depends on)
  916. template<typename MATCH, typename PUSHER>
  917. FORCE_INLINE bool PushT ( MATCH && tEntry, PUSHER && PUSH )
  918. {
  919. if constexpr ( NOTIFICATIONS )
  920. {
  921. m_tJustPushed = RowTagged_t();
  922. m_dJustPopped.Resize(0);
  923. }
  924. // quick early rejection checks
  925. ++m_iTotal;
  926. if ( m_pWorst && COMP::IsLess ( tEntry, *m_pWorst, m_tState ) )
  927. return true;
  928. // quick check passed
  929. // fill the data, back to front
  930. m_bFinalized = false;
  931. PUSH ( Add(), std::forward<MATCH> ( tEntry ));
  932. if constexpr ( NOTIFICATIONS )
  933. m_tJustPushed = RowTagged_t ( *Last() );
  934. // do the initial sort once
  935. if ( m_iTotal==m_iSize )
  936. {
  937. assert ( Used()==m_iSize && !m_pWorst );
  938. SortMatches();
  939. m_pWorst = Last();
  940. m_bFinalized = true;
  941. return true;
  942. }
  943. if ( Used ()<m_iSize*COEFF )
  944. return true;
  945. // do the sort/cut when the K-buffer is full
  946. assert ( Used ()==m_iSize*COEFF );
  947. RepartitionMatches();
  948. SortMatches ();
  949. m_pWorst = Last ();
  950. m_bFinalized = true;
  951. return true;
  952. }
  953. };
  954. //////////////////////////////////////////////////////////////////////////
  955. /// collect list of matched DOCIDs in aside compressed blob
  956. /// (mainly used to collect docs in `DELETE... WHERE` statement)
  957. class CollectQueue_c final : public MatchSorter_c, ISphNoncopyable
  958. {
  959. using BASE = MatchSorter_c;
  960. public:
  961. CollectQueue_c ( int iSize, CSphVector<BYTE>& dCollectedValues );
  962. bool IsGroupby () const final { return false; }
  963. int GetLength () final { return 0; } // that ensures, flatten() will never called;
  964. bool Push ( const CSphMatch& tEntry ) final { return PushMatch(tEntry); }
  965. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final
  966. {
  967. for ( const auto & i : dMatches )
  968. if ( i.m_tRowID!=INVALID_ROWID )
  969. PushMatch(i);
  970. }
  971. bool PushGrouped ( const CSphMatch &, bool ) final { assert(0); return false; }
  972. int Flatten ( CSphMatch * ) final { return 0; }
  973. void Finalize ( MatchProcessor_i &, bool, bool ) final;
  974. bool CanBeCloned() const final { return false; }
  975. ISphMatchSorter * Clone () const final { return nullptr; }
  976. void MoveTo ( ISphMatchSorter *, bool ) final {}
  977. void SetSchema ( ISphSchema * pSchema, bool bRemapCmp ) final;
  978. bool IsCutoffDisabled() const final { return true; }
  979. void SetMerge ( bool bMerge ) final {}
  980. private:
  981. DocID_t m_iLastID;
  982. int m_iMaxMatches;
  983. CSphVector<DocID_t> m_dUnsortedDocs;
  984. MemoryWriter_c m_tWriter;
  985. bool m_bDocIdDynamic = false;
  986. inline bool PushMatch ( const CSphMatch & tEntry );
  987. inline void ProcessPushed();
  988. };
  989. CollectQueue_c::CollectQueue_c ( int iSize, CSphVector<BYTE>& dCollectedValues )
  990. : m_iLastID ( 0 )
  991. , m_iMaxMatches ( iSize )
  992. , m_tWriter ( dCollectedValues )
  993. {}
  994. /// sort/uniq already collected and store them to writer
  995. void CollectQueue_c::ProcessPushed()
  996. {
  997. m_dUnsortedDocs.Uniq();
  998. for ( auto& iCurId : m_dUnsortedDocs )
  999. m_tWriter.ZipOffset ( iCurId - std::exchange ( m_iLastID, iCurId ) );
  1000. m_dUnsortedDocs.Resize ( 0 );
  1001. }
  1002. bool CollectQueue_c::PushMatch ( const CSphMatch & tEntry )
  1003. {
  1004. if ( m_dUnsortedDocs.GetLength() >= m_iMaxMatches && m_dUnsortedDocs.GetLength() == m_dUnsortedDocs.GetLimit() )
  1005. ProcessPushed();
  1006. m_dUnsortedDocs.Add ( sphGetDocID ( m_bDocIdDynamic ? tEntry.m_pDynamic : tEntry.m_pStatic ) );
  1007. return true;
  1008. }
  1009. /// final update pass
  1010. void CollectQueue_c::Finalize ( MatchProcessor_i&, bool, bool )
  1011. {
  1012. ProcessPushed();
  1013. m_iLastID = 0;
  1014. }
  1015. void CollectQueue_c::SetSchema ( ISphSchema * pSchema, bool bRemapCmp )
  1016. {
  1017. BASE::SetSchema ( pSchema, bRemapCmp );
  1018. const CSphColumnInfo * pDocId = pSchema->GetAttr ( sphGetDocidName() );
  1019. assert(pDocId);
  1020. m_bDocIdDynamic = pDocId->m_tLocator.m_bDynamic;
  1021. }
  1022. //////////////////////////////////////////////////////////////////////////
  1023. void SendSqlSchema ( const ISphSchema& tSchema, RowBuffer_i* pRows, const VecTraits_T<int>& dOrder )
  1024. {
  1025. int iCount = 0;
  1026. for ( int i = 0; i < tSchema.GetAttrsCount(); ++i )
  1027. if ( !sphIsInternalAttr ( tSchema.GetAttr ( i ) ) )
  1028. ++iCount;
  1029. assert ( iCount == dOrder.GetLength() );
  1030. pRows->HeadBegin ( iCount );
  1031. for ( int i : dOrder )
  1032. {
  1033. const CSphColumnInfo& tCol = tSchema.GetAttr ( i );
  1034. if ( sphIsInternalAttr ( tCol ) )
  1035. continue;
  1036. pRows->HeadColumn ( tCol.m_sName.cstr(), ESphAttr2MysqlColumn ( tCol.m_eAttrType ) );
  1037. }
  1038. pRows->HeadEnd ( false, 0 );
  1039. }
  1040. void SendSqlMatch ( const ISphSchema& tSchema, RowBuffer_i* pRows, CSphMatch& tMatch, const BYTE* pBlobPool, const VecTraits_T<int>& dOrder, bool bDynamicDocid )
  1041. {
  1042. auto& dRows = *pRows;
  1043. for ( int i : dOrder )
  1044. {
  1045. const CSphColumnInfo& dAttr = tSchema.GetAttr ( i );
  1046. if ( sphIsInternalAttr ( dAttr ) )
  1047. continue;
  1048. CSphAttrLocator tLoc = dAttr.m_tLocator;
  1049. ESphAttr eAttrType = dAttr.m_eAttrType;
  1050. switch ( eAttrType )
  1051. {
  1052. case SPH_ATTR_STRING:
  1053. dRows.PutArray ( sphGetBlobAttr ( tMatch, tLoc, pBlobPool ) );
  1054. break;
  1055. case SPH_ATTR_STRINGPTR:
  1056. {
  1057. const BYTE* pStr = nullptr;
  1058. if ( dAttr.m_eStage == SPH_EVAL_POSTLIMIT )
  1059. {
  1060. if ( bDynamicDocid )
  1061. {
  1062. dAttr.m_pExpr->StringEval ( tMatch, &pStr );
  1063. } else
  1064. {
  1065. auto pDynamic = tMatch.m_pDynamic;
  1066. if ( tMatch.m_pStatic )
  1067. tMatch.m_pDynamic = nullptr;
  1068. dAttr.m_pExpr->StringEval ( tMatch, &pStr );
  1069. tMatch.m_pDynamic = pDynamic;
  1070. }
  1071. dRows.PutString ( (const char*)pStr );
  1072. SafeDeleteArray ( pStr );
  1073. } else {
  1074. pStr = (const BYTE*)tMatch.GetAttr ( tLoc );
  1075. auto dString = sphUnpackPtrAttr ( pStr );
  1076. dRows.PutArray ( dString );
  1077. }
  1078. }
  1079. break;
  1080. case SPH_ATTR_INTEGER:
  1081. case SPH_ATTR_TIMESTAMP:
  1082. case SPH_ATTR_BOOL:
  1083. dRows.PutNumAsString ( (DWORD)tMatch.GetAttr ( tLoc ) );
  1084. break;
  1085. case SPH_ATTR_BIGINT:
  1086. dRows.PutNumAsString ( tMatch.GetAttr ( tLoc ) );
  1087. break;
  1088. case SPH_ATTR_UINT64:
  1089. dRows.PutNumAsString ( (uint64_t)tMatch.GetAttr ( tLoc ) );
  1090. break;
  1091. case SPH_ATTR_FLOAT:
  1092. dRows.PutFloatAsString ( tMatch.GetAttrFloat ( tLoc ) );
  1093. break;
  1094. case SPH_ATTR_DOUBLE:
  1095. dRows.PutDoubleAsString ( tMatch.GetAttrDouble ( tLoc ) );
  1096. break;
  1097. case SPH_ATTR_INT64SET:
  1098. case SPH_ATTR_UINT32SET:
  1099. {
  1100. StringBuilder_c dStr;
  1101. auto dMVA = sphGetBlobAttr ( tMatch, tLoc, pBlobPool );
  1102. sphMVA2Str ( dMVA, eAttrType == SPH_ATTR_INT64SET, dStr );
  1103. dRows.PutArray ( dStr, false );
  1104. break;
  1105. }
  1106. case SPH_ATTR_INT64SET_PTR:
  1107. case SPH_ATTR_UINT32SET_PTR:
  1108. {
  1109. StringBuilder_c dStr;
  1110. sphPackedMVA2Str ( (const BYTE*)tMatch.GetAttr ( tLoc ), eAttrType == SPH_ATTR_INT64SET_PTR, dStr );
  1111. dRows.PutArray ( dStr, false );
  1112. break;
  1113. }
  1114. case SPH_ATTR_FLOAT_VECTOR:
  1115. {
  1116. StringBuilder_c dStr;
  1117. auto dFloatVec = sphGetBlobAttr ( tMatch, tLoc, pBlobPool );
  1118. sphFloatVec2Str ( dFloatVec, dStr );
  1119. dRows.PutArray ( dStr, false );
  1120. }
  1121. break;
  1122. case SPH_ATTR_FLOAT_VECTOR_PTR:
  1123. {
  1124. StringBuilder_c dStr;
  1125. sphPackedFloatVec2Str ( (const BYTE*)tMatch.GetAttr(tLoc), dStr );
  1126. dRows.PutArray ( dStr, false );
  1127. }
  1128. break;
  1129. case SPH_ATTR_JSON:
  1130. {
  1131. auto pJson = sphGetBlobAttr ( tMatch, tLoc, pBlobPool );
  1132. JsonEscapedBuilder sTmp;
  1133. if ( pJson.second )
  1134. sphJsonFormat ( sTmp, pJson.first );
  1135. dRows.PutArray ( sTmp );
  1136. }
  1137. break;
  1138. case SPH_ATTR_JSON_PTR:
  1139. {
  1140. auto* pString = (const BYTE*)tMatch.GetAttr ( tLoc );
  1141. JsonEscapedBuilder sTmp;
  1142. if ( pString )
  1143. {
  1144. auto dJson = sphUnpackPtrAttr ( pString );
  1145. sphJsonFormat ( sTmp, dJson.first );
  1146. }
  1147. dRows.PutArray ( sTmp );
  1148. }
  1149. break;
  1150. case SPH_ATTR_FACTORS:
  1151. case SPH_ATTR_FACTORS_JSON:
  1152. case SPH_ATTR_JSON_FIELD:
  1153. case SPH_ATTR_JSON_FIELD_PTR:
  1154. assert ( false ); // index schema never contain such column
  1155. break;
  1156. default:
  1157. dRows.Add ( 1 );
  1158. dRows.Add ( '-' );
  1159. break;
  1160. }
  1161. }
  1162. if ( !dRows.Commit() )
  1163. session::SetKilled ( true );
  1164. }
  1165. /// stream out matches
  1166. class DirectSqlQueue_c final : public MatchSorter_c, ISphNoncopyable
  1167. {
  1168. using BASE = MatchSorter_c;
  1169. public:
  1170. DirectSqlQueue_c ( RowBuffer_i * pOutput, void ** ppOpaque1, void ** ppOpaque2, StrVec_t dColumns );
  1171. ~DirectSqlQueue_c() override;
  1172. bool IsGroupby () const final { return false; }
  1173. int GetLength () final { return 0; } // that ensures, flatten() will never called;
  1174. bool Push ( const CSphMatch& tEntry ) final { return PushMatch(const_cast<CSphMatch&>(tEntry)); }
  1175. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final
  1176. {
  1177. for ( const auto & i : dMatches )
  1178. if ( i.m_tRowID!=INVALID_ROWID )
  1179. PushMatch(const_cast<CSphMatch&>(i));
  1180. }
  1181. bool PushGrouped ( const CSphMatch &, bool ) final { assert(0); return false; }
  1182. int Flatten ( CSphMatch * ) final { return 0; }
  1183. void Finalize ( MatchProcessor_i &, bool, bool ) final;
  1184. bool CanBeCloned() const final { return false; }
  1185. ISphMatchSorter * Clone () const final { return nullptr; }
  1186. void MoveTo ( ISphMatchSorter *, bool ) final {}
  1187. void SetSchema ( ISphSchema * pSchema, bool bRemapCmp ) final;
  1188. bool IsCutoffDisabled() const final { return true; }
  1189. void SetMerge ( bool bMerge ) final {}
  1190. void SetBlobPool ( const BYTE* pBlobPool ) final
  1191. {
  1192. m_pBlobPool = pBlobPool;
  1193. MakeCtx();
  1194. }
  1195. void SetColumnar ( columnar::Columnar_i* pColumnar ) final
  1196. {
  1197. m_pColumnar = pColumnar;
  1198. MakeCtx();
  1199. }
  1200. private:
  1201. bool m_bSchemaSent = false;
  1202. int64_t m_iDocs = 0;
  1203. RowBuffer_i* m_pOutput;
  1204. const BYTE* m_pBlobPool = nullptr;
  1205. columnar::Columnar_i* m_pColumnar = nullptr;
  1206. CSphVector<ISphExpr*> m_dDocstores;
  1207. CSphVector<ISphExpr*> m_dFinals;
  1208. void ** m_ppOpaque1 = nullptr;
  1209. void ** m_ppOpaque2 = nullptr;
  1210. void * m_pCurDocstore = nullptr;
  1211. void * m_pCurDocstoreReader = nullptr;
  1212. CSphQuery m_dFake;
  1213. CSphQueryContext m_dCtx;
  1214. StrVec_t m_dColumns;
  1215. CSphVector<int> m_dOrder;
  1216. bool m_bDynamicDocid;
  1217. bool m_bNotYetFinalized = true;
  1218. inline bool PushMatch ( CSphMatch & tEntry );
  1219. void SendSchemaOnce();
  1220. void FinalizeOnce();
  1221. void MakeCtx();
  1222. };
  1223. DirectSqlQueue_c::DirectSqlQueue_c ( RowBuffer_i * pOutput, void ** ppOpaque1, void ** ppOpaque2, StrVec_t dColumns )
  1224. : m_pOutput ( pOutput )
  1225. , m_ppOpaque1 ( ppOpaque1 )
  1226. , m_ppOpaque2 ( ppOpaque2 )
  1227. , m_dCtx (m_dFake)
  1228. , m_dColumns ( std::move ( dColumns ) )
  1229. {}
  1230. DirectSqlQueue_c::~DirectSqlQueue_c()
  1231. {
  1232. FinalizeOnce();
  1233. }
  1234. void DirectSqlQueue_c::SendSchemaOnce()
  1235. {
  1236. if ( m_bSchemaSent )
  1237. return;
  1238. assert ( !m_iDocs );
  1239. for ( const auto& sColumn : m_dColumns )
  1240. {
  1241. auto iIdx = m_pSchema->GetAttrIndex ( sColumn.cstr() );
  1242. if ( iIdx >= 0 )
  1243. m_dOrder.Add ( iIdx );
  1244. }
  1245. for ( int i = 0; i < m_pSchema->GetAttrsCount(); ++i )
  1246. {
  1247. auto& tCol = const_cast< CSphColumnInfo &>(m_pSchema->GetAttr ( i ));
  1248. if ( tCol.m_sName == sphGetDocidName() )
  1249. m_bDynamicDocid = tCol.m_tLocator.m_bDynamic;
  1250. if ( !tCol.m_pExpr )
  1251. continue;
  1252. switch ( tCol.m_eStage )
  1253. {
  1254. case SPH_EVAL_FINAL : m_dFinals.Add ( tCol.m_pExpr ); break;
  1255. case SPH_EVAL_POSTLIMIT: m_dDocstores.Add ( tCol.m_pExpr ); break;
  1256. default:
  1257. sphWarning ("Unknown stage in SendSchema(): %d", tCol.m_eStage);
  1258. }
  1259. }
  1260. SendSqlSchema ( *m_pSchema, m_pOutput, m_dOrder );
  1261. m_bSchemaSent = true;
  1262. }
  1263. void DirectSqlQueue_c::MakeCtx()
  1264. {
  1265. CSphQueryResultMeta tFakeMeta;
  1266. CSphVector<const ISphSchema*> tFakeSchemas;
  1267. m_dCtx.SetupCalc ( tFakeMeta, *m_pSchema, *m_pSchema, m_pBlobPool, m_pColumnar, tFakeSchemas );
  1268. }
  1269. bool DirectSqlQueue_c::PushMatch ( CSphMatch & tEntry )
  1270. {
  1271. SendSchemaOnce();
  1272. ++m_iDocs;
  1273. if ( m_ppOpaque1 )
  1274. {
  1275. auto pDocstoreReader = *m_ppOpaque1;
  1276. if ( pDocstoreReader!=std::exchange (m_pCurDocstore, pDocstoreReader) && pDocstoreReader )
  1277. {
  1278. DocstoreSession_c::InfoDocID_t tSessionInfo;
  1279. tSessionInfo.m_pDocstore = (const DocstoreReader_i *)pDocstoreReader;
  1280. tSessionInfo.m_iSessionId = -1;
  1281. // value is copied; no leak of pointer to local here.
  1282. m_dDocstores.for_each ( [&tSessionInfo] ( ISphExpr* pExpr ) { pExpr->Command ( SPH_EXPR_SET_DOCSTORE_DOCID, &tSessionInfo ); } );
  1283. }
  1284. }
  1285. if ( m_ppOpaque2 )
  1286. {
  1287. auto pDocstore = *m_ppOpaque2;
  1288. if ( pDocstore != std::exchange ( m_pCurDocstoreReader, pDocstore ) && pDocstore )
  1289. {
  1290. DocstoreSession_c::InfoRowID_t tSessionInfo;
  1291. tSessionInfo.m_pDocstore = (Docstore_i*)pDocstore;
  1292. tSessionInfo.m_iSessionId = -1;
  1293. // value is copied; no leak of pointer to local here.
  1294. m_dFinals.for_each ( [&tSessionInfo] ( ISphExpr* pExpr ) { pExpr->Command ( SPH_EXPR_SET_DOCSTORE_ROWID, &tSessionInfo ); } );
  1295. }
  1296. }
  1297. m_dCtx.CalcFinal(tEntry);
  1298. SendSqlMatch ( *m_pSchema, m_pOutput, tEntry, m_pBlobPool, m_dOrder, m_bDynamicDocid );
  1299. return true;
  1300. }
  1301. /// final update pass
  1302. void DirectSqlQueue_c::Finalize ( MatchProcessor_i&, bool, bool bFinalizeMatches )
  1303. {
  1304. if ( !bFinalizeMatches )
  1305. return;
  1306. FinalizeOnce();
  1307. }
  1308. void DirectSqlQueue_c::FinalizeOnce ()
  1309. {
  1310. if ( !std::exchange ( m_bNotYetFinalized, false ) )
  1311. return;
  1312. SendSchemaOnce();
  1313. m_pOutput->Eof();
  1314. }
  1315. void DirectSqlQueue_c::SetSchema ( ISphSchema * pSchema, bool bRemapCmp )
  1316. {
  1317. BASE::SetSchema ( pSchema, bRemapCmp );
  1318. }
  1319. //////////////////////////////////////////////////////////////////////////
  1320. static bool IsCount ( const CSphString & s )
  1321. {
  1322. return s=="@count" || s=="count(*)";
  1323. }
  1324. static bool IsGroupby ( const CSphString & s )
  1325. {
  1326. return s=="@groupby"
  1327. || s=="@distinct"
  1328. || s=="groupby()"
  1329. || IsSortJsonInternal(s);
  1330. }
  1331. static bool IsKnnDist ( const CSphString & sExpr )
  1332. {
  1333. return sExpr==GetKnnDistAttrName() || sExpr=="knn_dist()";
  1334. }
  1335. bool IsGroupbyMagic ( const CSphString & s )
  1336. {
  1337. return IsGroupby ( s ) || IsCount ( s );
  1338. }
  1339. /////////////////////////////////////////////////////////////////////////////
  1340. /// group sorting functor
  1341. template < typename COMPGROUP >
  1342. struct GroupSorter_fn : public CSphMatchComparatorState, public MatchSortAccessor_t
  1343. {
  1344. const VecTraits_T<CSphMatch> & m_dBase;
  1345. explicit GroupSorter_fn ( const CSphMatchQueueTraits& dBase )
  1346. : m_dBase ( dBase.GetMatches() )
  1347. {}
  1348. FORCE_INLINE bool IsLess ( int a, int b ) const
  1349. {
  1350. return COMPGROUP::IsLess ( m_dBase[b], m_dBase[a], *this );
  1351. }
  1352. };
  1353. /// additional group-by sorter settings
  1354. struct CSphGroupSorterSettings
  1355. {
  1356. CSphAttrLocator m_tLocGroupby; ///< locator for @groupby
  1357. CSphAttrLocator m_tLocCount; ///< locator for @count
  1358. CSphAttrLocator m_tLocDistinct; ///< locator for @distinct
  1359. CSphAttrLocator m_tLocGroupbyStr; ///< locator for @groupbystr
  1360. bool m_bDistinct = false;///< whether we need distinct
  1361. CSphRefcountedPtr<CSphGrouper> m_pGrouper;///< group key calculator
  1362. CSphRefcountedPtr<DistinctFetcher_i> m_pDistinctFetcher;
  1363. bool m_bImplicit = false;///< for queries with aggregate functions but without group by clause
  1364. SharedPtr_t<ISphFilter> m_pAggrFilterTrait; ///< aggregate filter that got owned by grouper
  1365. bool m_bJson = false; ///< whether we're grouping by Json attribute
  1366. int m_iMaxMatches = 0;
  1367. bool m_bGrouped = false; ///< are we going to push already grouped matches to it?
  1368. int m_iDistinctAccuracy = 16; ///< HyperLogLog accuracy. 0 means "don't use HLL"
  1369. void FixupLocators ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema )
  1370. {
  1371. sphFixupLocator ( m_tLocGroupby, pOldSchema, pNewSchema );
  1372. sphFixupLocator ( m_tLocCount, pOldSchema, pNewSchema );
  1373. sphFixupLocator ( m_tLocDistinct, pOldSchema, pNewSchema );
  1374. sphFixupLocator ( m_tLocGroupbyStr, pOldSchema, pNewSchema );
  1375. if ( m_pDistinctFetcher )
  1376. m_pDistinctFetcher->FixupLocators ( pOldSchema, pNewSchema );
  1377. }
  1378. void SetupDistinctAccuracy ( int iThresh )
  1379. {
  1380. if ( !iThresh )
  1381. {
  1382. m_iDistinctAccuracy = 0;
  1383. return;
  1384. }
  1385. iThresh = int ( float(iThresh) / OpenHashTable_T<int,int>::GetLoadFactor() ) + 1;
  1386. m_iDistinctAccuracy = iThresh ? sphLog2(iThresh) + 4 : 0;
  1387. m_iDistinctAccuracy = Min ( m_iDistinctAccuracy, 18 );
  1388. m_iDistinctAccuracy = Max ( m_iDistinctAccuracy, 14 );
  1389. }
  1390. };
  1391. struct MatchCloner_t
  1392. {
  1393. private:
  1394. CSphFixedVector<CSphRowitem> m_dRowBuf { 0 };
  1395. CSphVector<CSphAttrLocator> m_dAttrsGrp; // locators for grouping attrs (@groupby, @count, @distinct, etc.)
  1396. CSphVector<CSphAttrLocator> m_dAttrsPtr; // locators for group_concat attrs
  1397. CSphVector<int> m_dMyPtrRows; // rowids matching m_dAttrsPtr. i.e. grpconcat ptr result I own
  1398. CSphVector<int> m_dOtherPtrRows; // rest rowids NOT matching m_dAttrsPtr. i.e. other ptr results
  1399. const CSphSchemaHelper * m_pSchema = nullptr;
  1400. bool m_bPtrRowsCommited = false; // readiness of m_dMyPtrRows and m_dOtherPtrRows
  1401. public:
  1402. void SetSchema ( const ISphSchema * pSchema )
  1403. {
  1404. m_pSchema = (const CSphSchemaHelper *) pSchema; /// lazy hack
  1405. m_dRowBuf.Reset ( m_pSchema->GetDynamicSize() );
  1406. }
  1407. // clone plain part (incl. pointers) from src to dst
  1408. // keep group part (aggregates, group_concat) of dst intact
  1409. // it assumes that tDst m_pDynamic contains correct data, or wiped away.
  1410. void CloneKeepingAggrs ( CSphMatch & tDst, const CSphMatch & tSrc )
  1411. {
  1412. assert ( m_pSchema );
  1413. assert ( m_bPtrRowsCommited );
  1414. // memorize old dynamic first
  1415. memcpy ( m_dRowBuf.Begin(), tDst.m_pDynamic, m_dRowBuf.GetLengthBytes() );
  1416. m_pSchema->CloneMatchSpecial ( tDst, tSrc, m_dOtherPtrRows );
  1417. /*
  1418. FreeDataSpecial ( tDst, m_dOtherPtrRows );
  1419. pDst->Combine ( *pSrc, GetDynamicSize () );
  1420. CopyPtrsSpecial ( tDst, tSrc, m_dOtherPtrRows );
  1421. */
  1422. // restore back group-by attributes
  1423. for ( auto & tAttrGrp : m_dAttrsGrp )
  1424. tDst.SetAttr ( tAttrGrp, sphGetRowAttr ( m_dRowBuf.Begin(), tAttrGrp ) );
  1425. // restore back group_concat attribute(s)
  1426. for ( auto & tAttrPtr : m_dAttrsPtr )
  1427. tDst.SetAttr ( tAttrPtr, sphGetRowAttr ( m_dRowBuf.Begin (), tAttrPtr ) );
  1428. }
  1429. // clone plain part (incl. pointers) from src to dst
  1430. // group part (aggregates, group_concat) is not copied
  1431. void CloneWithoutAggrs ( CSphMatch & tDst, const CSphMatch & tSrc )
  1432. {
  1433. assert ( m_pSchema );
  1434. assert ( m_bPtrRowsCommited );
  1435. m_pSchema->CloneMatchSpecial ( tDst, tSrc, m_dOtherPtrRows );
  1436. /*
  1437. FreeDataSpecial ( tDst, m_dOtherPtrRows );
  1438. pDst->Combine ( *pSrc, GetDynamicSize () );
  1439. CopyPtrsSpecial ( tDst, tSrc, m_dOtherPtrRows );
  1440. */
  1441. }
  1442. // just write group part (aggregates, group_concat) without cloning
  1443. // assumes tDst has allocated m_pDynamic. Fixme! look to #881 again...
  1444. void CopyAggrs ( CSphMatch & tDst, const CSphMatch & tSrc )
  1445. {
  1446. assert ( m_pSchema );
  1447. assert ( m_bPtrRowsCommited );
  1448. assert ( &tDst!=&tSrc );
  1449. assert ( tDst.m_pDynamic );
  1450. for ( auto & dAttrGrp : m_dAttrsGrp )
  1451. tDst.SetAttr ( dAttrGrp, tSrc.GetAttr ( dAttrGrp ));
  1452. CSphSchemaHelper::FreeDataSpecial ( tDst, m_dMyPtrRows );
  1453. CSphSchemaHelper::CopyPtrsSpecial ( tDst, tSrc, m_dMyPtrRows );
  1454. }
  1455. // copy group part (aggregates)
  1456. // move group_concat part without reallocating
  1457. void MoveAggrs ( CSphMatch & tDst, CSphMatch & tSrc )
  1458. {
  1459. assert ( m_pSchema );
  1460. assert ( m_bPtrRowsCommited );
  1461. assert ( &tDst!=&tSrc );
  1462. assert ( tDst.m_pDynamic );
  1463. for ( auto & dAttrGrp : m_dAttrsGrp )
  1464. tDst.SetAttr ( dAttrGrp, tSrc.GetAttr ( dAttrGrp ));
  1465. CSphSchemaHelper::MovePtrsSpecial( tDst, tSrc, m_dMyPtrRows );
  1466. }
  1467. inline void AddRaw ( const CSphAttrLocator& tLoc )
  1468. {
  1469. m_dAttrsGrp.Add ( tLoc );
  1470. }
  1471. inline void AddPtr ( const CSphAttrLocator &tLoc )
  1472. {
  1473. m_dAttrsPtr.Add ( tLoc );
  1474. }
  1475. inline void ResetAttrs()
  1476. {
  1477. m_dAttrsGrp.Resize ( 0 );
  1478. m_dAttrsPtr.Resize ( 0 );
  1479. }
  1480. // (re)fill m_dMyPtrRows and m_dOtherPtrRows from m_dAttrsPtr
  1481. inline void CommitPtrs ()
  1482. {
  1483. assert ( m_pSchema );
  1484. static const int SIZE_OF_ROW = 8 * sizeof ( CSphRowitem );
  1485. if ( m_bPtrRowsCommited )
  1486. m_dMyPtrRows.Resize(0);
  1487. for ( const CSphAttrLocator &tLoc : m_dAttrsPtr )
  1488. m_dMyPtrRows.Add ( tLoc.m_iBitOffset / SIZE_OF_ROW );
  1489. m_dOtherPtrRows = m_pSchema->SubsetPtrs ( m_dMyPtrRows );
  1490. #ifndef NDEBUG
  1491. // sanitize check
  1492. m_dMyPtrRows = m_pSchema->SubsetPtrs ( m_dOtherPtrRows );
  1493. assert ( m_dMyPtrRows.GetLength ()==m_dAttrsPtr.GetLength () );
  1494. #endif
  1495. m_bPtrRowsCommited = true;
  1496. }
  1497. };
  1498. class BaseGroupSorter_c : public BlobPool_c, protected CSphGroupSorterSettings
  1499. {
  1500. using BASE = CSphGroupSorterSettings;
  1501. public:
  1502. FWD_BASECTOR( BaseGroupSorter_c )
  1503. ~BaseGroupSorter_c() override { ResetAggregates(); }
  1504. protected:
  1505. MatchCloner_t m_tPregroup;
  1506. CSphVector<AggrFunc_i *> m_dAggregates;
  1507. void SetColumnar ( columnar::Columnar_i * pColumnar )
  1508. {
  1509. for ( auto i : m_dAggregates )
  1510. i->SetColumnar(pColumnar);
  1511. }
  1512. /// schema, aggregates setup
  1513. template <int DISTINCT>
  1514. inline void SetupBaseGrouper ( ISphSchema * pSchema, CSphVector<AggrFunc_i *> * pAvgs = nullptr )
  1515. {
  1516. m_tPregroup.ResetAttrs();
  1517. ResetAggregates();
  1518. m_tPregroup.SetSchema ( pSchema );
  1519. m_tPregroup.AddRaw ( m_tLocGroupby ); // @groupby
  1520. m_tPregroup.AddRaw ( m_tLocCount ); // @count
  1521. if constexpr ( DISTINCT )
  1522. m_tPregroup.AddRaw ( m_tLocDistinct ); // @distinct
  1523. // extract aggregates
  1524. for ( int i = 0; i<pSchema->GetAttrsCount (); ++i )
  1525. {
  1526. const CSphColumnInfo &tAttr = pSchema->GetAttr ( i );
  1527. if ( tAttr.m_eAggrFunc==SPH_AGGR_NONE
  1528. || IsGroupbyMagic ( tAttr.m_sName ) // @count, @groupby, @groupbystr, @distinct, count(*), groupby()
  1529. || IsSortStringInternal ( tAttr.m_sName.cstr () ) )
  1530. continue;
  1531. switch ( tAttr.m_eAggrFunc )
  1532. {
  1533. case SPH_AGGR_SUM: m_dAggregates.Add ( CreateAggrSum(tAttr) ); break;
  1534. case SPH_AGGR_AVG:
  1535. m_dAggregates.Add ( CreateAggrAvg ( tAttr, m_tLocCount ) );
  1536. // store avg to calculate these attributes prior to groups sort
  1537. if ( pAvgs )
  1538. pAvgs->Add ( m_dAggregates.Last() );
  1539. break;
  1540. case SPH_AGGR_MIN: m_dAggregates.Add ( CreateAggrMin(tAttr) ); break;
  1541. case SPH_AGGR_MAX: m_dAggregates.Add ( CreateAggrMax(tAttr) ); break;
  1542. case SPH_AGGR_CAT:
  1543. m_dAggregates.Add ( CreateAggrConcat(tAttr) );
  1544. m_tPregroup.AddPtr ( tAttr.m_tLocator );
  1545. break;
  1546. default: assert ( 0 && "internal error: unhandled aggregate function" );
  1547. break;
  1548. }
  1549. if ( tAttr.m_eAggrFunc!=SPH_AGGR_CAT )
  1550. m_tPregroup.AddRaw ( tAttr.m_tLocator );
  1551. }
  1552. m_tPregroup.CommitPtrs();
  1553. }
  1554. // HAVING filtering
  1555. bool EvalHAVING ( const CSphMatch& tMatch )
  1556. {
  1557. return !m_pAggrFilterTrait || m_pAggrFilterTrait->Eval ( tMatch );
  1558. }
  1559. void AggrUpdate ( CSphMatch & tDst, const CSphMatch & tSrc, bool bGrouped, bool bMerge = false )
  1560. {
  1561. for ( auto * pAggregate : this->m_dAggregates )
  1562. pAggregate->Update ( tDst, tSrc, bGrouped, bMerge );
  1563. }
  1564. void AggrSetup ( CSphMatch & tDst, const CSphMatch & tSrc, bool bMerge = false )
  1565. {
  1566. for ( auto * pAggregate : this->m_dAggregates )
  1567. pAggregate->Setup ( tDst, tSrc, bMerge );
  1568. }
  1569. void AggrUngroup ( CSphMatch & tMatch )
  1570. {
  1571. for ( auto * pAggregate : this->m_dAggregates )
  1572. pAggregate->Ungroup ( tMatch );
  1573. }
  1574. private:
  1575. void ResetAggregates()
  1576. {
  1577. for ( auto & pAggregate : m_dAggregates )
  1578. SafeDelete ( pAggregate );
  1579. m_dAggregates.Resize(0);
  1580. }
  1581. };
  1582. class SubGroupSorter_fn : public ISphNoncopyable
  1583. {
  1584. const VecTraits_T<CSphMatch> & m_dBase;
  1585. const CSphMatchComparatorState& m_tState;
  1586. const ISphMatchComparator * m_pComp;
  1587. public:
  1588. SubGroupSorter_fn ( const CSphMatchQueueTraits & dBase, const ISphMatchComparator * pC )
  1589. : m_dBase ( dBase.GetMatches () )
  1590. , m_tState ( dBase.GetState() )
  1591. , m_pComp ( pC )
  1592. {
  1593. assert ( m_pComp );
  1594. m_pComp->AddRef();
  1595. }
  1596. ~SubGroupSorter_fn()
  1597. {
  1598. m_pComp->Release();
  1599. }
  1600. const ISphMatchComparator * GetComparator() const
  1601. {
  1602. return m_pComp;
  1603. }
  1604. bool MatchIsGreater ( const CSphMatch & a, const CSphMatch & b ) const
  1605. {
  1606. return m_pComp->VirtualIsLess ( b, a, m_tState );
  1607. }
  1608. // inverse order, i.e. work as IsGreater
  1609. bool IsLess ( int a, int b ) const
  1610. {
  1611. return m_pComp->VirtualIsLess ( m_dBase[b], m_dBase[a], m_tState );
  1612. }
  1613. };
  1614. /// match sorter with k-buffering and group-by - common part
  1615. template<typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS>
  1616. class KBufferGroupSorter_T : public CSphMatchQueueTraits, protected BaseGroupSorter_c
  1617. {
  1618. using MYTYPE = KBufferGroupSorter_T<COMPGROUP,UNIQ,DISTINCT,NOTIFICATIONS>;
  1619. using BASE = CSphMatchQueueTraits;
  1620. public:
  1621. KBufferGroupSorter_T ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
  1622. : CSphMatchQueueTraits ( tSettings.m_iMaxMatches*GROUPBY_FACTOR )
  1623. , BaseGroupSorter_c ( tSettings )
  1624. , m_eGroupBy ( pQuery->m_eGroupFunc )
  1625. , m_iLimit ( tSettings.m_iMaxMatches )
  1626. , m_tGroupSorter (*this)
  1627. , m_tSubSorter ( *this, pComp )
  1628. {
  1629. assert ( GROUPBY_FACTOR>1 );
  1630. assert ( !DISTINCT || tSettings.m_pDistinctFetcher );
  1631. if constexpr ( NOTIFICATIONS )
  1632. m_dJustPopped.Reserve ( m_iSize );
  1633. m_pGrouper = tSettings.m_pGrouper;
  1634. m_pDistinctFetcher = tSettings.m_pDistinctFetcher;
  1635. m_tUniq.SetAccuracy ( tSettings.m_iDistinctAccuracy );
  1636. }
  1637. /// schema setup
  1638. void SetSchema ( ISphSchema * pSchema, bool bRemapCmp ) final
  1639. {
  1640. if ( m_pSchema )
  1641. {
  1642. FixupLocators ( m_pSchema, pSchema );
  1643. m_tGroupSorter.FixupLocators ( m_pSchema, pSchema, bRemapCmp );
  1644. m_tPregroup.ResetAttrs ();
  1645. m_dAggregates.Apply ( [] ( AggrFunc_i * pAggr ) { SafeDelete ( pAggr ); } );
  1646. m_dAggregates.Resize ( 0 );
  1647. m_dAvgs.Resize ( 0 );
  1648. }
  1649. BASE::SetSchema ( pSchema, bRemapCmp );
  1650. SetupBaseGrouper<DISTINCT> ( pSchema, &m_dAvgs );
  1651. }
  1652. /// check if this sorter does groupby
  1653. bool IsGroupby () const final
  1654. {
  1655. return true;
  1656. }
  1657. /// set blob pool pointer (for string+groupby sorters)
  1658. void SetBlobPool ( const BYTE * pBlobPool ) final
  1659. {
  1660. BlobPool_c::SetBlobPool ( pBlobPool );
  1661. m_pGrouper->SetBlobPool ( pBlobPool );
  1662. if ( m_pDistinctFetcher )
  1663. m_pDistinctFetcher->SetBlobPool(pBlobPool);
  1664. }
  1665. void SetColumnar ( columnar::Columnar_i * pColumnar ) final
  1666. {
  1667. CSphMatchQueueTraits::SetColumnar(pColumnar);
  1668. BaseGroupSorter_c::SetColumnar(pColumnar);
  1669. m_pGrouper->SetColumnar(pColumnar);
  1670. if ( m_pDistinctFetcher )
  1671. m_pDistinctFetcher->SetColumnar(pColumnar);
  1672. }
  1673. /// get entries count
  1674. int GetLength () override
  1675. {
  1676. return Min ( Used(), m_iLimit );
  1677. }
  1678. /// set group comparator state
  1679. void SetGroupState ( const CSphMatchComparatorState & tState ) final
  1680. {
  1681. m_tGroupSorter.m_fnStrCmp = tState.m_fnStrCmp;
  1682. // FIXME! manual bitwise copying.. yuck
  1683. for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; ++i )
  1684. {
  1685. m_tGroupSorter.m_eKeypart[i] = tState.m_eKeypart[i];
  1686. m_tGroupSorter.m_tLocator[i] = tState.m_tLocator[i];
  1687. }
  1688. m_tGroupSorter.m_uAttrDesc = tState.m_uAttrDesc;
  1689. m_tGroupSorter.m_iNow = tState.m_iNow;
  1690. // check whether we sort by distinct
  1691. if constexpr ( DISTINCT )
  1692. {
  1693. const CSphColumnInfo * pDistinct = m_pSchema->GetAttr("@distinct");
  1694. assert(pDistinct);
  1695. for ( const auto & tLocator : m_tGroupSorter.m_tLocator )
  1696. if ( tLocator==pDistinct->m_tLocator )
  1697. {
  1698. m_bSortByDistinct = true;
  1699. break;
  1700. }
  1701. }
  1702. }
  1703. bool CanBeCloned() const final { return !DISTINCT && BASE::CanBeCloned(); }
  1704. protected:
  1705. ESphGroupBy m_eGroupBy; ///< group-by function
  1706. int m_iLimit; ///< max matches to be retrieved
  1707. UNIQ m_tUniq;
  1708. bool m_bSortByDistinct = false;
  1709. GroupSorter_fn<COMPGROUP> m_tGroupSorter;
  1710. SubGroupSorter_fn m_tSubSorter;
  1711. CSphVector<AggrFunc_i *> m_dAvgs;
  1712. bool m_bAvgFinal = false;
  1713. CSphVector<SphAttr_t> m_dDistinctKeys;
  1714. static const int GROUPBY_FACTOR = 4; ///< allocate this times more storage when doing group-by (k, as in k-buffer)
  1715. /// finalize distinct counters
  1716. template <typename FIND>
  1717. void Distinct ( FIND&& fnFind )
  1718. {
  1719. m_tUniq.Sort ();
  1720. SphGroupKey_t uGroup;
  1721. for ( int iCount = m_tUniq.CountStart ( uGroup ); iCount; iCount = m_tUniq.CountNext ( uGroup ) )
  1722. {
  1723. CSphMatch * pMatch = fnFind ( uGroup );
  1724. if ( pMatch )
  1725. pMatch->SetAttr ( m_tLocDistinct, iCount );
  1726. }
  1727. }
  1728. inline void SetupBaseGrouperWrp ( ISphSchema * pSchema, CSphVector<AggrFunc_i *> * pAvgs )
  1729. {
  1730. SetupBaseGrouper<DISTINCT> ( pSchema, pAvgs );
  1731. }
  1732. void CloneKBufferGroupSorter ( MYTYPE* pClone ) const
  1733. {
  1734. // basic clone
  1735. BASE::CloneTo ( pClone );
  1736. // actions from SetGroupState
  1737. pClone->m_bSortByDistinct = m_bSortByDistinct;
  1738. pClone->m_tGroupSorter.m_fnStrCmp = m_tGroupSorter.m_fnStrCmp;
  1739. for ( int i = 0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  1740. {
  1741. pClone->m_tGroupSorter.m_eKeypart[i] = m_tGroupSorter.m_eKeypart[i];
  1742. pClone->m_tGroupSorter.m_tLocator[i] = m_tGroupSorter.m_tLocator[i];
  1743. }
  1744. pClone->m_tGroupSorter.m_uAttrDesc = m_tGroupSorter.m_uAttrDesc;
  1745. pClone->m_tGroupSorter.m_iNow = m_tGroupSorter.m_iNow;
  1746. // complete SetSchema
  1747. pClone->m_dAvgs.Resize ( 0 );
  1748. pClone->SetupBaseGrouperWrp ( pClone->m_pSchema, &pClone->m_dAvgs );
  1749. // m_pGrouper also need to be cloned (otherwise SetBlobPool will cause races)
  1750. if ( m_pGrouper )
  1751. pClone->m_pGrouper = m_pGrouper->Clone ();
  1752. if ( m_pDistinctFetcher )
  1753. pClone->m_pDistinctFetcher = m_pDistinctFetcher->Clone ();
  1754. }
  1755. template<typename SORTER> SORTER * CloneSorterT () const
  1756. {
  1757. CSphQuery dFoo;
  1758. dFoo.m_iMaxMatches = m_iLimit;
  1759. dFoo.m_eGroupFunc = m_eGroupBy;
  1760. auto pClone = new SORTER ( m_tSubSorter.GetComparator (), &dFoo, *this );
  1761. CloneKBufferGroupSorter ( pClone );
  1762. return pClone;
  1763. }
  1764. CSphVector<AggrFunc_i *> GetAggregatesWithoutAvgs() const
  1765. {
  1766. CSphVector<AggrFunc_i *> dAggrs;
  1767. if ( m_dAggregates.GetLength ()!=m_dAvgs.GetLength ())
  1768. {
  1769. dAggrs = m_dAggregates;
  1770. for ( auto * pAvg : this->m_dAvgs )
  1771. dAggrs.RemoveValue ( pAvg );
  1772. }
  1773. return dAggrs;
  1774. }
  1775. FORCE_INLINE void FreeMatchPtrs ( int iMatch, bool bNotify=true )
  1776. {
  1777. if ( NOTIFICATIONS && bNotify )
  1778. m_dJustPopped.Add ( RowTagged_t ( m_dData[iMatch] ) );
  1779. m_pSchema->FreeDataPtrs ( m_dData[iMatch] );
  1780. // on final pass we totally wipe match.
  1781. // That is need, since otherwise such 'garbage' matches with non-null m_pDynamic
  1782. // will be targeted in d-tr with FreeDataPtrs with possible another(!) schema
  1783. if ( !bNotify )
  1784. m_dData[iMatch].ResetDynamic ();
  1785. }
  1786. template <bool GROUPED>
  1787. FORCE_INLINE void UpdateDistinct ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey )
  1788. {
  1789. int iCount = 1;
  1790. if constexpr ( GROUPED )
  1791. iCount = (int)tEntry.GetAttr ( m_tLocDistinct );
  1792. assert(m_pDistinctFetcher);
  1793. if constexpr ( DISTINCT==1 )
  1794. m_tUniq.Add ( {uGroupKey, m_pDistinctFetcher->GetKey(tEntry), iCount} );
  1795. else
  1796. {
  1797. m_pDistinctFetcher->GetKeys ( tEntry, this->m_dDistinctKeys );
  1798. for ( auto i : this->m_dDistinctKeys )
  1799. m_tUniq.Add ( {uGroupKey, i, iCount} );
  1800. }
  1801. }
  1802. void RemoveDistinct ( VecTraits_T<SphGroupKey_t>& dRemove )
  1803. {
  1804. // sort and compact
  1805. if ( !m_bSortByDistinct )
  1806. m_tUniq.Sort ();
  1807. m_tUniq.Compact ( dRemove );
  1808. }
  1809. };
  1810. /// match sorter with k-buffering and group-by
  1811. /// invoking by select ... group by ... where only plain attributes (i.e. NO mva, NO jsons)
  1812. template < typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS, bool HAS_AGGREGATES >
  1813. class CSphKBufferGroupSorter : public KBufferGroupSorter_T<COMPGROUP,UNIQ,DISTINCT,NOTIFICATIONS>
  1814. {
  1815. using MYTYPE = CSphKBufferGroupSorter<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>;
  1816. bool m_bMatchesFinalized = false;
  1817. int m_iMaxUsed = -1;
  1818. protected:
  1819. OpenHashTableFastClear_T <SphGroupKey_t, CSphMatch *> m_hGroup2Match;
  1820. // since we inherit from template, we need to write boring 'using' block
  1821. using KBufferGroupSorter = KBufferGroupSorter_T<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS>;
  1822. using KBufferGroupSorter::m_eGroupBy;
  1823. using KBufferGroupSorter::m_pGrouper;
  1824. using KBufferGroupSorter::m_iLimit;
  1825. using KBufferGroupSorter::m_tUniq;
  1826. using KBufferGroupSorter::m_bSortByDistinct;
  1827. using KBufferGroupSorter::m_tGroupSorter;
  1828. using KBufferGroupSorter::m_tSubSorter;
  1829. using KBufferGroupSorter::m_dAvgs;
  1830. using KBufferGroupSorter::GROUPBY_FACTOR;
  1831. using KBufferGroupSorter::GetAggregatesWithoutAvgs;
  1832. using KBufferGroupSorter::Distinct;
  1833. using KBufferGroupSorter::UpdateDistinct;
  1834. using KBufferGroupSorter::RemoveDistinct;
  1835. using KBufferGroupSorter::FreeMatchPtrs;
  1836. using KBufferGroupSorter::m_bAvgFinal;
  1837. using CSphGroupSorterSettings::m_tLocGroupby;
  1838. using CSphGroupSorterSettings::m_tLocCount;
  1839. using CSphGroupSorterSettings::m_tLocDistinct;
  1840. using BaseGroupSorter_c::EvalHAVING;
  1841. using BaseGroupSorter_c::AggrSetup;
  1842. using BaseGroupSorter_c::AggrUpdate;
  1843. using BaseGroupSorter_c::AggrUngroup;
  1844. using CSphMatchQueueTraits::m_iSize;
  1845. using CSphMatchQueueTraits::m_dData;
  1846. using CSphMatchQueueTraits::Get;
  1847. using CSphMatchQueueTraits::Add;
  1848. using CSphMatchQueueTraits::Used;
  1849. using CSphMatchQueueTraits::ResetAfterFlatten;
  1850. using CSphMatchQueueTraits::ResetDynamic;
  1851. using CSphMatchQueueTraits::ResetDynamicFreeData;
  1852. using MatchSorter_c::m_iTotal;
  1853. using MatchSorter_c::m_tJustPushed;
  1854. using MatchSorter_c::m_dJustPopped;
  1855. using MatchSorter_c::m_pSchema;
  1856. public:
  1857. /// ctor
  1858. CSphKBufferGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
  1859. : KBufferGroupSorter ( pComp, pQuery, tSettings )
  1860. , m_hGroup2Match ( tSettings.m_iMaxMatches*GROUPBY_FACTOR )
  1861. {}
  1862. bool Push ( const CSphMatch & tEntry ) override { return PushEx<false> ( tEntry, m_pGrouper->KeyFromMatch(tEntry), false, false, true, nullptr ); }
  1863. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) override { assert ( 0 && "Not supported in grouping"); }
  1864. bool PushGrouped ( const CSphMatch & tEntry, bool ) override { return PushEx<true> ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), false, false, true, nullptr ); }
  1865. ISphMatchSorter * Clone() const override { return this->template CloneSorterT<MYTYPE>(); }
  1866. /// store all entries into specified location in sorted order, and remove them from queue
  1867. int Flatten ( CSphMatch * pTo ) override
  1868. {
  1869. FinalizeMatches();
  1870. auto dAggrs = GetAggregatesWithoutAvgs();
  1871. const CSphMatch * pBegin = pTo;
  1872. for ( auto iMatch : this->m_dIData )
  1873. {
  1874. CSphMatch & tMatch = m_dData[iMatch];
  1875. if constexpr ( HAS_AGGREGATES )
  1876. dAggrs.Apply ( [&tMatch] ( AggrFunc_i * pAggr ) { pAggr->Finalize ( tMatch ); } );
  1877. if ( !EvalHAVING ( tMatch ))
  1878. {
  1879. FreeMatchPtrs ( iMatch, false );
  1880. continue;
  1881. }
  1882. Swap ( *pTo, tMatch );
  1883. ++pTo;
  1884. }
  1885. m_iTotal = 0;
  1886. m_bMatchesFinalized = false;
  1887. if constexpr ( DISTINCT )
  1888. m_tUniq.Reset();
  1889. ResetAfterFlatten ();
  1890. m_iMaxUsed = ResetDynamic ( m_iMaxUsed );
  1891. return int ( pTo-pBegin );
  1892. }
  1893. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final
  1894. {
  1895. if ( !Used () )
  1896. return;
  1897. auto& dRhs = *(MYTYPE *) pRhs;
  1898. if ( dRhs.IsEmpty () )
  1899. {
  1900. CSphMatchQueueTraits::SwapMatchQueueTraits ( dRhs );
  1901. dRhs.m_hGroup2Match = std::move ( m_hGroup2Match );
  1902. dRhs.m_bMatchesFinalized = m_bMatchesFinalized;
  1903. dRhs.m_iMaxUsed = m_iMaxUsed;
  1904. if ( !m_bMatchesFinalized && bCopyMeta )
  1905. dRhs.m_tUniq = std::move(m_tUniq);
  1906. m_iMaxUsed = -1;
  1907. return;
  1908. }
  1909. bool bUniqUpdated = false;
  1910. if ( !m_bMatchesFinalized && bCopyMeta )
  1911. {
  1912. // can not move m_tUniq into dRhs as move invalidates m_tUniq then breaks FinalizeMatches
  1913. m_tUniq.CopyTo ( dRhs.m_tUniq );
  1914. bUniqUpdated = true;
  1915. }
  1916. // if we're copying meta (uniq counters), we don't need distinct calculation right now
  1917. // we can do it later after all sorters are merged
  1918. FinalizeMatches ( !bCopyMeta );
  1919. // matches in dRhs are using a new (standalone) schema
  1920. // however, some supposedly unused matches still have old schema
  1921. // they were not cleared immediately for performance reasons
  1922. // we need to do that now
  1923. for ( int i = dRhs.m_dIData.GetLength(); i < dRhs.m_dData.GetLength(); i++ )
  1924. {
  1925. int iId = *(dRhs.m_dIData.Begin()+i);
  1926. dRhs.m_dData[iId].ResetDynamic();
  1927. }
  1928. dRhs.m_bUpdateDistinct = !bUniqUpdated;
  1929. dRhs.SetMerge(true);
  1930. // just push in heap order
  1931. // since we have grouped matches, it is not always possible to move them,
  1932. // so use plain push instead
  1933. for ( auto iMatch : this->m_dIData )
  1934. dRhs.PushGrouped ( m_dData[iMatch], false );
  1935. dRhs.m_bUpdateDistinct = true;
  1936. dRhs.SetMerge(false);
  1937. // once we're done copying, cleanup
  1938. m_iMaxUsed = ResetDynamicFreeData ( m_iMaxUsed );
  1939. }
  1940. void Finalize ( MatchProcessor_i & tProcessor, bool, bool bFinalizeMatches ) override
  1941. {
  1942. if ( !Used() )
  1943. return;
  1944. if ( bFinalizeMatches )
  1945. FinalizeMatches();
  1946. else if constexpr ( DISTINCT )
  1947. {
  1948. // if we are not finalizing matches, we are using global sorters
  1949. // let's try to remove dupes while we are processing data in separate threads
  1950. // so that the main thread will have fewer data to work with
  1951. m_tUniq.Sort();
  1952. VecTraits_T<SphGroupKey_t> dStub;
  1953. m_tUniq.Compact(dStub);
  1954. }
  1955. // just evaluate in heap order
  1956. for ( auto iMatch : this->m_dIData )
  1957. tProcessor.Process ( &m_dData[iMatch] );
  1958. if constexpr ( DISTINCT )
  1959. {
  1960. // need to clean up matches NOT from m_dIData with current schema
  1961. // as after schema change data_ptr attributes will have garbage in ptr part for matches not processed by tProcessor
  1962. // and global sorters have differrent clean up code path that do not handle this garbage as usual sorters do
  1963. if ( this->m_dIData.GetLength()!=m_iMaxUsed )
  1964. {
  1965. for ( int i=0; i<m_iMaxUsed; i++ )
  1966. {
  1967. CSphMatch & tMatch = m_dData[i];
  1968. if ( !tMatch.m_pStatic ) // clean up match that was in m_dIData set
  1969. continue;
  1970. m_pSchema->FreeDataPtrs ( tMatch );
  1971. tMatch.ResetDynamic ();
  1972. }
  1973. }
  1974. }
  1975. }
  1976. void SetMerge ( bool bMerge ) override { m_bMerge = bMerge; }
  1977. protected:
  1978. template <bool GROUPED>
  1979. bool PushIntoExistingGroup( CSphMatch & tGroup, const CSphMatch & tEntry, SphGroupKey_t uGroupKey, SphAttr_t * pAttr )
  1980. {
  1981. assert ( tGroup.GetAttr ( m_tLocGroupby )==uGroupKey );
  1982. assert ( tGroup.m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
  1983. auto & tLocCount = m_tLocCount;
  1984. if constexpr ( GROUPED )
  1985. tGroup.AddCounterAttr ( tLocCount, tEntry );
  1986. else
  1987. tGroup.AddCounterScalar ( tLocCount, 1 );
  1988. if constexpr ( HAS_AGGREGATES )
  1989. AggrUpdate ( tGroup, tEntry, GROUPED, m_bMerge );
  1990. // if new entry is more relevant, update from it
  1991. if ( m_tSubSorter.MatchIsGreater ( tEntry, tGroup ) )
  1992. {
  1993. if constexpr ( NOTIFICATIONS )
  1994. {
  1995. m_tJustPushed = RowTagged_t ( tEntry );
  1996. this->m_dJustPopped.Add ( RowTagged_t ( tGroup ) );
  1997. }
  1998. // clone the low part of the match
  1999. this->m_tPregroup.CloneKeepingAggrs ( tGroup, tEntry );
  2000. if ( pAttr )
  2001. UpdateGroupbyStr ( tGroup, pAttr );
  2002. }
  2003. // submit actual distinct value
  2004. if ( DISTINCT && m_bUpdateDistinct )
  2005. KBufferGroupSorter::template UpdateDistinct<GROUPED> ( tEntry, uGroupKey );
  2006. return false; // since it is a dupe
  2007. }
  2008. /// add entry to the queue
  2009. template <bool GROUPED>
  2010. FORCE_INLINE bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, [[maybe_unused]] bool bNewSet, [[maybe_unused]] bool bTailFinalized, bool bClearNotify, SphAttr_t * pAttr )
  2011. {
  2012. if constexpr ( NOTIFICATIONS )
  2013. {
  2014. if ( bClearNotify )
  2015. {
  2016. m_tJustPushed = RowTagged_t();
  2017. this->m_dJustPopped.Resize ( 0 );
  2018. }
  2019. }
  2020. auto & tLocCount = m_tLocCount;
  2021. m_bMatchesFinalized = false;
  2022. if ( HAS_AGGREGATES && m_bAvgFinal )
  2023. CalcAvg ( Avg_e::UNGROUP );
  2024. // if this group is already hashed, we only need to update the corresponding match
  2025. CSphMatch ** ppMatch = m_hGroup2Match.Find ( uGroupKey );
  2026. if ( ppMatch )
  2027. {
  2028. CSphMatch * pMatch = (*ppMatch);
  2029. assert ( pMatch );
  2030. assert ( pMatch->GetAttr ( m_tLocGroupby )==uGroupKey );
  2031. return PushIntoExistingGroup<GROUPED> ( *pMatch, tEntry, uGroupKey, pAttr );
  2032. }
  2033. // submit actual distinct value
  2034. if constexpr ( DISTINCT )
  2035. KBufferGroupSorter::template UpdateDistinct<GROUPED> ( tEntry, uGroupKey );
  2036. // if we're full, let's cut off some worst groups
  2037. if ( Used()==m_iSize )
  2038. CutWorst ( m_iLimit * (int)(GROUPBY_FACTOR/2) );
  2039. // do add
  2040. assert ( Used()<m_iSize );
  2041. CSphMatch & tNew = Add();
  2042. m_pSchema->CloneMatch ( tNew, tEntry );
  2043. if constexpr ( HAS_AGGREGATES )
  2044. AggrSetup ( tNew, tEntry, m_bMerge );
  2045. if constexpr ( NOTIFICATIONS )
  2046. m_tJustPushed = RowTagged_t ( tNew );
  2047. if constexpr ( GROUPED )
  2048. {
  2049. if constexpr ( HAS_AGGREGATES )
  2050. AggrUngroup(tNew);
  2051. }
  2052. else
  2053. {
  2054. tNew.SetAttr ( m_tLocGroupby, uGroupKey );
  2055. tNew.SetAttr ( tLocCount, 1 );
  2056. if ( DISTINCT && m_bUpdateDistinct )
  2057. tNew.SetAttr ( m_tLocDistinct, 0 );
  2058. if ( pAttr )
  2059. UpdateGroupbyStr ( tNew, pAttr );
  2060. }
  2061. m_hGroup2Match.Add ( uGroupKey, &tNew );
  2062. ++m_iTotal;
  2063. return true;
  2064. }
  2065. private:
  2066. enum class Avg_e { FINALIZE, UNGROUP };
  2067. bool m_bUpdateDistinct = true;
  2068. bool m_bMerge = false;
  2069. CSphVector<SphGroupKey_t> m_dRemove;
  2070. void CalcAvg ( Avg_e eGroup )
  2071. {
  2072. if ( m_dAvgs.IsEmpty() )
  2073. return;
  2074. m_bAvgFinal = ( eGroup==Avg_e::FINALIZE );
  2075. if ( eGroup==Avg_e::FINALIZE )
  2076. for ( auto i : this->m_dIData )
  2077. m_dAvgs.Apply( [this,i] ( AggrFunc_i * pAvg ) { pAvg->Finalize ( m_dData[i] ); } );
  2078. else
  2079. for ( auto i : this->m_dIData )
  2080. m_dAvgs.Apply ( [this,i] ( AggrFunc_i * pAvg ) { pAvg->Ungroup ( m_dData[i] ); } );
  2081. }
  2082. /// finalize counted distinct values
  2083. void CountDistinct ()
  2084. {
  2085. Distinct ( [this] ( SphGroupKey_t uGroup )->CSphMatch *
  2086. {
  2087. auto ppMatch = m_hGroup2Match.Find ( uGroup );
  2088. return ppMatch ? *ppMatch : nullptr;
  2089. });
  2090. }
  2091. // make final order before finalize/flatten call
  2092. void FinalizeMatches ( bool bCountDistinct=true )
  2093. {
  2094. if ( m_bMatchesFinalized )
  2095. return;
  2096. m_bMatchesFinalized = true;
  2097. if ( Used() > m_iLimit )
  2098. CutWorst ( m_iLimit, true );
  2099. else
  2100. {
  2101. if ( DISTINCT && bCountDistinct )
  2102. CountDistinct();
  2103. CalcAvg ( Avg_e::FINALIZE );
  2104. SortGroups();
  2105. }
  2106. }
  2107. void RebuildHash ()
  2108. {
  2109. for ( auto iMatch : this->m_dIData ) {
  2110. auto & tMatch = m_dData[iMatch];
  2111. m_hGroup2Match.Add ( tMatch.GetAttr ( m_tLocGroupby ), &tMatch );
  2112. }
  2113. }
  2114. /// cut worst N groups off the buffer tail, and maybe sort the best part
  2115. void CutWorst ( int iBound, bool bFinalize=false )
  2116. {
  2117. // prepare to partition - finalize distinct, avgs to provide smooth sorting
  2118. if ( DISTINCT && m_bSortByDistinct )
  2119. CountDistinct ();
  2120. CalcAvg ( Avg_e::FINALIZE );
  2121. // relocate best matches to the low part (up to the iBound)
  2122. BinaryPartition (iBound);
  2123. // take worst matches and free them (distinct stuff, data ptrs)
  2124. auto dWorst = this->m_dIData.Slice ( iBound );
  2125. if constexpr ( DISTINCT )
  2126. {
  2127. m_dRemove.Resize(0);
  2128. for ( auto iMatch : dWorst )
  2129. m_dRemove.Add ( m_dData[iMatch].GetAttr ( m_tLocGroupby ));
  2130. RemoveDistinct ( m_dRemove );
  2131. }
  2132. dWorst.Apply ( [this,bFinalize] ( int iMatch ) { FreeMatchPtrs ( iMatch, !bFinalize ); } );
  2133. m_iMaxUsed = Max ( m_iMaxUsed, this->m_dIData.GetLength() ); // memorize it for free dynamics later.
  2134. this->m_dIData.Resize ( iBound );
  2135. m_hGroup2Match.Clear();
  2136. if ( bFinalize )
  2137. {
  2138. SortGroups();
  2139. if ( DISTINCT && !m_bSortByDistinct ) // since they haven't counted at the top
  2140. {
  2141. RebuildHash(); // distinct uses m_hGroup2Match
  2142. CountDistinct();
  2143. }
  2144. } else
  2145. {
  2146. // we've called CalcAvg ( Avg_e::FINALIZE ) before partitioning groups
  2147. // now we can undo this calculation for the rest apart from thrown away
  2148. // on finalize (sorting) cut we don't need to ungroup here
  2149. CalcAvg ( Avg_e::UNGROUP );
  2150. RebuildHash();
  2151. }
  2152. }
  2153. /// sort groups buffer
  2154. void SortGroups ()
  2155. {
  2156. this->m_dIData.Sort ( m_tGroupSorter );
  2157. }
  2158. // update @groupbystr value, if available
  2159. void UpdateGroupbyStr ( CSphMatch& tMatch, const SphAttr_t * pAttr )
  2160. {
  2161. if ( this->m_tLocGroupbyStr.m_bDynamic )
  2162. tMatch.SetAttr ( this->m_tLocGroupbyStr, *pAttr );
  2163. }
  2164. // lazy resort matches so that best are located up to iBound
  2165. void BinaryPartition ( int iBound )
  2166. {
  2167. float COEFF = Max ( 1.0f, float(Used()) / iBound );
  2168. int iPivot = this->m_dIData[ int(iBound/COEFF) ];
  2169. --iBound;
  2170. int a=0;
  2171. int b=Used()-1;
  2172. while (true)
  2173. {
  2174. int i=a;
  2175. int j=b;
  2176. while (i<=j)
  2177. {
  2178. while (m_tGroupSorter.IsLess (this->m_dIData[i],iPivot)) ++i;
  2179. while (m_tGroupSorter.IsLess (iPivot, this->m_dIData[j])) --j;
  2180. if ( i<=j ) ::Swap( this->m_dIData[i++], this->m_dIData[j--]);
  2181. }
  2182. if ( iBound == j )
  2183. break;
  2184. if ( iBound < j)
  2185. b = j; // too many elems acquired; continue with left part
  2186. else
  2187. a = i; // too less elems acquired; continue with right part
  2188. int iPivotIndex = int ( ( a * ( COEFF-1 )+b ) / COEFF );
  2189. iPivot = this->m_dIData[iPivotIndex];
  2190. }
  2191. }
  2192. };
  2193. #define LOG_COMPONENT_NG __FILE__ << ":" << __LINE__ << " -"
  2194. #define LOG_LEVEL_DIAG false
  2195. #define DBG LOC(DIAG,NG)
  2196. /// match sorter with k-buffering and N-best group-by
  2197. /* Trick explanation
  2198. *
  2199. * Here we keep several grouped matches, but each one is not a single match, but a group.
  2200. * On the backend we have solid vector of real matches. They are allocated once and freed, and never moved around.
  2201. * To work with them, we have vector of indexes, so that each index points to corresponding match in the backend.
  2202. * So when performing moving operations (sort, etc.) we actually change indexes and never move matches themselves.
  2203. *
  2204. * Say, when user pushes matches with weights of 5,2,3,1,4,6, and we then sort them, we will have the following relations:
  2205. *
  2206. * m5 m2 m3 m1 m4 m6 // backend, placed in natural order as they come here
  2207. * 1 2 3 4 5 6 // original indexes, just points directly to backend matches.
  2208. *
  2209. * After, say, sort by asc matches weights, only index vector modified and became this:
  2210. *
  2211. * 4 2 3 5 1 6 // reading match[i[k]] for k in 0..5 will return matches in weight ascending order.
  2212. *
  2213. * When grouping we collect several matches together and sort them.
  2214. * Say, if one group contains matches m1, m2, m5, m6 and second - m4, m3, we have to keep 2 sets of matches in hash:
  2215. *
  2216. * h1: m1 m2 m5 m6
  2217. * h2: m4 m3
  2218. *
  2219. * How to store that sequences?
  2220. *
  2221. * Well, we can do it directly, set by set, keeping heads in hash:
  2222. * m1 m2 m5 m6 m4 m3, heads 1, 5
  2223. *
  2224. * going to indirection indexes we have sequence
  2225. * 4 2 1 6 5 3, hash 1, 4
  2226. *
  2227. * That looks ok, but since sets can dynamically change, it is hard to insert more into existing group.
  2228. * That is like insertion into the middle of vector.
  2229. *
  2230. * Let's try to make a list (chain). Don't care about in-group ordering, just keep things chained.
  2231. * To make things easier, ring the list (connect tail back to head), and store pos of one of the elems in the hash
  2232. * (since it is ring - that is not important which exactly, just to have something to glue).
  2233. *
  2234. * m5 -> 1 heads 1
  2235. * m2 -> 2, 1 heads 2
  2236. * m3 -> 2, 1, 3, heads 2, 3
  2237. * m1 -> 2, 4, 3, 1, heads 4, 3
  2238. * m4 -> 2, 4, 5, 1, 3, heads 4, 5
  2239. * m6 -> 2, 4, 5, 6, 3, 1 heads 6, 5
  2240. *
  2241. * On insert, we store old head into new elem, and new elem into the place of old head.
  2242. * One thing rest here is indirect ref by position. I.e. we assume that index at position 6 points to match at position 6.
  2243. * However, we can notice, that since it is ring, left elem of 6-th points to it directly by number 6.
  2244. * So we can just shift heads back by one position - and that's all, indirect assumption no more necessary.
  2245. * Final sequence will be this one:
  2246. * m5 m2 m3 m1 m4 m6 - matches in their natural order
  2247. * 2, 4, 5, 6, 3, 1 - indirection vec. 4, 3. - heads of groups.
  2248. *
  2249. * Iteration: take 1-st group with head 4:
  2250. * 6->1->2->4*. Each num is both index of the link, and index of backend match. So, matches here are:
  2251. * m6 m5 m2 m1, and we can resort them as necessary (indirectly). Viola!
  2252. *
  2253. * On deletion item goes to freelist.
  2254. * Allocation of an elem is separate task, it is achieved by linear allocation (first), and by freelist (when filled).
  2255. *
  2256. */
  2257. template < typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS, bool HAS_AGGREGATES >
  2258. class CSphKBufferNGroupSorter : public KBufferGroupSorter_T<COMPGROUP,UNIQ,DISTINCT,NOTIFICATIONS>
  2259. {
  2260. using MYTYPE = CSphKBufferNGroupSorter<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS,HAS_AGGREGATES>;
  2261. protected:
  2262. using KBufferGroupSorter = KBufferGroupSorter_T<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS>;
  2263. using KBufferGroupSorter::m_eGroupBy;
  2264. using KBufferGroupSorter::m_pGrouper;
  2265. using KBufferGroupSorter::m_iLimit;
  2266. using KBufferGroupSorter::m_tUniq;
  2267. using KBufferGroupSorter::m_bSortByDistinct;
  2268. using KBufferGroupSorter::m_tGroupSorter;
  2269. using KBufferGroupSorter::m_tSubSorter;
  2270. using KBufferGroupSorter::m_dAvgs;
  2271. using KBufferGroupSorter::GROUPBY_FACTOR;
  2272. using KBufferGroupSorter::GetAggregatesWithoutAvgs;
  2273. using KBufferGroupSorter::Distinct;
  2274. using KBufferGroupSorter::FreeMatchPtrs;
  2275. using KBufferGroupSorter::UpdateDistinct;
  2276. using KBufferGroupSorter::RemoveDistinct;
  2277. using KBufferGroupSorter::m_bAvgFinal;
  2278. using CSphGroupSorterSettings::m_tLocGroupby;
  2279. using CSphGroupSorterSettings::m_tLocCount;
  2280. using CSphGroupSorterSettings::m_tLocDistinct;
  2281. // using CSphGroupSorterSettings::m_tLocGroupbyStr; // check! unimplemented?
  2282. using BaseGroupSorter_c::EvalHAVING;
  2283. using BaseGroupSorter_c::AggrUpdate;
  2284. using BaseGroupSorter_c::AggrUngroup;
  2285. using CSphMatchQueueTraits::m_iSize;
  2286. using CSphMatchQueueTraits::m_dData;
  2287. using MatchSorter_c::m_iTotal;
  2288. using MatchSorter_c::m_tJustPushed;
  2289. using MatchSorter_c::m_pSchema;
  2290. public:
  2291. /// ctor
  2292. CSphKBufferNGroupSorter ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings ) // FIXME! make k configurable
  2293. : KBufferGroupSorter ( pComp, pQuery, tSettings )
  2294. , m_hGroup2Index ( tSettings.m_iMaxMatches*GROUPBY_FACTOR )
  2295. , m_iGLimit ( Min ( pQuery->m_iGroupbyLimit, m_iLimit ) )
  2296. {
  2297. #ifndef NDEBUG
  2298. DBG << "Created iruns = " << m_iruns << " ipushed = " << m_ipushed;
  2299. #endif
  2300. this->m_dIData.Resize ( m_iSize ); // m_iLimit * GROUPBY_FACTOR
  2301. }
  2302. inline void SetGLimit ( int iGLimit ) { m_iGLimit = Min ( iGLimit, m_iLimit ); }
  2303. int GetLength() override { return Min ( m_iUsed, m_iLimit ); }
  2304. bool Push ( const CSphMatch & tEntry ) override { return PushEx<false> ( tEntry, m_pGrouper->KeyFromMatch(tEntry), false, false, true, nullptr ); }
  2305. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final { assert ( 0 && "Not supported in grouping"); }
  2306. bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet ) override { return PushEx<true> ( tEntry, tEntry.GetAttr ( m_tLocGroupby ), bNewSet, false, true, nullptr ); }
  2307. /// store all entries into specified location in sorted order, and remove them from queue
  2308. int Flatten ( CSphMatch * pTo ) override
  2309. {
  2310. if ( !GetLength() )
  2311. return 0;
  2312. if ( !m_bFinalized )
  2313. {
  2314. FinalizeChains ();
  2315. PrepareForExport ();
  2316. CountDistinct ();
  2317. }
  2318. auto fnSwap = [&pTo] ( CSphMatch & tSrc ) { // the writer
  2319. Swap ( *pTo, tSrc );
  2320. ++pTo;
  2321. };
  2322. const CSphMatch * pBegin = pTo;
  2323. for ( auto iHead : m_dFinalizedHeads )
  2324. {
  2325. CSphMatch & tGroupHead = m_dData[iHead];
  2326. if ( !EvalHAVING ( tGroupHead ))
  2327. {
  2328. DeleteChain ( iHead, false );
  2329. continue;
  2330. }
  2331. fnSwap ( tGroupHead ); // move top group match
  2332. for ( int i=this->m_dIData[iHead]; i!=iHead; i = this->m_dIData[i] )
  2333. fnSwap ( m_dData[i] ); // move tail matches
  2334. }
  2335. // final clean up before possible next pass
  2336. m_uLastGroupKey = -1;
  2337. m_iFree = 0;
  2338. m_iUsed = 0;
  2339. m_bFinalized = false;
  2340. m_iStorageSolidFrom = 0;
  2341. m_iTotal = 0;
  2342. m_dFinalizedHeads.Reset ();
  2343. m_hGroup2Index.Clear();
  2344. if constexpr ( DISTINCT )
  2345. m_tUniq.Reset();
  2346. return int ( pTo-pBegin );
  2347. }
  2348. void Finalize ( MatchProcessor_i & tProcessor, bool, bool bFinalizeMatches ) override
  2349. {
  2350. if ( !GetLength() )
  2351. return;
  2352. if ( bFinalizeMatches )
  2353. {
  2354. if ( !m_bFinalized )
  2355. {
  2356. FinalizeChains();
  2357. PrepareForExport();
  2358. CountDistinct();
  2359. }
  2360. ProcessData ( tProcessor, m_dFinalizedHeads );
  2361. }
  2362. else
  2363. {
  2364. ProcessData ( tProcessor, GetAllHeads() );
  2365. if constexpr ( DISTINCT )
  2366. {
  2367. // if we are not finalizing matches, we are using global sorters
  2368. // let's try to remove dupes while we are processing data in separate threads
  2369. // so that the main thread will have fewer data to work with
  2370. m_tUniq.Sort();
  2371. VecTraits_T<SphGroupKey_t> dStub;
  2372. m_tUniq.Compact(dStub);
  2373. }
  2374. }
  2375. }
  2376. // TODO! TEST!
  2377. ISphMatchSorter * Clone () const override
  2378. {
  2379. auto* pClone = this->template CloneSorterT<MYTYPE>();
  2380. pClone->SetGLimit (m_iGLimit);
  2381. return pClone;
  2382. }
  2383. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final
  2384. {
  2385. #ifndef NDEBUG
  2386. DBG << " MoveTo " << pRhs << " iRuns:iPushed - " << m_iruns << " " << m_ipushed;
  2387. #endif
  2388. auto& dRhs = *(MYTYPE *) pRhs;
  2389. if ( !dRhs.m_iTotal )
  2390. {
  2391. DBG << " Rhs is empty, adopt! ";
  2392. CSphMatchQueueTraits::SwapMatchQueueTraits ( dRhs );
  2393. dRhs.m_hGroup2Index = std::move ( m_hGroup2Index );
  2394. ::Swap ( m_uLastGroupKey, dRhs.m_uLastGroupKey );
  2395. ::Swap ( m_iFree, dRhs.m_iFree );
  2396. ::Swap ( m_iUsed, dRhs.m_iUsed );
  2397. ::Swap ( m_bFinalized, dRhs.m_bFinalized );
  2398. m_dFinalizedHeads.SwapData ( dRhs.m_dFinalizedHeads );
  2399. ::Swap ( m_iStorageSolidFrom, dRhs.m_iStorageSolidFrom );
  2400. #ifndef NDEBUG
  2401. ::Swap ( m_iruns, dRhs.m_iruns );
  2402. ::Swap ( m_ipushed, dRhs.m_ipushed );
  2403. LOC_SWAP(dRhs);
  2404. #endif
  2405. if ( !m_bFinalized && bCopyMeta )
  2406. dRhs.m_tUniq = std::move(m_tUniq);
  2407. return;
  2408. }
  2409. bool bUniqUpdated = false;
  2410. if ( !m_bFinalized && bCopyMeta )
  2411. {
  2412. m_tUniq.CopyTo ( dRhs.m_tUniq );
  2413. bUniqUpdated = true;
  2414. }
  2415. if ( !m_bFinalized )
  2416. {
  2417. FinalizeChains();
  2418. // PrepareForExport(); // for moving we not need fine-finaled matches; just cleaned is enough
  2419. CountDistinct();
  2420. }
  2421. dRhs.m_bUpdateDistinct = !bUniqUpdated;
  2422. dRhs.SetMerge(true);
  2423. auto iTotal = dRhs.m_iTotal;
  2424. for ( auto iHead : m_dFinalizedHeads )
  2425. {
  2426. auto uGroupKey = m_dData[iHead].GetAttr ( m_tLocGroupby );
  2427. // have to set bNewSet to true
  2428. // as need to fallthrough at PushAlreadyHashed and update count and aggregates values for head match
  2429. // even uGroupKey match already exists
  2430. dRhs.template PushEx<true> ( m_dData[iHead], uGroupKey, true, true, true, nullptr );
  2431. for ( int i = this->m_dIData[iHead]; i!=iHead; i = this->m_dIData[i] )
  2432. dRhs.template PushEx<false> ( m_dData[i], uGroupKey, false, true, true, nullptr );
  2433. DeleteChain ( iHead, false );
  2434. }
  2435. dRhs.m_bUpdateDistinct = true;
  2436. dRhs.SetMerge(false);
  2437. dRhs.m_iTotal = m_iTotal+iTotal;
  2438. }
  2439. void SetMerge ( bool bMerge ) override { m_bMerge = bMerge; }
  2440. protected:
  2441. int m_iStorageSolidFrom = 0; // edge from witch storage is not yet touched and need no chaining freelist
  2442. OpenHashTable_T<SphGroupKey_t, int> m_hGroup2Index; // used to quickly locate group for incoming match
  2443. int m_iGLimit; ///< limit per one group
  2444. SphGroupKey_t m_uLastGroupKey = -1; ///< helps to determine in pushEx whether the new subgroup started
  2445. int m_iFree = 0; ///< current insertion point
  2446. int m_iUsed = 0;
  2447. // final cached data valid when everything is finalized
  2448. bool m_bFinalized = false; // helper to avoid double work
  2449. CSphVector<int> m_dFinalizedHeads; /// < sorted finalized heads
  2450. int m_iLastGroupCutoff; /// < cutoff edge of last group to fit limit
  2451. #ifndef NDEBUG
  2452. int m_iruns = 0; ///< helpers for conditional breakpoints on debug
  2453. int m_ipushed = 0;
  2454. #endif
  2455. LOC_ADD;
  2456. /*
  2457. * Every match according to uGroupKey came to own subset.
  2458. * Head match of each group stored in the hash to quickly locate on next pushes
  2459. * It hold all calculated stuff from aggregates/group_concat until finalization.
  2460. */
  2461. template <bool GROUPED>
  2462. bool PushEx ( const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool bNewSet, bool bTailFinalized, bool bClearNotify, [[maybe_unused]] SphAttr_t * pAttr )
  2463. {
  2464. #ifndef NDEBUG
  2465. ++m_ipushed;
  2466. DBG << "PushEx: tag" << tEntry.m_iTag << ",g" << uGroupKey << ": pushed" << m_ipushed
  2467. << " g" << GROUPED << " n" << bNewSet;
  2468. #endif
  2469. if constexpr ( NOTIFICATIONS )
  2470. {
  2471. if ( bClearNotify )
  2472. {
  2473. m_tJustPushed = RowTagged_t();
  2474. this->m_dJustPopped.Resize ( 0 );
  2475. }
  2476. }
  2477. this->m_bFinalized = false;
  2478. if ( HAS_AGGREGATES && m_bAvgFinal )
  2479. CalcAvg ( Avg_e::UNGROUP );
  2480. // place elem into the set
  2481. auto iNew = AllocateMatch ();
  2482. CSphMatch & tNew = m_dData[iNew];
  2483. // if such group already hashed
  2484. int * pGroupIdx = m_hGroup2Index.Find ( uGroupKey );
  2485. if ( pGroupIdx )
  2486. return PushAlreadyHashed<GROUPED> ( pGroupIdx, iNew, tEntry, uGroupKey, bNewSet, bTailFinalized );
  2487. // match came from MoveTo of another sorter, it is tail, and it has no group here (m.b. it is already
  2488. // deleted during finalization as one of worst). Just discard the whole group in the case.
  2489. if ( bTailFinalized && !GROUPED )
  2490. {
  2491. DeallocateMatch ( iNew );
  2492. return false;
  2493. }
  2494. m_pSchema->CloneMatch ( tNew, tEntry ); // fixme! check if essential data cloned
  2495. // else
  2496. // this->m_tPregroup.CloneWithoutAggrs ( tNew, tEntry );
  2497. // this->m_tPregroup.CopyAggrs ( tNew, tEntry );
  2498. // submit actual distinct value in all cases
  2499. if ( DISTINCT && m_bUpdateDistinct )
  2500. KBufferGroupSorter::template UpdateDistinct<GROUPED> ( tNew, uGroupKey );
  2501. if constexpr ( NOTIFICATIONS )
  2502. m_tJustPushed = RowTagged_t ( tNew );
  2503. this->m_dIData[iNew] = iNew; // new head - points to self (0-ring)
  2504. Verify ( m_hGroup2Index.Add ( uGroupKey, iNew ));
  2505. ++m_iTotal;
  2506. if constexpr ( GROUPED )
  2507. {
  2508. m_uLastGroupKey = uGroupKey;
  2509. if constexpr ( HAS_AGGREGATES )
  2510. AggrUngroup ( m_dData[iNew] );
  2511. } else
  2512. {
  2513. tNew.SetAttr ( m_tLocGroupby, uGroupKey );
  2514. tNew.SetAttr ( m_tLocCount, 1 );
  2515. if constexpr ( DISTINCT )
  2516. tNew.SetAttr ( m_tLocDistinct, 0 );
  2517. }
  2518. return true;
  2519. }
  2520. private:
  2521. bool m_bUpdateDistinct = true;
  2522. bool m_bMerge = false;
  2523. // surely give place for a match (do vacuum-cleaning, if there is no place)
  2524. inline int AllocateMatch ()
  2525. {
  2526. auto iPlace = TryAllocateMatch ();
  2527. if ( iPlace<0 )
  2528. {
  2529. VacuumClean ();
  2530. iPlace = TryAllocateMatch ();
  2531. }
  2532. assert ( iPlace>=0 && iPlace<m_iSize );
  2533. DBG << "allocated: " << iPlace;
  2534. return iPlace;
  2535. }
  2536. // return match and free it's dataptrs
  2537. FORCE_INLINE void FreeMatch ( int iElem, bool bNotify ) // fixme! intersects with parent by name
  2538. {
  2539. FreeMatchPtrs ( iElem, bNotify );
  2540. DeallocateMatch ( iElem );
  2541. }
  2542. inline int TryAllocateMatch ()
  2543. {
  2544. if ( m_iUsed==m_iSize )
  2545. return -1; // no more place..
  2546. ++m_iUsed;
  2547. auto iElem = m_iFree;
  2548. if ( iElem<m_iStorageSolidFrom )
  2549. m_iFree = this->m_dIData[iElem];
  2550. else {
  2551. ++m_iFree;
  2552. m_iStorageSolidFrom = m_iFree;
  2553. }
  2554. return iElem;
  2555. }
  2556. inline void DeallocateMatch (int iElem)
  2557. {
  2558. --m_iUsed;
  2559. this->m_dIData[iElem] = m_iFree; // put to chain
  2560. m_iFree = iElem;
  2561. assert ( m_iFree >=0 );
  2562. }
  2563. // return length of the matches chain (-1 terminated)
  2564. int ChainLen ( int iPos ) const
  2565. {
  2566. int iChainLen = 1;
  2567. for ( int i = this->m_dIData[iPos]; i!=iPos; i = this->m_dIData[i] )
  2568. ++iChainLen;
  2569. return iChainLen;
  2570. }
  2571. // add new match into the chain. Aggregates are relaxed and not managed till finalize
  2572. /*
  2573. * chain of the matches is actually ring of integers. Each one points to the coherent
  2574. * match in the storage, and simultaneously next member of the ring.
  2575. * We can iterate over the chain starting from the head and looking until same index met again.
  2576. */
  2577. void AddToChain ( int iNew, const CSphMatch & tEntry, int iHead )
  2578. {
  2579. CSphMatch & tNew = m_dData[iNew];
  2580. this->m_tPregroup.CloneWithoutAggrs ( tNew, tEntry );
  2581. if constexpr ( NOTIFICATIONS )
  2582. m_tJustPushed = RowTagged_t ( tNew );
  2583. // put after the head
  2584. auto iPrevChain = this->m_dIData[iHead];
  2585. this->m_dIData[iNew] = iPrevChain;
  2586. this->m_dIData[iHead] = iNew;
  2587. }
  2588. // add entry to existing group
  2589. /*
  2590. * If group is not full, and new match is less than head, it will replace the head.
  2591. * calculated stuff will be moved and adopted by this new replacement.
  2592. * If group is full, and new match is less than head, it will be early rejected.
  2593. * In all other cases new match will be inserted into the group right after head
  2594. */
  2595. template <bool GROUPED>
  2596. bool PushAlreadyHashed ( int * pHead, int iNew, const CSphMatch & tEntry, const SphGroupKey_t uGroupKey, bool bNewSet, bool bTailFinalized )
  2597. {
  2598. int & iHead = *pHead;
  2599. assert ( m_dData[iHead].GetAttr ( m_tLocGroupby )==uGroupKey );
  2600. assert ( m_dData[iHead].m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
  2601. DBG << "existing " << m_dData[iHead].m_iTag << "," << uGroupKey
  2602. << " m_pDynamic: " << m_dData[iHead].m_pDynamic;
  2603. // check if we need to push the match at all
  2604. if ( m_tSubSorter.MatchIsGreater ( tEntry, m_dData[iHead] ) )
  2605. AddToChain ( iNew, tEntry, iHead ); // always add; bad will be filtered later in gc
  2606. else if ( ChainLen ( iHead )>=m_iGLimit ) // less than worst, drop it
  2607. DeallocateMatch ( iNew );
  2608. else
  2609. {
  2610. AddToChain ( iNew, tEntry, iHead );
  2611. this->m_tPregroup.MoveAggrs ( m_dData[iNew], m_dData[iHead] );
  2612. *pHead = iNew;
  2613. }
  2614. auto & tHeadMatch = m_dData[iHead];
  2615. // submit actual distinct value in all cases
  2616. if ( DISTINCT && m_bUpdateDistinct )
  2617. KBufferGroupSorter::template UpdateDistinct<GROUPED> ( tEntry, uGroupKey );
  2618. // update group-wide counters
  2619. auto & tLocCount = m_tLocCount;
  2620. if constexpr ( GROUPED )
  2621. {
  2622. // it's already grouped match
  2623. // sum grouped matches count
  2624. if ( bNewSet || uGroupKey!=m_uLastGroupKey )
  2625. {
  2626. tHeadMatch.AddCounterAttr ( tLocCount, tEntry );
  2627. m_uLastGroupKey = uGroupKey;
  2628. bNewSet = true;
  2629. }
  2630. } else if ( !bTailFinalized )
  2631. {
  2632. // it's a simple match
  2633. // increase grouped matches count
  2634. tHeadMatch.AddCounterScalar ( tLocCount, 1 );
  2635. bNewSet = true;
  2636. }
  2637. // update aggregates
  2638. if constexpr ( HAS_AGGREGATES )
  2639. {
  2640. if ( bNewSet )
  2641. AggrUpdate ( tHeadMatch, tEntry, GROUPED, m_bMerge );
  2642. }
  2643. // since it is dupe (i.e. such group is already pushed) - return false;
  2644. return false;
  2645. }
  2646. enum class Avg_e { FINALIZE, UNGROUP };
  2647. void CalcAvg ( Avg_e eGroup )
  2648. {
  2649. if ( this->m_dAvgs.IsEmpty() )
  2650. return;
  2651. m_bAvgFinal = ( eGroup==Avg_e::FINALIZE );
  2652. int64_t i = 0;
  2653. if ( eGroup==Avg_e::FINALIZE )
  2654. for ( auto tData = m_hGroup2Index.Iterate(i); tData.second; tData = m_hGroup2Index.Iterate(i) )
  2655. m_dAvgs.Apply ( [this, &tData] ( AggrFunc_i * pAvg ) {
  2656. pAvg->Finalize ( m_dData[*tData.second] );
  2657. });
  2658. else
  2659. for ( auto tData = m_hGroup2Index.Iterate(i); tData.second; tData = m_hGroup2Index.Iterate(i) )
  2660. m_dAvgs.Apply ( [this, &tData] ( AggrFunc_i * pAvg ) {
  2661. pAvg->Ungroup ( m_dData[*tData.second] );
  2662. });
  2663. }
  2664. void BinaryPartitionTail ( VecTraits_T<int>& dData, int iBound )
  2665. {
  2666. --iBound;
  2667. int iPivot = dData[iBound];
  2668. int a = 0;
  2669. int b = dData.GetLength ()-1;
  2670. while (true) {
  2671. int i = a;
  2672. int j = b;
  2673. while (i<=j) {
  2674. while ( m_tSubSorter.IsLess ( dData[i], iPivot )) ++i;
  2675. while ( m_tSubSorter.IsLess ( iPivot, dData[j] )) --j;
  2676. if ( i<=j ) ::Swap ( dData[i++], dData[j--] );
  2677. }
  2678. if ( iBound==j )
  2679. break;
  2680. if ( iBound<j )
  2681. b = j; // too many elems acquired; continue with left part
  2682. else
  2683. a = i; // too few elems acquired; continue with right part
  2684. iPivot = dData[( a+b ) / 2];
  2685. }
  2686. }
  2687. CSphVector<int> GetAllHeads()
  2688. {
  2689. CSphVector<int> dAllHeads;
  2690. dAllHeads.Reserve ( m_hGroup2Index.GetLength ());
  2691. int64_t i = 0;
  2692. for ( auto tData = m_hGroup2Index.Iterate(i); tData.second; tData = m_hGroup2Index.Iterate(i) )
  2693. dAllHeads.Add ( *tData.second );
  2694. return dAllHeads;
  2695. }
  2696. // free place for new matches
  2697. void VacuumClean()
  2698. {
  2699. auto iLimit = m_iLimit * GROUPBY_FACTOR / 2;
  2700. // first try to cut out too long tails
  2701. int iSize = 0;
  2702. int64_t i = 0;
  2703. for ( auto tData = m_hGroup2Index.Iterate(i); tData.second; tData = m_hGroup2Index.Iterate(i) )
  2704. iSize += VacuumTail ( tData.second, m_iGLimit );
  2705. // if we reached the limit now - bail, no need to free more.
  2706. if ( iSize<=iLimit )
  2707. return;
  2708. // if we're here, just vacuuming tails wasn't effective enough and some deeper cleaning necessary
  2709. SortThenVacuumWorstHeads ( iLimit );
  2710. }
  2711. // final pass before iface finalize/flatten - cut worst, sort everything
  2712. void FinalizeChains()
  2713. {
  2714. if ( m_bFinalized )
  2715. return;
  2716. m_bFinalized = true;
  2717. int64_t i = 0;
  2718. for ( auto tData = m_hGroup2Index.Iterate(i); tData.second; tData = m_hGroup2Index.Iterate(i) )
  2719. VacuumTail ( tData.second, m_iGLimit, Stage_e::FINAL );
  2720. // Continue by cut out whole groups
  2721. SortThenVacuumWorstHeads ( m_iLimit, Stage_e::FINAL ); // false since it is already sorted
  2722. // also free matches in the chain were cleared with FreeDataPtrs, but *now* we also need to free their dynamics
  2723. // otherwise in d-tr FreDataPtr on non-zero dynamics will be called again with probably another schema and crash
  2724. // FIXME!!! need to keep and restore all members changed by TryAllocateMatch - it'd be better to rewrite code to pass state into TryAllocateMatch or use common code
  2725. auto iFree = m_iFree;
  2726. auto iUsed = m_iUsed;
  2727. auto iSSFrom = m_iStorageSolidFrom;
  2728. for ( auto iElem = TryAllocateMatch (); iElem>=0; iElem = TryAllocateMatch () )
  2729. m_dData[iElem].ResetDynamic ();
  2730. m_iFree = iFree;
  2731. m_iUsed = iUsed;
  2732. m_iStorageSolidFrom = iSSFrom;
  2733. }
  2734. /*
  2735. * Here we
  2736. * 1) Cut off very last head if it would exceed the limit.
  2737. * 1) Copy all calculated stuff (aggr attributes) from head match to every other match of a group
  2738. * 2) Sort group in decreasing order, and then shift the ring ahead to 1 match.
  2739. * That is necessary since head is worst match, and next after it is the best one (since just sorted)
  2740. * Since it is ring, by moving ahead we will have 1-st match the best, last - the worst.
  2741. */
  2742. void PrepareForExport()
  2743. {
  2744. VacuumTail ( &m_dFinalizedHeads.Last(), m_iLastGroupCutoff, Stage_e::FINAL );
  2745. auto dAggrs = GetAggregatesWithoutAvgs ();
  2746. for ( auto& iHead : m_dFinalizedHeads )
  2747. {
  2748. for ( auto * pAggr : dAggrs )
  2749. pAggr->Finalize ( m_dData[iHead] );
  2750. PropagateAggregates ( iHead );
  2751. iHead = this->m_dIData[iHead]; // shift
  2752. }
  2753. }
  2754. void PropagateAggregates ( int iHead )
  2755. {
  2756. for ( auto i = this->m_dIData[iHead]; i!=iHead; i = this->m_dIData[i] )
  2757. this->m_tPregroup.CopyAggrs ( m_dData[i], m_dData[iHead] );
  2758. }
  2759. // at collect stage we don't need to strictly sort matches inside groups,
  2760. // but we need to track pushed/deleted matches.
  2761. // at finalize stage, in opposite, no tracking need, but matches must be sorted.
  2762. enum class Stage_e { COLLECT, FINAL };
  2763. // sorts by next-to-worst element in the chain
  2764. struct FinalGroupSorter_t
  2765. {
  2766. const GroupSorter_fn<COMPGROUP> & m_tGroupSorter;
  2767. const CSphTightVector<int> & m_dIData;
  2768. FinalGroupSorter_t ( const GroupSorter_fn<COMPGROUP> & tSorter, const CSphTightVector<int> & dIData )
  2769. : m_tGroupSorter ( tSorter )
  2770. , m_dIData ( dIData )
  2771. {}
  2772. bool IsLess ( int a, int b ) const
  2773. {
  2774. return m_tGroupSorter.IsLess ( m_dIData[a], m_dIData[b] );
  2775. }
  2776. };
  2777. // full clean - sort the groups, then iterate on them until iLimit elems counted. Cut out the rest.
  2778. // if last group is not fit into rest of iLimit, it still kept whole, no fraction performed over it.
  2779. // returns desired length of the last chain to make the limit hard ( 1..m_iGLimit )
  2780. void SortThenVacuumWorstHeads ( int iSoftLimit, Stage_e eStage = Stage_e::COLLECT )
  2781. {
  2782. m_dFinalizedHeads = GetAllHeads();
  2783. CalcAvg ( Avg_e::FINALIZE );
  2784. // in this final sort we need to keep the heads but to sort by next-to-head element (which is the best in group)
  2785. FinalGroupSorter_t tFinalSorter ( m_tGroupSorter, this->m_dIData );
  2786. m_dFinalizedHeads.Sort ( tFinalSorter );
  2787. int iRetainMatches = 0;
  2788. CSphVector<SphGroupKey_t> dRemovedHeads; // to remove distinct
  2789. // delete worst heads
  2790. ARRAY_FOREACH ( i, m_dFinalizedHeads )
  2791. if ( iSoftLimit > iRetainMatches )
  2792. iRetainMatches += ChainLen ( m_dFinalizedHeads[i] );
  2793. else
  2794. {
  2795. // all quota exceeded, the rest just to be cut totally
  2796. auto iRemoved = DeleteChain ( m_dFinalizedHeads[i], eStage==Stage_e::COLLECT );
  2797. if constexpr ( DISTINCT )
  2798. dRemovedHeads.Add( iRemoved );
  2799. m_dFinalizedHeads.RemoveFast ( i-- );
  2800. }
  2801. // discard removed distinct
  2802. if constexpr ( DISTINCT )
  2803. RemoveDistinct ( dRemovedHeads );
  2804. if ( eStage==Stage_e::COLLECT )
  2805. CalcAvg ( Avg_e::UNGROUP );
  2806. m_iLastGroupCutoff = m_iGLimit+iSoftLimit-iRetainMatches;
  2807. }
  2808. // for given chain throw out worst elems to fit in iLimit quota.
  2809. // Returns length of the chain
  2810. int VacuumTail ( int* pHead, int iLimit, Stage_e eStage = Stage_e::COLLECT )
  2811. {
  2812. assert ( iLimit>0 );
  2813. CSphVector<int> dChain;
  2814. dChain.Add ( *pHead );
  2815. for ( auto i = this->m_dIData[*pHead]; i!=*pHead; i = this->m_dIData[i] )
  2816. dChain.Add ( i );
  2817. if ( dChain.GetLength()==1 )
  2818. return 1; // fast over
  2819. auto dWorstTail = dChain.Slice ( iLimit );
  2820. // if no sort necessary and limit not exceeded - nothing to do
  2821. if ( eStage==Stage_e::COLLECT && dWorstTail.IsEmpty() )
  2822. return dChain.GetLength();
  2823. // chain need to be shortened
  2824. if ( !dWorstTail.IsEmpty() )
  2825. {
  2826. BinaryPartitionTail ( dChain, iLimit );
  2827. dChain.Resize ( iLimit );
  2828. }
  2829. // sort if necessary and ensure last elem of chain is the worst one
  2830. if ( eStage==Stage_e::FINAL )
  2831. {
  2832. dChain.Sort( m_tSubSorter ); // sorted in reverse order, so the worst match here is the last one.
  2833. iLimit = dChain.GetLength();
  2834. } else
  2835. {
  2836. assert ( dChain.GetLength ()==iLimit );
  2837. // not sorted, need to find worst match for new head
  2838. int iWorst = 0;
  2839. for (int i=1; i<iLimit; ++i)
  2840. {
  2841. if ( m_tSubSorter.IsLess ( dChain[iWorst], dChain[i] ) )
  2842. iWorst = i;
  2843. }
  2844. ::Swap ( dChain[iWorst], dChain[iLimit-1] );
  2845. }
  2846. auto iNewHead = dChain.Last ();
  2847. // move calculated aggregates to the new head
  2848. if ( iNewHead!=*pHead )
  2849. {
  2850. SphGroupKey_t uGroupKey = m_dData[*pHead].GetAttr ( m_tLocGroupby );
  2851. int * pHeadInHash = m_hGroup2Index.Find(uGroupKey);
  2852. assert(pHeadInHash);
  2853. this->m_tPregroup.MoveAggrs ( m_dData[iNewHead], m_dData[*pHead] );
  2854. *pHead = iNewHead;
  2855. *pHeadInHash = iNewHead;
  2856. }
  2857. // now we can safely free worst matches
  2858. for ( auto iWorst : dWorstTail )
  2859. FreeMatch ( iWorst, eStage==Stage_e::COLLECT );
  2860. // recreate the chain. It is actually ring, and external hash points to the minimal elem
  2861. this->m_dIData[iNewHead] = dChain[0]; // head points to begin of chain
  2862. for ( int i = 0; i<iLimit-1; ++i ) // each elem points to the next, last again to head
  2863. this->m_dIData[dChain[i]] = dChain[i+1];
  2864. return iLimit;
  2865. }
  2866. // delete whole chain (and remove from hash also).
  2867. SphGroupKey_t DeleteChain ( int iPos, bool bNotify )
  2868. {
  2869. SphGroupKey_t uGroupKey = m_dData[iPos].GetAttr ( m_tLocGroupby );
  2870. m_hGroup2Index.Delete ( uGroupKey );
  2871. int iNext = this->m_dIData[iPos];
  2872. FreeMatch ( iPos, bNotify );
  2873. for ( auto i = iNext; i!=iPos; i = iNext )
  2874. {
  2875. iNext = this->m_dIData[i];
  2876. FreeMatch ( i, bNotify );
  2877. }
  2878. return uGroupKey;
  2879. }
  2880. /// count distinct values if necessary
  2881. void CountDistinct ()
  2882. {
  2883. if constexpr ( DISTINCT )
  2884. Distinct ( [this] ( SphGroupKey_t uGroup )->CSphMatch *
  2885. {
  2886. auto pIdx = m_hGroup2Index.Find ( uGroup );
  2887. return pIdx? &m_dData[*pIdx] : nullptr;
  2888. });
  2889. }
  2890. void ProcessData ( MatchProcessor_i & tProcessor, const IntVec_t & dHeads )
  2891. {
  2892. for ( auto iHead : dHeads )
  2893. {
  2894. tProcessor.Process ( &m_dData[iHead] ); // process top group match
  2895. for ( int i = this->m_dIData[iHead]; i!=iHead; i = this->m_dIData[i] )
  2896. tProcessor.Process ( &m_dData[i] ); // process tail matches
  2897. }
  2898. }
  2899. };
  2900. /////////////////////////////////////////////////////////////////////
  2901. /// generic match sorter that understands groupers that return multiple keys per match
  2902. template < typename T >
  2903. class MultiValueGroupSorterTraits_T : public T
  2904. {
  2905. using BASE = T;
  2906. public:
  2907. MultiValueGroupSorterTraits_T ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings )
  2908. : T ( pComp, pQuery, tSettings )
  2909. {}
  2910. bool Push ( const CSphMatch & tMatch ) override
  2911. {
  2912. this->m_pGrouper->MultipleKeysFromMatch ( tMatch, m_dKeys );
  2913. bool bRes = false;
  2914. ARRAY_FOREACH ( i, m_dKeys )
  2915. {
  2916. SphGroupKey_t tKey = m_dKeys[i];
  2917. // need to clear notifications once per match - not for every pushed value
  2918. bRes |= BASE::template PushEx<false> ( tMatch, tKey, false, false, ( i==0 ), nullptr );
  2919. }
  2920. return bRes;
  2921. }
  2922. bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet ) override
  2923. {
  2924. return BASE::template PushEx<true> ( tEntry, tEntry.GetAttr ( BASE::m_tLocGroupby ), bNewSet, false, true, nullptr );
  2925. }
  2926. private:
  2927. CSphVector<SphGroupKey_t> m_dKeys;
  2928. };
  2929. template < typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS, bool HAS_AGGREGATES >
  2930. class MultiValueGroupSorter_T : public MultiValueGroupSorterTraits_T <CSphKBufferGroupSorter <COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>>
  2931. {
  2932. using BASE = MultiValueGroupSorterTraits_T <CSphKBufferGroupSorter < COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>>;
  2933. using MYTYPE = MultiValueGroupSorter_T < COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES >;
  2934. public:
  2935. using BASE::BASE;
  2936. ISphMatchSorter * Clone () const final { return this->template CloneSorterT<MYTYPE>(); }
  2937. };
  2938. template < typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS, bool HAS_AGGREGATES >
  2939. class MultiValueNGroupSorter_T : public MultiValueGroupSorterTraits_T < CSphKBufferNGroupSorter<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>>
  2940. {
  2941. using BASE = MultiValueGroupSorterTraits_T <CSphKBufferNGroupSorter < COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>>;
  2942. using MYTYPE = MultiValueNGroupSorter_T <COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>;
  2943. public:
  2944. using BASE::BASE;
  2945. ISphMatchSorter * Clone () const final
  2946. {
  2947. auto * pClone = this->template CloneSorterT<MYTYPE>();
  2948. pClone->SetGLimit (this->m_iGLimit);
  2949. return pClone;
  2950. }
  2951. };
  2952. /////////////////////////////////////////////////////////////////////
  2953. /// match sorter with k-buffering and group-by for JSON arrays
  2954. template < typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS, bool HAS_AGGREGATES >
  2955. class CSphKBufferJsonGroupSorter : public CSphKBufferGroupSorter < COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES >
  2956. {
  2957. public:
  2958. using BASE = CSphKBufferGroupSorter<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>;
  2959. using MYTYPE = CSphKBufferJsonGroupSorter<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>;
  2960. // since we inherit from template, we need to write boring 'using' block
  2961. using KBufferGroupSorter = KBufferGroupSorter_T<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS>;
  2962. using KBufferGroupSorter::m_eGroupBy;
  2963. using KBufferGroupSorter::m_iLimit;
  2964. using KBufferGroupSorter::m_tSubSorter;
  2965. /// ctor
  2966. FWD_BASECTOR( CSphKBufferJsonGroupSorter )
  2967. bool Push ( const CSphMatch & tEntry ) final { return PushMatch(tEntry); }
  2968. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final { assert ( 0 && "Not supported in grouping"); }
  2969. /// add pre-grouped entry to the queue
  2970. bool PushGrouped ( const CSphMatch & tEntry, bool bNewSet ) override
  2971. {
  2972. // re-group it based on the group key
  2973. return BASE::template PushEx<true> ( tEntry, tEntry.GetAttr ( BASE::m_tLocGroupby ), bNewSet, false, true, nullptr );
  2974. }
  2975. ISphMatchSorter * Clone () const final
  2976. {
  2977. return this->template CloneSorterT<MYTYPE>();
  2978. }
  2979. private:
  2980. FORCE_INLINE bool PushMatch ( const CSphMatch & tMatch )
  2981. {
  2982. SphGroupKey_t uGroupKey = this->m_pGrouper->KeyFromMatch ( tMatch );
  2983. const BYTE * pBlobPool = this->m_pGrouper->GetBlobPool();
  2984. bool bClearNotify = true;
  2985. return PushJsonField ( uGroupKey, pBlobPool, [this, &tMatch, &bClearNotify]( SphAttr_t * pAttr, SphGroupKey_t uMatchGroupKey )
  2986. {
  2987. bool bPushed = BASE::template PushEx<false> ( tMatch, uMatchGroupKey, false, false, bClearNotify, pAttr );
  2988. bClearNotify = false; // need to clear notifications once per match - not for every pushed value
  2989. return bPushed;
  2990. }
  2991. );
  2992. }
  2993. };
  2994. /// implicit group-by sorter
  2995. /// invoked when no 'group-by', but count(*) or count(distinct attr) are in game
  2996. template < typename COMPGROUP, typename UNIQ, int DISTINCT, bool NOTIFICATIONS, bool HAS_AGGREGATES>
  2997. class CSphImplicitGroupSorter final : public MatchSorter_c, ISphNoncopyable, protected BaseGroupSorter_c
  2998. {
  2999. using MYTYPE = CSphImplicitGroupSorter<COMPGROUP, UNIQ, DISTINCT, NOTIFICATIONS, HAS_AGGREGATES>;
  3000. using BASE = MatchSorter_c;
  3001. public:
  3002. CSphImplicitGroupSorter ( const ISphMatchComparator * DEBUGARG(pComp), const CSphQuery *, const CSphGroupSorterSettings & tSettings )
  3003. : BaseGroupSorter_c ( tSettings )
  3004. {
  3005. assert ( !DISTINCT || tSettings.m_pDistinctFetcher );
  3006. assert ( !pComp );
  3007. if constexpr ( NOTIFICATIONS )
  3008. m_dJustPopped.Reserve(1);
  3009. m_iMatchCapacity = 1;
  3010. m_pDistinctFetcher = tSettings.m_pDistinctFetcher;
  3011. }
  3012. /// schema setup
  3013. void SetSchema ( ISphSchema * pSchema, bool bRemapCmp ) final
  3014. {
  3015. if ( m_pSchema )
  3016. {
  3017. FixupLocators ( m_pSchema, pSchema );
  3018. m_tPregroup.ResetAttrs ();
  3019. m_dAggregates.Apply ( [] ( AggrFunc_i * pAggr ) {SafeDelete ( pAggr ); } );
  3020. m_dAggregates.Resize ( 0 );
  3021. }
  3022. BASE::SetSchema ( pSchema, bRemapCmp );
  3023. SetupBaseGrouper<DISTINCT> ( pSchema );
  3024. }
  3025. bool IsGroupby () const final { return true; }
  3026. void SetBlobPool ( const BYTE * pBlobPool ) final
  3027. {
  3028. BlobPool_c::SetBlobPool ( pBlobPool );
  3029. if ( m_pDistinctFetcher )
  3030. m_pDistinctFetcher->SetBlobPool(pBlobPool);
  3031. }
  3032. void SetColumnar ( columnar::Columnar_i * pColumnar ) final
  3033. {
  3034. BASE::SetColumnar(pColumnar);
  3035. BaseGroupSorter_c::SetColumnar(pColumnar);
  3036. if ( m_pDistinctFetcher )
  3037. m_pDistinctFetcher->SetColumnar(pColumnar);
  3038. }
  3039. bool IsCutoffDisabled() const final { return true; }
  3040. bool Push ( const CSphMatch & tEntry ) final { return PushEx<false>(tEntry); }
  3041. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final { assert ( 0 && "Not supported in grouping"); }
  3042. bool PushGrouped ( const CSphMatch & tEntry, bool ) final { return PushEx<true>(tEntry); }
  3043. /// store all entries into specified location in sorted order, and remove them from queue
  3044. int Flatten ( CSphMatch * pTo ) final
  3045. {
  3046. assert ( m_bDataInitialized );
  3047. CountDistinct ();
  3048. if constexpr ( HAS_AGGREGATES )
  3049. {
  3050. for ( auto * pAggregate : m_dAggregates )
  3051. pAggregate->Finalize ( m_tData );
  3052. }
  3053. int iCopied = 0;
  3054. if ( EvalHAVING ( m_tData ) )
  3055. {
  3056. iCopied = 1;
  3057. Swap ( *pTo, m_tData );
  3058. } else
  3059. {
  3060. m_pSchema->FreeDataPtrs ( m_tData );
  3061. m_tData.ResetDynamic ();
  3062. }
  3063. m_iTotal = 0;
  3064. m_bDataInitialized = false;
  3065. if constexpr ( DISTINCT )
  3066. m_tUniq.Reset();
  3067. return iCopied;
  3068. }
  3069. /// finalize, perform final sort/cut as needed
  3070. void Finalize ( MatchProcessor_i & tProcessor, bool, bool bFinalizeMatches ) final
  3071. {
  3072. if ( !GetLength() )
  3073. return;
  3074. tProcessor.Process ( &m_tData );
  3075. if ( !bFinalizeMatches )
  3076. m_tUniq.Compact();
  3077. }
  3078. int GetLength() final { return m_bDataInitialized ? 1 : 0; }
  3079. bool CanBeCloned() const final { return !DISTINCT && BASE::CanBeCloned(); }
  3080. // TODO! test.
  3081. ISphMatchSorter * Clone () const final
  3082. {
  3083. auto pClone = new MYTYPE ( nullptr, nullptr, *this );
  3084. CloneTo ( pClone );
  3085. pClone->SetupBaseGrouperWrp ( pClone->m_pSchema );
  3086. if ( m_pDistinctFetcher )
  3087. pClone->m_pDistinctFetcher = m_pDistinctFetcher->Clone();
  3088. return pClone;
  3089. }
  3090. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final
  3091. {
  3092. if (!m_bDataInitialized)
  3093. return;
  3094. auto& dRhs = *(MYTYPE *) pRhs;
  3095. if ( !dRhs.m_bDataInitialized )
  3096. {
  3097. // ISphMatchSorter
  3098. ::Swap ( m_iTotal, dRhs.m_iTotal );
  3099. ::Swap ( m_tData, dRhs.m_tData );
  3100. ::Swap ( m_bDataInitialized, dRhs.m_bDataInitialized );
  3101. if ( bCopyMeta )
  3102. dRhs.m_tUniq = std::move ( m_tUniq );
  3103. return;
  3104. }
  3105. if ( bCopyMeta )
  3106. m_tUniq.CopyTo ( dRhs.m_tUniq );
  3107. // other step is a bit tricky:
  3108. // we just can't add current count uniq to final; need to append m_tUniq instead,
  3109. // so that final flattening will calculate real uniq count.
  3110. dRhs.AddCount ( m_tData );
  3111. if constexpr ( HAS_AGGREGATES )
  3112. dRhs.UpdateAggregates ( m_tData, false, true );
  3113. if ( !bCopyMeta && DISTINCT )
  3114. dRhs.UpdateDistinct ( m_tData );
  3115. }
  3116. void SetMerge ( bool bMerge ) override { m_bMerge = bMerge; }
  3117. protected:
  3118. CSphMatch m_tData;
  3119. bool m_bDataInitialized = false;
  3120. bool m_bMerge = false;
  3121. UNIQ m_tUniq;
  3122. private:
  3123. CSphVector<SphAttr_t> m_dDistinctKeys;
  3124. CSphRefcountedPtr<DistinctFetcher_i> m_pDistinctFetcher;
  3125. inline void SetupBaseGrouperWrp ( ISphSchema * pSchema ) { SetupBaseGrouper<DISTINCT> ( pSchema ); }
  3126. void AddCount ( const CSphMatch & tEntry ) { m_tData.AddCounterAttr ( m_tLocCount, tEntry ); }
  3127. void UpdateAggregates ( const CSphMatch & tEntry, bool bGrouped = true, bool bMerge = false ) { AggrUpdate ( m_tData, tEntry, bGrouped, bMerge ); }
  3128. void SetupAggregates ( const CSphMatch & tEntry ) { AggrSetup ( m_tData, tEntry, m_bMerge ); }
  3129. // submit actual distinct value in all cases
  3130. template <bool GROUPED = true>
  3131. void UpdateDistinct ( const CSphMatch & tEntry )
  3132. {
  3133. int iCount = 1;
  3134. if constexpr ( GROUPED )
  3135. iCount = (int) tEntry.GetAttr ( m_tLocDistinct );
  3136. if constexpr ( DISTINCT==1 )
  3137. m_tUniq.Add ( { 0, m_pDistinctFetcher->GetKey(tEntry), iCount } );
  3138. else
  3139. {
  3140. m_pDistinctFetcher->GetKeys ( tEntry, m_dDistinctKeys );
  3141. for ( auto i : m_dDistinctKeys )
  3142. this->m_tUniq.Add ( { 0, i, iCount } );
  3143. }
  3144. }
  3145. /// add entry to the queue
  3146. template <bool GROUPED>
  3147. FORCE_INLINE bool PushEx ( const CSphMatch & tEntry )
  3148. {
  3149. if constexpr ( NOTIFICATIONS )
  3150. {
  3151. m_tJustPushed = RowTagged_t();
  3152. m_dJustPopped.Resize(0);
  3153. }
  3154. if ( m_bDataInitialized )
  3155. {
  3156. assert ( m_tData.m_pDynamic[-1]==tEntry.m_pDynamic[-1] );
  3157. if constexpr ( GROUPED )
  3158. {
  3159. // it's already grouped match
  3160. // sum grouped matches count
  3161. AddCount ( tEntry );
  3162. } else
  3163. {
  3164. // it's a simple match
  3165. // increase grouped matches count
  3166. m_tData.AddCounterScalar ( m_tLocCount, 1 );
  3167. }
  3168. // update aggregates
  3169. if constexpr ( HAS_AGGREGATES )
  3170. UpdateAggregates ( tEntry, GROUPED, m_bMerge );
  3171. }
  3172. if constexpr ( DISTINCT )
  3173. UpdateDistinct<GROUPED> ( tEntry );
  3174. // it's a dupe anyway, so we shouldn't update total matches count
  3175. if ( m_bDataInitialized )
  3176. return false;
  3177. // add first
  3178. m_pSchema->CloneMatch ( m_tData, tEntry );
  3179. // first-time aggregate setup
  3180. if constexpr ( HAS_AGGREGATES )
  3181. SetupAggregates(tEntry);
  3182. if constexpr ( NOTIFICATIONS )
  3183. m_tJustPushed = RowTagged_t ( m_tData );
  3184. if constexpr ( !GROUPED )
  3185. {
  3186. m_tData.SetAttr ( m_tLocGroupby, 1 ); // fake group number
  3187. m_tData.SetAttr ( m_tLocCount, 1 );
  3188. if constexpr ( DISTINCT )
  3189. m_tData.SetAttr ( m_tLocDistinct, 0 );
  3190. }
  3191. else
  3192. {
  3193. if constexpr ( HAS_AGGREGATES )
  3194. AggrUngroup ( m_tData );
  3195. }
  3196. m_bDataInitialized = true;
  3197. ++m_iTotal;
  3198. return true;
  3199. }
  3200. /// count distinct values if necessary
  3201. void CountDistinct ()
  3202. {
  3203. if constexpr ( !DISTINCT )
  3204. return;
  3205. assert ( m_bDataInitialized );
  3206. m_tData.SetAttr ( m_tLocDistinct, m_tUniq.CountDistinct() );
  3207. }
  3208. };
  3209. class FastBaseSorter_c : public MatchSorter_c, ISphNoncopyable, protected BaseGroupSorter_c
  3210. {
  3211. public:
  3212. FastBaseSorter_c ( const CSphGroupSorterSettings & tSettings ) : BaseGroupSorter_c ( tSettings ) {}
  3213. bool IsGroupby () const final { return true; }
  3214. bool CanBeCloned() const final { return false; }
  3215. void SetMerge ( bool bMerge ) final {}
  3216. void Finalize ( MatchProcessor_i & tProcessor, bool, bool bFinalizeMatches ) final { if ( GetLength() ) tProcessor.Process ( &m_tData ); }
  3217. int GetLength() final { return m_bDataInitialized ? 1 : 0; }
  3218. ISphMatchSorter * Clone() const final { return nullptr; }
  3219. void MoveTo ( ISphMatchSorter * pRhs, bool bCopyMeta ) final { assert ( 0 && "Not supported"); }
  3220. bool IsPrecalc() const final { return true; }
  3221. int Flatten ( CSphMatch * pTo ) final
  3222. {
  3223. assert ( m_bDataInitialized );
  3224. Swap ( *pTo, m_tData );
  3225. m_iTotal = 0;
  3226. m_bDataInitialized = false;
  3227. return 1;
  3228. }
  3229. protected:
  3230. CSphMatch m_tData;
  3231. bool m_bDataInitialized = false;
  3232. };
  3233. // fast count distinct sorter
  3234. // works by using precalculated count distinct taken from secondary indexes
  3235. class FastCountDistinctSorter_c final : public FastBaseSorter_c
  3236. {
  3237. public:
  3238. FastCountDistinctSorter_c ( int iCountDistinct, const CSphGroupSorterSettings & tSettings )
  3239. : FastBaseSorter_c ( tSettings )
  3240. , m_iCountDistinct ( iCountDistinct )
  3241. {}
  3242. bool Push ( const CSphMatch & tEntry ) final { return PushEx(tEntry); }
  3243. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final { assert ( 0 && "Not supported in grouping"); }
  3244. bool PushGrouped ( const CSphMatch & tEntry, bool ) final { return PushEx(tEntry); }
  3245. private:
  3246. int m_iCountDistinct = 0;
  3247. FORCE_INLINE bool PushEx ( const CSphMatch & tEntry )
  3248. {
  3249. if ( m_bDataInitialized )
  3250. return true; // always return true, otherwise in RT indexes we won't be able to hit cutoff in disk chunks after the first one
  3251. m_pSchema->CloneMatch ( m_tData, tEntry );
  3252. m_tData.SetAttr ( m_tLocGroupby, 1 ); // fake group number
  3253. m_tData.SetAttr ( m_tLocCount, 1 );
  3254. m_tData.SetAttr ( m_tLocDistinct, m_iCountDistinct );
  3255. m_bDataInitialized = true;
  3256. m_iTotal++;
  3257. return true;
  3258. }
  3259. };
  3260. // fast count sorter
  3261. // works by using precalculated count taken from secondary indexes
  3262. class FastCountFilterSorter_c final : public FastBaseSorter_c
  3263. {
  3264. public:
  3265. FastCountFilterSorter_c ( int iCount, const CSphGroupSorterSettings & tSettings )
  3266. : FastBaseSorter_c ( tSettings )
  3267. , m_iCount ( iCount )
  3268. {}
  3269. bool Push ( const CSphMatch & tEntry ) final { return PushEx(tEntry); }
  3270. void Push ( const VecTraits_T<const CSphMatch> & dMatches ) final { assert ( 0 && "Not supported in grouping"); }
  3271. bool PushGrouped ( const CSphMatch & tEntry, bool ) final { return PushEx(tEntry); }
  3272. private:
  3273. int m_iCount = 0;
  3274. FORCE_INLINE bool PushEx ( const CSphMatch & tEntry )
  3275. {
  3276. if ( m_bDataInitialized )
  3277. return true; // always return true, otherwise in RT indexes we won't be able to hit cutoff in disk chunks after the first one
  3278. m_pSchema->CloneMatch ( m_tData, tEntry );
  3279. m_tData.SetAttr ( m_tLocGroupby, 1 ); // fake group number
  3280. m_tData.SetAttr ( m_tLocCount, 1 );
  3281. m_tData.SetAttr ( m_tLocCount, m_iCount );
  3282. m_bDataInitialized = true;
  3283. m_iTotal++;
  3284. return true;
  3285. }
  3286. };
  3287. //////////////////////////////////////////////////////////////////////////
  3288. // SORT CLAUSE PARSER
  3289. //////////////////////////////////////////////////////////////////////////
  3290. class SortClauseTokenizer_t
  3291. {
  3292. protected:
  3293. const char * m_pCur;
  3294. const char * m_pMax;
  3295. char * m_pBuf;
  3296. protected:
  3297. char ToLower ( char c )
  3298. {
  3299. // 0..9, A..Z->a..z, _, a..z, @, .
  3300. if ( ( c>='0' && c<='9' ) || ( c>='a' && c<='z' ) || c=='_' || c=='@' || c=='.' || c=='[' || c==']' || c=='\'' || c=='\"' || c=='(' || c==')' || c=='*' )
  3301. return c;
  3302. if ( c>='A' && c<='Z' )
  3303. return c-'A'+'a';
  3304. return 0;
  3305. }
  3306. public:
  3307. explicit SortClauseTokenizer_t ( const char * sBuffer )
  3308. {
  3309. auto iLen = (int) strlen(sBuffer);
  3310. m_pBuf = new char [ iLen+1 ];
  3311. m_pMax = m_pBuf+iLen;
  3312. m_pCur = m_pBuf;
  3313. // make string lowercase but keep case of JSON.field
  3314. bool bJson = false;
  3315. for ( int i=0; i<=iLen; i++ )
  3316. {
  3317. char cSrc = sBuffer[i];
  3318. char cDst = ToLower ( cSrc );
  3319. bJson = ( cSrc=='.' || cSrc=='[' || ( bJson && cDst>0 ) ); // keep case of valid char sequence after '.' and '[' symbols
  3320. m_pBuf[i] = bJson ? cSrc : cDst;
  3321. }
  3322. }
  3323. ~SortClauseTokenizer_t ()
  3324. {
  3325. SafeDeleteArray ( m_pBuf );
  3326. }
  3327. const char * GetToken ()
  3328. {
  3329. // skip spaces
  3330. while ( m_pCur<m_pMax && !*m_pCur )
  3331. m_pCur++;
  3332. if ( m_pCur>=m_pMax )
  3333. return nullptr;
  3334. // memorize token start, and move pointer forward
  3335. const char * sRes = m_pCur;
  3336. while ( *m_pCur )
  3337. m_pCur++;
  3338. return sRes;
  3339. }
  3340. bool IsSparseCount ( const char * sTok )
  3341. {
  3342. const char * sSeq = "(*)";
  3343. for ( ; sTok<m_pMax && *sSeq; sTok++ )
  3344. {
  3345. bool bGotSeq = ( *sSeq==*sTok );
  3346. if ( bGotSeq )
  3347. sSeq++;
  3348. // stop checking on any non-space char outside sequence or sequence end
  3349. if ( ( !bGotSeq && !sphIsSpace ( *sTok ) && *sTok!='\0' ) || !*sSeq )
  3350. break;
  3351. }
  3352. if ( !*sSeq && sTok+1<m_pMax && !sTok[1] )
  3353. {
  3354. // advance token iterator after composite count(*) token
  3355. m_pCur = sTok+1;
  3356. return true;
  3357. } else
  3358. {
  3359. return false;
  3360. }
  3361. }
  3362. };
  3363. static inline ESphSortKeyPart Attr2Keypart ( ESphAttr eType )
  3364. {
  3365. switch ( eType )
  3366. {
  3367. case SPH_ATTR_FLOAT:
  3368. return SPH_KEYPART_FLOAT;
  3369. case SPH_ATTR_DOUBLE:
  3370. return SPH_KEYPART_DOUBLE;
  3371. case SPH_ATTR_STRING:
  3372. return SPH_KEYPART_STRING;
  3373. case SPH_ATTR_JSON:
  3374. case SPH_ATTR_JSON_PTR:
  3375. case SPH_ATTR_JSON_FIELD:
  3376. case SPH_ATTR_JSON_FIELD_PTR:
  3377. case SPH_ATTR_STRINGPTR:
  3378. return SPH_KEYPART_STRINGPTR;
  3379. default:
  3380. return SPH_KEYPART_INT;
  3381. }
  3382. }
  3383. //////////////////////////////////////////////////////////////////////////
  3384. // SORTING+GROUPING INSTANTIATION
  3385. //////////////////////////////////////////////////////////////////////////
  3386. struct Precalculated_t
  3387. {
  3388. int64_t m_iCountDistinct = -1;
  3389. int64_t m_iCountFilter = -1;
  3390. int64_t m_iCount = -1;
  3391. };
  3392. #define CREATE_SORTER_4TH(SORTER,COMPGROUP,UNIQ,COMP,QUERY,SETTINGS,HAS_PACKEDFACTORS,HAS_AGGREGATES) \
  3393. { \
  3394. int iMultiDistict = 0; \
  3395. if ( tSettings.m_bDistinct ) \
  3396. { \
  3397. assert(tSettings.m_pDistinctFetcher); \
  3398. iMultiDistict = tSettings.m_pDistinctFetcher->IsMultiValue() ? 2 : 1; \
  3399. } \
  3400. BYTE uSelector = 4*iMultiDistict + 2*(bHasPackedFactors?1:0) + (HAS_AGGREGATES?1:0); \
  3401. switch ( uSelector ) \
  3402. { \
  3403. case 0: return new SORTER<COMPGROUP,UNIQ,0, false,false> ( pComp, pQuery, tSettings ); \
  3404. case 1: return new SORTER<COMPGROUP,UNIQ,0, false,true> ( pComp, pQuery, tSettings ); \
  3405. case 2: return new SORTER<COMPGROUP,UNIQ,0, true, false> ( pComp, pQuery, tSettings ); \
  3406. case 3: return new SORTER<COMPGROUP,UNIQ,0, true, true> ( pComp, pQuery, tSettings ); \
  3407. case 4: return new SORTER<COMPGROUP,UNIQ,1, false,false> ( pComp, pQuery, tSettings ); \
  3408. case 5: return new SORTER<COMPGROUP,UNIQ,1, false,true> ( pComp, pQuery, tSettings ); \
  3409. case 6: return new SORTER<COMPGROUP,UNIQ,1, true, false> ( pComp, pQuery, tSettings ); \
  3410. case 7: return new SORTER<COMPGROUP,UNIQ,1, true, true> ( pComp, pQuery, tSettings ); \
  3411. case 8: return new SORTER<COMPGROUP,UNIQ,2, false,false> ( pComp, pQuery, tSettings ); \
  3412. case 9: return new SORTER<COMPGROUP,UNIQ,2, false,true> ( pComp, pQuery, tSettings ); \
  3413. case 10:return new SORTER<COMPGROUP,UNIQ,2, true, false> ( pComp, pQuery, tSettings ); \
  3414. case 11:return new SORTER<COMPGROUP,UNIQ,2, true, true> ( pComp, pQuery, tSettings ); \
  3415. default: assert(0); return nullptr; \
  3416. } \
  3417. }
  3418. template < typename COMPGROUP >
  3419. static ISphMatchSorter * sphCreateSorter3rd ( const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors, bool bHasAggregates, const Precalculated_t & tPrecalc )
  3420. {
  3421. if ( tPrecalc.m_iCountDistinct!=-1 )
  3422. return new FastCountDistinctSorter_c ( tPrecalc.m_iCountDistinct, tSettings );
  3423. if ( tPrecalc.m_iCountFilter!=-1 )
  3424. return new FastCountFilterSorter_c ( tPrecalc.m_iCountFilter, tSettings );
  3425. if ( tPrecalc.m_iCount!=-1 )
  3426. return new FastCountFilterSorter_c ( tPrecalc.m_iCount, tSettings );
  3427. bool bUseHLL = tSettings.m_iDistinctAccuracy > 0;
  3428. using Uniq_c = UniqGrouped_T<ValueWithGroup_t>;
  3429. using UniqSingle_c = UniqSingle_T<SphAttr_t>;
  3430. using UniqCount_c = UniqGrouped_T<ValueWithGroupCount_t>;
  3431. using UniqCountSingle_c = UniqSingle_T<ValueWithCount_t>;
  3432. BYTE uSelector3rd = 32*( bUseHLL ? 1 : 0 ) + 16*( tSettings.m_bGrouped ? 1:0 ) + 8*( tSettings.m_bJson ? 1:0 ) + 4*( pQuery->m_iGroupbyLimit>1 ? 1:0 ) + 2*( tSettings.m_bImplicit ? 1:0 ) + ( ( tSettings.m_pGrouper && tSettings.m_pGrouper->IsMultiValue() ) ? 1:0 );
  3433. switch ( uSelector3rd )
  3434. {
  3435. case 0: CREATE_SORTER_4TH ( CSphKBufferGroupSorter, COMPGROUP, Uniq_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3436. case 1: CREATE_SORTER_4TH ( MultiValueGroupSorter_T, COMPGROUP, Uniq_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3437. case 2: CREATE_SORTER_4TH ( CSphImplicitGroupSorter, COMPGROUP, UniqSingle_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3438. case 4: CREATE_SORTER_4TH ( CSphKBufferNGroupSorter, COMPGROUP, Uniq_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3439. case 5: CREATE_SORTER_4TH ( MultiValueNGroupSorter_T, COMPGROUP, Uniq_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3440. case 8: CREATE_SORTER_4TH ( CSphKBufferJsonGroupSorter, COMPGROUP, Uniq_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3441. case 16:CREATE_SORTER_4TH ( CSphKBufferGroupSorter, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3442. case 17:CREATE_SORTER_4TH ( MultiValueGroupSorter_T, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3443. case 18:CREATE_SORTER_4TH ( CSphImplicitGroupSorter, COMPGROUP, UniqCountSingle_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3444. case 20:CREATE_SORTER_4TH ( CSphKBufferNGroupSorter, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3445. case 21:CREATE_SORTER_4TH ( MultiValueNGroupSorter_T, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3446. case 24:CREATE_SORTER_4TH ( CSphKBufferJsonGroupSorter, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3447. case 32:CREATE_SORTER_4TH ( CSphKBufferGroupSorter, COMPGROUP, UniqHLL_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3448. case 33:CREATE_SORTER_4TH ( MultiValueGroupSorter_T, COMPGROUP, UniqHLL_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3449. case 34:CREATE_SORTER_4TH ( CSphImplicitGroupSorter, COMPGROUP, UniqHLLSingle_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3450. case 36:CREATE_SORTER_4TH ( CSphKBufferNGroupSorter, COMPGROUP, UniqHLL_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3451. case 37:CREATE_SORTER_4TH ( MultiValueNGroupSorter_T, COMPGROUP, UniqHLL_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3452. case 40:CREATE_SORTER_4TH ( CSphKBufferJsonGroupSorter, COMPGROUP, UniqHLL_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3453. case 48:CREATE_SORTER_4TH ( CSphKBufferGroupSorter, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3454. case 49:CREATE_SORTER_4TH ( MultiValueGroupSorter_T, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3455. case 50:CREATE_SORTER_4TH ( CSphImplicitGroupSorter, COMPGROUP, UniqCountSingle_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3456. case 52:CREATE_SORTER_4TH ( CSphKBufferNGroupSorter, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3457. case 53:CREATE_SORTER_4TH ( MultiValueNGroupSorter_T, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3458. case 56:CREATE_SORTER_4TH ( CSphKBufferJsonGroupSorter, COMPGROUP, UniqCount_c, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates );
  3459. default: assert(0); return nullptr;
  3460. }
  3461. }
  3462. static ISphMatchSorter * sphCreateSorter2nd ( ESphSortFunc eGroupFunc, const ISphMatchComparator * pComp, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors, bool bHasAggregates, const Precalculated_t & tPrecalc )
  3463. {
  3464. switch ( eGroupFunc )
  3465. {
  3466. case FUNC_GENERIC1: return sphCreateSorter3rd<MatchGeneric1_fn> ( pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  3467. case FUNC_GENERIC2: return sphCreateSorter3rd<MatchGeneric2_fn> ( pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  3468. case FUNC_GENERIC3: return sphCreateSorter3rd<MatchGeneric3_fn> ( pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  3469. case FUNC_GENERIC4: return sphCreateSorter3rd<MatchGeneric4_fn> ( pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  3470. case FUNC_GENERIC5: return sphCreateSorter3rd<MatchGeneric5_fn> ( pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  3471. case FUNC_EXPR: return sphCreateSorter3rd<MatchExpr_fn> ( pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  3472. default: return nullptr;
  3473. }
  3474. }
  3475. static ISphMatchSorter * sphCreateSorter1st ( ESphSortFunc eMatchFunc, ESphSortFunc eGroupFunc, const CSphQuery * pQuery, const CSphGroupSorterSettings & tSettings, bool bHasPackedFactors, bool bHasAggregates, const Precalculated_t & tPrecalc )
  3476. {
  3477. CSphRefcountedPtr<ISphMatchComparator> pComp;
  3478. if ( !tSettings.m_bImplicit )
  3479. switch ( eMatchFunc )
  3480. {
  3481. case FUNC_REL_DESC: pComp = new MatchRelevanceLt_fn(); break;
  3482. case FUNC_TIMESEGS: pComp = new MatchTimeSegments_fn(); break;
  3483. case FUNC_GENERIC1: pComp = new MatchGeneric1_fn(); break;
  3484. case FUNC_GENERIC2: pComp = new MatchGeneric2_fn(); break;
  3485. case FUNC_GENERIC3: pComp = new MatchGeneric3_fn(); break;
  3486. case FUNC_GENERIC4: pComp = new MatchGeneric4_fn(); break;
  3487. case FUNC_GENERIC5: pComp = new MatchGeneric5_fn(); break;
  3488. case FUNC_EXPR: pComp = new MatchExpr_fn(); break; // only for non-bitfields, obviously
  3489. }
  3490. return sphCreateSorter2nd ( eGroupFunc, pComp, pQuery, tSettings, bHasPackedFactors, bHasAggregates, tPrecalc );
  3491. }
  3492. //////////////////////////////////////////////////////////////////////////
  3493. // GEODIST
  3494. //////////////////////////////////////////////////////////////////////////
  3495. struct ExprGeodist_t : public ISphExpr
  3496. {
  3497. public:
  3498. ExprGeodist_t () = default;
  3499. bool Setup ( const CSphQuery * pQuery, const ISphSchema & tSchema, CSphString & sError );
  3500. float Eval ( const CSphMatch & tMatch ) const final;
  3501. void FixupLocator ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema ) final;
  3502. void Command ( ESphExprCommand eCmd, void * pArg ) final;
  3503. uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) final;
  3504. ISphExpr * Clone() const final;
  3505. protected:
  3506. CSphAttrLocator m_tGeoLatLoc;
  3507. CSphAttrLocator m_tGeoLongLoc;
  3508. float m_fGeoAnchorLat;
  3509. float m_fGeoAnchorLong;
  3510. int m_iLat;
  3511. int m_iLon;
  3512. };
  3513. bool ExprGeodist_t::Setup ( const CSphQuery * pQuery, const ISphSchema & tSchema, CSphString & sError )
  3514. {
  3515. if ( !pQuery->m_bGeoAnchor )
  3516. {
  3517. sError.SetSprintf ( "INTERNAL ERROR: no geoanchor, can not create geodist evaluator" );
  3518. return false;
  3519. }
  3520. int iLat = tSchema.GetAttrIndex ( pQuery->m_sGeoLatAttr.cstr() );
  3521. if ( iLat<0 )
  3522. {
  3523. sError.SetSprintf ( "unknown latitude attribute '%s'", pQuery->m_sGeoLatAttr.cstr() );
  3524. return false;
  3525. }
  3526. int iLong = tSchema.GetAttrIndex ( pQuery->m_sGeoLongAttr.cstr() );
  3527. if ( iLong<0 )
  3528. {
  3529. sError.SetSprintf ( "unknown latitude attribute '%s'", pQuery->m_sGeoLongAttr.cstr() );
  3530. return false;
  3531. }
  3532. m_tGeoLatLoc = tSchema.GetAttr(iLat).m_tLocator;
  3533. m_tGeoLongLoc = tSchema.GetAttr(iLong).m_tLocator;
  3534. m_fGeoAnchorLat = pQuery->m_fGeoLatitude;
  3535. m_fGeoAnchorLong = pQuery->m_fGeoLongitude;
  3536. m_iLat = iLat;
  3537. m_iLon = iLong;
  3538. return true;
  3539. }
  3540. static inline double sphSqr ( double v )
  3541. {
  3542. return v*v;
  3543. }
  3544. float ExprGeodist_t::Eval ( const CSphMatch & tMatch ) const
  3545. {
  3546. const double R = 6384000;
  3547. float plat = tMatch.GetAttrFloat ( m_tGeoLatLoc );
  3548. float plon = tMatch.GetAttrFloat ( m_tGeoLongLoc );
  3549. double dlat = plat - m_fGeoAnchorLat;
  3550. double dlon = plon - m_fGeoAnchorLong;
  3551. double a = sphSqr ( sin ( dlat/2 ) ) + cos(plat)*cos(m_fGeoAnchorLat)*sphSqr(sin(dlon/2));
  3552. double c = 2*asin ( Min ( 1.0, sqrt(a) ) );
  3553. return (float)(R*c);
  3554. }
  3555. void ExprGeodist_t::FixupLocator ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema )
  3556. {
  3557. sphFixupLocator ( m_tGeoLatLoc, pOldSchema, pNewSchema );
  3558. sphFixupLocator ( m_tGeoLongLoc, pOldSchema, pNewSchema );
  3559. }
  3560. void ExprGeodist_t::Command ( ESphExprCommand eCmd, void * pArg )
  3561. {
  3562. if ( eCmd==SPH_EXPR_GET_DEPENDENT_COLS )
  3563. {
  3564. static_cast < CSphVector<int>* >(pArg)->Add ( m_iLat );
  3565. static_cast < CSphVector<int>* >(pArg)->Add ( m_iLon );
  3566. }
  3567. if ( eCmd==SPH_EXPR_UPDATE_DEPENDENT_COLS )
  3568. {
  3569. int iRef = *static_cast<int*>(pArg);
  3570. if ( m_iLat>=iRef ) m_iLat--;
  3571. if ( m_iLon>=iRef ) m_iLon--;
  3572. }
  3573. }
  3574. uint64_t ExprGeodist_t::GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
  3575. {
  3576. uint64_t uHash = sphCalcExprDepHash ( this, tSorterSchema, uPrevHash, bDisable );
  3577. static const char * EXPR_TAG = "ExprGeodist_t";
  3578. uHash = sphFNV64 ( EXPR_TAG, (int) strlen(EXPR_TAG), uHash );
  3579. uHash = sphFNV64 ( &m_fGeoAnchorLat, sizeof(m_fGeoAnchorLat), uHash );
  3580. uHash = sphFNV64 ( &m_fGeoAnchorLong, sizeof(m_fGeoAnchorLong), uHash );
  3581. return uHash;
  3582. }
  3583. ISphExpr * ExprGeodist_t::Clone() const
  3584. {
  3585. auto * pClone = new ExprGeodist_t;
  3586. pClone->m_tGeoLatLoc = m_tGeoLatLoc;
  3587. pClone->m_tGeoLongLoc = m_tGeoLongLoc;
  3588. pClone->m_fGeoAnchorLat = m_fGeoAnchorLat;
  3589. pClone->m_fGeoAnchorLong = m_fGeoAnchorLong;
  3590. pClone->m_iLat = m_iLat;
  3591. pClone->m_iLon = m_iLon;
  3592. return pClone;
  3593. }
  3594. //////////////////////////////////////////////////////////////////////////
  3595. // PUBLIC FUNCTIONS (FACTORY AND FLATTENING)
  3596. //////////////////////////////////////////////////////////////////////////
  3597. bool HasImplicitGrouping ( const CSphQuery & tQuery )
  3598. {
  3599. auto fnIsImplicit = [] ( const CSphQueryItem & t )
  3600. {
  3601. return ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr=="@distinct";
  3602. };
  3603. return tQuery.m_sGroupBy.IsEmpty() ? tQuery.m_dItems.any_of(fnIsImplicit) : false;
  3604. }
  3605. class QueueCreator_c
  3606. {
  3607. public:
  3608. bool m_bMulti = false;
  3609. bool m_bCreate = true;
  3610. bool m_bZonespanlist = false;
  3611. DWORD m_uPackedFactorFlags = SPH_FACTOR_DISABLE;
  3612. QueueCreator_c ( const SphQueueSettings_t & tSettings, const CSphQuery & tQuery, CSphString & sError, StrVec_t * pExtra, QueryProfile_c * pProfile );
  3613. bool SetupComputeQueue();
  3614. bool SetupGroupQueue();
  3615. bool SetupQueue();
  3616. CSphRsetSchema & SorterSchema() const { return *m_pSorterSchema; }
  3617. bool HasJson() const { return m_tGroupSorterSettings.m_bJson; }
  3618. bool SetSchemaGroupQueue ( const CSphRsetSchema & tNewSchema );
  3619. /// creates proper queue for given query
  3620. /// may return NULL on error; in this case, error message is placed in sError
  3621. /// if the pUpdate is given, creates the updater's queue and perform the index update
  3622. /// instead of searching
  3623. ISphMatchSorter * CreateQueue();
  3624. private:
  3625. const SphQueueSettings_t & m_tSettings;
  3626. const CSphQuery & m_tQuery;
  3627. CSphString & m_sError;
  3628. StrVec_t * m_pExtra = nullptr;
  3629. QueryProfile_c * m_pProfile = nullptr;
  3630. bool m_bHasCount = false;
  3631. bool m_bHasGroupByExpr = false;
  3632. sph::StringSet m_hQueryAttrs;
  3633. std::unique_ptr<CSphRsetSchema> m_pSorterSchema;
  3634. bool m_bGotGroupby;
  3635. bool m_bRandomize;
  3636. ESphSortFunc m_eMatchFunc = FUNC_REL_DESC;
  3637. ESphSortFunc m_eGroupFunc = FUNC_REL_DESC;
  3638. CSphMatchComparatorState m_tStateMatch;
  3639. CSphVector<ExtraSortExpr_t> m_dMatchJsonExprs;
  3640. CSphMatchComparatorState m_tStateGroup;
  3641. CSphVector<ExtraSortExpr_t> m_dGroupJsonExprs;
  3642. CSphGroupSorterSettings m_tGroupSorterSettings;
  3643. CSphVector<std::pair<int,bool>> m_dGroupColumns;
  3644. bool m_bHeadWOGroup;
  3645. bool m_bGotDistinct;
  3646. bool m_bExprsNeedDocids = false;
  3647. // for sorter to create pooled attributes
  3648. bool m_bHaveStar = false;
  3649. // fixme! transform to StringSet on end of merge!
  3650. sph::StringSet m_hQueryColumns; // FIXME!!! unify with Extra schema after merge master into branch
  3651. sph::StringSet m_hQueryDups;
  3652. sph::StringSet m_hExtra;
  3653. bool ParseQueryItem ( const CSphQueryItem & tItem );
  3654. bool MaybeAddGeodistColumn();
  3655. bool MaybeAddExprColumn();
  3656. bool MaybeAddExpressionsFromSelectList();
  3657. bool AddExpressionsForUpdates();
  3658. bool MaybeAddGroupbyMagic ( bool bGotDistinct );
  3659. bool AddKNNDistColumn();
  3660. bool CheckHavingConstraints() const;
  3661. bool SetupGroupbySettings ( bool bHasImplicitGrouping );
  3662. void AssignOrderByToPresortStage ( const int * pAttrs, int iAttrCount );
  3663. void ReplaceGroupbyStrWithExprs ( CSphMatchComparatorState & tState, int iNumOldAttrs );
  3664. void ReplaceStaticStringsWithExprs ( CSphMatchComparatorState & tState );
  3665. void ReplaceJsonWithExprs ( CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs );
  3666. void AddColumnarExprsAsAttrs ( CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs );
  3667. void RemapAttrs ( CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs );
  3668. static void SetupRemapColJson ( CSphColumnInfo & tRemapCol, CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs, int iStateAttr ) ;
  3669. const CSphColumnInfo * GetGroupbyStr ( int iAttr, int iNumOldAttrs ) const;
  3670. bool SetupMatchesSortingFunc();
  3671. bool SetupGroupSortingFunc ( bool bGotDistinct );
  3672. bool AddGroupbyStuff();
  3673. void AddKnnDistSort ( CSphString & sSortBy );
  3674. bool SetGroupSorting();
  3675. void ExtraAddSortkeys ( const int * dAttrs );
  3676. bool AddStoredFieldExpressions();
  3677. bool AddColumnarAttributeExpressions();
  3678. void CreateGrouperByAttr ( ESphAttr eType, const CSphColumnInfo & tGroupByAttr, bool & bGrouperUsesAttrs );
  3679. void SelectStageForColumnarExpr ( CSphColumnInfo & tExprCol );
  3680. void FetchDependencyChains ( IntVec_t & dDependentCols );
  3681. void PropagateEvalStage ( CSphColumnInfo & tExprCol, IntVec_t & dDependentCols );
  3682. bool SetupDistinctAttr();
  3683. bool PredictAggregates() const;
  3684. bool ReplaceWithColumnarItem ( const CSphString & sAttr, ESphEvalStage eStage );
  3685. int ReduceMaxMatches() const;
  3686. int AdjustMaxMatches ( int iMaxMatches ) const;
  3687. bool ConvertColumnarToDocstore();
  3688. const CSphColumnInfo * GetAliasedColumnarAttr ( const CSphColumnInfo & tAttr );
  3689. bool SetupAggregateExpr ( CSphColumnInfo & tExprCol, const CSphString & sExpr, DWORD uQueryPackedFactorFlags );
  3690. bool SetupColumnarAggregates ( CSphColumnInfo & tExprCol );
  3691. void UpdateAggregateDependencies ( CSphColumnInfo & tExprCol );
  3692. int GetGroupbyAttrIndex() const { return GetAliasedAttrIndex ( m_tQuery.m_sGroupBy, m_tQuery, *m_pSorterSchema ); }
  3693. int GetGroupDistinctAttrIndex() const { return GetAliasedAttrIndex ( m_tQuery.m_sGroupDistinct, m_tQuery, *m_pSorterSchema ); }
  3694. bool CanCalcFastCountDistinct() const;
  3695. bool CanCalcFastCountFilter() const;
  3696. bool CanCalcFastCount() const;
  3697. Precalculated_t FetchPrecalculatedValues() const;
  3698. ISphMatchSorter * SpawnQueue();
  3699. std::unique_ptr<ISphFilter> CreateAggrFilter() const;
  3700. void SetupCollation();
  3701. bool Err ( const char * sFmt, ... ) const;
  3702. };
  3703. QueueCreator_c::QueueCreator_c ( const SphQueueSettings_t & tSettings, const CSphQuery & tQuery, CSphString & sError, StrVec_t * pExtra, QueryProfile_c * pProfile )
  3704. : m_tSettings ( tSettings )
  3705. , m_tQuery ( tQuery )
  3706. , m_sError ( sError )
  3707. , m_pExtra ( pExtra )
  3708. , m_pProfile ( pProfile )
  3709. , m_pSorterSchema { std::make_unique<CSphRsetSchema>() }
  3710. {
  3711. // short-cuts
  3712. m_sError = "";
  3713. *m_pSorterSchema = m_tSettings.m_tSchema;
  3714. m_dMatchJsonExprs.Resize ( CSphMatchComparatorState::MAX_ATTRS );
  3715. m_dGroupJsonExprs.Resize ( CSphMatchComparatorState::MAX_ATTRS );
  3716. }
  3717. const CSphColumnInfo * QueueCreator_c::GetAliasedColumnarAttr ( const CSphColumnInfo & tAttr )
  3718. {
  3719. if ( !tAttr.IsColumnarExpr() )
  3720. return &tAttr;
  3721. CSphString sAliasedCol;
  3722. tAttr.m_pExpr->Command ( SPH_EXPR_GET_COLUMNAR_COL, &sAliasedCol );
  3723. const CSphColumnInfo * pAttr = m_pSorterSchema->GetAttr ( sAliasedCol.cstr() );
  3724. assert(pAttr);
  3725. return pAttr;
  3726. }
  3727. void QueueCreator_c::CreateGrouperByAttr ( ESphAttr eType, const CSphColumnInfo & tGroupByAttr, bool & bGrouperUsesAttrs )
  3728. {
  3729. assert ( m_pSorterSchema );
  3730. auto & tSchema = *m_pSorterSchema;
  3731. const CSphAttrLocator & tLoc = tGroupByAttr.m_tLocator;
  3732. switch ( eType )
  3733. {
  3734. case SPH_ATTR_JSON:
  3735. case SPH_ATTR_JSON_FIELD:
  3736. {
  3737. ExprParseArgs_t tExprArgs;
  3738. tExprArgs.m_eCollation = m_tQuery.m_eCollation;
  3739. ISphExprRefPtr_c pExpr { sphExprParse ( m_tQuery.m_sGroupBy.cstr(), tSchema, m_sError, tExprArgs ) };
  3740. m_tGroupSorterSettings.m_pGrouper = CreateGrouperJsonField ( tLoc, pExpr );
  3741. m_tGroupSorterSettings.m_bJson = true;
  3742. }
  3743. break;
  3744. case SPH_ATTR_STRING:
  3745. case SPH_ATTR_STRINGPTR:
  3746. // percolate select list push matches with string_ptr
  3747. // check if it is a columnar attr or an expression spawned instead of a columnar attr
  3748. // even if it is an expression, spawn a new one, because a specialized grouper works a lot faster because it doesn't allocate and store string in the match
  3749. if ( tGroupByAttr.IsColumnar() || tGroupByAttr.IsColumnarExpr() )
  3750. {
  3751. m_tGroupSorterSettings.m_pGrouper = CreateGrouperColumnarString ( *GetAliasedColumnarAttr(tGroupByAttr), m_tQuery.m_eCollation );
  3752. bGrouperUsesAttrs = false;
  3753. }
  3754. else if ( tGroupByAttr.m_pExpr && !tGroupByAttr.m_pExpr->IsDataPtrAttr() )
  3755. {
  3756. m_tGroupSorterSettings.m_pGrouper = CreateGrouperStringExpr ( tGroupByAttr.m_pExpr, m_tQuery.m_eCollation );
  3757. bGrouperUsesAttrs = false;
  3758. }
  3759. else
  3760. m_tGroupSorterSettings.m_pGrouper = CreateGrouperString ( tLoc, m_tQuery.m_eCollation );
  3761. break;
  3762. case SPH_ATTR_UINT32SET:
  3763. case SPH_ATTR_INT64SET:
  3764. if ( tGroupByAttr.IsColumnar() || tGroupByAttr.IsColumnarExpr() )
  3765. {
  3766. m_tGroupSorterSettings.m_pGrouper = CreateGrouperColumnarMVA ( *GetAliasedColumnarAttr(tGroupByAttr) );
  3767. bGrouperUsesAttrs = false;
  3768. break;
  3769. }
  3770. if ( eType==SPH_ATTR_UINT32SET )
  3771. m_tGroupSorterSettings.m_pGrouper = CreateGrouperMVA32(tLoc);
  3772. else
  3773. m_tGroupSorterSettings.m_pGrouper = CreateGrouperMVA64(tLoc);
  3774. break;
  3775. case SPH_ATTR_UINT32SET_PTR:
  3776. case SPH_ATTR_INT64SET_PTR:
  3777. if ( tGroupByAttr.IsColumnar() || tGroupByAttr.IsColumnarExpr() )
  3778. {
  3779. m_tGroupSorterSettings.m_pGrouper = CreateGrouperColumnarMVA ( *GetAliasedColumnarAttr(tGroupByAttr) );
  3780. bGrouperUsesAttrs = false;
  3781. }
  3782. break;
  3783. case SPH_ATTR_BOOL:
  3784. case SPH_ATTR_INTEGER:
  3785. case SPH_ATTR_BIGINT:
  3786. case SPH_ATTR_FLOAT:
  3787. if ( tGroupByAttr.IsColumnar() || ( tGroupByAttr.IsColumnarExpr() && tGroupByAttr.m_eStage>SPH_EVAL_PREFILTER ) )
  3788. {
  3789. m_tGroupSorterSettings.m_pGrouper = CreateGrouperColumnarInt ( *GetAliasedColumnarAttr(tGroupByAttr) );
  3790. bGrouperUsesAttrs = false;
  3791. }
  3792. break;
  3793. default:
  3794. break;
  3795. }
  3796. if ( !m_tGroupSorterSettings.m_pGrouper )
  3797. m_tGroupSorterSettings.m_pGrouper = CreateGrouperAttr(tLoc);
  3798. }
  3799. bool QueueCreator_c::SetupDistinctAttr()
  3800. {
  3801. if ( m_tQuery.m_sGroupDistinct.IsEmpty() )
  3802. return true;
  3803. assert ( m_pSorterSchema );
  3804. auto & tSchema = *m_pSorterSchema;
  3805. int iDistinct = tSchema.GetAttrIndex ( m_tQuery.m_sGroupDistinct.cstr () );
  3806. if ( iDistinct<0 )
  3807. return Err ( "group-count-distinct attribute '%s' not found", m_tQuery.m_sGroupDistinct.cstr() );
  3808. const auto & tDistinctAttr = tSchema.GetAttr(iDistinct);
  3809. if ( IsNotRealAttribute(tDistinctAttr) )
  3810. return Err ( "group-count-distinct attribute '%s' not found", m_tQuery.m_sGroupDistinct.cstr() );
  3811. if ( tDistinctAttr.IsColumnar() )
  3812. m_tGroupSorterSettings.m_pDistinctFetcher = CreateColumnarDistinctFetcher ( tDistinctAttr.m_sName, tDistinctAttr.m_eAttrType, m_tQuery.m_eCollation );
  3813. else
  3814. m_tGroupSorterSettings.m_pDistinctFetcher = CreateDistinctFetcher ( tDistinctAttr.m_sName, tDistinctAttr.m_tLocator, tDistinctAttr.m_eAttrType );
  3815. return true;
  3816. }
  3817. bool QueueCreator_c::SetupGroupbySettings ( bool bHasImplicitGrouping )
  3818. {
  3819. if ( m_tQuery.m_sGroupBy.IsEmpty() && !bHasImplicitGrouping )
  3820. return true;
  3821. if ( m_tQuery.m_eGroupFunc==SPH_GROUPBY_ATTRPAIR )
  3822. return Err ( "SPH_GROUPBY_ATTRPAIR is not supported any more (just group on 'bigint' attribute)" );
  3823. assert ( m_pSorterSchema );
  3824. auto & tSchema = *m_pSorterSchema;
  3825. m_tGroupSorterSettings.m_iMaxMatches = m_tSettings.m_iMaxMatches;
  3826. if ( !SetupDistinctAttr() )
  3827. return false;
  3828. CSphString sJsonColumn;
  3829. if ( m_tQuery.m_eGroupFunc==SPH_GROUPBY_MULTIPLE )
  3830. {
  3831. CSphVector<CSphColumnInfo> dAttrs;
  3832. VecRefPtrs_t<ISphExpr *> dJsonKeys;
  3833. StrVec_t dGroupBy;
  3834. sph::Split ( m_tQuery.m_sGroupBy.cstr (), -1, ",", [&] ( const char * sToken, int iLen )
  3835. {
  3836. CSphString sGroupBy ( sToken, iLen );
  3837. sGroupBy.Trim ();
  3838. dGroupBy.Add ( std::move ( sGroupBy ));
  3839. } );
  3840. dGroupBy.Uniq();
  3841. for ( auto & sGroupBy : dGroupBy )
  3842. {
  3843. CSphString sJsonExpr;
  3844. if ( sphJsonNameSplit ( sGroupBy.cstr(), &sJsonColumn ) )
  3845. {
  3846. sJsonExpr = sGroupBy;
  3847. sGroupBy = sJsonColumn;
  3848. }
  3849. const int iAttr = tSchema.GetAttrIndex ( sGroupBy.cstr() );
  3850. if ( iAttr<0 )
  3851. return Err( "group-by attribute '%s' not found", sGroupBy.cstr() );
  3852. auto tAttr = tSchema.GetAttr ( iAttr );
  3853. ESphAttr eType = tAttr.m_eAttrType;
  3854. if ( eType==SPH_ATTR_UINT32SET || eType==SPH_ATTR_INT64SET )
  3855. return Err ( "MVA values can't be used in multiple group-by" );
  3856. if ( eType==SPH_ATTR_JSON && sJsonExpr.IsEmpty() )
  3857. return Err ( "JSON blob can't be used in multiple group-by" );
  3858. dAttrs.Add ( tAttr );
  3859. m_dGroupColumns.Add ( { iAttr, true } );
  3860. if ( !sJsonExpr.IsEmpty() )
  3861. {
  3862. ExprParseArgs_t tExprArgs;
  3863. dJsonKeys.Add ( sphExprParse ( sJsonExpr.cstr(), tSchema, m_sError, tExprArgs ) );
  3864. }
  3865. else
  3866. dJsonKeys.Add ( nullptr );
  3867. }
  3868. m_tGroupSorterSettings.m_pGrouper = CreateGrouperMulti ( dAttrs, std::move(dJsonKeys), m_tQuery.m_eCollation );
  3869. return true;
  3870. }
  3871. if ( sphJsonNameSplit ( m_tQuery.m_sGroupBy.cstr(), &sJsonColumn ) )
  3872. {
  3873. const int iAttr = tSchema.GetAttrIndex ( sJsonColumn.cstr() );
  3874. if ( iAttr<0 )
  3875. return Err ( "groupby: no such attribute '%s'", sJsonColumn.cstr ());
  3876. if ( tSchema.GetAttr(iAttr).m_eAttrType!=SPH_ATTR_JSON
  3877. && tSchema.GetAttr(iAttr).m_eAttrType!=SPH_ATTR_JSON_PTR )
  3878. return Err ( "groupby: attribute '%s' does not have subfields (must be sql_attr_json)",
  3879. sJsonColumn.cstr() );
  3880. if ( m_tQuery.m_eGroupFunc!=SPH_GROUPBY_ATTR )
  3881. return Err ( "groupby: legacy groupby modes are not supported on JSON attributes" );
  3882. m_dGroupColumns.Add ( { iAttr, true } );
  3883. ExprParseArgs_t tExprArgs;
  3884. tExprArgs.m_eCollation = m_tQuery.m_eCollation;
  3885. ISphExprRefPtr_c pExpr { sphExprParse ( m_tQuery.m_sGroupBy.cstr(), tSchema, m_sError, tExprArgs ) };
  3886. m_tGroupSorterSettings.m_pGrouper = CreateGrouperJsonField ( tSchema.GetAttr(iAttr).m_tLocator, pExpr );
  3887. m_tGroupSorterSettings.m_bJson = true;
  3888. return true;
  3889. }
  3890. if ( bHasImplicitGrouping )
  3891. {
  3892. m_tGroupSorterSettings.m_bImplicit = true;
  3893. return true;
  3894. }
  3895. // setup groupby attr
  3896. int iGroupBy = GetGroupbyAttrIndex();
  3897. if ( iGroupBy<0 )
  3898. return Err ( "group-by attribute '%s' not found", m_tQuery.m_sGroupBy.cstr() );
  3899. const CSphColumnInfo & tGroupByAttr = tSchema.GetAttr(iGroupBy);
  3900. ESphAttr eType = tGroupByAttr.m_eAttrType;
  3901. CSphAttrLocator tLoc = tGroupByAttr.m_tLocator;
  3902. bool bGrouperUsesAttrs = true;
  3903. switch (m_tQuery.m_eGroupFunc )
  3904. {
  3905. case SPH_GROUPBY_DAY:
  3906. m_tGroupSorterSettings.m_pGrouper = CreateGrouperDay(tLoc); break;
  3907. case SPH_GROUPBY_WEEK:
  3908. m_tGroupSorterSettings.m_pGrouper = CreateGrouperWeek(tLoc); break;
  3909. case SPH_GROUPBY_MONTH:
  3910. m_tGroupSorterSettings.m_pGrouper = CreateGrouperMonth(tLoc); break;
  3911. case SPH_GROUPBY_YEAR:
  3912. m_tGroupSorterSettings.m_pGrouper = CreateGrouperYear(tLoc); break;
  3913. case SPH_GROUPBY_ATTR:
  3914. CreateGrouperByAttr ( eType, tGroupByAttr, bGrouperUsesAttrs );
  3915. break;
  3916. default:
  3917. return Err ( "invalid group-by mode (mode=%d)", m_tQuery.m_eGroupFunc );
  3918. }
  3919. m_dGroupColumns.Add ( { iGroupBy, bGrouperUsesAttrs } );
  3920. return true;
  3921. }
  3922. // move expressions used in ORDER BY or WITHIN GROUP ORDER BY to presort phase
  3923. void QueueCreator_c::AssignOrderByToPresortStage ( const int * pAttrs, int iAttrCount )
  3924. {
  3925. if ( !iAttrCount )
  3926. return;
  3927. assert ( pAttrs );
  3928. assert ( m_pSorterSchema );
  3929. CSphVector<int> dCur;
  3930. // add valid attributes to processing list
  3931. for ( int i=0; i<iAttrCount; ++i )
  3932. if ( pAttrs[i]>=0 )
  3933. dCur.Add ( pAttrs[i] );
  3934. // collect columns which affect current expressions
  3935. for ( int i=0; i<dCur.GetLength(); ++i )
  3936. {
  3937. const CSphColumnInfo & tCol = m_pSorterSchema->GetAttr ( dCur[i] );
  3938. if ( tCol.m_eStage>SPH_EVAL_PRESORT && tCol.m_pExpr )
  3939. tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
  3940. }
  3941. // get rid of dupes
  3942. dCur.Uniq();
  3943. // fix up of attributes stages
  3944. for ( int iAttr : dCur )
  3945. {
  3946. if ( iAttr<0 )
  3947. continue;
  3948. auto & tCol = const_cast < CSphColumnInfo & > ( m_pSorterSchema->GetAttr ( iAttr ) );
  3949. if ( tCol.m_eStage==SPH_EVAL_FINAL )
  3950. tCol.m_eStage = SPH_EVAL_PRESORT;
  3951. }
  3952. }
  3953. // expression that transform string pool base + offset -> ptr
  3954. class ExprSortStringAttrFixup_c : public BlobPool_c, public ISphExpr
  3955. {
  3956. public:
  3957. explicit ExprSortStringAttrFixup_c ( const CSphAttrLocator & tLocator )
  3958. : m_tLocator ( tLocator )
  3959. {}
  3960. float Eval ( const CSphMatch & ) const override { assert ( 0 ); return 0.0f; }
  3961. const BYTE * StringEvalPacked ( const CSphMatch & tMatch ) const override
  3962. {
  3963. // our blob strings are not null-terminated!
  3964. // we can either store nulls in .SPB or add them here
  3965. return sphPackPtrAttr ( sphGetBlobAttr ( tMatch, m_tLocator, GetBlobPool() ) );
  3966. }
  3967. void FixupLocator ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema ) override
  3968. {
  3969. sphFixupLocator ( m_tLocator, pOldSchema, pNewSchema );
  3970. }
  3971. void Command ( ESphExprCommand eCmd, void * pArg ) override
  3972. {
  3973. if ( eCmd==SPH_EXPR_SET_BLOB_POOL )
  3974. SetBlobPool( (const BYTE*)pArg);
  3975. }
  3976. uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) override
  3977. {
  3978. EXPR_CLASS_NAME_NOCHECK("ExprSortStringAttrFixup_c");
  3979. uHash = sphFNV64 ( &m_tLocator, sizeof(m_tLocator), uHash );
  3980. return CALC_DEP_HASHES();
  3981. }
  3982. ISphExpr * Clone() const final
  3983. {
  3984. return new ExprSortStringAttrFixup_c ( *this );
  3985. }
  3986. public:
  3987. CSphAttrLocator m_tLocator; ///< string attribute to fix
  3988. private:
  3989. ExprSortStringAttrFixup_c ( const ExprSortStringAttrFixup_c& rhs ) : m_tLocator ( rhs.m_tLocator ) {}
  3990. };
  3991. // expression that transform string pool base + offset -> ptr
  3992. class ExprSortJson2StringPtr_c : public BlobPool_c, public ISphExpr
  3993. {
  3994. public:
  3995. ExprSortJson2StringPtr_c ( const CSphAttrLocator & tLocator, ISphExpr * pExpr )
  3996. : m_tJsonCol ( tLocator )
  3997. , m_pExpr ( pExpr )
  3998. {
  3999. if ( pExpr ) // adopt the expression
  4000. pExpr->AddRef();
  4001. }
  4002. bool IsDataPtrAttr () const final { return true; }
  4003. float Eval ( const CSphMatch & ) const override { assert ( 0 ); return 0.0f; }
  4004. int StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const override
  4005. {
  4006. if ( !GetBlobPool() || !m_pExpr )
  4007. {
  4008. *ppStr = nullptr;
  4009. return 0;
  4010. }
  4011. uint64_t uPacked = m_pExpr->Int64Eval ( tMatch );
  4012. const BYTE * pVal = GetBlobPool() + sphJsonUnpackOffset ( uPacked );
  4013. ESphJsonType eJson = sphJsonUnpackType ( uPacked );
  4014. CSphString sVal;
  4015. // FIXME!!! make string length configurable for STRING and STRING_VECTOR to compare and allocate only Min(String.Length, CMP_LENGTH)
  4016. switch ( eJson )
  4017. {
  4018. case JSON_INT32:
  4019. sVal.SetSprintf ( "%d", sphJsonLoadInt ( &pVal ) );
  4020. break;
  4021. case JSON_INT64:
  4022. sVal.SetSprintf ( INT64_FMT, sphJsonLoadBigint ( &pVal ) );
  4023. break;
  4024. case JSON_DOUBLE:
  4025. sVal.SetSprintf ( "%f", sphQW2D ( sphJsonLoadBigint ( &pVal ) ) );
  4026. break;
  4027. case JSON_STRING:
  4028. {
  4029. int iLen = sphJsonUnpackInt ( &pVal );
  4030. sVal.SetBinary ( (const char *)pVal, iLen );
  4031. break;
  4032. }
  4033. case JSON_STRING_VECTOR:
  4034. {
  4035. int iTotalLen = sphJsonUnpackInt ( &pVal );
  4036. int iCount = sphJsonUnpackInt ( &pVal );
  4037. CSphFixedVector<BYTE> dBuf ( iTotalLen + 4 + iCount ); // data and tail GAP and space count
  4038. BYTE * pDst = dBuf.Begin();
  4039. // head element
  4040. if ( iCount )
  4041. {
  4042. int iElemLen = sphJsonUnpackInt ( &pVal );
  4043. memcpy ( pDst, pVal, iElemLen );
  4044. pDst += iElemLen;
  4045. pVal += iElemLen;
  4046. }
  4047. // tail elements separated by space
  4048. for ( int i=1; i<iCount; i++ )
  4049. {
  4050. *pDst++ = ' ';
  4051. int iElemLen = sphJsonUnpackInt ( &pVal );
  4052. memcpy ( pDst, pVal, iElemLen );
  4053. pDst += iElemLen;
  4054. pVal += iElemLen;
  4055. }
  4056. int iStrLen = int ( pDst-dBuf.Begin() );
  4057. // filling junk space
  4058. while ( pDst<dBuf.Begin()+dBuf.GetLength() )
  4059. *pDst++ = '\0';
  4060. *ppStr = dBuf.LeakData();
  4061. return iStrLen;
  4062. }
  4063. default:
  4064. break;
  4065. }
  4066. int iStriLen = sVal.Length();
  4067. *ppStr = (const BYTE *)sVal.Leak();
  4068. return iStriLen;
  4069. }
  4070. void FixupLocator ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema ) override
  4071. {
  4072. sphFixupLocator ( m_tJsonCol, pOldSchema, pNewSchema );
  4073. if ( m_pExpr )
  4074. m_pExpr->FixupLocator ( pOldSchema, pNewSchema );
  4075. }
  4076. void Command ( ESphExprCommand eCmd, void * pArg ) override
  4077. {
  4078. if ( eCmd==SPH_EXPR_SET_BLOB_POOL )
  4079. {
  4080. SetBlobPool((const BYTE*)pArg);
  4081. if ( m_pExpr )
  4082. m_pExpr->Command ( eCmd, pArg );
  4083. }
  4084. }
  4085. uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) override
  4086. {
  4087. EXPR_CLASS_NAME_NOCHECK("ExprSortJson2StringPtr_c");
  4088. CALC_CHILD_HASH(m_pExpr);
  4089. // uHash = sphFNV64 ( &m_tJsonCol, sizeof ( m_tJsonCol ), uHash ); //< that is wrong! Locator may have padding uninitialized data, valgrind will warn!
  4090. uHash = sphCalcLocatorHash ( m_tJsonCol, uHash ); //< that is right, only meaningful fields processed without padding.
  4091. return CALC_DEP_HASHES();
  4092. }
  4093. ISphExpr * Clone() const final
  4094. {
  4095. return new ExprSortJson2StringPtr_c ( *this );
  4096. }
  4097. private:
  4098. CSphAttrLocator m_tJsonCol; ///< JSON attribute to fix
  4099. ISphExprRefPtr_c m_pExpr;
  4100. private:
  4101. ExprSortJson2StringPtr_c ( const ExprSortJson2StringPtr_c & rhs )
  4102. : m_tJsonCol ( rhs.m_tJsonCol )
  4103. , m_pExpr ( SafeClone (rhs.m_pExpr) )
  4104. {}
  4105. };
  4106. const char * GetInternalAttrPrefix()
  4107. {
  4108. return g_sIntAttrPrefix;
  4109. }
  4110. bool IsSortStringInternal ( const CSphString & sColumnName )
  4111. {
  4112. assert ( sColumnName.cstr ());
  4113. return ( strncmp ( sColumnName.cstr (), g_sIntAttrPrefix, sizeof ( g_sIntAttrPrefix )-1 )==0 );
  4114. }
  4115. bool IsSortJsonInternal ( const CSphString& sColumnName )
  4116. {
  4117. assert ( sColumnName.cstr ());
  4118. return ( strncmp ( sColumnName.cstr (), g_sIntJsonPrefix, sizeof ( g_sIntJsonPrefix )-1 )==0 );
  4119. }
  4120. CSphString SortJsonInternalSet ( const CSphString& sColumnName )
  4121. {
  4122. CSphString sName;
  4123. if ( !sColumnName.IsEmpty() )
  4124. ( StringBuilder_c () << g_sIntJsonPrefix << "_" << sColumnName ).MoveTo ( sName );
  4125. return sName;
  4126. }
  4127. /////////////////////////
  4128. // SORTING QUEUE FACTORY
  4129. /////////////////////////
  4130. template < typename COMP >
  4131. static ISphMatchSorter * CreatePlainSorter ( bool bKbuffer, int iMaxMatches, bool bFactors )
  4132. {
  4133. if ( bKbuffer )
  4134. {
  4135. if ( bFactors )
  4136. return new CSphKbufferMatchQueue<COMP, true> ( iMaxMatches );
  4137. return new CSphKbufferMatchQueue<COMP, false> ( iMaxMatches );
  4138. }
  4139. if ( bFactors )
  4140. return new CSphMatchQueue<COMP, true> ( iMaxMatches );
  4141. return new CSphMatchQueue<COMP, false> ( iMaxMatches );
  4142. }
  4143. static ISphMatchSorter * CreatePlainSorter ( ESphSortFunc eMatchFunc, bool bKbuffer, int iMaxMatches, bool bFactors )
  4144. {
  4145. switch ( eMatchFunc )
  4146. {
  4147. case FUNC_REL_DESC: return CreatePlainSorter<MatchRelevanceLt_fn> ( bKbuffer, iMaxMatches, bFactors );
  4148. case FUNC_TIMESEGS: return CreatePlainSorter<MatchTimeSegments_fn> ( bKbuffer, iMaxMatches, bFactors );
  4149. case FUNC_GENERIC1: return CreatePlainSorter<MatchGeneric1_fn> ( bKbuffer, iMaxMatches, bFactors );
  4150. case FUNC_GENERIC2: return CreatePlainSorter<MatchGeneric2_fn> ( bKbuffer, iMaxMatches, bFactors );
  4151. case FUNC_GENERIC3: return CreatePlainSorter<MatchGeneric3_fn> ( bKbuffer, iMaxMatches, bFactors );
  4152. case FUNC_GENERIC4: return CreatePlainSorter<MatchGeneric4_fn> ( bKbuffer, iMaxMatches, bFactors );
  4153. case FUNC_GENERIC5: return CreatePlainSorter<MatchGeneric5_fn> ( bKbuffer, iMaxMatches, bFactors );
  4154. case FUNC_EXPR: return CreatePlainSorter<MatchExpr_fn> ( bKbuffer, iMaxMatches, bFactors );
  4155. default: return nullptr;
  4156. }
  4157. }
  4158. void QueueCreator_c::ExtraAddSortkeys ( const int * dAttrs )
  4159. {
  4160. for ( int i=0; i<CSphMatchComparatorState::MAX_ATTRS; ++i )
  4161. if ( dAttrs[i]>=0 )
  4162. m_hExtra.Add ( m_pSorterSchema->GetAttr ( dAttrs[i] ).m_sName );
  4163. }
  4164. bool QueueCreator_c::Err ( const char * sFmt, ... ) const
  4165. {
  4166. va_list ap;
  4167. va_start ( ap, sFmt );
  4168. m_sError.SetSprintfVa ( sFmt, ap );
  4169. va_end ( ap );
  4170. return false;
  4171. }
  4172. void QueueCreator_c::SelectStageForColumnarExpr ( CSphColumnInfo & tExprCol )
  4173. {
  4174. if ( !tExprCol.IsColumnarExpr() )
  4175. {
  4176. tExprCol.m_eStage = SPH_EVAL_PREFILTER;
  4177. return;
  4178. }
  4179. // columnar expressions are a special case
  4180. // it is sometimes faster to evaluate them in the filter than to evaluate the expression, store it in the match and then use it in the filter
  4181. // FIXME: add sorters?
  4182. int iRank = 0;
  4183. iRank += tExprCol.m_sName==m_tQuery.m_sGroupBy ? 1 : 0;
  4184. iRank += m_tQuery.m_dFilters.any_of ( [&tExprCol]( const CSphFilterSettings & tFilter ) { return tFilter.m_sAttrName==tExprCol.m_sName; } ) ? 1 : 0;
  4185. if ( iRank>1 )
  4186. tExprCol.m_eStage = SPH_EVAL_PREFILTER;
  4187. }
  4188. void QueueCreator_c::FetchDependencyChains ( IntVec_t & dDependentCols )
  4189. {
  4190. ARRAY_FOREACH ( i, dDependentCols )
  4191. {
  4192. const CSphColumnInfo & tCol = m_pSorterSchema->GetAttr ( dDependentCols[i] );
  4193. // handle chains of dependencies (e.g. SELECT 1+attr f1, f1-1 f2 ... WHERE f2>5)
  4194. if ( tCol.m_pExpr )
  4195. tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dDependentCols );
  4196. }
  4197. dDependentCols.Uniq();
  4198. }
  4199. void QueueCreator_c::PropagateEvalStage ( CSphColumnInfo & tExprCol, IntVec_t & dDependentCols )
  4200. {
  4201. bool bWeight = false;
  4202. for ( auto i : dDependentCols )
  4203. {
  4204. const CSphColumnInfo & tCol = m_pSorterSchema->GetAttr(i);
  4205. bWeight |= tCol.m_bWeight;
  4206. }
  4207. if ( bWeight )
  4208. {
  4209. tExprCol.m_eStage = SPH_EVAL_PRESORT;
  4210. tExprCol.m_bWeight = true;
  4211. }
  4212. for ( auto i : dDependentCols )
  4213. {
  4214. auto & tDep = const_cast < CSphColumnInfo & > ( m_pSorterSchema->GetAttr(i) );
  4215. if ( tDep.m_eStage > tExprCol.m_eStage )
  4216. tDep.m_eStage = tExprCol.m_eStage;
  4217. }
  4218. }
  4219. bool QueueCreator_c::SetupAggregateExpr ( CSphColumnInfo & tExprCol, const CSphString & sExpr, DWORD uQueryPackedFactorFlags )
  4220. {
  4221. switch ( tExprCol.m_eAggrFunc )
  4222. {
  4223. case SPH_AGGR_AVG:
  4224. // force AVG() to be computed in doubles
  4225. tExprCol.m_eAttrType = SPH_ATTR_DOUBLE;
  4226. tExprCol.m_tLocator.m_iBitCount = 64;
  4227. break;
  4228. case SPH_AGGR_CAT:
  4229. // force GROUP_CONCAT() to be computed as strings
  4230. tExprCol.m_eAttrType = SPH_ATTR_STRINGPTR;
  4231. tExprCol.m_tLocator.m_iBitCount = ROWITEMPTR_BITS;
  4232. break;
  4233. case SPH_AGGR_SUM:
  4234. if ( tExprCol.m_eAttrType==SPH_ATTR_BOOL )
  4235. {
  4236. tExprCol.m_eAttrType = SPH_ATTR_INTEGER;
  4237. tExprCol.m_tLocator.m_iBitCount = 32;
  4238. } else if ( tExprCol.m_eAttrType==SPH_ATTR_INTEGER )
  4239. {
  4240. tExprCol.m_eAttrType = SPH_ATTR_BIGINT;
  4241. tExprCol.m_tLocator.m_iBitCount = 64;
  4242. }
  4243. break;
  4244. default:
  4245. break;
  4246. }
  4247. // force explicit type conversion for JSON attributes
  4248. if ( tExprCol.m_eAggrFunc!=SPH_AGGR_NONE && tExprCol.m_eAttrType==SPH_ATTR_JSON_FIELD )
  4249. return Err ( "ambiguous attribute type '%s', use INTEGER(), BIGINT() or DOUBLE() conversion functions", sExpr.cstr() );
  4250. if ( uQueryPackedFactorFlags & SPH_FACTOR_JSON_OUT )
  4251. tExprCol.m_eAttrType = SPH_ATTR_FACTORS_JSON;
  4252. return true;
  4253. }
  4254. bool QueueCreator_c::SetupColumnarAggregates ( CSphColumnInfo & tExprCol )
  4255. {
  4256. CSphVector<int> dDependentCols;
  4257. tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dDependentCols );
  4258. FetchDependencyChains ( dDependentCols );
  4259. if ( !dDependentCols.GetLength() )
  4260. return tExprCol.IsColumnarExpr();
  4261. if ( dDependentCols.GetLength()==1 )
  4262. {
  4263. const CSphColumnInfo & tColumnarAttr = m_pSorterSchema->GetAttr ( dDependentCols[0] );
  4264. if ( tColumnarAttr.IsColumnarExpr() )
  4265. {
  4266. CSphString sColumnarCol;
  4267. tColumnarAttr.m_pExpr->Command ( SPH_EXPR_GET_COLUMNAR_COL, &sColumnarCol );
  4268. // let aggregate expression know that it is working with that columnar attribute
  4269. tExprCol.m_pExpr->Command ( SPH_EXPR_SET_COLUMNAR_COL, &sColumnarCol );
  4270. return true;
  4271. }
  4272. }
  4273. return false;
  4274. }
  4275. void QueueCreator_c::UpdateAggregateDependencies ( CSphColumnInfo & tExprCol )
  4276. {
  4277. /// update aggregate dependencies (e.g. SELECT 1+attr f1, min(f1), ...)
  4278. CSphVector<int> dDependentCols;
  4279. tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dDependentCols );
  4280. FetchDependencyChains ( dDependentCols );
  4281. ARRAY_FOREACH ( j, dDependentCols )
  4282. {
  4283. auto & tDep = const_cast < CSphColumnInfo & > ( m_pSorterSchema->GetAttr ( dDependentCols[j] ) );
  4284. if ( tDep.m_eStage>tExprCol.m_eStage )
  4285. tDep.m_eStage = tExprCol.m_eStage;
  4286. }
  4287. }
  4288. bool QueueCreator_c::ParseQueryItem ( const CSphQueryItem & tItem )
  4289. {
  4290. assert ( m_pSorterSchema );
  4291. const CSphString & sExpr = tItem.m_sExpr;
  4292. bool bIsCount = IsCount(sExpr);
  4293. m_bHasCount |= bIsCount;
  4294. if ( sExpr=="*" )
  4295. {
  4296. m_bHaveStar = true;
  4297. for ( int i=0; i<m_tSettings.m_tSchema.GetAttrsCount(); ++i )
  4298. {
  4299. m_hQueryDups.Add ( m_tSettings.m_tSchema.GetAttr(i).m_sName );
  4300. m_hQueryColumns.Add ( m_tSettings.m_tSchema.GetAttr(i).m_sName );
  4301. }
  4302. }
  4303. // for now, just always pass "plain" attrs from index to sorter; they will be filtered on searchd level
  4304. int iAttrIdx = m_tSettings.m_tSchema.GetAttrIndex ( sExpr.cstr() );
  4305. bool bColumnar = iAttrIdx>=0 && m_tSettings.m_tSchema.GetAttr(iAttrIdx).IsColumnar();
  4306. bool bPlainAttr = ( ( sExpr=="*" || ( iAttrIdx>=0 && tItem.m_eAggrFunc==SPH_AGGR_NONE && !bColumnar ) ) &&
  4307. ( tItem.m_sAlias.IsEmpty() || tItem.m_sAlias==tItem.m_sExpr ) );
  4308. if ( iAttrIdx>=0 )
  4309. {
  4310. ESphAttr eAttr = m_tSettings.m_tSchema.GetAttr ( iAttrIdx ).m_eAttrType;
  4311. if ( eAttr==SPH_ATTR_STRING || eAttr==SPH_ATTR_STRINGPTR
  4312. || eAttr==SPH_ATTR_UINT32SET || eAttr==SPH_ATTR_INT64SET )
  4313. {
  4314. if ( tItem.m_eAggrFunc!=SPH_AGGR_NONE )
  4315. return Err ( "can not aggregate non-scalar attribute '%s'", tItem.m_sExpr.cstr() );
  4316. if ( !bPlainAttr && !bColumnar && ( eAttr==SPH_ATTR_STRING || eAttr==SPH_ATTR_STRINGPTR ) )
  4317. {
  4318. bPlainAttr = true;
  4319. for ( const auto & i : m_tQuery.m_dItems )
  4320. if ( sExpr==i.m_sAlias )
  4321. bPlainAttr = false;
  4322. }
  4323. }
  4324. }
  4325. if ( bPlainAttr || IsGroupby ( sExpr ) || bIsCount )
  4326. {
  4327. if ( sExpr!="*" && !tItem.m_sAlias.IsEmpty() )
  4328. {
  4329. m_hQueryDups.Add ( tItem.m_sAlias );
  4330. if ( bPlainAttr )
  4331. m_hQueryColumns.Add ( tItem.m_sExpr );
  4332. }
  4333. m_bHasGroupByExpr = IsGroupby ( sExpr );
  4334. return true;
  4335. }
  4336. if ( IsKnnDist(sExpr) && m_pSorterSchema->GetAttrIndex ( GetKnnDistAttrName() )<0 )
  4337. return Err ( "KNN_DIST() is only allowed for KNN() queries" );
  4338. // not an attribute? must be an expression, and must be aliased by query parser
  4339. assert ( !tItem.m_sAlias.IsEmpty() );
  4340. // tricky part
  4341. // we might be fed with precomputed matches, but it's all or nothing
  4342. // the incoming match either does not have anything computed, or it has everything
  4343. int iSorterAttr = m_pSorterSchema->GetAttrIndex ( tItem.m_sAlias.cstr() );
  4344. if ( iSorterAttr>=0 )
  4345. {
  4346. if ( m_hQueryDups[tItem.m_sAlias] )
  4347. {
  4348. if ( bColumnar ) // we might have several similar aliases for columnar attributes (and they are not plain attrs but expressions)
  4349. return true;
  4350. else
  4351. return Err ( "alias '%s' must be unique (conflicts with another alias)", tItem.m_sAlias.cstr() );
  4352. }
  4353. }
  4354. // a new and shiny expression, lets parse
  4355. CSphColumnInfo tExprCol ( tItem.m_sAlias.cstr(), SPH_ATTR_NONE );
  4356. DWORD uQueryPackedFactorFlags = SPH_FACTOR_DISABLE;
  4357. bool bHasZonespanlist = false;
  4358. bool bExprsNeedDocids = false;
  4359. ExprParseArgs_t tExprParseArgs;
  4360. tExprParseArgs.m_pAttrType = &tExprCol.m_eAttrType;
  4361. tExprParseArgs.m_pUsesWeight = &tExprCol.m_bWeight;
  4362. tExprParseArgs.m_pProfiler = m_tSettings.m_pProfiler;
  4363. tExprParseArgs.m_eCollation = m_tQuery.m_eCollation;
  4364. tExprParseArgs.m_pHook = m_tSettings.m_pHook;
  4365. tExprParseArgs.m_pZonespanlist = &bHasZonespanlist;
  4366. tExprParseArgs.m_pPackedFactorsFlags = &uQueryPackedFactorFlags;
  4367. tExprParseArgs.m_pEvalStage = &tExprCol.m_eStage;
  4368. tExprParseArgs.m_pStoredField = &tExprCol.m_uFieldFlags;
  4369. tExprParseArgs.m_pNeedDocIds = &bExprsNeedDocids;
  4370. // tricky bit
  4371. // GROUP_CONCAT() adds an implicit TO_STRING() conversion on top of its argument
  4372. // and then the aggregate operation simply concatenates strings as matches arrive
  4373. // ideally, we would instead pass ownership of the expression to G_C() implementation
  4374. // and also the original expression type, and let the string conversion happen in G_C() itself
  4375. // but that ideal route seems somewhat more complicated in the current architecture
  4376. if ( tItem.m_eAggrFunc==SPH_AGGR_CAT )
  4377. {
  4378. CSphString sExpr2;
  4379. sExpr2.SetSprintf ( "TO_STRING(%s)", sExpr.cstr() );
  4380. tExprCol.m_pExpr = sphExprParse ( sExpr2.cstr(), *m_pSorterSchema, m_sError, tExprParseArgs );
  4381. } else
  4382. {
  4383. tExprCol.m_pExpr = sphExprParse ( sExpr.cstr(), *m_pSorterSchema, m_sError, tExprParseArgs );
  4384. }
  4385. m_uPackedFactorFlags |= uQueryPackedFactorFlags;
  4386. m_bZonespanlist |= bHasZonespanlist;
  4387. m_bExprsNeedDocids |= bExprsNeedDocids;
  4388. tExprCol.m_eAggrFunc = tItem.m_eAggrFunc;
  4389. tExprCol.m_iIndex = iSorterAttr>= 0 ? m_pSorterSchema->GetAttrIndexOriginal ( tItem.m_sAlias.cstr() ) : -1;
  4390. if ( !tExprCol.m_pExpr )
  4391. return Err ( "parse error: %s", m_sError.cstr() );
  4392. // remove original column
  4393. if ( iSorterAttr>=0 )
  4394. m_pSorterSchema->RemoveStaticAttr(iSorterAttr);
  4395. if ( !SetupAggregateExpr ( tExprCol, tItem.m_sExpr, uQueryPackedFactorFlags ) )
  4396. return false;
  4397. // postpone aggregates, add non-aggregates
  4398. if ( tExprCol.m_eAggrFunc==SPH_AGGR_NONE )
  4399. {
  4400. // is this expression used in filter?
  4401. // OPTIMIZE? hash filters and do hash lookups?
  4402. if ( tExprCol.m_eAttrType!=SPH_ATTR_JSON_FIELD )
  4403. ARRAY_FOREACH ( i, m_tQuery.m_dFilters )
  4404. if ( m_tQuery.m_dFilters[i].m_sAttrName==tExprCol.m_sName )
  4405. {
  4406. // is this a hack?
  4407. // m_bWeight is computed after EarlyReject() get called
  4408. // that means we can't evaluate expressions with WEIGHT() in prefilter phase
  4409. if ( tExprCol.m_bWeight )
  4410. {
  4411. tExprCol.m_eStage = SPH_EVAL_PRESORT; // special, weight filter ( short cut )
  4412. break;
  4413. }
  4414. // so we are about to add a filter condition,
  4415. // but it might depend on some preceding columns (e.g. SELECT 1+attr f1 ... WHERE f1>5)
  4416. // lets detect those and move them to prefilter \ presort phase too
  4417. CSphVector<int> dDependentCols;
  4418. tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dDependentCols );
  4419. SelectStageForColumnarExpr(tExprCol);
  4420. FetchDependencyChains ( dDependentCols );
  4421. PropagateEvalStage ( tExprCol, dDependentCols );
  4422. break;
  4423. }
  4424. // add it!
  4425. // NOTE, "final" stage might need to be fixed up later
  4426. // we'll do that when parsing sorting clause
  4427. m_pSorterSchema->AddAttr ( tExprCol, true );
  4428. } else // some aggregate
  4429. {
  4430. bool bColumnarAggregate = SetupColumnarAggregates(tExprCol);
  4431. // columnar aggregates have their own code path; no need to calculate them in presort
  4432. tExprCol.m_eStage = bColumnarAggregate ? SPH_EVAL_SORTER : SPH_EVAL_PRESORT;
  4433. m_pSorterSchema->AddAttr ( tExprCol, true );
  4434. m_hExtra.Add ( tExprCol.m_sName );
  4435. if ( !bColumnarAggregate )
  4436. UpdateAggregateDependencies ( tExprCol );
  4437. }
  4438. m_hQueryDups.Add ( tExprCol.m_sName );
  4439. m_hQueryColumns.Add ( tExprCol.m_sName );
  4440. // need to add all dependent columns for post limit expressions
  4441. if ( tExprCol.m_eStage==SPH_EVAL_POSTLIMIT && tExprCol.m_pExpr )
  4442. {
  4443. CSphVector<int> dCur;
  4444. tExprCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
  4445. ARRAY_FOREACH ( j, dCur )
  4446. {
  4447. const CSphColumnInfo & tCol = m_pSorterSchema->GetAttr ( dCur[j] );
  4448. if ( tCol.m_pExpr )
  4449. tCol.m_pExpr->Command ( SPH_EXPR_GET_DEPENDENT_COLS, &dCur );
  4450. }
  4451. dCur.Uniq ();
  4452. ARRAY_FOREACH ( j, dCur )
  4453. {
  4454. const CSphColumnInfo & tDep = m_pSorterSchema->GetAttr ( dCur[j] );
  4455. m_hQueryColumns.Add ( tDep.m_sName );
  4456. }
  4457. }
  4458. return true;
  4459. }
  4460. bool QueueCreator_c::ReplaceWithColumnarItem ( const CSphString & sAttr, ESphEvalStage eStage )
  4461. {
  4462. const CSphColumnInfo * pAttr = m_pSorterSchema->GetAttr ( sAttr.cstr() );
  4463. if ( !pAttr->IsColumnar() )
  4464. return true;
  4465. m_hQueryDups.Delete(sAttr);
  4466. CSphQueryItem tItem;
  4467. tItem.m_sExpr = tItem.m_sAlias = sAttr;
  4468. if ( !ParseQueryItem ( tItem ) )
  4469. return false;
  4470. // force stage
  4471. const CSphColumnInfo * pNewAttr = m_pSorterSchema->GetAttr ( sAttr.cstr() );
  4472. const_cast<CSphColumnInfo *>(pNewAttr)->m_eStage = Min ( pNewAttr->m_eStage, eStage );
  4473. return true;
  4474. }
  4475. // Test for @geodist and setup, if any
  4476. bool QueueCreator_c::MaybeAddGeodistColumn ()
  4477. {
  4478. if ( !m_tQuery.m_bGeoAnchor || m_pSorterSchema->GetAttrIndex ( "@geodist" )>=0 )
  4479. return true;
  4480. // replace columnar lat/lon with expressions before adding geodist
  4481. if ( !ReplaceWithColumnarItem ( m_tQuery.m_sGeoLatAttr, SPH_EVAL_PREFILTER ) ) return false;
  4482. if ( !ReplaceWithColumnarItem ( m_tQuery.m_sGeoLongAttr, SPH_EVAL_PREFILTER ) ) return false;
  4483. auto pExpr = new ExprGeodist_t();
  4484. if ( !pExpr->Setup ( &m_tQuery, *m_pSorterSchema, m_sError ))
  4485. {
  4486. pExpr->Release ();
  4487. return false;
  4488. }
  4489. CSphColumnInfo tCol ( "@geodist", SPH_ATTR_FLOAT );
  4490. tCol.m_pExpr = pExpr; // takes ownership, no need to for explicit pExpr release
  4491. tCol.m_eStage = SPH_EVAL_PREFILTER; // OPTIMIZE? actual stage depends on usage
  4492. m_pSorterSchema->AddAttr ( tCol, true );
  4493. m_hExtra.Add ( tCol.m_sName );
  4494. m_hQueryAttrs.Add ( tCol.m_sName );
  4495. return true;
  4496. }
  4497. // Test for @expr and setup, if any
  4498. bool QueueCreator_c::MaybeAddExprColumn ()
  4499. {
  4500. if ( m_tQuery.m_eSort!=SPH_SORT_EXPR || m_pSorterSchema->GetAttrIndex ( "@expr" )>=0 )
  4501. return true;
  4502. CSphColumnInfo tCol ( "@expr", SPH_ATTR_FLOAT ); // enforce float type for backwards compatibility
  4503. // (i.e. too lazy to fix those tests right now)
  4504. bool bHasZonespanlist;
  4505. ExprParseArgs_t tExprArgs;
  4506. tExprArgs.m_pProfiler = m_tSettings.m_pProfiler;
  4507. tExprArgs.m_eCollation = m_tQuery.m_eCollation;
  4508. tExprArgs.m_pZonespanlist = &bHasZonespanlist;
  4509. tCol.m_pExpr = sphExprParse ( m_tQuery.m_sSortBy.cstr (), *m_pSorterSchema, m_sError, tExprArgs );
  4510. if ( !tCol.m_pExpr )
  4511. return false;
  4512. m_bZonespanlist |= bHasZonespanlist;
  4513. tCol.m_eStage = SPH_EVAL_PRESORT;
  4514. m_pSorterSchema->AddAttr ( tCol, true );
  4515. m_hQueryAttrs.Add ( tCol.m_sName );
  4516. return true;
  4517. }
  4518. bool QueueCreator_c::AddStoredFieldExpressions()
  4519. {
  4520. for ( int i = 0; i<m_tSettings.m_tSchema.GetFieldsCount(); i++ )
  4521. {
  4522. const CSphColumnInfo & tField = m_tSettings.m_tSchema.GetField(i);
  4523. if ( !(tField.m_uFieldFlags & CSphColumnInfo::FIELD_STORED) )
  4524. continue;
  4525. CSphQueryItem tItem;
  4526. tItem.m_sExpr = tItem.m_sAlias = tField.m_sName;
  4527. if ( !ParseQueryItem ( tItem ) )
  4528. return false;
  4529. }
  4530. return true;
  4531. }
  4532. bool QueueCreator_c::AddColumnarAttributeExpressions()
  4533. {
  4534. for ( int i = 0; i<m_tSettings.m_tSchema.GetAttrsCount(); i++ )
  4535. {
  4536. const CSphColumnInfo & tAttr = m_tSettings.m_tSchema.GetAttr(i);
  4537. const CSphColumnInfo * pSorterAttr = m_pSorterSchema->GetAttr ( tAttr.m_sName.cstr() );
  4538. if ( !tAttr.IsColumnar() || ( pSorterAttr && !pSorterAttr->IsColumnar() ) )
  4539. continue;
  4540. m_hQueryDups.Delete ( tAttr.m_sName );
  4541. CSphQueryItem tItem;
  4542. tItem.m_sExpr = tItem.m_sAlias = tAttr.m_sName;
  4543. if ( !ParseQueryItem ( tItem ) )
  4544. return false;
  4545. }
  4546. return true;
  4547. }
  4548. // Add computed items
  4549. bool QueueCreator_c::MaybeAddExpressionsFromSelectList ()
  4550. {
  4551. // expressions from select items
  4552. if ( !m_tSettings.m_bComputeItems )
  4553. return true;
  4554. if ( !m_tQuery.m_dItems.all_of ( [&] ( const CSphQueryItem & v ) { return ParseQueryItem ( v ); } ))
  4555. return false;
  4556. if ( m_bHaveStar )
  4557. {
  4558. if ( !AddColumnarAttributeExpressions() )
  4559. return false;
  4560. if ( !AddStoredFieldExpressions() )
  4561. return false;
  4562. }
  4563. return true;
  4564. }
  4565. bool QueueCreator_c::AddExpressionsForUpdates()
  4566. {
  4567. if ( !m_tSettings.m_pCollection )
  4568. return true;
  4569. const CSphColumnInfo * pOldDocId = m_pSorterSchema->GetAttr ( sphGetDocidName() );
  4570. if ( !pOldDocId->IsColumnar() && !pOldDocId->IsColumnarExpr() )
  4571. return true;
  4572. if ( pOldDocId->IsColumnar() )
  4573. {
  4574. // add columnar id expressions to update queue. otherwise we won't be able to fetch docids which are needed to run updates/deletes
  4575. CSphQueryItem tItem;
  4576. tItem.m_sExpr = tItem.m_sAlias = sphGetDocidName();
  4577. if ( !ParseQueryItem ( tItem ) )
  4578. return false;
  4579. }
  4580. auto * pDocId = const_cast<CSphColumnInfo *> ( m_pSorterSchema->GetAttr ( sphGetDocidName() ) );
  4581. assert(pDocId);
  4582. pDocId->m_eStage = SPH_EVAL_PRESORT; // update/delete queues don't have real Finalize(), so just evaluate it at presort stage
  4583. return true;
  4584. }
  4585. bool QueueCreator_c::MaybeAddGroupbyMagic ( bool bGotDistinct )
  4586. {
  4587. CSphString sJsonGroupBy;
  4588. // now let's add @groupby etc. if needed
  4589. if ( m_bGotGroupby && m_pSorterSchema->GetAttrIndex ( "@groupby" )<0 )
  4590. {
  4591. ESphAttr eGroupByResult = ( !m_tGroupSorterSettings.m_bImplicit )
  4592. ? m_tGroupSorterSettings.m_pGrouper->GetResultType ()
  4593. : SPH_ATTR_INTEGER; // implicit do not have grouper
  4594. // all FACET group by should be the widest possible type
  4595. if ( m_tQuery.m_bFacet || m_tQuery.m_bFacetHead || m_bMulti )
  4596. eGroupByResult = SPH_ATTR_BIGINT;
  4597. CSphColumnInfo tGroupby ( "@groupby", eGroupByResult );
  4598. CSphColumnInfo tCount ( "@count", SPH_ATTR_BIGINT );
  4599. tGroupby.m_eStage = SPH_EVAL_SORTER;
  4600. tCount.m_eStage = SPH_EVAL_SORTER;
  4601. auto AddColumn = [this] ( const CSphColumnInfo & tCol )
  4602. {
  4603. m_pSorterSchema->AddAttr ( tCol, true );
  4604. m_hQueryColumns.Add ( tCol.m_sName );
  4605. };
  4606. AddColumn ( tGroupby );
  4607. AddColumn ( tCount );
  4608. if ( bGotDistinct )
  4609. {
  4610. CSphColumnInfo tDistinct ( "@distinct", SPH_ATTR_INTEGER );
  4611. tDistinct.m_eStage = SPH_EVAL_SORTER;
  4612. AddColumn ( tDistinct );
  4613. }
  4614. // add @groupbystr last in case we need to skip it on sending (like @int_attr_*)
  4615. if ( m_tGroupSorterSettings.m_bJson )
  4616. {
  4617. sJsonGroupBy = SortJsonInternalSet ( m_tQuery.m_sGroupBy );
  4618. if ( !m_pSorterSchema->GetAttr ( sJsonGroupBy.cstr() ) )
  4619. {
  4620. CSphColumnInfo tGroupbyStr ( sJsonGroupBy.cstr(), SPH_ATTR_JSON_FIELD );
  4621. tGroupbyStr.m_eStage = SPH_EVAL_SORTER;
  4622. AddColumn ( tGroupbyStr );
  4623. }
  4624. }
  4625. }
  4626. #define LOC_CHECK( _cond, _msg ) if (!(_cond)) { m_sError = "invalid schema: " _msg; return false; }
  4627. int iGroupby = m_pSorterSchema->GetAttrIndex ( "@groupby" );
  4628. if ( iGroupby>=0 )
  4629. {
  4630. m_tGroupSorterSettings.m_bDistinct = bGotDistinct;
  4631. m_tGroupSorterSettings.m_tLocGroupby = m_pSorterSchema->GetAttr ( iGroupby ).m_tLocator;
  4632. LOC_CHECK ( m_tGroupSorterSettings.m_tLocGroupby.m_bDynamic, "@groupby must be dynamic" );
  4633. int iCount = m_pSorterSchema->GetAttrIndex ( "@count" );
  4634. LOC_CHECK ( iCount>=0, "missing @count" );
  4635. m_tGroupSorterSettings.m_tLocCount = m_pSorterSchema->GetAttr ( iCount ).m_tLocator;
  4636. LOC_CHECK ( m_tGroupSorterSettings.m_tLocCount.m_bDynamic, "@count must be dynamic" );
  4637. int iDistinct = m_pSorterSchema->GetAttrIndex ( "@distinct" );
  4638. if ( bGotDistinct )
  4639. {
  4640. LOC_CHECK ( iDistinct>=0, "missing @distinct" );
  4641. m_tGroupSorterSettings.m_tLocDistinct = m_pSorterSchema->GetAttr ( iDistinct ).m_tLocator;
  4642. LOC_CHECK ( m_tGroupSorterSettings.m_tLocDistinct.m_bDynamic, "@distinct must be dynamic" );
  4643. }
  4644. else
  4645. LOC_CHECK ( iDistinct<=0, "unexpected @distinct" );
  4646. int iGroupbyStr = m_pSorterSchema->GetAttrIndex ( sJsonGroupBy.cstr() );
  4647. if ( iGroupbyStr>=0 )
  4648. m_tGroupSorterSettings.m_tLocGroupbyStr = m_pSorterSchema->GetAttr ( iGroupbyStr ).m_tLocator;
  4649. }
  4650. if ( m_bHasCount )
  4651. LOC_CHECK ( m_pSorterSchema->GetAttrIndex ( "@count" )>=0, "Count(*) or @count is queried, but not available in the schema" );
  4652. #undef LOC_CHECK
  4653. return true;
  4654. }
  4655. bool QueueCreator_c::AddKNNDistColumn()
  4656. {
  4657. if ( m_tQuery.m_sKNNAttr.IsEmpty() || m_pSorterSchema->GetAttrIndex ( GetKnnDistAttrName() )>=0 )
  4658. return true;
  4659. auto pAttr = m_pSorterSchema->GetAttr ( m_tQuery.m_sKNNAttr.cstr() );
  4660. if ( !pAttr )
  4661. {
  4662. m_sError.SetSprintf ( "requested KNN search attribute '%s' not found", m_tQuery.m_sKNNAttr.cstr() );
  4663. return false;
  4664. }
  4665. if ( !pAttr->IsIndexedKNN() )
  4666. {
  4667. m_sError.SetSprintf ( "KNN index not enabled for attribute '%s'", m_tQuery.m_sKNNAttr.cstr() );
  4668. return false;
  4669. }
  4670. if ( pAttr->m_tKNN.m_iDims!=m_tQuery.m_dKNNVec.GetLength() )
  4671. {
  4672. m_sError.SetSprintf ( "KNN index '%s' requires a vector of %d entries; %d entries specified", m_tQuery.m_sKNNAttr.cstr(), pAttr->m_tKNN.m_iDims, m_tQuery.m_dKNNVec.GetLength() );
  4673. return false;
  4674. }
  4675. CSphColumnInfo tKNNDist ( GetKnnDistAttrName(), SPH_ATTR_FLOAT );
  4676. tKNNDist.m_eStage = SPH_EVAL_PRESORT;
  4677. tKNNDist.m_pExpr = CreateExpr_KNNDist ( m_tQuery.m_dKNNVec, *pAttr );
  4678. m_pSorterSchema->AddAttr ( tKNNDist, true );
  4679. m_hQueryColumns.Add ( tKNNDist.m_sName );
  4680. return true;
  4681. }
  4682. bool QueueCreator_c::CheckHavingConstraints () const
  4683. {
  4684. if ( m_tSettings.m_pAggrFilter && !m_tSettings.m_pAggrFilter->m_sAttrName.IsEmpty () )
  4685. {
  4686. if ( !m_bGotGroupby )
  4687. return Err ( "can not use HAVING without GROUP BY" );
  4688. // should be column named at group by, or it's alias or aggregate
  4689. const CSphString & sHaving = m_tSettings.m_pAggrFilter->m_sAttrName;
  4690. if ( !IsGroupbyMagic ( sHaving ) )
  4691. {
  4692. bool bValidHaving = false;
  4693. for ( const CSphQueryItem & tItem : m_tQuery.m_dItems )
  4694. {
  4695. if ( tItem.m_sAlias!=sHaving )
  4696. continue;
  4697. bValidHaving = ( IsGroupbyMagic ( tItem.m_sExpr ) || tItem.m_eAggrFunc!=SPH_AGGR_NONE );
  4698. break;
  4699. }
  4700. if ( !bValidHaving )
  4701. return Err ( "can not use HAVING with attribute not related to GROUP BY" );
  4702. }
  4703. }
  4704. return true;
  4705. }
  4706. void QueueCreator_c::SetupRemapColJson ( CSphColumnInfo & tRemapCol, CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs, int iStateAttr )
  4707. {
  4708. bool bFunc = dExtraExprs[iStateAttr].m_tKey.m_uMask==0;
  4709. tRemapCol.m_eStage = SPH_EVAL_PRESORT;
  4710. if ( bFunc )
  4711. {
  4712. tRemapCol.m_pExpr = dExtraExprs[iStateAttr].m_pExpr;
  4713. tRemapCol.m_eAttrType = dExtraExprs[iStateAttr].m_eType;
  4714. tState.m_eKeypart[iStateAttr] = Attr2Keypart ( tRemapCol.m_eAttrType );
  4715. }
  4716. else
  4717. tRemapCol.m_pExpr = new ExprSortJson2StringPtr_c ( tState.m_tLocator[iStateAttr], dExtraExprs[iStateAttr].m_pExpr );
  4718. }
  4719. const CSphColumnInfo * QueueCreator_c::GetGroupbyStr ( int iAttr, int iNumOldAttrs ) const
  4720. {
  4721. assert ( m_pSorterSchema );
  4722. auto & tSorterSchema = *m_pSorterSchema;
  4723. if ( m_tSettings.m_bComputeItems && iAttr>=0 && iAttr<iNumOldAttrs && tSorterSchema.GetAttr(iAttr).m_sName=="@groupby" && m_dGroupColumns.GetLength() )
  4724. {
  4725. // FIXME!!! add support of multi group by
  4726. const CSphColumnInfo & tGroupCol = tSorterSchema.GetAttr ( m_dGroupColumns[0].first );
  4727. if ( tGroupCol.m_eAttrType==SPH_ATTR_STRING || tGroupCol.m_eAttrType==SPH_ATTR_STRINGPTR )
  4728. return &tGroupCol;
  4729. }
  4730. return nullptr;
  4731. }
  4732. void QueueCreator_c::ReplaceGroupbyStrWithExprs ( CSphMatchComparatorState & tState, int iNumOldAttrs )
  4733. {
  4734. assert ( m_pSorterSchema );
  4735. auto & tSorterSchema = *m_pSorterSchema;
  4736. for ( int i = 0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  4737. {
  4738. const CSphColumnInfo * pGroupStrBase = GetGroupbyStr ( tState.m_dAttrs[i], iNumOldAttrs );
  4739. if ( !pGroupStrBase )
  4740. continue;
  4741. assert ( tState.m_dAttrs[i]>=0 && tState.m_dAttrs[i]<iNumOldAttrs );
  4742. int iRemap = -1;
  4743. if ( pGroupStrBase->m_eAttrType==SPH_ATTR_STRINGPTR )
  4744. {
  4745. // grouping by (columnar) string; and the same string is used in sorting
  4746. // correct the locator and change the evaluation stage to PRESORT
  4747. iRemap = tSorterSchema.GetAttrIndex ( pGroupStrBase->m_sName.cstr() );
  4748. assert ( iRemap>=0 );
  4749. const CSphColumnInfo & tAttr = tSorterSchema.GetAttr(iRemap);
  4750. const_cast<CSphColumnInfo &>(tAttr).m_eStage = SPH_EVAL_PRESORT;
  4751. }
  4752. else if ( !pGroupStrBase->IsColumnar() )
  4753. {
  4754. CSphString sRemapCol;
  4755. sRemapCol.SetSprintf ( "%s%s", g_sIntAttrPrefix, pGroupStrBase->m_sName.cstr() );
  4756. iRemap = tSorterSchema.GetAttrIndex ( sRemapCol.cstr() );
  4757. if ( iRemap==-1 )
  4758. {
  4759. CSphColumnInfo tRemapCol ( sRemapCol.cstr(), SPH_ATTR_STRINGPTR );
  4760. tRemapCol.m_pExpr = new ExprSortStringAttrFixup_c ( pGroupStrBase->m_tLocator );
  4761. tRemapCol.m_eStage = SPH_EVAL_PRESORT;
  4762. iRemap = tSorterSchema.GetAttrsCount();
  4763. tSorterSchema.AddAttr ( tRemapCol, true );
  4764. }
  4765. }
  4766. if ( iRemap!=-1 )
  4767. {
  4768. tState.m_eKeypart[i] = SPH_KEYPART_STRINGPTR;
  4769. tState.m_tLocator[i] = tSorterSchema.GetAttr(iRemap).m_tLocator;
  4770. tState.m_dAttrs[i] = iRemap;
  4771. tState.m_dRemapped.BitSet ( i );
  4772. }
  4773. }
  4774. }
  4775. void QueueCreator_c::ReplaceStaticStringsWithExprs ( CSphMatchComparatorState & tState )
  4776. {
  4777. assert ( m_pSorterSchema );
  4778. auto & tSorterSchema = *m_pSorterSchema;
  4779. for ( int i = 0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  4780. {
  4781. if ( tState.m_dRemapped.BitGet ( i ) )
  4782. continue;
  4783. if ( tState.m_eKeypart[i]!=SPH_KEYPART_STRING )
  4784. continue;
  4785. int iRemap = -1;
  4786. int iAttrId = tState.m_dAttrs[i];
  4787. const CSphColumnInfo & tAttr = tSorterSchema.GetAttr(iAttrId);
  4788. if ( tAttr.IsColumnar() )
  4789. {
  4790. CSphString sAttrName = tAttr.m_sName;
  4791. tSorterSchema.RemoveStaticAttr(iAttrId);
  4792. CSphColumnInfo tRemapCol ( sAttrName.cstr(), SPH_ATTR_STRINGPTR );
  4793. tRemapCol.m_eStage = SPH_EVAL_PRESORT;
  4794. tRemapCol.m_pExpr = CreateExpr_GetColumnarString ( sAttrName, tAttr.m_uAttrFlags & CSphColumnInfo::ATTR_STORED );
  4795. tSorterSchema.AddAttr ( tRemapCol, true );
  4796. iRemap = tSorterSchema.GetAttrIndex ( sAttrName.cstr() );
  4797. }
  4798. else
  4799. {
  4800. CSphString sRemapCol;
  4801. sRemapCol.SetSprintf ( "%s%s", g_sIntAttrPrefix, tSorterSchema.GetAttr(iAttrId).m_sName.cstr() );
  4802. iRemap = tSorterSchema.GetAttrIndex ( sRemapCol.cstr() );
  4803. if ( iRemap==-1 )
  4804. {
  4805. CSphColumnInfo tRemapCol ( sRemapCol.cstr(), SPH_ATTR_STRINGPTR );
  4806. tRemapCol.m_eStage = SPH_EVAL_PRESORT;
  4807. tRemapCol.m_pExpr = new ExprSortStringAttrFixup_c ( tState.m_tLocator[i] );
  4808. iRemap = tSorterSchema.GetAttrsCount();
  4809. tSorterSchema.AddAttr ( tRemapCol, true );
  4810. }
  4811. }
  4812. tState.m_tLocator[i] = tSorterSchema.GetAttr ( iRemap ).m_tLocator;
  4813. tState.m_dAttrs[i] = iRemap;
  4814. tState.m_eKeypart[i] = SPH_KEYPART_STRINGPTR;
  4815. tState.m_dRemapped.BitSet ( i );
  4816. }
  4817. }
  4818. void QueueCreator_c::ReplaceJsonWithExprs ( CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs )
  4819. {
  4820. assert ( m_pSorterSchema );
  4821. auto & tSorterSchema = *m_pSorterSchema;
  4822. for ( int i = 0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  4823. {
  4824. if ( tState.m_dRemapped.BitGet ( i ) )
  4825. continue;
  4826. if ( dExtraExprs[i].m_tKey.m_sKey.IsEmpty() )
  4827. continue;
  4828. CSphString sRemapCol;
  4829. sRemapCol.SetSprintf ( "%s%s", g_sIntAttrPrefix, dExtraExprs[i].m_tKey.m_sKey.cstr() );
  4830. int iRemap = tSorterSchema.GetAttrIndex ( sRemapCol.cstr() );
  4831. if ( iRemap==-1 )
  4832. {
  4833. CSphString sRemapLowercase = sRemapCol;
  4834. sRemapLowercase.ToLower();
  4835. iRemap = tSorterSchema.GetAttrIndex ( sRemapLowercase.cstr() );
  4836. }
  4837. if ( iRemap==-1 )
  4838. {
  4839. CSphColumnInfo tRemapCol ( sRemapCol.cstr(), SPH_ATTR_STRINGPTR );
  4840. SetupRemapColJson ( tRemapCol, tState, dExtraExprs, i );
  4841. iRemap = tSorterSchema.GetAttrsCount();
  4842. tSorterSchema.AddAttr ( tRemapCol, true );
  4843. }
  4844. tState.m_tLocator[i] = tSorterSchema.GetAttr(iRemap).m_tLocator;
  4845. tState.m_dAttrs[i] = iRemap;
  4846. tState.m_dRemapped.BitSet ( i );
  4847. }
  4848. }
  4849. void QueueCreator_c::AddColumnarExprsAsAttrs ( CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs )
  4850. {
  4851. assert ( m_pSorterSchema );
  4852. auto & tSorterSchema = *m_pSorterSchema;
  4853. for ( int i = 0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  4854. {
  4855. if ( tState.m_dRemapped.BitGet ( i ) )
  4856. continue;
  4857. ISphExpr * pExpr = dExtraExprs[i].m_pExpr;
  4858. if ( !pExpr || !pExpr->IsColumnar() )
  4859. continue;
  4860. const CSphString & sAttrName = tSorterSchema.GetAttr ( tState.m_dAttrs[i] ).m_sName;
  4861. CSphColumnInfo tRemapCol ( sAttrName.cstr(), dExtraExprs[i].m_eType );
  4862. tRemapCol.m_eStage = SPH_EVAL_PRESORT;
  4863. tRemapCol.m_pExpr = pExpr;
  4864. tRemapCol.m_pExpr->AddRef();
  4865. int iRemap = tSorterSchema.GetAttrsCount();
  4866. tSorterSchema.AddAttr ( tRemapCol, true );
  4867. // remove initial attribute from m_hExtra
  4868. // that way it won't be evaluated twice when it is not in select list
  4869. m_hExtra.Delete(sAttrName);
  4870. tState.m_tLocator[i] = tSorterSchema.GetAttr ( iRemap ).m_tLocator;
  4871. tState.m_dAttrs[i] = iRemap;
  4872. tState.m_eKeypart[i] = Attr2Keypart ( dExtraExprs[i].m_eType );
  4873. tState.m_dRemapped.BitSet ( i );
  4874. }
  4875. }
  4876. void QueueCreator_c::RemapAttrs ( CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs )
  4877. {
  4878. // we have extra attrs (expressions) that we created while parsing the sort clause
  4879. // we couldn't add them to the schema at that stage,
  4880. // but now we can. we create attributes, assign internal names and set their expressions
  4881. assert ( m_pSorterSchema );
  4882. auto & tSorterSchema = *m_pSorterSchema;
  4883. int iNumOldAttrs = tSorterSchema.GetAttrsCount();
  4884. ReplaceGroupbyStrWithExprs ( tState, iNumOldAttrs );
  4885. ReplaceStaticStringsWithExprs ( tState );
  4886. ReplaceJsonWithExprs ( tState, dExtraExprs );
  4887. AddColumnarExprsAsAttrs ( tState, dExtraExprs );
  4888. // need another sort keys add after setup remap
  4889. if ( iNumOldAttrs!=tSorterSchema.GetAttrsCount() )
  4890. ExtraAddSortkeys ( tState.m_dAttrs );
  4891. }
  4892. void QueueCreator_c::AddKnnDistSort ( CSphString & sSortBy )
  4893. {
  4894. if ( m_pSorterSchema->GetAttr ( GetKnnDistAttrName() ) && !strstr ( sSortBy.cstr(), "knn_dist" ) )
  4895. sSortBy.SetSprintf ( "knn_dist() asc, %s", sSortBy.cstr() );
  4896. }
  4897. // matches sorting function
  4898. bool QueueCreator_c::SetupMatchesSortingFunc()
  4899. {
  4900. m_bRandomize = false;
  4901. if ( m_tQuery.m_eSort==SPH_SORT_EXTENDED )
  4902. {
  4903. CSphString sSortBy = m_tQuery.m_sSortBy;
  4904. AddKnnDistSort ( sSortBy );
  4905. ESortClauseParseResult eRes = sphParseSortClause ( m_tQuery, sSortBy.cstr(), *m_pSorterSchema, m_eMatchFunc, m_tStateMatch, m_dMatchJsonExprs, m_tSettings.m_bComputeItems, m_sError );
  4906. if ( eRes==SORT_CLAUSE_ERROR )
  4907. return false;
  4908. if ( eRes==SORT_CLAUSE_RANDOM )
  4909. m_bRandomize = true;
  4910. ExtraAddSortkeys ( m_tStateMatch.m_dAttrs );
  4911. AssignOrderByToPresortStage ( m_tStateMatch.m_dAttrs, CSphMatchComparatorState::MAX_ATTRS );
  4912. RemapAttrs ( m_tStateMatch, m_dMatchJsonExprs );
  4913. return true;
  4914. }
  4915. if ( m_tQuery.m_eSort==SPH_SORT_EXPR )
  4916. {
  4917. m_tStateMatch.m_eKeypart[0] = SPH_KEYPART_INT;
  4918. m_tStateMatch.m_tLocator[0] = m_pSorterSchema->GetAttr ( m_pSorterSchema->GetAttrIndex ( "@expr" ) ).m_tLocator;
  4919. m_tStateMatch.m_eKeypart[1] = SPH_KEYPART_ROWID;
  4920. m_tStateMatch.m_uAttrDesc = 1;
  4921. m_eMatchFunc = FUNC_EXPR;
  4922. return true;
  4923. }
  4924. // check sort-by attribute
  4925. if ( m_tQuery.m_eSort!=SPH_SORT_RELEVANCE )
  4926. {
  4927. int iSortAttr = m_pSorterSchema->GetAttrIndex ( m_tQuery.m_sSortBy.cstr() );
  4928. if ( iSortAttr<0 )
  4929. {
  4930. Err ( "sort-by attribute '%s' not found", m_tQuery.m_sSortBy.cstr() );
  4931. return false;
  4932. }
  4933. const CSphColumnInfo & tAttr = m_pSorterSchema->GetAttr ( iSortAttr );
  4934. m_tStateMatch.m_eKeypart[0] = Attr2Keypart ( tAttr.m_eAttrType );
  4935. m_tStateMatch.m_tLocator[0] = tAttr.m_tLocator;
  4936. m_tStateMatch.m_dAttrs[0] = iSortAttr;
  4937. RemapAttrs ( m_tStateMatch, m_dMatchJsonExprs );
  4938. }
  4939. ExtraAddSortkeys ( m_tStateMatch.m_dAttrs );
  4940. // find out what function to use and whether it needs attributes
  4941. switch (m_tQuery.m_eSort )
  4942. {
  4943. case SPH_SORT_TIME_SEGMENTS: m_eMatchFunc = FUNC_TIMESEGS; break;
  4944. case SPH_SORT_RELEVANCE: m_eMatchFunc = FUNC_REL_DESC; break;
  4945. default:
  4946. Err ( "unknown sorting mode %d", m_tQuery.m_eSort );
  4947. return false;
  4948. }
  4949. return true;
  4950. }
  4951. bool QueueCreator_c::SetupGroupSortingFunc ( bool bGotDistinct )
  4952. {
  4953. assert ( m_bGotGroupby );
  4954. CSphString sGroupOrderBy = m_tQuery.m_sGroupSortBy;
  4955. if ( sGroupOrderBy=="@weight desc" )
  4956. AddKnnDistSort ( sGroupOrderBy );
  4957. ESortClauseParseResult eRes = sphParseSortClause ( m_tQuery, sGroupOrderBy.cstr(), *m_pSorterSchema, m_eGroupFunc, m_tStateGroup, m_dGroupJsonExprs, m_tSettings.m_bComputeItems, m_sError );
  4958. if ( eRes==SORT_CLAUSE_ERROR || eRes==SORT_CLAUSE_RANDOM )
  4959. {
  4960. if ( eRes==SORT_CLAUSE_RANDOM )
  4961. m_sError = "groups can not be sorted by @random";
  4962. return false;
  4963. }
  4964. ExtraAddSortkeys ( m_tStateGroup.m_dAttrs );
  4965. if ( !m_tGroupSorterSettings.m_bImplicit )
  4966. {
  4967. for ( const auto & tGroupColumn : m_dGroupColumns )
  4968. m_hExtra.Add ( m_pSorterSchema->GetAttr ( tGroupColumn.first ).m_sName );
  4969. }
  4970. if ( bGotDistinct )
  4971. {
  4972. m_dGroupColumns.Add ( { m_pSorterSchema->GetAttrIndex ( m_tQuery.m_sGroupDistinct.cstr() ), true } );
  4973. assert ( m_dGroupColumns.Last().first>=0 );
  4974. m_hExtra.Add ( m_pSorterSchema->GetAttr ( m_dGroupColumns.Last().first ).m_sName );
  4975. }
  4976. // implicit case
  4977. CSphVector<int> dGroupByCols;
  4978. for ( const auto & i : m_dGroupColumns )
  4979. if ( i.second )
  4980. dGroupByCols.Add ( i.first );
  4981. AssignOrderByToPresortStage ( dGroupByCols.Begin(), dGroupByCols.GetLength() );
  4982. AssignOrderByToPresortStage ( m_tStateGroup.m_dAttrs, CSphMatchComparatorState::MAX_ATTRS );
  4983. // GroupSortBy str attributes setup
  4984. RemapAttrs ( m_tStateGroup, m_dGroupJsonExprs );
  4985. return true;
  4986. }
  4987. // set up aggregate filter for grouper
  4988. std::unique_ptr<ISphFilter> QueueCreator_c::CreateAggrFilter () const
  4989. {
  4990. assert ( m_bGotGroupby );
  4991. if ( m_pSorterSchema->GetAttr ( m_tSettings.m_pAggrFilter->m_sAttrName.cstr() ) )
  4992. return sphCreateAggrFilter ( m_tSettings.m_pAggrFilter, m_tSettings.m_pAggrFilter->m_sAttrName,
  4993. *m_pSorterSchema, m_sError );
  4994. // having might reference aliased attributes but @* attributes got stored without alias in sorter schema
  4995. CSphString sHaving;
  4996. for ( const auto & tItem : m_tQuery.m_dItems )
  4997. if ( tItem.m_sAlias==m_tSettings.m_pAggrFilter->m_sAttrName )
  4998. {
  4999. sHaving = tItem.m_sExpr;
  5000. break;
  5001. }
  5002. if ( sHaving=="groupby()" )
  5003. sHaving = "@groupby";
  5004. else if ( sHaving=="count(*)" )
  5005. sHaving = "@count";
  5006. return sphCreateAggrFilter ( m_tSettings.m_pAggrFilter, sHaving, *m_pSorterSchema, m_sError );
  5007. }
  5008. void QueueCreator_c::SetupCollation()
  5009. {
  5010. SphStringCmp_fn fnCmp = GetStringCmpFunc ( m_tQuery.m_eCollation );
  5011. m_tStateMatch.m_fnStrCmp = fnCmp;
  5012. m_tStateGroup.m_fnStrCmp = fnCmp;
  5013. }
  5014. bool QueueCreator_c::AddGroupbyStuff ()
  5015. {
  5016. // need schema with group related columns however not need grouper
  5017. m_bHeadWOGroup = ( m_tQuery.m_sGroupBy.IsEmpty () && m_tQuery.m_bFacetHead );
  5018. auto fnIsImplicit = [] ( const CSphQueryItem & t )
  5019. {
  5020. return ( t.m_eAggrFunc!=SPH_AGGR_NONE ) || t.m_sExpr=="count(*)" || t.m_sExpr=="@distinct";
  5021. };
  5022. bool bHasImplicitGrouping = HasImplicitGrouping(m_tQuery);
  5023. // count(*) and distinct wo group by at main query should keep implicit flag
  5024. if ( bHasImplicitGrouping && m_bHeadWOGroup )
  5025. m_bHeadWOGroup = !m_tQuery.m_dRefItems.any_of ( fnIsImplicit );
  5026. if ( !SetupGroupbySettings(bHasImplicitGrouping) )
  5027. return false;
  5028. // or else, check in SetupGroupbySettings() would already fail
  5029. m_bGotGroupby = !m_tQuery.m_sGroupBy.IsEmpty () || m_tGroupSorterSettings.m_bImplicit;
  5030. m_bGotDistinct = !!m_tGroupSorterSettings.m_pDistinctFetcher;
  5031. if ( m_bHasGroupByExpr && !m_bGotGroupby )
  5032. return Err ( "GROUPBY() is allowed only in GROUP BY queries" );
  5033. // check for HAVING constrains
  5034. if ( !CheckHavingConstraints() )
  5035. return false;
  5036. // now let's add @groupby stuff, if necessary
  5037. return MaybeAddGroupbyMagic(m_bGotDistinct);
  5038. }
  5039. bool QueueCreator_c::SetGroupSorting()
  5040. {
  5041. if ( m_bGotGroupby )
  5042. {
  5043. if ( !SetupGroupSortingFunc ( m_bGotDistinct ) )
  5044. return false;
  5045. if ( m_tSettings.m_pAggrFilter && !m_tSettings.m_pAggrFilter->m_sAttrName.IsEmpty() )
  5046. {
  5047. auto pFilter = CreateAggrFilter ();
  5048. if ( !pFilter )
  5049. return false;
  5050. m_tGroupSorterSettings.m_pAggrFilterTrait = pFilter.release();
  5051. }
  5052. int iDistinctAccuracyThresh = m_tQuery.m_bExplicitDistinctThresh ? m_tQuery.m_iDistinctThresh : GetDistinctThreshDefault();
  5053. m_tGroupSorterSettings.SetupDistinctAccuracy ( iDistinctAccuracyThresh );
  5054. }
  5055. for ( auto & tIdx: m_hExtra )
  5056. {
  5057. m_hQueryColumns.Add ( tIdx.first );
  5058. if ( m_pExtra )
  5059. m_pExtra->Add ( tIdx.first );
  5060. }
  5061. return true;
  5062. }
  5063. bool QueueCreator_c::PredictAggregates() const
  5064. {
  5065. for ( int i = 0; i < m_pSorterSchema->GetAttrsCount(); i++ )
  5066. {
  5067. const CSphColumnInfo & tAttr = m_pSorterSchema->GetAttr(i);
  5068. if ( !(tAttr.m_eAggrFunc==SPH_AGGR_NONE || IsGroupbyMagic ( tAttr.m_sName ) || IsSortStringInternal ( tAttr.m_sName.cstr () )) )
  5069. return true;
  5070. }
  5071. return false;
  5072. }
  5073. int QueueCreator_c::ReduceMaxMatches() const
  5074. {
  5075. assert ( !m_bGotGroupby );
  5076. if ( m_tQuery.m_bExplicitMaxMatches || m_tQuery.m_bHasOuter || !m_tSettings.m_bComputeItems )
  5077. return Max ( m_tSettings.m_iMaxMatches, 1 );
  5078. return Max ( Min ( m_tSettings.m_iMaxMatches, m_tQuery.m_iLimit+m_tQuery.m_iOffset ), 1 );
  5079. }
  5080. int QueueCreator_c::AdjustMaxMatches ( int iMaxMatches ) const
  5081. {
  5082. assert ( m_bGotGroupby );
  5083. if ( m_tQuery.m_bExplicitMaxMatches || m_tSettings.m_bForceSingleThread )
  5084. return iMaxMatches;
  5085. int iGroupbyAttr = GetGroupbyAttrIndex();
  5086. if ( iGroupbyAttr<0 )
  5087. return iMaxMatches;
  5088. int iCountDistinct = m_tSettings.m_fnGetCountDistinct ? m_tSettings.m_fnGetCountDistinct ( m_pSorterSchema->GetAttr(iGroupbyAttr).m_sName ) : -1;
  5089. if ( iCountDistinct > m_tQuery.m_iMaxMatchThresh )
  5090. return iMaxMatches;
  5091. return Max ( iCountDistinct, iMaxMatches );
  5092. }
  5093. bool QueueCreator_c::CanCalcFastCountDistinct() const
  5094. {
  5095. bool bHasAggregates = PredictAggregates();
  5096. return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.IsEmpty() && m_tQuery.m_sQuery.IsEmpty() && m_tQuery.m_sKNNAttr.IsEmpty();
  5097. }
  5098. bool QueueCreator_c::CanCalcFastCountFilter() const
  5099. {
  5100. bool bHasAggregates = PredictAggregates();
  5101. return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && !m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.GetLength()==1 && m_tQuery.m_sQuery.IsEmpty() && m_tQuery.m_sKNNAttr.IsEmpty();
  5102. }
  5103. bool QueueCreator_c::CanCalcFastCount() const
  5104. {
  5105. bool bHasAggregates = PredictAggregates();
  5106. return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && !m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.IsEmpty() && m_tQuery.m_sQuery.IsEmpty() && m_tQuery.m_sKNNAttr.IsEmpty();
  5107. }
  5108. Precalculated_t QueueCreator_c::FetchPrecalculatedValues() const
  5109. {
  5110. Precalculated_t tPrecalc;
  5111. if ( CanCalcFastCountDistinct() )
  5112. {
  5113. int iCountDistinctAttr = GetGroupDistinctAttrIndex();
  5114. if ( iCountDistinctAttr>0 && m_tSettings.m_bEnableFastDistinct )
  5115. tPrecalc.m_iCountDistinct = m_tSettings.m_fnGetCountDistinct ? m_tSettings.m_fnGetCountDistinct ( m_pSorterSchema->GetAttr(iCountDistinctAttr).m_sName ) : -1;
  5116. }
  5117. if ( CanCalcFastCountFilter() )
  5118. tPrecalc.m_iCountFilter = m_tSettings.m_fnGetCountFilter ? m_tSettings.m_fnGetCountFilter ( m_tQuery.m_dFilters[0] ) : -1;
  5119. if ( CanCalcFastCount() )
  5120. tPrecalc.m_iCount = m_tSettings.m_fnGetCount ? m_tSettings.m_fnGetCount() : -1;
  5121. return tPrecalc;
  5122. }
  5123. ISphMatchSorter * QueueCreator_c::SpawnQueue()
  5124. {
  5125. bool bNeedFactors = !!(m_uPackedFactorFlags & SPH_FACTOR_ENABLE);
  5126. if ( m_bGotGroupby )
  5127. {
  5128. m_tGroupSorterSettings.m_bGrouped = m_tSettings.m_bGrouped;
  5129. m_tGroupSorterSettings.m_iMaxMatches = AdjustMaxMatches ( m_tGroupSorterSettings.m_iMaxMatches );
  5130. if ( m_pProfile )
  5131. m_pProfile->m_iMaxMatches = m_tGroupSorterSettings.m_iMaxMatches;
  5132. Precalculated_t tPrecalc = FetchPrecalculatedValues();
  5133. return sphCreateSorter1st ( m_eMatchFunc, m_eGroupFunc, &m_tQuery, m_tGroupSorterSettings, bNeedFactors, PredictAggregates(), tPrecalc );
  5134. }
  5135. if ( m_tQuery.m_iLimit == -1 && m_tSettings.m_pSqlRowBuffer )
  5136. return new DirectSqlQueue_c ( m_tSettings.m_pSqlRowBuffer, m_tSettings.m_ppOpaque1, m_tSettings.m_ppOpaque2, std::move (m_tSettings.m_dCreateSchema) );
  5137. if ( m_tSettings.m_pCollection )
  5138. return new CollectQueue_c ( m_tSettings.m_iMaxMatches, *m_tSettings.m_pCollection );
  5139. int iMaxMatches = ReduceMaxMatches();
  5140. if ( m_pProfile )
  5141. m_pProfile->m_iMaxMatches = iMaxMatches;
  5142. ISphMatchSorter * pResult = CreatePlainSorter ( m_eMatchFunc, m_tQuery.m_bSortKbuffer, iMaxMatches, bNeedFactors );
  5143. if ( !pResult )
  5144. return nullptr;
  5145. return CreateColumnarProxySorter ( pResult, iMaxMatches, *m_pSorterSchema, m_tStateMatch, m_eMatchFunc, bNeedFactors, m_tSettings.m_bComputeItems, m_bMulti );
  5146. }
  5147. bool QueueCreator_c::SetupComputeQueue ()
  5148. {
  5149. return MaybeAddGeodistColumn ()
  5150. && AddKNNDistColumn()
  5151. && MaybeAddExprColumn ()
  5152. && MaybeAddExpressionsFromSelectList ()
  5153. && AddExpressionsForUpdates();
  5154. }
  5155. bool QueueCreator_c::SetupGroupQueue ()
  5156. {
  5157. return AddGroupbyStuff ()
  5158. && SetupMatchesSortingFunc ()
  5159. && SetGroupSorting ();
  5160. }
  5161. bool QueueCreator_c::ConvertColumnarToDocstore()
  5162. {
  5163. // don't use docstore (need to try to keep schemas similar for multiquery to work)
  5164. if ( m_tQuery.m_bFacet || m_tQuery.m_bFacetHead )
  5165. return true;
  5166. // check for columnar attributes that have FINAL eval stage
  5167. // if we have more than 1 of such attributes (and they are also stored), we replace columnar expressions with columnar expressions
  5168. CSphVector<int> dStoredColumnar;
  5169. auto & tSchema = *m_pSorterSchema;
  5170. for ( int i = 0; i < tSchema.GetAttrsCount(); i++ )
  5171. {
  5172. auto & tAttr = tSchema.GetAttr(i);
  5173. bool bStored = false;
  5174. bool bColumnar = tAttr.m_pExpr && tAttr.m_pExpr->IsColumnar(&bStored);
  5175. if ( bColumnar && bStored && tAttr.m_eStage==SPH_EVAL_FINAL )
  5176. dStoredColumnar.Add(i);
  5177. }
  5178. if ( dStoredColumnar.GetLength()<=1 )
  5179. return true;
  5180. for ( auto i : dStoredColumnar )
  5181. {
  5182. auto & tAttr = const_cast<CSphColumnInfo&>( tSchema.GetAttr(i) );
  5183. CSphString sColumnarAttrName;
  5184. tAttr.m_pExpr->Command ( SPH_EXPR_GET_COLUMNAR_COL, &sColumnarAttrName );
  5185. tAttr.m_pExpr = CreateExpr_GetStoredAttr ( sColumnarAttrName, tAttr.m_eAttrType );
  5186. }
  5187. return true;
  5188. }
  5189. bool QueueCreator_c::SetupQueue ()
  5190. {
  5191. return SetupComputeQueue ()
  5192. && SetupGroupQueue ()
  5193. && ConvertColumnarToDocstore();
  5194. }
  5195. ISphMatchSorter * QueueCreator_c::CreateQueue ()
  5196. {
  5197. SetupCollation();
  5198. if ( m_bHeadWOGroup && m_tGroupSorterSettings.m_bImplicit )
  5199. {
  5200. m_tGroupSorterSettings.m_bImplicit = false;
  5201. m_bGotGroupby = false;
  5202. }
  5203. ///////////////////
  5204. // spawn the queue
  5205. ///////////////////
  5206. ISphMatchSorter * pTop = SpawnQueue();
  5207. if ( !pTop )
  5208. {
  5209. Err ( "internal error: unhandled sorting mode (match-sort=%d, group=%d, group-sort=%d)", m_eMatchFunc, m_bGotGroupby, m_eGroupFunc );
  5210. return nullptr;
  5211. }
  5212. assert ( pTop );
  5213. pTop->SetSchema ( m_pSorterSchema.release(), false );
  5214. pTop->SetState ( m_tStateMatch );
  5215. pTop->SetGroupState ( m_tStateGroup );
  5216. pTop->SetRandom ( m_bRandomize );
  5217. if ( !m_bHaveStar && m_hQueryColumns.GetLength() )
  5218. pTop->SetFilteredAttrs ( m_hQueryColumns, m_tSettings.m_bNeedDocids || m_bExprsNeedDocids );
  5219. if ( m_bRandomize )
  5220. {
  5221. if ( m_tQuery.m_iRandSeed>=0 )
  5222. sphSrand ( (DWORD)m_tQuery.m_iRandSeed );
  5223. else
  5224. sphAutoSrand();
  5225. }
  5226. return pTop;
  5227. }
  5228. static void ResetRemaps ( CSphMatchComparatorState & tState )
  5229. {
  5230. for ( int i = 0; i<CSphMatchComparatorState::MAX_ATTRS; i++ )
  5231. {
  5232. if ( tState.m_dRemapped.BitGet ( i ) && tState.m_eKeypart[i]==SPH_KEYPART_STRINGPTR )
  5233. tState.m_dRemapped.BitClear ( i );
  5234. }
  5235. }
  5236. bool QueueCreator_c::SetSchemaGroupQueue ( const CSphRsetSchema & tNewSchema )
  5237. {
  5238. // need to reissue remap but with existed attributes
  5239. ResetRemaps ( m_tStateMatch );
  5240. ResetRemaps ( m_tStateGroup );
  5241. *m_pSorterSchema = tNewSchema;
  5242. return SetupGroupQueue();
  5243. }
  5244. static ISphMatchSorter * CreateQueue ( QueueCreator_c & tCreator, SphQueueRes_t & tRes )
  5245. {
  5246. ISphMatchSorter * pSorter = tCreator.CreateQueue ();
  5247. tRes.m_bZonespanlist = tCreator.m_bZonespanlist;
  5248. tRes.m_uPackedFactorFlags = tCreator.m_uPackedFactorFlags;
  5249. return pSorter;
  5250. }
  5251. bool sphHasExpressions ( const CSphQuery & tQuery, const CSphSchema & tSchema )
  5252. {
  5253. return !tQuery.m_dItems.all_of ( [&tSchema] ( const CSphQueryItem& tItem )
  5254. {
  5255. const CSphString & sExpr = tItem.m_sExpr;
  5256. // all expressions that come from parser are automatically aliased
  5257. assert ( !tItem.m_sAlias.IsEmpty() );
  5258. return sExpr=="*"
  5259. || ( tSchema.GetAttrIndex ( sExpr.cstr() )>=0 && tItem.m_eAggrFunc==SPH_AGGR_NONE && tItem.m_sAlias==sExpr )
  5260. || IsGroupbyMagic ( sExpr );
  5261. });
  5262. }
  5263. int GetAliasedAttrIndex ( const CSphString & sAttr, const CSphQuery & tQuery, const ISphSchema & tSchema )
  5264. {
  5265. int iAttr = tSchema.GetAttrIndex ( sAttr.cstr() );
  5266. if ( iAttr>=0 )
  5267. return iAttr;
  5268. // try aliased groupby attr (facets)
  5269. ARRAY_FOREACH ( i, tQuery.m_dItems )
  5270. {
  5271. if ( sAttr==tQuery.m_dItems[i].m_sExpr )
  5272. return tSchema.GetAttrIndex ( tQuery.m_dItems[i].m_sAlias.cstr() );
  5273. else if ( sAttr==tQuery.m_dItems[i].m_sAlias )
  5274. return tSchema.GetAttrIndex ( tQuery.m_dItems[i].m_sExpr.cstr() );
  5275. }
  5276. return iAttr;
  5277. }
  5278. static void CreateSorters ( const VecTraits_T<CSphQuery> & dQueries, const VecTraits_T<ISphMatchSorter*> & dSorters, const VecTraits_T<QueueCreator_c> & dCreators, const VecTraits_T<CSphString> & dErrors, SphQueueRes_t & tRes )
  5279. {
  5280. ARRAY_FOREACH ( i, dCreators )
  5281. {
  5282. if ( !dCreators[i].m_bCreate )
  5283. continue;
  5284. dSorters[i] = CreateQueue ( dCreators[i], tRes );
  5285. assert ( dSorters[i]!=nullptr );
  5286. }
  5287. if ( tRes.m_bAlowMulti )
  5288. {
  5289. ISphMatchSorter * pSorter0 = nullptr;
  5290. for ( int iCheck=0; iCheck<dSorters.GetLength(); ++iCheck )
  5291. {
  5292. if ( !dCreators[iCheck].m_bCreate )
  5293. continue;
  5294. assert ( dSorters[iCheck] );
  5295. if ( !pSorter0 )
  5296. {
  5297. pSorter0 = dSorters[iCheck];
  5298. continue;
  5299. }
  5300. assert ( dSorters[iCheck]->GetSchema()->GetAttrsCount()==pSorter0->GetSchema()->GetAttrsCount() );
  5301. }
  5302. }
  5303. }
  5304. int ApplyImplicitCutoff ( const CSphQuery & tQuery, const VecTraits_T<ISphMatchSorter*> & dSorters, bool bFT )
  5305. {
  5306. bool bAllPrecalc = dSorters.GetLength() && dSorters.all_of ( []( auto pSorter ){ return pSorter->IsPrecalc(); } );
  5307. if ( bAllPrecalc )
  5308. return 1; // only need one match for precalc sorters
  5309. if ( tQuery.m_iCutoff>0 )
  5310. return tQuery.m_iCutoff;
  5311. if ( !tQuery.m_iCutoff )
  5312. return -1;
  5313. // this is the same as checking the sorters for disabled cutoff
  5314. // but this works when sorters are not yet available (e.g. GetPseudoShardingMetric())
  5315. if ( HasImplicitGrouping ( tQuery ) )
  5316. return -1;
  5317. bool bDisableCutoff = dSorters.any_of ( []( auto * pSorter ){ return pSorter->IsCutoffDisabled(); } );
  5318. if ( bDisableCutoff )
  5319. return -1;
  5320. // implicit cutoff when there's no sorting and no grouping
  5321. if ( !bFT && ( tQuery.m_sSortBy=="@weight desc" || tQuery.m_sSortBy.IsEmpty() ) && tQuery.m_sGroupBy.IsEmpty() && !tQuery.m_bFacet && !tQuery.m_bFacetHead )
  5322. return tQuery.m_iLimit+tQuery.m_iOffset;
  5323. return -1;
  5324. }
  5325. ISphMatchSorter * sphCreateQueue ( const SphQueueSettings_t & tQueue, const CSphQuery & tQuery, CSphString & sError, SphQueueRes_t & tRes, StrVec_t * pExtra, QueryProfile_c * pProfile )
  5326. {
  5327. QueueCreator_c tCreator ( tQueue, tQuery, sError, pExtra, pProfile );
  5328. if ( !tCreator.SetupQueue () )
  5329. return nullptr;
  5330. return CreateQueue ( tCreator, tRes );
  5331. }
  5332. static void CreateMultiQueue ( RawVector_T<QueueCreator_c> & dCreators, const SphQueueSettings_t & tQueue, const VecTraits_T<CSphQuery> & dQueries, VecTraits_T<ISphMatchSorter*> & dSorters, VecTraits_T<CSphString> & dErrors, SphQueueRes_t & tRes, StrVec_t * pExtra, QueryProfile_c * pProfile )
  5333. {
  5334. assert ( dSorters.GetLength()>1 );
  5335. assert ( dSorters.GetLength()==dQueries.GetLength() );
  5336. assert ( dSorters.GetLength()==dErrors.GetLength() );
  5337. dCreators.Reserve_static ( dSorters.GetLength () );
  5338. dCreators.Emplace_back( tQueue, dQueries[0], dErrors[0], pExtra, pProfile );
  5339. dCreators[0].m_bMulti = true;
  5340. // same as SetupQueue
  5341. dCreators[0].SetupComputeQueue ();
  5342. // copy schema WO group by and internals
  5343. CSphRsetSchema tRefSchema = dCreators[0].SorterSchema();
  5344. bool bHasJson = dCreators[0].HasJson();
  5345. bool bJsonMixed = false;
  5346. dCreators[0].SetupGroupQueue ();
  5347. // create rest of schemas
  5348. for ( int i=1; i<dSorters.GetLength(); ++i )
  5349. {
  5350. // fill extra only for initial pass
  5351. dCreators.Emplace_back ( tQueue, dQueries[i], dErrors[i], pExtra, pProfile );
  5352. dCreators[i].m_bMulti = true;
  5353. if ( !dCreators[i].SetupQueue () )
  5354. {
  5355. dCreators[i].m_bCreate = false;
  5356. continue;
  5357. }
  5358. bJsonMixed |= ( bHasJson!=dCreators[i].HasJson () );
  5359. bHasJson |= dCreators[i].HasJson();
  5360. }
  5361. // FIXME!!! check attributes and expressions matches
  5362. bool bSame = !bJsonMixed;
  5363. const auto& tSchema0 = dCreators[0].SorterSchema();
  5364. for ( int i=1; i<dCreators.GetLength() && bSame; ++i )
  5365. {
  5366. const auto & tCur = dCreators[i].SorterSchema();
  5367. bSame &= ( tSchema0.GetDynamicSize()==tCur.GetDynamicSize() && tSchema0.GetAttrsCount()==tCur.GetAttrsCount() );
  5368. }
  5369. // same schemes
  5370. if ( bSame )
  5371. return;
  5372. CSphRsetSchema tMultiSchema = tRefSchema;
  5373. int iMinGroups = INT_MAX;
  5374. int iMaxGroups = 0;
  5375. bool bHasMulti = false;
  5376. ARRAY_FOREACH ( iSchema, dCreators )
  5377. {
  5378. if ( !dCreators[iSchema].m_bCreate )
  5379. continue;
  5380. int iGroups = 0;
  5381. const CSphRsetSchema & tSchema = dCreators[iSchema].SorterSchema();
  5382. for ( int iCol=0; iCol<tSchema.GetAttrsCount(); ++iCol )
  5383. {
  5384. const CSphColumnInfo & tCol = tSchema.GetAttr ( iCol );
  5385. if ( !tCol.m_tLocator.m_bDynamic && !tCol.IsColumnar() )
  5386. continue;
  5387. if ( IsGroupbyMagic ( tCol.m_sName ) )
  5388. {
  5389. ++iGroups;
  5390. if ( !IsSortJsonInternal ( tCol.m_sName ))
  5391. continue;
  5392. }
  5393. const CSphColumnInfo * pMultiCol = tMultiSchema.GetAttr ( tCol.m_sName.cstr() );
  5394. if ( pMultiCol )
  5395. {
  5396. bool bDisable1 = false;
  5397. bool bDisable2 = false;
  5398. // no need to add attributes that already exists
  5399. if ( pMultiCol->m_eAttrType==tCol.m_eAttrType &&
  5400. ( ( !pMultiCol->m_pExpr && !tCol.m_pExpr ) ||
  5401. ( pMultiCol->m_pExpr && tCol.m_pExpr
  5402. && pMultiCol->m_pExpr->GetHash ( tMultiSchema, SPH_FNV64_SEED, bDisable1 )==tCol.m_pExpr->GetHash ( tSchema, SPH_FNV64_SEED, bDisable2 ) )
  5403. ) )
  5404. continue;
  5405. // no need to add a new column, but we need the same schema for the sorters
  5406. if ( tCol.IsColumnar() && pMultiCol->IsColumnarExpr() )
  5407. {
  5408. bHasMulti = true;
  5409. continue;
  5410. }
  5411. if ( !tCol.IsColumnarExpr() || !pMultiCol->IsColumnar() ) // need a new column
  5412. {
  5413. tRes.m_bAlowMulti = false; // if attr or expr differs need to create regular sorters and issue search WO multi-query
  5414. return;
  5415. }
  5416. }
  5417. bHasMulti = true;
  5418. tMultiSchema.AddAttr ( tCol, true );
  5419. if ( tCol.m_pExpr )
  5420. tCol.m_pExpr->FixupLocator ( &tSchema, &tMultiSchema );
  5421. }
  5422. iMinGroups = Min ( iMinGroups, iGroups );
  5423. iMaxGroups = Max ( iMaxGroups, iGroups );
  5424. }
  5425. // usual multi query should all have similar group by
  5426. if ( iMinGroups!=iMaxGroups && !dQueries[0].m_bFacetHead && !dQueries[0].m_bFacet )
  5427. {
  5428. tRes.m_bAlowMulti = false;
  5429. return;
  5430. }
  5431. // only group attributes differs - create regular sorters
  5432. if ( !bHasMulti && !bJsonMixed )
  5433. return;
  5434. // setup common schemas
  5435. for ( QueueCreator_c & tCreator : dCreators )
  5436. {
  5437. if ( !tCreator.m_bCreate )
  5438. continue;
  5439. if ( !tCreator.SetSchemaGroupQueue ( tMultiSchema ) )
  5440. tCreator.m_bCreate = false;
  5441. }
  5442. }
  5443. void sphCreateMultiQueue ( const SphQueueSettings_t & tQueue, const VecTraits_T<CSphQuery> & dQueries, VecTraits_T<ISphMatchSorter *> & dSorters, VecTraits_T<CSphString> & dErrors, SphQueueRes_t & tRes, StrVec_t * pExtra, QueryProfile_c * pProfile )
  5444. {
  5445. RawVector_T<QueueCreator_c> dCreators;
  5446. CreateMultiQueue ( dCreators, tQueue, dQueries, dSorters, dErrors, tRes, pExtra, pProfile );
  5447. CreateSorters ( dQueries, dSorters, dCreators, dErrors, tRes );
  5448. }