소스 검색

implemented columnar storage (via columnar library)

Ilya Kuznetsov 5 년 전
부모
커밋
63d5d611db
100개의 변경된 파일8221개의 추가작업 그리고 3431개의 파일을 삭제
  1. 11 0
      CMakeLists.txt
  2. 4 2
      api/libsphinxclient/sphinxclient.c
  3. 113 0
      cmake/GetColumnar.cmake
  4. 6 0
      config/config_cmake.h.in
  5. 157 0
      libfastpfor/CMakeLists.txt
  6. 9 3
      src/CMakeLists.txt
  7. 31 11
      src/attribute.cpp
  8. 11 2
      src/attribute.h
  9. 699 0
      src/columnarfilter.cpp
  10. 35 0
      src/columnarfilter.h
  11. 173 0
      src/columnargrouper.cpp
  12. 24 0
      src/columnargrouper.h
  13. 216 0
      src/columnarlib.cpp
  14. 32 0
      src/columnarlib.h
  15. 65 28
      src/datareader.cpp
  16. 8 1
      src/datareader.h
  17. 2 1
      src/docstore.cpp
  18. 23 12
      src/dynamic_idx.cpp
  19. 910 0
      src/fileio.cpp
  20. 213 0
      src/fileio.h
  21. 14 2
      src/fileutils.cpp
  22. 7 3
      src/fileutils.h
  23. 90 1
      src/gtests/gtests_functions.cpp
  24. 6 6
      src/gtests/gtests_json.cpp
  25. 7 3
      src/gtests/gtests_rtstuff.cpp
  26. 1 1
      src/gtests/gtests_tokenizer.cpp
  27. 901 0
      src/histogram.cpp
  28. 72 0
      src/histogram.h
  29. 5 9
      src/index_converter.cpp
  30. 1 1
      src/indexcheck.cpp
  31. 14 3
      src/indexer.cpp
  32. 33 4
      src/indexsettings.cpp
  33. 12 0
      src/indexsettings.h
  34. 15 4
      src/indextool.cpp
  35. 45 0
      src/libutils.cpp
  36. 45 0
      src/libutils.h
  37. 217 0
      src/memio.cpp
  38. 86 0
      src/memio.h
  39. 74 0
      src/queryprofile.cpp
  40. 118 0
      src/queryprofile.h
  41. 204 161
      src/searchd.cpp
  42. 2 1
      src/searchdaemon.h
  43. 9 9
      src/searchdconfig.cpp
  44. 1 1
      src/searchdconfig.h
  45. 4 1
      src/searchdexpr.cpp
  46. 1 1
      src/searchdexpr.h
  47. 3 2
      src/searchdhttp.cpp
  48. 1 1
      src/searchdreplication.cpp
  49. 7 5
      src/searchdsql.cpp
  50. 260 558
      src/secondaryindex.cpp
  51. 14 56
      src/secondaryindex.h
  52. 490 0
      src/sortsetup.cpp
  53. 72 0
      src/sortsetup.h
  54. 235 1006
      src/sphinx.cpp
  55. 69 232
      src/sphinx.h
  56. 3 1
      src/sphinxexcerpt.cpp
  57. 562 14
      src/sphinxexpr.cpp
  58. 21 3
      src/sphinxexpr.h
  59. 16 0
      src/sphinxexpr.y
  60. 647 194
      src/sphinxfilter.cpp
  61. 222 13
      src/sphinxfilter.h
  62. 39 554
      src/sphinxint.h
  63. 1 0
      src/sphinxjsonquery.h
  64. 1 49
      src/sphinxplugin.cpp
  65. 29 18
      src/sphinxpq.cpp
  66. 130 173
      src/sphinxrt.cpp
  67. 3 4
      src/sphinxrt.h
  68. 2 3
      src/sphinxsearch.cpp
  69. 373 232
      src/sphinxsort.cpp
  70. 194 0
      src/sphinxsort.h
  71. 36 5
      src/sphinxstd.h
  72. 11 0
      src/sphinxutils.cpp
  73. 3 1
      src/sphinxversion.cpp
  74. 3 1
      src/testrt.cpp
  75. 7 1
      test/helpers.inc
  76. 7 0
      test/settings.inc
  77. 1 1
      test/test_020/test.xml
  78. 0 0
      test/test_039/model.bin
  79. 6 6
      test/test_039/test.xml
  80. 0 0
      test/test_041/model.bin
  81. 1 1
      test/test_048/model.bin
  82. 1 1
      test/test_048/test.xml
  83. 0 0
      test/test_057/model.bin
  84. 12 12
      test/test_057/test.xml
  85. 0 0
      test/test_066/model.bin
  86. 1 1
      test/test_066/test.xml
  87. 1 1
      test/test_067/test.xml
  88. 1 1
      test/test_068/test.xml
  89. 0 0
      test/test_087/model.bin
  90. 3 3
      test/test_087/test.xml
  91. 1 1
      test/test_088/model.bin
  92. 1 1
      test/test_088/test.xml
  93. 0 0
      test/test_092/model.bin
  94. 1 1
      test/test_092/test.xml
  95. 0 0
      test/test_104/model.bin
  96. 1 1
      test/test_117/model.bin
  97. 1 1
      test/test_117/test.xml
  98. 1 1
      test/test_118/model.bin
  99. 1 1
      test/test_118/test.xml
  100. 0 0
      test/test_119/model.bin

+ 11 - 0
CMakeLists.txt

@@ -307,6 +307,8 @@ else ( WIN32 )
 			add_definitions ( "-DNDEBUG" )
 		endif ()
 
+		set ( CFLAGS "${CFLAGS} -msse4.1" )
+
 		option ( STATIC_BINARY "Produce statically linked ELF" OFF)
 		if ( STATIC_BINARY )
 			set ( CFLAGS "${CFLAGS} -static")
@@ -596,6 +598,15 @@ if ( WITH_ICU )
 	endif()
 endif ( WITH_ICU )
 
+# Check for Columnar build
+message ( STATUS "Option WITH_COLUMNAR ${WITH_COLUMNAR}" )
+option ( WITH_COLUMNAR "compile with Columnar library support" ON )
+if ( WITH_COLUMNAR )
+	include ( GetColumnar )
+	if (HAVE_COLUMNAR)
+		set ( USE_COLUMNAR 1 )
+	endif ()
+endif ( WITH_COLUMNAR )
 
 find_package(Valgrind)
 

+ 4 - 2
api/libsphinxclient/sphinxclient.c

@@ -950,8 +950,10 @@ sphinx_bool sphinx_set_query_flags ( sphinx_client * client, const char * flag_n
 
 	if ( strcmp ( flag_name, "reverse_scan")==0 )
 	{
-		set_bit ( &client->query_flags, 0, enabled );
-	} else if ( strcmp ( flag_name, "sort_method_kbuffer")==0 )
+		set_error ( client, "reverse_scan is deprecated" );
+		return SPH_FALSE;
+	}
+	else if ( strcmp ( flag_name, "sort_method_kbuffer")==0 )
 	{
 			set_bit ( &client->query_flags, 1, enabled );
 	} else if ( strcmp ( flag_name, "max_predicted_time")==0 )

+ 113 - 0
cmake/GetColumnar.cmake

@@ -0,0 +1,113 @@
+set(HAVE_COLUMNAR 0) # will be overridden later
+
+include(update_bundle)
+
+set (COLUMNARNAME "columnar")
+set (COLUMNAR_BUNDLEZIP "${COLUMNARNAME}.zip")
+set (COLUMNAR_GITHUB "https://github.com/manticoresoftware/columnar/archive/master.zip")
+
+function(check_imported FOUND BINDIR)
+	if (NOT EXISTS "${BINDIR}/columnar-targets.cmake")
+		return()
+	endif()
+
+	include("${BINDIR}/columnar-targets.cmake")
+	string(TOUPPER "${CMAKE_BUILD_TYPE}" UPB)
+	get_target_property(LBB columnar LOCATION_${UPB})
+	if (NOT EXISTS ${LBB})
+		diags("not exists ${LBB}")
+		return()
+	endif ()
+
+	get_filename_component(BUILDPATH ${LBB} PATH)
+	get_filename_component(BUILDNAME ${LBB} NAME)
+
+	diags ("COLUMNAR_LIBRARY -> ${LBB}")
+
+	set (COLUMNAR_LIBRARY "${LBB}" PARENT_SCOPE)
+	set (COLUMNAR_LIBDIR "${BUILDPATH}" PARENT_SCOPE)
+	set (COLUMNAR_SONAME "${BUILDNAME}" PARENT_SCOPE)
+	set (${FOUND} 1 PARENT_SCOPE)
+endfunction()
+
+function(columnar_install)
+	if (NOT HAVE_COLUMNAR)
+		return()
+	endif()
+    if ( APPLE )
+        set ( COLUMNAR_PATH "${BINPREFIX}lib" )
+    else()
+        set ( COLUMNAR_PATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" )
+    endif()
+	if (COLUMNAR_LIBRARY)
+		# can't make 'true install' for imported target; only file installation available
+		diags ("Install columnar as usual file from imported target")
+		install(PROGRAMS ${COLUMNAR_LIBRARY} DESTINATION "${COLUMNAR_PATH}" COMPONENT applications )
+	else()
+		diags("Install columnar as usual file from imported target")
+		install(TARGETS columnar_manticore LIBRARY DESTINATION "${COLUMNAR_PATH}" COMPONENT applications NAMELINK_SKIP)
+	endif()
+endfunction()
+
+if ( NOT DEFINED COLUMNAR_DEST )
+	message ( STATUS "COLUMNAR_DEST not set; not using columnar" )
+	set ( HAVE_COLUMNAR 0 )
+	return()
+endif()
+
+get_buildd(COLUMNAR_BUILD ${COLUMNARNAME})
+
+# first check 'lazy' case - build from previous run
+diags("first check 'lazy' case - build from previous run ${MANTICORE_BINARY_DIR}/columnar/${COLUMNAR_SONAME}")
+check_imported (HAVE_COLUMNAR "${MANTICORE_BINARY_DIR}/columnar")
+if (HAVE_COLUMNAR)
+	diags("Use 'lazy' prebuilt columnar from previous build ${COLUMNAR_LIBDIR}")
+	return() # we're done
+endif ()
+
+# check build in common cache
+diags("check build in common cache ${COLUMNAR_BUILD}/${COLUMNAR_SONAME}")
+check_imported(HAVE_COLUMNAR "${COLUMNAR_BUILD}")
+if (HAVE_COLUMNAR)
+	diags("Use cached prebuilt columnar from bundle ${COLUMNAR_LIBDIR}")
+	return() # we're done
+endif ()
+
+# packed build in the bundle, as bundle/columnar-cmake-3.x-5.7-darwin-x86_64.tar.gz
+get_platformed_named (COLUMNAR_PLATFORM_BUILD "${COLUMNARNAME}")
+diags("packet build in the bundle ${LIBS_BUNDLE}/${COLUMNAR_PLATFORM_BUILD}.tar.gz")
+if (EXISTS "${LIBS_BUNDLE}/${COLUMNAR_PLATFORM_BUILD}.tar.gz")
+	set(COLUMNAR_LIBDIR "${COLUMNAR_BINARY_DIR}/columnar")
+	fetch_and_unpack(columnar_lib "${LIBS_BUNDLE}/${COLUMNAR_PLATFORM_BUILD}.tar.gz" "${COLUMNAR_LIBDIR}")
+	check_imported(HAVE_COLUMNAR "${COLUMNAR_LIBDIR}")
+	if (HAVE_COLUMNAR)
+		diags("Use cached prebuilt columnar from bundled archive ${COLUMNAR_LIBDIR}")
+		return() # we're done
+	endif ()
+endif ()
+
+# finally set up build from sources
+populate(COLUMNAR_PLACE ${COLUMNARNAME} "${LIBS_BUNDLE}/${COLUMNAR_BUNDLEZIP}" ${COLUMNAR_GITHUB})
+get_srcpath(COLUMNAR_SRC ${COLUMNARNAME})
+
+diags("check if src folder is empty")
+if (NOT EXISTS "${COLUMNAR_SRC}/CMakeLists.txt")
+	diags("need to fetch sources from ${COLUMNAR_PLACE} to ${COLUMNAR_SRC}")
+	fetch_and_unpack(columnar ${COLUMNAR_PLACE} ${COLUMNAR_SRC})
+endif ()
+
+if (EXISTS "${COLUMNAR_SRC}/CMakeLists.txt")
+	if ( DEFINED COLUMNAR_DEST )
+		if ( MSVC )
+			set ( COLUMNAR_PATH "${COLUMNAR_DEST}/columnar.dll" )
+		else ()
+			set ( COLUMNAR_PATH "${COLUMNAR_DEST}/libcolumnar.so" )
+		endif ()
+		message ( STATUS "COLUMNAR_PATH is set to ${COLUMNAR_PATH}" )
+	endif ()
+
+	set(COLUMNAR_LIBDIR "${COLUMNAR_BUILD}")
+	add_subdirectory(${COLUMNAR_SRC} ${COLUMNAR_BUILD})
+	include_directories ( ${COLUMNAR_SRC} )
+	set(HAVE_COLUMNAR 1)
+endif()

+ 6 - 0
config/config_cmake.h.in

@@ -176,6 +176,12 @@
 /* ICU library support */
 #cmakedefine USE_ICU ${USE_ICU}
 
+/* columnar library support */
+#cmakedefine USE_COLUMNAR ${USE_COLUMNAR}
+
+/* default path to columnar shared library */
+#cmakedefine COLUMNAR_PATH "${COLUMNAR_PATH}"
+
 /* define to use Alexandresku's loki small obj allocator for movable attrs */
 #cmakedefine USE_SMALLALLOC ${USE_SMALLALLOC}
 

+ 157 - 0
libfastpfor/CMakeLists.txt

@@ -0,0 +1,157 @@
+# This code is released under the
+# Apache License Version 2.0 http://www.apache.org/licenses/.
+#
+# Copyright (c) 2012 Louis Dionne
+#
+cmake_minimum_required(VERSION 3.0)
+project(FastPFOR CXX C)
+set(CMAKE_CXX_STANDARD 14)
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
+include(AppendCompilerFlags)
+include(DetectCPUFeatures)
+
+#
+# Runs compiler with "-dumpversion" and parses major/minor
+# version with a regex.
+#
+# Taken&Modified from Boost.cmake
+#
+function(CXX_COMPILER_DUMPVERSION _OUTPUT_VERSION)
+
+  exec_program(${CMAKE_CXX_COMPILER}
+    ARGS ${CMAKE_CXX_COMPILER_ARG1} -dumpversion
+    OUTPUT_VARIABLE COMPILER_VERSION
+  )
+  #string(REGEX REPLACE "([0-9])\\.([0-9])(\\.[0-9])?" "\\1\\2"
+  #   COMPILER_VERSION ${COMPILER_VERSION})
+
+  set(${_OUTPUT_VERSION} ${COMPILER_VERSION} PARENT_SCOPE)
+endfunction()
+
+if(NOT WIN32)
+    CXX_COMPILER_DUMPVERSION(CXX_COMPILER_VERSION)
+endif()
+
+MESSAGE( STATUS "CMAKE_SIZEOF_VOID_P (should be 8): " ${CMAKE_SIZEOF_VOID_P} )
+if ( NOT CMAKE_SIZEOF_VOID_P EQUAL 8 )
+   MESSAGE( STATUS "Please use a 64-bit system. " )
+endif()
+if( SUPPORT_SSE42 )
+    MESSAGE( STATUS "SSE 4.2 support detected" )
+else()
+    MESSAGE( STATUS "SSE 4.2 support not detected" )
+endif()
+
+set ( FastPFOR_SRCS
+    src/bitpacking.cpp
+    src/bitpackingaligned.cpp
+    src/bitpackingunaligned.cpp
+#    src/horizontalbitpacking.cpp
+    src/simdunalignedbitpacking.cpp
+    src/simdbitpacking.cpp
+    src/varintdecode.c
+    src/streamvbyte.c
+)
+
+set ( FastPFOR_HEADERS
+    headers/bitpacking.h
+    headers/bitpackingaligned.h
+    headers/bitpackinghelpers.h
+    headers/bitpackingunaligned.h
+    headers/blockpacking.h
+    headers/codecfactory.h
+    headers/codecs.h
+    headers/common.h
+    headers/compositecodec.h
+    headers/cpubenchmark.h
+    headers/csv.h
+    headers/deltautil.h
+    headers/entropy.h
+    headers/externalvector.h
+    headers/fastpfor.h
+    headers/horizontalbitpacking.h
+    headers/maropuparser.h
+    headers/memutil.h
+    headers/mersenne.h
+    headers/newpfor.h
+    headers/optpfor.h
+    headers/packingvectors.h
+    headers/pfor.h
+    headers/pfor2008.h
+    headers/rolledbitpacking.h
+    headers/simdbinarypacking.h
+    headers/simdbitpacking.h
+    headers/simdfastpfor.h
+    headers/simdgroupsimple.h
+    headers/simdnewpfor.h
+    headers/simdoptpfor.h
+    headers/simdpfor.h
+    headers/simdvariablebyte.h
+    headers/simple8b.h
+    headers/simple8b_rle.h
+    headers/simple9.h
+    headers/simple9_rle.h
+    headers/simple16.h
+    headers/snappydelta.h
+    headers/streamvariablebyte.h
+    headers/stringutil.h
+    headers/synthetic.h
+    headers/usimdbitpacking.h
+    headers/util.h
+    headers/variablebyte.h
+    headers/VarIntG8IU.h
+    headers/varintgb.h
+    headers/vsencoding.h
+    headers/ztimer.h
+)
+
+add_library ( FastPFOR STATIC ${FastPFOR_SRCS} ${FastPFOR_HEADERS} )
+target_include_directories(FastPFOR PUBLIC headers)
+
+if (WIN32)
+	target_compile_options(FastPFOR PRIVATE "-wd4267")
+endif()
+
+set_target_properties(FastPFOR PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
+
+if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
+    # require at least gcc 4.7
+    if (CXX_COMPILER_VERSION VERSION_LESS 4.7)
+        message(STATUS "GCC version must be at least 4.7!")
+    endif()
+    # Uncomment the following lines to see how the code compiles without AVX,SSE4.2 and/or SSE2
+    #set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Ofast -lm  -DNDEBUG -std=c++11 -DHAVE_CXX0X -march=x86-64")
+    #set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Ofast -lm  -DNDEBUG -std=c++11 -DHAVE_CXX0X -march=core2")
+    #set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Ofast -lm  -DNDEBUG -std=c++11 -DHAVE_CXX0X -msse4.2")
+
+    #set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wcast-align -Ofast -lm  -DNDEBUG -std=c++11 -DHAVE_CXX0X -msse41")
+    #set (CMAKE_CXX_FLAGS_DEBUG   "-Wall -Wcast-align -ggdb  -lm  -std=c++11 -DHAVE_CXX0X -msse41")
+    #set (CMAKE_C_FLAGS_RELEASE "-Wall -Wcast-align -Ofast -lm  -DNDEBUG -std=c99  -msse41")
+    #set (CMAKE_C_FLAGS_DEBUG   "-Wall -Wcast-align -ggdb  -lm  -std=c99 -msse41")
+
+	target_compile_options ( FastPFOR PRIVATE "-msse4.1")
+
+elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel")
+    if (CXX_COMPILER_VERSION VERSION_LESS 14.0.1)
+        message(STATUS "Intel version must be at least 14.0.1!")
+    endif()
+    set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Ofast  -DNDEBUG -std=c++11 -DHAVE_CXX0X  -march=native")
+    set (CMAKE_CXX_FLAGS_DEBUG   "-Wall -ggdb   -std=c++11 -DHAVE_CXX0X  -march=native")
+    set (CMAKE_C_FLAGS_RELEASE "-Wall -Ofast  -DNDEBUG -std=c99  -march=native")
+    set (CMAKE_C_FLAGS_DEBUG   "-Wall -ggdb   -std=c99  -march=native")
+elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL "AppleClang")
+    if (CXX_COMPILER_VERSION VERSION_LESS 4.2.1)
+        message(STATUS  "Clang version must be at least 4.2.1!" )
+    endif()
+    #set (CMAKE_CXX_FLAGS_RELEASE "-Wall -Wcast-align -O3 -DNDEBUG -std=c++11 -DHAVE_CXX0X -msse4.1 -march=native")
+    #set (CMAKE_CXX_FLAGS_DEBUG   "-Wall -Wcast-align -ggdb  -std=c++11 -DHAVE_CXX0X -msse4.1 -march=native")
+    #set (CMAKE_C_FLAGS_RELEASE "-Wall -Wcast-align -O3 -DNDEBUG -std=c99 -msse4.1 -march=native")
+    #set (CMAKE_C_FLAGS_DEBUG   "-Wall -Wcast-align -ggdb  -std=c99 -msse4.1 -march=native")
+
+	target_compile_options ( FastPFOR PRIVATE "-msse4.1")
+elseif(WIN32)
+    # TODO check MSVC versions?
+else ()
+    message(FATAL_ERROR "Please, use MSVC, GCC, Clang, or the Intel compiler!")
+endif()

+ 9 - 3
src/CMakeLists.txt

@@ -11,7 +11,7 @@ set ( LIBSPHINX_SRCS sphinx.cpp sphinxexcerpt.cpp
 		sphinxquery.cpp sphinxsoundex.cpp sphinxmetaphone.cpp
 		sphinxstemen.cpp sphinxstemru.cpp sphinxstemru.inl sphinxstemcz.cpp
 		sphinxstemar.cpp sphinxutils.cpp sphinxstd.cpp
-		sphinxsort.cpp sphinxexpr.cpp sphinxfilter.cpp
+		sphinxsort.cpp sortsetup.cpp sphinxexpr.cpp sphinxfilter.cpp
 		sphinxsearch.cpp sphinxrt.cpp sphinxjson.cpp
 		sphinxaot.cpp sphinxplugin.cpp sphinxudf.c
 		sphinxqcache.cpp sphinxjsonquery.cpp jsonqueryfilter.cpp
@@ -20,7 +20,11 @@ set ( LIBSPHINX_SRCS sphinx.cpp sphinxexcerpt.cpp
 		searchdexpr.cpp snippetfunctor.cpp snippetindex.cpp snippetstream.cpp
 		snippetpassage.cpp threadutils.cpp sphinxversion.cpp indexcheck.cpp
 		datareader.cpp indexformat.cpp indexsettings.cpp fileutils.cpp coroutine.cpp
-		threads_detached.cpp hazard_pointer.cpp task_info.cpp mini_timer.cpp collation.cpp fnv64.cpp dynamic_idx.cpp )
+		threads_detached.cpp hazard_pointer.cpp task_info.cpp mini_timer.cpp
+		fileio.cpp memio.cpp queryprofile.cpp columnarfilter.cpp columnargrouper.cpp
+		columnarlib.cpp collation.cpp fnv64.cpp histogram.cpp
+		threads_detached.cpp hazard_pointer.cpp task_info.cpp mini_timer.cpp
+		dynamic_idx.cpp libutils.cpp )
 set ( INDEXER_SRCS indexer.cpp )
 set ( INDEXTOOL_SRCS indextool.cpp )
 set ( SEARCHD_SRCS_TESTABLE searchdha.cpp http/http_parser.c searchdhttp.cpp
@@ -50,7 +54,9 @@ file ( GLOB HEADERS "sphinx*.h" )
 list ( APPEND HEADERS lz4/lz4.h lz4/lz4hc.h )
 list ( APPEND HEADERS http/http_parser.h )
 list ( APPEND HEADERS secondaryindex.h searchnode.h killlist.h attribute.h accumulator.h global_idf.h optional.h
-		event.h coroutine.h threadutils.h hazard_pointer.h task_info.h mini_timer.h collation.h fnv64.h dynamic_idx.h)
+		event.h coroutine.h threadutils.h hazard_pointer.h task_info.h mini_timer.h collation.h fnv64.h histogram.h
+		sortsetup.h dynamic_idx.h indexsettings.h columnarlib.h )
+list ( APPEND HEADERS fileio.h memio.h queryprofile.h columnarfilter.h columnargrouper.h fileutils.h libutils.h )
 file ( GLOB SEARCHD_H "searchd*.h" "task*.h" )
 list ( APPEND SEARCHD_H net_action_accept.h netreceive_api.h netreceive_http.h
 		netreceive_ql.h netstate_api.h networking_daemon.h optional.h query_status.h compressed_mysql.h sphinxql_debug.h)

+ 31 - 11
src/attribute.cpp

@@ -177,6 +177,8 @@ BlobRowBuilder_File_c::BlobRowBuilder_File_c ( const ISphSchema & tSchema, SphOf
 	for ( int i = 0; i < tSchema.GetAttrsCount(); i++ )
 	{
 		const CSphColumnInfo & tCol = tSchema.GetAttr(i);
+		if ( !sphIsBlobAttr(tCol) )
+			continue;
 
 		AttributePacker_i * pPacker = nullptr;
 		switch ( tCol.m_eAttrType )
@@ -277,10 +279,7 @@ SphOffset_t BlobRowBuilder_File_c::Flush ( const BYTE * pOldRow )
 bool BlobRowBuilder_File_c::Done ( CSphString & sError )
 {
 	SphOffset_t tTotalSize = m_tWriter.GetPos();
-	// FIXME!!! made single function from this mess as order matters here
-	m_tWriter.Flush(); // store collected data as SeekTo might got rid of buffer collected so far
-	m_tWriter.SeekTo ( 0 ); 
-	m_tWriter.PutOffset ( tTotalSize );
+	SeekAndPutOffset ( m_tWriter, 0, tTotalSize );
 	m_tWriter.SeekTo ( tTotalSize + m_tSpaceForUpdates, true );
 	m_tWriter.CloseFile();
 
@@ -416,7 +415,7 @@ BlobRowBuilder_MemUpdate_c::BlobRowBuilder_MemUpdate_c ( const ISphSchema & tSch
 	{
 		const CSphColumnInfo & tCol = tSchema.GetAttr(i);
 
-		if ( !dAttrsUpdated.BitGet(i) && sphIsBlobAttr ( tCol.m_eAttrType ) )
+		if ( !dAttrsUpdated.BitGet(i) && sphIsBlobAttr(tCol) )
 		{
 			m_dAttrs.Add ( new AttributePacker_c );
 			continue;
@@ -793,9 +792,9 @@ bool sphCheckBlobRow ( int64_t iOff, DebugCheckReader_i & tBlobs, const CSphSche
 	CSphVector<ESphAttr> dBlobAttrs;
 	for ( int i = 0; i < tSchema.GetAttrsCount(); i++ )
 	{
-		ESphAttr eAttr = tSchema.GetAttr(i).m_eAttrType;
-		if ( sphIsBlobAttr(eAttr) )
-			dBlobAttrs.Add(eAttr);
+		const CSphColumnInfo & tAttr = tSchema.GetAttr(i);
+		if ( sphIsBlobAttr(tAttr) )
+			dBlobAttrs.Add ( tAttr.m_eAttrType );
 	}
 
 	int64_t iBlobsElemCount = tBlobs.GetLengthBytes();
@@ -908,7 +907,8 @@ const char * sphGetDocidName()
 	return g_sDocidName.cstr();
 }
 
-const CSphString &	sphGetDocidStr()
+
+const CSphString & sphGetDocidStr()
 {
 	return g_sDocidName;
 }
@@ -920,6 +920,21 @@ bool sphIsBlobAttr ( ESphAttr eAttr )
 }
 
 
+bool sphIsBlobAttr ( const CSphColumnInfo & tAttr )
+{
+	if ( tAttr.IsColumnar() )
+		return false;
+
+	return sphIsBlobAttr ( tAttr.m_eAttrType );
+}
+
+
+bool IsMvaAttr ( ESphAttr eAttr )
+{
+	return eAttr==SPH_ATTR_UINT32SET || eAttr==SPH_ATTR_INT64SET || eAttr==SPH_ATTR_UINT32SET_PTR || eAttr==SPH_ATTR_INT64SET_PTR;
+}
+
+
 //////////////////////////////////////////////////////////////////////////
 // data ptr attributes
 
@@ -928,14 +943,14 @@ int sphCalcPackedLength ( int iLengthBytes )
 	return sphCalcZippedLen(iLengthBytes) + iLengthBytes;
 }
 
-BYTE *				sphPackedBlob ( ByteBlob_t dBlob )
+BYTE * sphPackedBlob ( ByteBlob_t dBlob )
 {
 	if ( !dBlob.first ) return nullptr;
 	return const_cast<BYTE*>(dBlob.first-sphCalcZippedLen (dBlob.second));
 }
 
 
-// allocate buf and pack blob dBlob into it, return pointer to buf
+// allocate buf and pack blob tBlob into it, return pointer to buf
 BYTE * sphPackPtrAttr ( ByteBlob_t dBlob )
 {
 	if ( !dBlob.second )
@@ -1057,3 +1072,8 @@ void sphPackedMVA2Str ( const BYTE * pMVA, bool b64bit, StringBuilder_c & dStr )
 	auto dMVA = sphUnpackPtrAttr ( pMVA );
 	sphMVA2Str( dMVA, b64bit, dStr );
 }
+
+bool IsNotRealAttribute ( const CSphColumnInfo & tColumn )
+{
+	return tColumn.m_uFieldFlags & CSphColumnInfo::FIELD_STORED;
+}

+ 11 - 2
src/attribute.h

@@ -52,12 +52,12 @@ void				sphSetBlobRowOffset ( CSphRowitem * pDocinfo, int64_t iOffset );
 // fetches a attribute data and its length from the pool
 const BYTE *		sphGetBlobAttr ( const CSphMatch & tMatch, const CSphAttrLocator & tLocator, const BYTE * pBlobPool, int & iLengthBytes );
 
-ByteBlob_t sphGetBlobAttr ( const CSphMatch & tMatch, const CSphAttrLocator & tLocator, const BYTE * pBlobPool );
+ByteBlob_t			sphGetBlobAttr ( const CSphMatch & tMatch, const CSphAttrLocator & tLocator, const BYTE * pBlobPool );
 
 // same as above, but works with docinfo
 const BYTE *		sphGetBlobAttr ( const CSphRowitem * pDocinfo, const CSphAttrLocator & tLocator, const BYTE * pBlobPool, int & iLengthBytes );
 
-ByteBlob_t sphGetBlobAttr ( const CSphRowitem * pDocinfo, const CSphAttrLocator & tLocator, const BYTE * pBlobPool );
+ByteBlob_t			sphGetBlobAttr ( const CSphRowitem * pDocinfo, const CSphAttrLocator & tLocator, const BYTE * pBlobPool );
 
 // returns blob attribute length
 int					sphGetBlobAttrLen ( const CSphMatch & tMatch, const CSphAttrLocator & tLocator, const BYTE * pBlobPool );
@@ -87,6 +87,11 @@ const CSphString &	sphGetDocidStr();
 // returns true if this is a blob attr type
 bool				sphIsBlobAttr ( ESphAttr eAttr );
 
+// returns true if this is a blob attr type; returns false for columnar attrs
+bool				sphIsBlobAttr ( const CSphColumnInfo & tAttr );
+
+bool				IsMvaAttr ( ESphAttr eAttr );
+
 //////////////////////////////////////////////////////////////////////////
 // data ptr attributes
 
@@ -130,6 +135,10 @@ bool	sphIsInternalAttr ( const CSphColumnInfo & tCol );
 void	sphMVA2Str ( ByteBlob_t dMVA, bool b64bit, StringBuilder_c & dStr );
 void	sphPackedMVA2Str ( const BYTE * pMVA, bool b64bit, StringBuilder_c & dStr );
 
+/// check if tColumn is actually stored field (so, can't be used in filters/expressions)
+bool	IsNotRealAttribute ( const CSphColumnInfo & tColumn );
+
+
 inline DocID_t sphGetDocID ( const CSphRowitem * pData )
 {
 	assert ( pData );

+ 699 - 0
src/columnarfilter.cpp

@@ -0,0 +1,699 @@
+//
+// Copyright (c) 2020-2021, Manticore Software LTD (http://manticoresearch.com)
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#include "columnarfilter.h"
+#include "collation.h"
+#include "attribute.h"
+#include "sphinxint.h"
+
+#if USE_COLUMNAR
+
+ColumnarFilterTraits_c::ColumnarFilterTraits_c ( ISphExpr * pExpr )
+{
+	assert(pExpr);
+	pExpr->Command ( SPH_EXPR_GET_COLUMNAR_COL, &m_iColumnarCol );
+}
+
+
+void ColumnarFilterTraits_c::SetColumnarCol ( int iColumnarCol )
+{
+	m_iColumnarCol = iColumnarCol;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+class ColumnarFilter_c : public ISphFilter, public ColumnarFilterTraits_c
+{
+public:
+			ColumnarFilter_c ( const CSphString & sAttrName );
+
+	void	SetColumnarCol ( int iColumnarCol ) final;
+	void	SetColumnar ( const columnar::Columnar_i * pColumnar ) override;
+
+protected:
+	CSphString							m_sAttrName;
+	const columnar::Columnar_i *		m_pColumnar = nullptr;
+	CSphScopedPtr<columnar::Iterator_i>	m_pIterator {nullptr};
+
+	inline bool	GetValue ( RowID_t tRowID, SphAttr_t & tValue ) const;
+	inline bool	GetValue ( RowID_t tRowID, ByteBlob_t & tData ) const;
+};
+
+
+ColumnarFilter_c::ColumnarFilter_c ( const CSphString & sAttrName )
+	: m_sAttrName ( sAttrName )
+{}
+
+
+void ColumnarFilter_c::SetColumnar ( const columnar::Columnar_i * pColumnar )
+{
+	assert(pColumnar);
+	m_pColumnar = pColumnar;
+	std::string sError; // fixme! report errors
+	m_pIterator = pColumnar->CreateIterator ( m_sAttrName.cstr(), columnar::IteratorHints_t(), sError );
+}
+
+
+void ColumnarFilter_c::SetColumnarCol ( int iColumnarCol )
+{
+	ColumnarFilterTraits_c::SetColumnarCol(iColumnarCol);
+}
+
+
+bool ColumnarFilter_c::GetValue ( RowID_t tRowID, SphAttr_t & tValue ) const
+{
+	if ( m_pIterator.Ptr() && m_pIterator->AdvanceTo(tRowID) == tRowID )
+	{
+		tValue = m_pIterator->Get();
+		return true;
+	}
+
+	return false;
+}
+
+
+bool ColumnarFilter_c::GetValue ( RowID_t tRowID, ByteBlob_t & tData ) const
+{
+	if ( m_pIterator.Ptr() && m_pIterator->AdvanceTo(tRowID) == tRowID )
+	{
+		tData.second = m_pIterator->Get ( tData.first, false );
+		return true;
+	}
+
+	return false;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+// direct access to columnar storage to avoid expression overhead
+class Filter_SingleValueColumnar_c : public ColumnarFilter_c
+{
+	using ColumnarFilter_c::ColumnarFilter_c;
+
+public:
+	void	SetValues ( const SphAttr_t * pStorage, int iCount ) final;
+	bool	Eval ( const CSphMatch & tMatch ) const override;
+	bool	Test ( const columnar::MinMaxVec_t & dMinMax ) const final;
+
+protected:
+	SphAttr_t		m_tRefValue;
+};
+
+
+void Filter_SingleValueColumnar_c::SetValues ( const SphAttr_t * pStorage, int DEBUGARG(iCount) )
+{
+	assert ( pStorage );
+	assert ( iCount==1 );
+	m_tRefValue = (*pStorage);
+}
+
+
+bool Filter_SingleValueColumnar_c::Eval ( const CSphMatch & tMatch ) const
+{
+	SphAttr_t tValue;
+	if ( !GetValue ( tMatch.m_tRowID, tValue ) )
+		return false;
+
+	return tValue==m_tRefValue;
+}
+
+
+bool Filter_SingleValueColumnar_c::Test ( const columnar::MinMaxVec_t & dMinMax ) const
+{
+	if ( m_iColumnarCol<0 )
+		return true;
+
+	return ( dMinMax[m_iColumnarCol].first<=m_tRefValue && m_tRefValue<=dMinMax[m_iColumnarCol].second );
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+class Filter_ValuesColumnar_c : public ColumnarFilter_c
+{
+	using ColumnarFilter_c::ColumnarFilter_c;
+
+public:
+	bool		Eval ( const CSphMatch & tMatch ) const final;
+	bool		Test ( const columnar::MinMaxVec_t & dMinMax ) const final;
+	void		SetValues ( const SphAttr_t * pValues, int iNumValues ) final;
+
+private:
+	VecTraits_T<const SphAttr_t>	m_dValues;
+	bool							m_bDegenerate = false;
+
+	bool (Filter_ValuesColumnar_c::*m_fnEval)( SphAttr_t tValue ) const = nullptr;
+	bool (Filter_ValuesColumnar_c::*m_fnEvalBlock)( SphAttr_t tMin, SphAttr_t tMax ) const = nullptr;
+
+	bool		EvalLinear ( SphAttr_t tValue ) const;
+	bool		EvalBinary ( SphAttr_t tValue ) const;
+	bool		EvalBlockLinear ( SphAttr_t uMin, SphAttr_t uMax ) const;
+	bool		EvalBlockBinary ( SphAttr_t uMin, SphAttr_t uMax ) const;
+	bool		IsDegenerate() const;
+};
+
+
+bool Filter_ValuesColumnar_c::Eval ( const CSphMatch & tMatch ) const
+{
+	if ( m_bDegenerate )
+		return true;
+	
+	SphAttr_t tValue;
+	if ( !GetValue ( tMatch.m_tRowID, tValue ) )
+		return false;
+
+	return (*this.*m_fnEval)(tValue);
+}
+
+
+bool Filter_ValuesColumnar_c::Test ( const columnar::MinMaxVec_t & dMinMax ) const
+{
+	if ( m_iColumnarCol<0 || m_bDegenerate )
+		return true;
+
+	return (*this.*m_fnEvalBlock)( dMinMax[m_iColumnarCol].first, dMinMax[m_iColumnarCol].second );
+}
+
+
+void Filter_ValuesColumnar_c::SetValues ( const SphAttr_t * pValues, int iNumValues )
+{
+	assert ( pValues );
+	assert ( iNumValues > 0 );
+
+#ifndef NDEBUG
+	for ( int i = 1; i < iNumValues; i++ )
+		assert ( pValues[i-1]<=pValues[i] );
+#endif
+
+	m_dValues = { pValues, iNumValues };
+
+	const int SEARCH_THRESH=128;
+	if ( iNumValues<SEARCH_THRESH )
+	{
+		m_fnEval = &Filter_ValuesColumnar_c::EvalLinear;
+		m_fnEvalBlock = &Filter_ValuesColumnar_c::EvalBlockLinear;
+	}
+	else
+	{
+		m_fnEval = &Filter_ValuesColumnar_c::EvalBinary;
+		m_fnEvalBlock = &Filter_ValuesColumnar_c::EvalBlockBinary;
+	}
+
+	m_bDegenerate = IsDegenerate();
+}
+
+
+bool Filter_ValuesColumnar_c::EvalLinear ( SphAttr_t tValue ) const
+{
+	for ( auto i : m_dValues )
+		if ( i==tValue )
+			return true;
+
+	return false;
+}
+
+
+bool Filter_ValuesColumnar_c::EvalBinary ( SphAttr_t tValue ) const
+{
+	return !!m_dValues.BinarySearch(tValue);
+}
+
+
+bool Filter_ValuesColumnar_c::EvalBlockLinear ( SphAttr_t uMin, SphAttr_t uMax ) const
+{
+	for ( auto i : m_dValues )
+		if ( uMin<=i && i<=uMax )
+			return true;
+
+	return false;
+}
+
+
+bool Filter_ValuesColumnar_c::EvalBlockBinary ( SphAttr_t uMin, SphAttr_t uMax ) const
+{
+	// find first value greater or equal than uMin
+	const SphAttr_t * pFound = sphBinarySearchFirst ( m_dValues.Begin(), m_dValues.End()-1, SphIdentityFunctor_T<SphAttr_t>(), uMin );
+	if ( *pFound<=uMax )
+		return true;
+
+	return false;
+}
+
+
+bool Filter_ValuesColumnar_c::IsDegenerate() const
+{
+	assert(m_pColumnar);
+
+	columnar::Filter_t tFilter;
+	tFilter.m_sName = m_sAttrName.cstr();
+	tFilter.m_eType = columnar::FilterType_e::VALUES;
+
+	int iNumValues = m_dValues.GetLength();
+	tFilter.m_dValues.resize(iNumValues);
+	if ( iNumValues )
+		memcpy ( &tFilter.m_dValues[0], m_dValues.Begin(), iNumValues*sizeof ( m_dValues[0] ) );
+
+	return m_pColumnar->IsFilterDegenerate(tFilter);
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+class Filter_StringColumnar_c : public ColumnarFilter_c
+{
+public:
+			Filter_StringColumnar_c ( const CSphString & sAttrName, ESphCollation eCollation, bool bEquals );
+
+	void	SetRefString ( const CSphString * pRef, int iCount ) final;
+	bool	Eval ( const CSphMatch & tMatch ) const final;
+	void	SetColumnar ( const columnar::Columnar_i * pColumnar ) final;
+
+protected:
+	CSphString				m_sValue;
+	int						m_iLength = 0;
+	uint64_t				m_uHash = 0;
+	bool					m_bHash = false;
+	ESphCollation			m_eCollation;
+	SphStringCmp_fn			m_fnStrCmp = nullptr;
+	bool					m_bEquals = true;
+};
+
+
+Filter_StringColumnar_c::Filter_StringColumnar_c ( const CSphString & sAttrName, ESphCollation eCollation, bool bEquals )
+	: ColumnarFilter_c ( sAttrName )
+	, m_eCollation ( eCollation )
+	, m_fnStrCmp ( GetStringCmpFunc ( eCollation ) )
+	, m_bEquals ( bEquals )
+{}
+
+
+void Filter_StringColumnar_c::SetRefString ( const CSphString * pRef, int iCount )
+{
+	assert ( iCount<=1 );
+	if ( pRef )
+	{
+		m_sValue = *pRef;
+		m_iLength = m_sValue.Length();
+	}
+
+	assert ( iCount<=1 );
+	if ( pRef && pRef->Length() )
+		m_uHash = LibcCIHash_fn::Hash ( (const BYTE*)pRef->cstr(), pRef->Length() );
+	else
+		m_uHash = 0;
+}
+
+
+bool Filter_StringColumnar_c::Eval ( const CSphMatch & tMatch ) const
+{
+	if ( !m_pIterator.Ptr() || m_pIterator->AdvanceTo ( tMatch.m_tRowID ) != tMatch.m_tRowID )
+		return false;
+
+	bool bEqual;
+	if ( m_bHash )
+		bEqual = m_uHash==m_pIterator->GetStringHash();
+	else
+	{
+		int iLength = m_pIterator->GetLength();
+		if ( iLength!=m_iLength )
+			return false;
+
+		const BYTE * pStr = nullptr;
+		m_pIterator->Get ( pStr, false );
+
+		bEqual = !m_fnStrCmp ( {pStr, iLength}, {(const BYTE*)m_sValue.cstr(), m_iLength}, false );
+	}
+
+	return bEqual==m_bEquals;
+}
+
+
+void Filter_StringColumnar_c::SetColumnar ( const columnar::Columnar_i * pColumnar )
+{
+	assert(pColumnar);
+
+	columnar::IteratorHints_t tHints;
+	tHints.m_bNeedStringHashes = m_eCollation==SPH_COLLATION_DEFAULT;
+
+	std::string sError; // fixme! report errors
+	m_pIterator = pColumnar->CreateIterator ( m_sAttrName.cstr(), tHints, sError );
+	m_bHash = m_pIterator.Ptr() && m_pIterator->HaveStringHashes();
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+template <typename T, bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX, bool OPEN_LEFT, bool OPEN_RIGHT>
+class Filter_RangeColumnar_T : public ColumnarFilter_c
+{
+	using ColumnarFilter_c::ColumnarFilter_c;
+
+public:
+	bool	Eval ( const CSphMatch & tMatch ) const final;
+	bool	Test ( const columnar::MinMaxVec_t & dMinMax ) const final;
+	void	SetRange ( SphAttr_t tMin, SphAttr_t tMax ) final;
+	void	SetRangeFloat ( float fMin, float fMax ) final;
+
+private:
+	T m_tMinValue;
+	T m_tMaxValue;
+};
+
+template <typename T, bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX, bool OPEN_LEFT, bool OPEN_RIGHT>
+bool Filter_RangeColumnar_T<T, HAS_EQUAL_MIN, HAS_EQUAL_MAX, OPEN_LEFT, OPEN_RIGHT>::Eval ( const CSphMatch & tMatch ) const
+{
+	SphAttr_t tValue;
+	if ( !GetValue ( tMatch.m_tRowID, tValue ) )
+		return false;
+
+	return EvalRange<HAS_EQUAL_MIN,HAS_EQUAL_MAX,OPEN_LEFT,OPEN_RIGHT> ( ConvertType<T>(tValue), m_tMinValue, m_tMaxValue );
+}
+
+template <typename T, bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX, bool OPEN_LEFT, bool OPEN_RIGHT>
+bool Filter_RangeColumnar_T<T, HAS_EQUAL_MIN, HAS_EQUAL_MAX, OPEN_LEFT, OPEN_RIGHT>::Test ( const columnar::MinMaxVec_t & dMinMax ) const
+{
+	if ( m_iColumnarCol<0 )
+		return true;
+
+	T tBlockMin = ConvertType<T> ( dMinMax[m_iColumnarCol].first );
+	T tBlockMax = ConvertType<T> ( dMinMax[m_iColumnarCol].second );
+
+	return EvalBlockRangeAny<HAS_EQUAL_MIN,HAS_EQUAL_MAX> ( tBlockMin, tBlockMax, m_tMinValue, m_tMaxValue );
+}
+
+template <typename T, bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX, bool OPEN_LEFT, bool OPEN_RIGHT>
+void Filter_RangeColumnar_T<T, HAS_EQUAL_MIN, HAS_EQUAL_MAX, OPEN_LEFT, OPEN_RIGHT>::SetRange ( SphAttr_t tMin, SphAttr_t tMax )
+{
+	m_tMinValue = ConvertType<T>(tMin);
+	m_tMaxValue = ConvertType<T>(tMax);
+}
+
+template <typename T, bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX, bool OPEN_LEFT, bool OPEN_RIGHT>
+void Filter_RangeColumnar_T<T, HAS_EQUAL_MIN, HAS_EQUAL_MAX, OPEN_LEFT, OPEN_RIGHT>::SetRangeFloat ( float fMin, float fMax )
+{
+	m_tMinValue = (T)fMin;
+	m_tMaxValue = (T)fMax;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+static ISphFilter * CreateColumnarRangeFilter ( const CSphString & sName, const CSphFilterSettings & tSettings )
+{
+	int iIndex = tSettings.m_bHasEqualMin*8 + tSettings.m_bHasEqualMax*4 + tSettings.m_bOpenLeft*2 + tSettings.m_bOpenRight;
+	switch ( iIndex )
+	{
+	case 0:		return new Filter_RangeColumnar_T<T, false, false, false, false> (sName);
+	case 1:		return new Filter_RangeColumnar_T<T, false, false, false, true>  (sName);
+	case 2:		return new Filter_RangeColumnar_T<T, false, false, true,  false> (sName);
+	case 3:		return new Filter_RangeColumnar_T<T, false, false, true,  true>  (sName);
+	case 4:		return new Filter_RangeColumnar_T<T, false, true,  false, false> (sName);
+	case 5:		return new Filter_RangeColumnar_T<T, false, true,  false, true>  (sName);
+	case 6:		return new Filter_RangeColumnar_T<T, false, true,  true,  false> (sName);
+	case 7:		return new Filter_RangeColumnar_T<T, false, true,  true,  true>  (sName);
+	case 8:		return new Filter_RangeColumnar_T<T, true,  false, false, false> (sName);
+	case 9:		return new Filter_RangeColumnar_T<T, true,  false, false, true>  (sName);
+	case 10:	return new Filter_RangeColumnar_T<T, true,  false, true,  false> (sName);
+	case 11:	return new Filter_RangeColumnar_T<T, true,  false, true,  true>  (sName);
+	case 12:	return new Filter_RangeColumnar_T<T, true,  true,  false, false> (sName);
+	case 13:	return new Filter_RangeColumnar_T<T, true,  true,  false, true>  (sName);
+	case 14:	return new Filter_RangeColumnar_T<T, true,  true,  true,  false> (sName);
+	case 15:	return new Filter_RangeColumnar_T<T, true,  true,  true,  true>  (sName);
+	default:	return nullptr;
+	}
+}
+
+template < typename T, typename FUNC >
+class Filter_SingleValueColumnar_MVA_T : public Filter_SingleValueColumnar_c
+{
+	using Filter_SingleValueColumnar_c::Filter_SingleValueColumnar_c;
+
+public:
+	bool Eval ( const CSphMatch & tMatch ) const final
+	{
+		ByteBlob_t tData;
+		if ( !GetValue ( tMatch.m_tRowID, tData ) )
+			return false;
+
+		VecTraits_T<const T> tCheck ( (const T*)tData.first, tData.second/sizeof(T) );
+		return FUNC::Eval ( tCheck, m_tRefValue );
+	}
+};
+
+//////////////////////////////////////////////////////////////////////////
+
+template < typename T, typename FUNC >
+class Filter_ValuesColumnar_MVA_T : public ColumnarFilter_c
+{
+	using ColumnarFilter_c::ColumnarFilter_c;
+
+public:
+	bool		Eval ( const CSphMatch & tMatch ) const final;
+	bool		Test ( const columnar::MinMaxVec_t & dMinMax ) const final;
+	void		SetValues ( const SphAttr_t * pValues, int iNumValues ) final;
+
+private:
+	VecTraits_T<const SphAttr_t>	m_dValues;
+};
+
+template < typename T, typename FUNC >
+bool Filter_ValuesColumnar_MVA_T<T,FUNC>::Eval ( const CSphMatch & tMatch ) const
+{
+	ByteBlob_t tData;
+	if ( !GetValue ( tMatch.m_tRowID, tData ) )
+		return false;
+
+	VecTraits_T<const T> tCheck ( (const T*)tData.first, tData.second/sizeof(T) );
+	return FUNC::Eval ( tCheck, m_dValues );
+}
+
+template < typename T, typename FUNC >
+bool Filter_ValuesColumnar_MVA_T<T,FUNC>::Test ( const columnar::MinMaxVec_t & dMinMax ) const
+{
+	if ( m_iColumnarCol<0 )
+		return true;
+
+	return FUNC::EvalBlock ( m_dValues, dMinMax[m_iColumnarCol].first, dMinMax[m_iColumnarCol].second );
+}
+
+template < typename T, typename FUNC >
+void Filter_ValuesColumnar_MVA_T<T,FUNC>::SetValues ( const SphAttr_t * pValues, int iNumValues )
+{
+	assert ( pValues );
+	assert ( iNumValues > 0 );
+
+#ifndef NDEBUG
+	for ( int i = 1; i < iNumValues; i++ )
+		assert ( pValues[i-1]<=pValues[i] );
+#endif
+
+	m_dValues = { pValues, iNumValues };
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+template <typename T, typename FUNC, bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX, bool OPEN_LEFT, bool OPEN_RIGHT>
+class Filter_RangeColumnar_MVA_T : public ColumnarFilter_c
+{
+	using ColumnarFilter_c::ColumnarFilter_c;
+
+public:
+	bool	Eval ( const CSphMatch & tMatch ) const final;
+	bool	Test ( const columnar::MinMaxVec_t & dMinMax ) const final;
+	void	SetRange ( SphAttr_t tMin, SphAttr_t tMax ) final;
+
+private:
+	SphAttr_t m_tMinValue;
+	SphAttr_t m_tMaxValue;
+};
+
+template <typename T, typename FUNC, bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX, bool OPEN_LEFT, bool OPEN_RIGHT>
+bool Filter_RangeColumnar_MVA_T<T, FUNC, HAS_EQUAL_MIN, HAS_EQUAL_MAX, OPEN_LEFT, OPEN_RIGHT>::Eval ( const CSphMatch & tMatch ) const
+{
+	ByteBlob_t tData;
+	if ( !GetValue ( tMatch.m_tRowID, tData ) )
+		return false;
+
+	VecTraits_T<const T> tCheck ( (const T*)tData.first, tData.second/sizeof(T) );
+	return FUNC::template EvalRange<T,HAS_EQUAL_MIN,HAS_EQUAL_MAX> ( tCheck, m_tMinValue, m_tMaxValue );
+}
+
+template <typename T, typename FUNC, bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX, bool OPEN_LEFT, bool OPEN_RIGHT>
+bool Filter_RangeColumnar_MVA_T<T, FUNC, HAS_EQUAL_MIN, HAS_EQUAL_MAX, OPEN_LEFT, OPEN_RIGHT>::Test ( const columnar::MinMaxVec_t & dMinMax ) const
+{
+	if ( m_iColumnarCol<0 )
+		return true;
+
+	return EvalBlockRangeAny<HAS_EQUAL_MIN,HAS_EQUAL_MAX> ( dMinMax[m_iColumnarCol].first, dMinMax[m_iColumnarCol].second, m_tMinValue, m_tMaxValue );
+}
+
+template <typename T, typename FUNC, bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX, bool OPEN_LEFT, bool OPEN_RIGHT>
+void Filter_RangeColumnar_MVA_T<T, FUNC, HAS_EQUAL_MIN, HAS_EQUAL_MAX, OPEN_LEFT, OPEN_RIGHT>::SetRange ( SphAttr_t tMin, SphAttr_t tMax )
+{
+	m_tMinValue = tMin;
+	m_tMaxValue = tMax;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+template < typename T, typename FUNC>
+static ISphFilter * CreateColumnarMvaFilterValues ( const CSphString & sName, const CSphFilterSettings & tSettings )
+{
+	if ( tSettings.GetNumValues()==1 )
+		return new Filter_SingleValueColumnar_MVA_T<T,FUNC>(sName);
+
+	return new Filter_ValuesColumnar_MVA_T<T,FUNC>(sName);
+}
+
+
+template < typename T, typename FUNC>
+static ISphFilter * CreateColumnarMvaRangeFilter ( const CSphString & sName, const CSphFilterSettings & tSettings )
+{
+	int iIndex = tSettings.m_bHasEqualMin*8 + tSettings.m_bHasEqualMax*4 + tSettings.m_bOpenLeft*2 + tSettings.m_bOpenRight;
+	switch ( iIndex )
+	{
+	case 0:		return new Filter_RangeColumnar_MVA_T<T, FUNC, false, false, false, false> (sName);
+	case 1:		return new Filter_RangeColumnar_MVA_T<T, FUNC, false, false, false, true>  (sName);
+	case 2:		return new Filter_RangeColumnar_MVA_T<T, FUNC, false, false, true,  false> (sName);
+	case 3:		return new Filter_RangeColumnar_MVA_T<T, FUNC, false, false, true,  true>  (sName);
+	case 4:		return new Filter_RangeColumnar_MVA_T<T, FUNC, false, true,  false, false> (sName);
+	case 5:		return new Filter_RangeColumnar_MVA_T<T, FUNC, false, true,  false, true>  (sName);
+	case 6:		return new Filter_RangeColumnar_MVA_T<T, FUNC, false, true,  true,  false> (sName);
+	case 7:		return new Filter_RangeColumnar_MVA_T<T, FUNC, false, true,  true,  true>  (sName);
+	case 8:		return new Filter_RangeColumnar_MVA_T<T, FUNC, true,  false, false, false> (sName);
+	case 9:		return new Filter_RangeColumnar_MVA_T<T, FUNC, true,  false, false, true>  (sName);
+	case 10:	return new Filter_RangeColumnar_MVA_T<T, FUNC, true,  false, true,  false> (sName);
+	case 11:	return new Filter_RangeColumnar_MVA_T<T, FUNC, true,  false, true,  true>  (sName);
+	case 12:	return new Filter_RangeColumnar_MVA_T<T, FUNC, true,  true,  false, false> (sName);
+	case 13:	return new Filter_RangeColumnar_MVA_T<T, FUNC, true,  true,  false, true>  (sName);
+	case 14:	return new Filter_RangeColumnar_MVA_T<T, FUNC, true,  true,  true,  false> (sName);
+	case 15:	return new Filter_RangeColumnar_MVA_T<T, FUNC, true,  true,  true,  true>  (sName);
+	default:	return nullptr;
+	}
+}
+
+
+static ISphFilter * CreateColumnarFilterMVA ( const CSphFilterSettings & tSettings, const CommonFilterSettings_t & tFixedSettings, const CSphColumnInfo & tAttr, int iAttr )
+{
+	bool bWide = tAttr.m_eAttrType==SPH_ATTR_INT64SET || tAttr.m_eAttrType==SPH_ATTR_INT64SET_PTR;
+	bool bRange = tFixedSettings.m_eType==SPH_FILTER_RANGE;
+	bool bAll = tSettings.m_eMvaFunc==SPH_MVAFUNC_ALL;
+	int iIndex = bWide*4 + bRange*2 + bAll;
+
+	ISphFilter * pFilter = nullptr;
+
+	switch ( iIndex )
+	{
+	case 0:	pFilter = CreateColumnarMvaFilterValues<uint32_t,MvaEvalAny_c> ( tAttr.m_sName, tSettings ); break;
+	case 1: pFilter = CreateColumnarMvaFilterValues<uint32_t,MvaEvalAll_c> ( tAttr.m_sName, tSettings ); break;
+
+	case 2:	pFilter = CreateColumnarMvaRangeFilter<uint32_t,MvaEvalAny_c> ( tAttr.m_sName, tSettings ); break;
+	case 3:	pFilter = CreateColumnarMvaRangeFilter<uint32_t,MvaEvalAll_c> ( tAttr.m_sName, tSettings ); break;
+
+	case 4:	pFilter = CreateColumnarMvaFilterValues<int64_t,MvaEvalAny_c> ( tAttr.m_sName, tSettings ); break;
+	case 5:	pFilter = CreateColumnarMvaFilterValues<int64_t,MvaEvalAll_c> ( tAttr.m_sName, tSettings ); break;
+
+	case 6:	pFilter = CreateColumnarMvaRangeFilter<int64_t,MvaEvalAny_c> ( tAttr.m_sName, tSettings ); break;
+	case 7:	pFilter = CreateColumnarMvaRangeFilter<int64_t,MvaEvalAll_c> ( tAttr.m_sName, tSettings ); break;
+
+	default:
+		assert ( 0 && "Unsupported MVA filter type" );
+	}
+
+	if ( pFilter )
+		pFilter->SetColumnarCol(iAttr);
+
+	return pFilter;
+}
+
+
+static ISphFilter * CreateColumnarFilterPlain ( const CSphFilterSettings & tSettings, const CommonFilterSettings_t & tFixedSettings, const CSphColumnInfo & tAttr, int iAttr, ESphCollation eCollation )
+{
+	switch ( tFixedSettings.m_eType )
+	{
+	case SPH_FILTER_VALUES:
+		if ( tSettings.GetNumValues()==1 )
+		{
+			ISphFilter * pFilter = new Filter_SingleValueColumnar_c ( tAttr.m_sName );
+			pFilter->SetColumnarCol(iAttr);
+			return pFilter;
+		}
+		else
+		{
+			ISphFilter * pFilter = new Filter_ValuesColumnar_c ( tAttr.m_sName );
+			pFilter->SetColumnarCol(iAttr);
+			return pFilter;
+		}
+
+	case SPH_FILTER_RANGE:
+	{
+		ISphFilter * pFilter = CreateColumnarRangeFilter<SphAttr_t> ( tAttr.m_sName, tSettings );
+		pFilter->SetColumnarCol(iAttr);
+		return pFilter;
+	}
+
+	case SPH_FILTER_FLOATRANGE:
+	{
+		ISphFilter * pFilter = CreateColumnarRangeFilter<float> ( tAttr.m_sName, tSettings );
+		pFilter->SetColumnarCol(iAttr);
+		return pFilter;
+	}
+
+	case SPH_FILTER_STRING:
+		return new Filter_StringColumnar_c ( tAttr.m_sName, eCollation, tSettings.m_bHasEqualMin || tSettings.m_bHasEqualMax );
+
+	default:
+		assert ( 0 && "Unhandled columnar filter type" );
+		return nullptr;
+	}
+}
+
+
+ISphFilter * TryToCreateColumnarFilter ( int iAttr, const ISphSchema & tSchema, const CSphFilterSettings & tSettings, const CommonFilterSettings_t & tFixedSettings, ESphCollation eCollation,
+	CSphString & sError, CSphString & sWarning )
+{
+	if ( iAttr<0 )
+		return nullptr;
+
+	const CSphColumnInfo & tAttr = tSchema.GetAttr(iAttr);
+	if ( !tAttr.IsColumnar() && !tAttr.IsColumnarExpr() )
+		return nullptr;
+
+	// when we created a columnar expression, we removed it from PREFILTER stage
+	// that means that we have to create a specialized filter here because a generic expression filter will no longer work
+
+	bool bFound = false;
+	static const ESphAttr dSupportedTypes[] = { SPH_ATTR_INTEGER, SPH_ATTR_BIGINT, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_STRING, SPH_ATTR_STRINGPTR,
+		SPH_ATTR_UINT32SET, SPH_ATTR_UINT32SET_PTR, SPH_ATTR_INT64SET, SPH_ATTR_INT64SET_PTR };
+
+	for ( auto i : dSupportedTypes )
+		bFound |= tAttr.m_eAttrType==i;
+
+	if ( !bFound )
+	{
+		assert ( 0 && "Unhandled columnar filter type" );
+		return nullptr;
+	}
+
+	if ( IsMvaAttr(tAttr.m_eAttrType) )
+	{
+		if ( tFixedSettings.m_eType!=SPH_FILTER_VALUES && tFixedSettings.m_eType!=SPH_FILTER_RANGE )
+		{
+			sError.SetSprintf ( "unsupported filter type '%s' on MVA column", FilterType2Str ( tFixedSettings.m_eType ).cstr() );
+			return nullptr;
+		}
+
+		if ( tSettings.m_eMvaFunc==SPH_MVAFUNC_NONE )
+			sWarning.SetSprintf ( "use an explicit ANY()/ALL() around a filter on MVA column" );
+
+		return CreateColumnarFilterMVA ( tSettings, tFixedSettings, tAttr, iAttr );
+	}
+
+	return CreateColumnarFilterPlain ( tSettings, tFixedSettings, tAttr, iAttr, eCollation );
+}
+
+#endif // USE_COLUMNAR

+ 35 - 0
src/columnarfilter.h

@@ -0,0 +1,35 @@
+//
+// Copyright (c) 2020-2021, Manticore Software LTD (http://manticoresearch.com)
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#ifndef _columnarfilter_
+#define _columnarfilter_
+
+#include "sphinxfilter.h"
+
+#if USE_COLUMNAR
+
+class ColumnarFilterTraits_c
+{
+public:
+			ColumnarFilterTraits_c() = default;
+			ColumnarFilterTraits_c ( ISphExpr * pExpr );
+
+	void	SetColumnarCol ( int iColumnarCol );
+
+protected:
+	int		m_iColumnarCol = -1;
+};
+
+ISphFilter * TryToCreateColumnarFilter ( int iAttr, const ISphSchema & tSchema, const CSphFilterSettings & tSettings, const CommonFilterSettings_t & tFixedSettings,
+	ESphCollation eCollation, CSphString & sError, CSphString & sWarning );
+
+#endif // USE_COLUMNAR
+
+#endif // _columnarfilter_

+ 173 - 0
src/columnargrouper.cpp

@@ -0,0 +1,173 @@
+//
+// Copyright (c) 2020-2021, Manticore Software LTD (http://manticoresearch.com)
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#include "columnargrouper.h"
+#include "sphinxsort.h"
+
+#if USE_COLUMNAR
+
+class GrouperColumnarInt_c : public CSphGrouper
+{
+public:
+					GrouperColumnarInt_c ( const CSphColumnInfo & tAttr );
+					GrouperColumnarInt_c ( const GrouperColumnarInt_c & rhs );
+
+	void			GetLocator ( CSphAttrLocator & tOut ) const final {}
+	ESphAttr		GetResultType () const final { return m_eAttrType; }
+	SphGroupKey_t	KeyFromMatch ( const CSphMatch & tMatch ) const final;
+	SphGroupKey_t	KeyFromValue ( SphAttr_t ) const final;
+	void			SetColumnar ( const columnar::Columnar_i * pColumnar ) final;
+	CSphGrouper *	Clone() const final;
+
+private:
+	ESphAttr							m_eAttrType = SPH_ATTR_INTEGER;
+	CSphString							m_sAttrName;
+	CSphScopedPtr<columnar::Iterator_i>	m_pIterator {nullptr};
+};
+
+
+GrouperColumnarInt_c::GrouperColumnarInt_c ( const CSphColumnInfo & tAttr )
+	: m_eAttrType ( tAttr.m_eAttrType )
+	, m_sAttrName ( tAttr.m_sName )
+{}
+
+
+GrouperColumnarInt_c::GrouperColumnarInt_c ( const GrouperColumnarInt_c & rhs )
+	: m_eAttrType ( rhs.m_eAttrType )
+	, m_sAttrName ( rhs.m_sAttrName )
+{}
+
+
+SphGroupKey_t GrouperColumnarInt_c::KeyFromMatch ( const CSphMatch & tMatch ) const
+{
+	if ( m_pIterator.Ptr() && m_pIterator->AdvanceTo ( tMatch.m_tRowID ) == tMatch.m_tRowID )
+		return m_pIterator->Get();
+
+	return SphGroupKey_t(0);
+}
+
+
+SphGroupKey_t GrouperColumnarInt_c::KeyFromValue ( SphAttr_t ) const
+{
+	assert(0);
+	return SphGroupKey_t();
+}
+
+
+void GrouperColumnarInt_c::SetColumnar ( const columnar::Columnar_i * pColumnar )
+{
+	assert(pColumnar);
+	std::string sError; // fixme! report errors
+	m_pIterator = pColumnar->CreateIterator ( m_sAttrName.cstr(), columnar::IteratorHints_t(), sError );
+}
+
+
+CSphGrouper * GrouperColumnarInt_c::Clone() const
+{
+	return new GrouperColumnarInt_c(*this);
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+template <typename HASH>
+class GrouperColumnarString_T : public CSphGrouper, public HASH
+{
+public:
+					GrouperColumnarString_T ( const CSphColumnInfo & tAttr, ESphCollation eCollation );
+					GrouperColumnarString_T ( const GrouperColumnarString_T & rhs );
+
+	void			GetLocator ( CSphAttrLocator & tOut ) const final {}
+	ESphAttr		GetResultType () const final { return SPH_ATTR_BIGINT; }
+	SphGroupKey_t	KeyFromMatch ( const CSphMatch & tMatch ) const final;
+	SphGroupKey_t	KeyFromValue ( SphAttr_t ) const final;
+	void			SetColumnar ( const columnar::Columnar_i * pColumnar ) final;
+	CSphGrouper *	Clone() const final;
+
+private:
+	CSphString							m_sAttrName;
+	ESphCollation						m_eCollation = SPH_COLLATION_DEFAULT;
+	CSphScopedPtr<columnar::Iterator_i>	m_pIterator {nullptr};
+	bool								m_bHasHashes = false;
+};
+
+template <typename HASH>
+GrouperColumnarString_T<HASH>::GrouperColumnarString_T ( const CSphColumnInfo & tAttr, ESphCollation eCollation )
+	: m_sAttrName ( tAttr.m_sName )
+	, m_eCollation ( eCollation )
+{}
+
+template <typename HASH>
+GrouperColumnarString_T<HASH>::GrouperColumnarString_T ( const GrouperColumnarString_T & rhs )
+	: m_sAttrName ( rhs.m_sAttrName )
+	, m_eCollation ( rhs.m_eCollation )
+{}
+
+template <typename HASH>
+SphGroupKey_t GrouperColumnarString_T<HASH>::KeyFromMatch ( const CSphMatch & tMatch ) const
+{
+	if ( !m_pIterator.Ptr() || m_pIterator->AdvanceTo ( tMatch.m_tRowID ) != tMatch.m_tRowID )
+		return 0;
+
+	if ( m_bHasHashes )
+		return m_pIterator->GetStringHash();
+
+	const BYTE * pStr = nullptr;
+	int iLen = m_pIterator->Get ( pStr, false );
+	if ( !iLen )
+		return 0;
+
+	return HASH::Hash ( pStr, iLen );
+}
+
+template <typename HASH>
+SphGroupKey_t GrouperColumnarString_T<HASH>::KeyFromValue ( SphAttr_t ) const
+{
+	assert(0);
+	return SphGroupKey_t();
+}
+
+template <typename HASH>
+void GrouperColumnarString_T<HASH>::SetColumnar ( const columnar::Columnar_i * pColumnar )
+{
+	assert(pColumnar);
+	columnar::IteratorHints_t tHints;
+	tHints.m_bNeedStringHashes = m_eCollation==SPH_COLLATION_DEFAULT;
+
+	std::string sError; // fixme! report errors
+	m_pIterator = pColumnar->CreateIterator ( m_sAttrName.cstr(), tHints, sError );
+	m_bHasHashes = tHints.m_bNeedStringHashes && m_pIterator.Ptr() && m_pIterator->HaveStringHashes();
+}
+
+template <typename HASH>
+CSphGrouper * GrouperColumnarString_T<HASH>::Clone() const
+{
+	return new GrouperColumnarString_T<HASH>(*this);
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+CSphGrouper * CreateGrouperColumnarInt ( const CSphColumnInfo & tAttr )
+{
+	return new GrouperColumnarInt_c(tAttr);
+}
+
+
+CSphGrouper * CreateGrouperColumnarString ( const CSphColumnInfo & tAttr, ESphCollation eCollation )
+{
+	switch ( eCollation )
+	{
+	case SPH_COLLATION_UTF8_GENERAL_CI:	return new GrouperColumnarString_T<Utf8CIHash_fn> ( tAttr, eCollation );
+	case SPH_COLLATION_LIBC_CI:			return new GrouperColumnarString_T<LibcCIHash_fn> ( tAttr, eCollation );
+	case SPH_COLLATION_LIBC_CS:			return new GrouperColumnarString_T<LibcCSHash_fn> ( tAttr, eCollation );
+	default:							return new GrouperColumnarString_T<BinaryHash_fn> ( tAttr, eCollation );
+	}
+}
+
+#endif // USE_COLUMNAR

+ 24 - 0
src/columnargrouper.h

@@ -0,0 +1,24 @@
+//
+// Copyright (c) 2020-2021, Manticore Software LTD (http://manticoresearch.com)
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#ifndef _columnargrouper_
+#define _columnargrouper_
+
+#include "sphinx.h"
+
+#if USE_COLUMNAR
+
+class CSphGrouper;
+CSphGrouper * CreateGrouperColumnarInt ( const CSphColumnInfo & tAttr );
+CSphGrouper * CreateGrouperColumnarString ( const CSphColumnInfo & tAttr, ESphCollation eCollation );
+
+#endif // USE_COLUMNAR
+
+#endif // _columnargrouper_

+ 216 - 0
src/columnarlib.cpp

@@ -0,0 +1,216 @@
+//
+// Copyright (c) 2020-2021, Manticore Software LTD (http://manticoresearch.com)
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#include "columnarlib.h"
+#include "sphinx.h"
+#include "libutils.h"
+
+#if USE_COLUMNAR
+
+using CreateStorageReader_fn =	columnar::Columnar_i * (*) ( const std::string & sFilename, uint32_t uTotalDocs, std::string & sError );
+using CreateBuilder_fn =		columnar::Builder_i * (*) ( const columnar::Settings_t & tSettings, const columnar::Schema_t & tSchema, const std::string & sFile, std::string & sError );
+using Setup_fn =				void (*) ( columnar::Malloc_fn, columnar::Free_fn );
+using VersionStr_fn =			const char * (*)();
+
+static void *					g_pColumnarLib = nullptr;
+static CreateStorageReader_fn	g_fnCreateColumnarStorage = nullptr;
+static CreateBuilder_fn 		g_fnCreateColumnarBuilder = nullptr;
+static Setup_fn 				g_fnSetupColumnar = nullptr;
+static VersionStr_fn			g_fnVersionStr = nullptr;
+
+/////////////////////////////////////////////////////////////////////
+
+static columnar::AttrType_e ToColumnarType ( ESphAttr eAttrType, int iBitCount )
+{
+	switch ( eAttrType )
+	{
+	case SPH_ATTR_NONE:			return columnar::AttrType_e::NONE;
+	case SPH_ATTR_INTEGER:		return iBitCount==1 ? columnar::AttrType_e::BOOLEAN : columnar::AttrType_e::UINT32;
+	case SPH_ATTR_TIMESTAMP:	return columnar::AttrType_e::TIMESTAMP;
+	case SPH_ATTR_BOOL:			return columnar::AttrType_e::BOOLEAN;
+	case SPH_ATTR_FLOAT:		return columnar::AttrType_e::FLOAT;
+	case SPH_ATTR_BIGINT:		return iBitCount==1 ? columnar::AttrType_e::BOOLEAN : columnar::AttrType_e::INT64;
+	case SPH_ATTR_STRING:		return columnar::AttrType_e::STRING;
+	case SPH_ATTR_UINT32SET:	return columnar::AttrType_e::UINT32SET;
+	case SPH_ATTR_INT64SET:		return columnar::AttrType_e::INT64SET;
+	default:
+		assert ( 0 && "Unknown columnar type");
+		return columnar::AttrType_e::NONE;
+	}
+}
+
+
+columnar::Columnar_i * CreateColumnarStorageReader ( const CSphString & sFile, DWORD uNumDocs, CSphString & sError )
+{
+	if ( !g_pColumnarLib )
+	{
+		sError = "columnar library not loaded";
+		return nullptr;
+	}
+
+	std::string sErrorSTL;
+	assert ( g_fnCreateColumnarStorage );
+	columnar::Columnar_i * pColumnar = g_fnCreateColumnarStorage ( sFile.cstr(), uNumDocs, sErrorSTL );
+	if ( !pColumnar )
+		sError = sErrorSTL.c_str();
+
+	return pColumnar;
+}
+
+
+columnar::Builder_i * CreateColumnarBuilder ( const ISphSchema & tSchema, const columnar::Settings_t & tSettings, const CSphString & sFilename, CSphString & sError )
+{
+	if ( !g_pColumnarLib )
+	{
+		sError = "columnar library not loaded";
+		return nullptr;
+	}
+
+	columnar::Schema_t tColumnarSchema;
+	std::string sErrorSTL;
+
+	// convert our data types to columnars storage data types
+	for ( int i = 0; i < tSchema.GetAttrsCount(); i++ )
+	{
+		const CSphColumnInfo & tAttr = tSchema.GetAttr(i);
+		if ( !tAttr.IsColumnar() )
+			continue;
+
+		columnar::StringHash_fn fnStringCalcHash = nullptr;
+		columnar::AttrType_e eAttrType = ToColumnarType ( tAttr.m_eAttrType, tAttr.m_tLocator.m_iBitCount );
+
+		// fixme! make default collation configurable
+		if ( eAttrType==columnar::AttrType_e::STRING && tAttr.HasStringHashes() )
+			fnStringCalcHash = LibcCIHash_fn::Hash;
+
+		tColumnarSchema.push_back ( { tAttr.m_sName.cstr(), eAttrType, fnStringCalcHash } );
+	}
+
+	assert ( g_fnCreateColumnarBuilder );
+	columnar::Builder_i * pBuilder = g_fnCreateColumnarBuilder ( tSettings, tColumnarSchema, sFilename.cstr(), sErrorSTL );
+	if ( !pBuilder )
+		sError = sErrorSTL.c_str();
+
+	return pBuilder;
+}
+
+
+template <typename T>
+static bool LoadFunc ( T & pFunc, void * pHandle, const char * szFunc, const CSphString & sLib, CSphString & sError )
+{
+	pFunc = (T) dlsym ( pHandle, szFunc );
+	if ( !pFunc )
+	{
+		sError.SetSprintf ( "symbol '%s' not found in '%s'", szFunc, sLib.cstr() );
+		dlclose ( pHandle );
+		return false;
+	}
+
+	return true;
+}
+
+
+class ScopedHandle_c
+{
+public:
+	ScopedHandle_c ( void * pHandle )
+		: m_pHandle ( pHandle )
+	{}
+
+	~ScopedHandle_c()
+	{
+		if ( m_pHandle )
+			dlclose(m_pHandle);
+	}
+
+	void * Leak()
+	{
+		void * pHandle = m_pHandle;
+		m_pHandle = nullptr;
+		return pHandle;
+	}
+
+	void * Get() { return m_pHandle; }
+
+private:
+	void * m_pHandle = nullptr;
+};
+
+#endif // USE_COLUMNAR
+
+
+bool InitColumnar ( CSphString & sError )
+{
+#if USE_COLUMNAR
+	CSphString sLibfile;
+
+#ifndef COLUMNAR_PATH
+	sError = "COLUMNAR_DEST not set";
+	return false;
+#else
+	sLibfile = COLUMNAR_PATH;
+#endif
+
+	ScopedHandle_c tHandle ( dlopen ( sLibfile.cstr(), RTLD_LAZY | RTLD_LOCAL ) );
+	if ( !tHandle.Get() )
+	{
+		const char * szDlError = dlerror();
+		sError.SetSprintf ( "dlopen() failed: %s", szDlError ? szDlError : "(null)" );
+		return false;
+	}
+
+	sphLogDebug ( "dlopen(%s)=%p", sLibfile.cstr(), tHandle.Get() );
+
+	using GetVersion_fn = int (*)();
+	GetVersion_fn fnGetVersion;
+	if ( !LoadFunc ( fnGetVersion, tHandle.Get(), "GetColumnarLibVersion", sLibfile, sError ) )
+		return false;
+
+	int iLibVersion = fnGetVersion();
+	if ( iLibVersion!=columnar::LIB_VERSION )
+	{
+		sError.SetSprintf ( "daemon requires columnar library v%d (trying to load v%d)", columnar::LIB_VERSION, iLibVersion );
+		return false;
+	}
+
+	if ( !LoadFunc ( g_fnCreateColumnarStorage, tHandle.Get(), "CreateColumnarStorageReader", sLibfile, sError ) )	return false;
+	if ( !LoadFunc ( g_fnCreateColumnarBuilder, tHandle.Get(), "CreateColumnarBuilder", sLibfile, sError ) )		return false;
+	if ( !LoadFunc ( g_fnSetupColumnar, tHandle.Get(), "SetupColumnar", sLibfile, sError ) )						return false;
+	if ( !LoadFunc ( g_fnVersionStr, tHandle.Get(), "GetColumnarLibVersionStr", sLibfile, sError ) )				return false;
+
+	g_fnSetupColumnar ( malloc, free );
+	g_pColumnarLib = tHandle.Leak();
+#endif
+
+	return true;
+}
+
+
+void ShutdownColumnar()
+{
+#if USE_COLUMNAR
+	if ( g_pColumnarLib )
+		dlclose(g_pColumnarLib);
+#endif
+}
+
+
+const char * GetColumnarVersionStr()
+{
+#if USE_COLUMNAR
+	if ( !g_pColumnarLib )
+		return nullptr;
+
+	assert ( g_fnVersionStr );
+	return g_fnVersionStr();
+#else
+	return nullptr;
+#endif
+}

+ 32 - 0
src/columnarlib.h

@@ -0,0 +1,32 @@
+//
+// Copyright (c) 2020-2021, Manticore Software LTD (http://manticoresearch.com)
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#ifndef _columnarlib_
+#define _columnarlib_
+
+#include "sphinxstd.h"
+
+#if USE_COLUMNAR
+
+#include "columnar.h"
+#include "builder.h"
+
+class ISphSchema;
+
+columnar::Columnar_i *	CreateColumnarStorageReader ( const CSphString & sFile, DWORD uNumDocs, CSphString & sError );
+columnar::Builder_i *	CreateColumnarBuilder ( const ISphSchema & tSchema, const columnar::Settings_t & tSettings, const CSphString & sFilename, CSphString & sError );
+
+#endif // USE_COLUMNAR
+
+bool			InitColumnar ( CSphString & sError );
+void			ShutdownColumnar();
+const char *	GetColumnarVersionStr();
+
+#endif // _columnarlib_

+ 65 - 28
src/datareader.cpp

@@ -60,6 +60,11 @@ public:
 		m_pPointer = m_pBase + iPos;
 	}
 
+	void		GetBytes ( BYTE * pData, int iSize ) final;
+
+	int			GetBytesZerocopy ( const BYTE *& pData, int iMax ) final;
+	DWORD		GetDword() final;
+	SphOffset_t	GetOffset() final;
 	DWORD		UnzipInt () final;
 	uint64_t	UnzipOffset () final;
 
@@ -100,6 +105,51 @@ private:
 };
 
 
+void ThinMMapReader_c::GetBytes ( BYTE * pData, int iSize )
+{
+	auto iPos = m_pPointer - m_pBase;
+	if ( iPos>=0 && iPos+iSize<=m_iSize )
+	{
+		memcpy ( pData, m_pPointer, iSize );
+		m_pPointer += iSize;
+		return;
+	}
+
+	sphWarning ( "INTERNAL: out-of-range in ThinMMapReader_c: trying to read %d bytes from '%s' at " INT64_FMT ", from mmap of " INT64_FMT ", query most probably would FAIL; report the fact to dev!",
+		iSize, ( m_szFileName ? m_szFileName : "" ), int64_t(iPos), int64_t(m_iSize) );
+}
+
+
+int ThinMMapReader_c::GetBytesZerocopy ( const BYTE *& pData, int iMax )
+{
+	if ( m_pPointer+iMax > m_pBase+m_iSize )
+	{
+		pData = m_pPointer;
+		return 0;
+	}
+
+	pData = m_pPointer;
+	m_pPointer += iMax;
+	return iMax;
+}
+
+
+DWORD ThinMMapReader_c::GetDword()
+{
+	DWORD tRes;
+	GetBytes ( (BYTE*)&tRes, sizeof(tRes) );
+	return tRes;
+}
+
+
+SphOffset_t	ThinMMapReader_c::GetOffset()
+{
+	SphOffset_t tRes;
+	GetBytes ( (BYTE*)&tRes, sizeof(tRes) );
+	return tRes;
+}
+
+
 DWORD ThinMMapReader_c::UnzipInt()
 {
 	SPH_VARINT_DECODE ( DWORD, GetByte() );
@@ -118,30 +168,16 @@ class DirectFileReader_c final : public FileBlockReader_c, protected FileReader_
 	friend class DirectFactory_c;
 
 public:
-	SphOffset_t GetPos () const final
-	{
-		return FileReader_c::GetPos();
-	}
-
-	void SeekTo ( SphOffset_t iPos, int iSizeHint ) final
-	{
-		FileReader_c::SeekTo ( iPos, iSizeHint );
-	}
-
-	DWORD UnzipInt() final
-	{
-		return FileReader_c::UnzipInt();
-	}
-
-	uint64_t UnzipOffset() final
-	{
-		return FileReader_c::UnzipOffset();
-	}
-
-	void Reset() final
-	{
-		FileReader_c::Reset();
-	}
+	void		SeekTo ( SphOffset_t iPos, int iSizeHint ) final		{ FileReader_c::SeekTo ( iPos, iSizeHint ); }
+	void		GetBytes ( BYTE * pData, int iSize ) final				{ FileReader_c::GetBytes ( pData, iSize ); }
+	int 		GetBytesZerocopy ( const BYTE *& pData, int iMax ) final { return FileReader_c::GetBytesZerocopy ( &pData, iMax ); }
+	SphOffset_t GetPos () const final	{ return FileReader_c::GetPos(); }
+	BYTE		GetByte() final			{ return FileReader_c::GetByte(); }
+	DWORD		GetDword () final		{ return FileReader_c::GetDword(); }
+	SphOffset_t	GetOffset() final		{ return FileReader_c::GetOffset(); }
+	DWORD		UnzipInt() final		{ return FileReader_c::UnzipInt(); }
+	uint64_t	UnzipOffset() final		{ return FileReader_c::UnzipOffset(); }
+	void		Reset() final			{ FileReader_c::Reset(); }
 
 protected:
 	explicit DirectFileReader_c ( BYTE * pBuf, int iSize, const char * szFileName )
@@ -205,7 +241,7 @@ public:
 		return pFileReader;
 	}
 
-	void SetProfile ( QueryProfile_c* pProfile ) final
+	void SetProfile ( QueryProfile_c * pProfile ) final
 	{
 		m_dReader.m_pProfile = pProfile;
 	}
@@ -291,7 +327,8 @@ DataReaderFactory_c * NewProxyReader ( const CSphString & sFile, CSphString & sE
 	else
 		pReader = new MMapFactory_c ( sFile, sError, eAccess );
 
-	if ( !pReader->IsValid ())
-		SafeRelease ( pReader )
-		return pReader;
+	if ( !pReader->IsValid() )
+		SafeRelease(pReader);
+
+	return pReader;
 }

+ 8 - 1
src/datareader.h

@@ -21,6 +21,11 @@ class FileBlockReader_i : public ISphRefcountedMT
 public:
 	virtual SphOffset_t	GetPos() const = 0;
 	virtual void		SeekTo ( SphOffset_t iPos, int iSizeHint ) = 0;
+	virtual void		GetBytes ( BYTE * pData, int iSize ) = 0;
+	virtual int			GetBytesZerocopy ( const BYTE *& pData, int iMax ) = 0;
+	virtual BYTE		GetByte() = 0;
+	virtual DWORD		GetDword() = 0;
+	virtual SphOffset_t	GetOffset() = 0;
 	virtual DWORD		UnzipInt() = 0;
 	virtual uint64_t	UnzipOffset() = 0;
 	virtual RowID_t		UnzipRowid() = 0;
@@ -30,6 +35,7 @@ public:
 
 
 using FileBlockReaderPtr_c = CSphRefcountedPtr<FileBlockReader_i>;
+class QueryProfile_c;
 
 // producer of readers from file or filemap
 class DataReaderFactory_c : public ISphRefcountedMT
@@ -38,7 +44,8 @@ public:
 	enum Kind_e
 	{
 		DOCS,
-		HITS
+		HITS,
+		COLUMNAR
 	};
 
 	bool						IsValid () const { return m_bValid; }

+ 2 - 1
src/docstore.cpp

@@ -10,7 +10,8 @@
 
 #include "docstore.h"
 
-#include "sphinxint.h"
+#include "fileio.h"
+#include "memio.h"
 #include "fileutils.h"
 #include "attribute.h"
 #include "indexcheck.h"

+ 23 - 12
src/dynamic_idx.cpp

@@ -11,6 +11,7 @@
 //
 
 #include "dynamic_idx.h"
+#include "sphinxsort.h"
 
 class Feeder_c : public RowBuffer_i
 {
@@ -503,7 +504,7 @@ public:
 	int					Kill ( DocID_t tDocID ) override { return 0; }
 	int					Build ( const CSphVector<CSphSource*> & , int , int ) override { return 0; }
 	bool				Merge ( CSphIndex * , const VecTraits_T<CSphFilterSettings> &, bool ) override { return false; }
-	bool				Prealloc ( bool, FilenameBuilder_i * ) final { return false; }
+	bool				Prealloc ( bool, FilenameBuilder_i *, StrVec_t & ) final { return false; }
 	void				Dealloc () final {}
 	void				Preread () final {}
 	void				SetBase ( const char * ) final {}
@@ -586,17 +587,23 @@ bool GenericTableIndex_c::MultiQueryEx ( int iQueries, const CSphQuery * pQuerie
 	return bResult;
 }
 
-struct DynMatchProcessor_t : ISphMatchProcessor, ISphNoncopyable
+class DynMatchProcessor_c : public MatchProcessor_i, ISphNoncopyable
 {
-	const CSphQueryContext &m_tCtx;
-	int m_iTag;
-
-	DynMatchProcessor_t ( int iTag, const CSphQueryContext &tCtx )
-		: m_tCtx ( tCtx )
-		, m_iTag ( iTag )
+public:
+	DynMatchProcessor_c ( int iTag, const CSphQueryContext &tCtx )
+		: m_iTag ( iTag )
+		, m_tCtx ( tCtx )
 	{}
 
-	void Process ( CSphMatch * pMatch ) final
+	void Process ( CSphMatch * pMatch ) final			{ ProcessMatch(pMatch); }
+	bool ProcessInRowIdOrder() const final				{ return false;	}
+	void Process ( VecTraits_T<CSphMatch *> & dMatches ){ dMatches.for_each ( [this]( CSphMatch * pMatch ){ ProcessMatch(pMatch); } ); }
+
+private:
+	int							m_iTag;
+	const CSphQueryContext &	m_tCtx;
+
+	inline void ProcessMatch ( CSphMatch * pMatch )
 	{
 		if ( pMatch->m_iTag>=0 )
 			return;
@@ -636,8 +643,12 @@ bool GenericTableIndex_c::MultiScan ( CSphQueryResult & tResult, const CSphQuery
 
 	// setup calculations and result schema
 	CSphQueryContext tCtx ( tQuery );
-	if ( !tCtx.SetupCalc ( tMeta, tMaxSorterSchema, m_tSchema, nullptr, dSorterSchemas ) )
-		return false;
+
+#if USE_COLUMNAR
+	if ( !tCtx.SetupCalc ( tMeta, tMaxSorterSchema, m_tSchema, nullptr, nullptr, dSorterSchemas ) ) return false;
+#else
+	if ( !tCtx.SetupCalc ( tMeta, tMaxSorterSchema, m_tSchema, nullptr, dSorterSchemas ) ) return false;
+#endif
 
 	// setup filters
 	CreateFilterContext_t tFlx;
@@ -714,7 +725,7 @@ bool GenericTableIndex_c::MultiScan ( CSphQueryResult & tResult, const CSphQuery
 	// do final expression calculations
 	if ( tCtx.m_dCalcFinal.GetLength () )
 	{
-		DynMatchProcessor_t tFinal ( tArgs.m_iTag, tCtx );
+		DynMatchProcessor_c tFinal ( tArgs.m_iTag, tCtx );
 		dSorters.Apply ( [&tFinal] ( ISphMatchSorter * p ) { p->Finalize ( tFinal, false ); } );
 	}
 

+ 910 - 0
src/fileio.cpp

@@ -0,0 +1,910 @@
+//
+// Copyright (c) 2017-2020, Manticore Software LTD (http://manticoresearch.com)
+// Copyright (c) 2001-2016, Andrew Aksyonoff
+// Copyright (c) 2008-2016, Sphinx Technologies Inc
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#include "fileio.h"
+#include "sphinxint.h"
+
+#define SPH_READ_PROGRESS_CHUNK (8192*1024)
+#define SPH_READ_NOPROGRESS_CHUNK (32768*1024)
+
+#if PARANOID
+
+#define SPH_VARINT_DECODE(_type,_getexpr) \
+	register DWORD b = 0; \
+	register _type v = 0; \
+	int it = 0; \
+	do { b = _getexpr; v = ( v<<7 ) + ( b&0x7f ); it++; } while ( b&0x80 ); \
+	assert ( (it-1)*7<=sizeof(_type)*8 ); \
+	return v;
+
+#else
+
+#define SPH_VARINT_DECODE(_type,_getexpr) \
+	register DWORD b = _getexpr; \
+	register _type res = 0; \
+	while ( b & 0x80 ) \
+	{ \
+		res = ( res<<7 ) + ( b & 0x7f ); \
+		b = _getexpr; \
+	} \
+	res = ( res<<7 ) + b; \
+	return res;
+
+#endif // PARANOID
+
+DWORD sphUnzipInt ( const BYTE * & pBuf )			{ SPH_VARINT_DECODE ( DWORD, *pBuf++ ); }
+SphOffset_t sphUnzipOffset ( const BYTE * & pBuf )	{ SPH_VARINT_DECODE ( SphOffset_t, *pBuf++ ); }
+
+//////////////////////////////////////////////////////////////////////////
+
+CSphAutofile::CSphAutofile ( const CSphString & sName, int iMode, CSphString & sError, bool bTemp )
+{
+	Open ( sName, iMode, sError, bTemp );
+}
+
+
+CSphAutofile::~CSphAutofile()
+{
+	Close();
+}
+
+
+static int AutoFileOpen ( const CSphString & sName, int iMode )
+{
+	int iFD = -1;
+#if USE_WINDOWS
+	if ( iMode==SPH_O_READ )
+	{
+		intptr_t tFD = (intptr_t)CreateFile ( sName.cstr(), GENERIC_READ , FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL );
+		iFD = _open_osfhandle ( tFD, 0 );
+	} else
+		iFD = ::open ( sName.cstr(), iMode, 0644 );
+#else
+	iFD = ::open ( sName.cstr(), iMode, 0644 );
+#endif
+
+	return iFD;
+}
+
+
+int CSphAutofile::Open ( const CSphString & sName, int iMode, CSphString & sError, bool bTemp )
+{
+	assert ( m_iFD==-1 && m_sFilename.IsEmpty() );
+	assert ( !sName.IsEmpty() );
+
+	m_iFD = AutoFileOpen ( sName, iMode );
+	m_sFilename = sName; // not exactly sure why is this unconditional. for error reporting later, i suppose
+
+	if ( m_iFD<0 )
+		sError.SetSprintf ( "failed to open %s: %s", sName.cstr(), strerrorm(errno) );
+	else
+	{
+		m_bTemporary = bTemp; // only if we managed to actually open it
+		m_bWouldTemporary = true; // if a shit happen - we could delete the file.
+	}
+
+	return m_iFD;
+}
+
+
+void CSphAutofile::Close()
+{
+	if ( m_iFD>=0 )
+	{
+		::close ( m_iFD );
+		if ( m_bTemporary )
+			::unlink ( m_sFilename.cstr() );
+	}
+
+	m_iFD = -1;
+	m_sFilename = "";
+	m_bTemporary = false;
+	m_bWouldTemporary = false;
+}
+
+void CSphAutofile::SetTemporary()
+{
+	m_bTemporary = m_bWouldTemporary;
+}
+
+
+const char * CSphAutofile::GetFilename() const
+{
+	assert ( m_sFilename.cstr() );
+	return m_sFilename.cstr();
+}
+
+
+SphOffset_t CSphAutofile::GetSize ( SphOffset_t iMinSize, bool bCheckSizeT, CSphString & sError )
+{
+	struct_stat st;
+	if ( stat ( GetFilename(), &st )<0 )
+	{
+		sError.SetSprintf ( "failed to stat %s: %s", GetFilename(), strerrorm(errno) );
+		return -1;
+	}
+	if ( st.st_size<iMinSize )
+	{
+		sError.SetSprintf ( "failed to load %s: bad size " INT64_FMT " (at least " INT64_FMT " bytes expected)",
+			GetFilename(), (int64_t)st.st_size, (int64_t)iMinSize );
+		return -1;
+	}
+	if ( bCheckSizeT )
+	{
+		size_t uCheck = (size_t)st.st_size;
+		if ( st.st_size!=SphOffset_t(uCheck) )
+		{
+			sError.SetSprintf ( "failed to load %s: bad size " INT64_FMT " (out of size_t; 4 GB limit on 32-bit machine hit?)",
+				GetFilename(), (int64_t)st.st_size );
+			return -1;
+		}
+	}
+	return st.st_size;
+}
+
+
+SphOffset_t CSphAutofile::GetSize()
+{
+	CSphString sTmp;
+	return GetSize ( 0, false, sTmp );
+}
+
+
+bool CSphAutofile::Read ( void * pBuf, int64_t iCount, CSphString & sError )
+{
+	assert ( iCount>=0 );
+
+	int64_t iToRead = iCount;
+	BYTE * pCur = (BYTE *)pBuf;
+	while ( iToRead>0 )
+	{
+		int64_t iToReadOnce = ( m_pStat )
+			? Min ( iToRead, SPH_READ_PROGRESS_CHUNK )
+			: Min ( iToRead, SPH_READ_NOPROGRESS_CHUNK );
+		int64_t iGot = sphRead ( GetFD(), pCur, (size_t)iToReadOnce );
+
+		if ( iGot==-1 )
+		{
+			// interrupted by a signal - try again
+			if ( errno==EINTR )
+				continue;
+
+			sError.SetSprintf ( "read error in %s (%s); " INT64_FMT " of " INT64_FMT " bytes read",
+				GetFilename(), strerrorm(errno), iCount-iToRead, iCount );
+			return false;
+		}
+
+		// EOF
+		if ( iGot==0 )
+		{
+			sError.SetSprintf ( "unexpected EOF in %s (%s); " INT64_FMT " of " INT64_FMT " bytes read",
+				GetFilename(), strerrorm(errno), iCount-iToRead, iCount );
+			return false;
+		}
+
+		iToRead -= iGot;
+		pCur += iGot;
+
+		if ( m_pStat )
+		{
+			m_pStat->m_iBytes += iGot;
+			m_pStat->Show ( false );
+		}
+	}
+
+	if ( iToRead!=0 )
+	{
+		sError.SetSprintf ( "read error in %s (%s); " INT64_FMT " of " INT64_FMT " bytes read",
+			GetFilename(), strerrorm(errno), iCount-iToRead, iCount );
+		return false;
+	}
+
+	return true;
+}
+
+
+void CSphAutofile::SetProgressCallback ( CSphIndexProgress * pStat )
+{
+	m_pStat = pStat;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+CSphReader::CSphReader ( BYTE * pBuf, int iSize )
+	: m_pBuff ( pBuf )
+	, m_iBufSize ( iSize )
+	, m_iReadUnhinted ( DEFAULT_READ_UNHINTED )
+{
+	assert ( pBuf==NULL || iSize>0 );
+}
+
+
+CSphReader::~CSphReader()
+{
+	if ( m_bBufOwned )
+		SafeDeleteArray ( m_pBuff );
+}
+
+
+void CSphReader::SetBuffers ( int iReadBuffer, int iReadUnhinted )
+{
+	if ( !m_pBuff )
+		m_iBufSize = iReadBuffer;
+	m_iReadUnhinted = iReadUnhinted;
+}
+
+
+void CSphReader::SetFile ( int iFD, const char * sFilename )
+{
+	m_iFD = iFD;
+	m_iPos = 0;
+	m_iBuffPos = 0;
+	m_iBuffUsed = 0;
+	m_sFilename = sFilename;
+}
+
+
+void CSphReader::SetFile ( const CSphAutofile & tFile )
+{
+	SetFile ( tFile.GetFD(), tFile.GetFilename() );
+}
+
+
+void CSphReader::Reset()
+{
+	SetFile ( -1, "" );
+}
+
+
+/// sizehint > 0 means we expect to read approx that much bytes
+/// sizehint == 0 means no hint, use default (happens later in UpdateCache())
+/// sizehint == -1 means reposition and adjust current hint
+void CSphReader::SeekTo ( SphOffset_t iPos, int iSizeHint )
+{
+	assert ( iPos>=0 );
+	assert ( iSizeHint>=-1 );
+
+#ifndef NDEBUG
+#if PARANOID
+	struct_stat tStat;
+	fstat ( m_iFD, &tStat );
+	if ( iPos > tStat.st_size )
+		sphDie ( "INTERNAL ERROR: seeking past the end of file" );
+#endif
+#endif
+
+	if ( iPos>=m_iPos && iPos<m_iPos+m_iBuffUsed )
+	{
+		m_iBuffPos = (int)( iPos-m_iPos ); // reposition to proper byte
+		m_iSizeHint = iSizeHint - ( m_iBuffUsed - m_iBuffPos ); // we already have some bytes cached, so let's adjust size hint
+		assert ( m_iBuffPos<m_iBuffUsed );
+	} else
+	{
+		m_iPos = iPos;
+		m_iBuffPos = 0; // for GetPos() to work properly, aaaargh
+		m_iBuffUsed = 0;
+
+		if ( iSizeHint==-1 )
+		{
+			// the adjustment bureau
+			// we need to seek but still keep the current hint
+			// happens on a skiplist jump, for instance
+			int64_t iHintLeft = m_iPos + m_iSizeHint - iPos;
+			if ( iHintLeft>0 && iHintLeft<INT_MAX )
+				iSizeHint = (int)iHintLeft;
+			else
+				iSizeHint = 0;
+		}
+
+		// get that hint
+		assert ( iSizeHint>=0 );
+		m_iSizeHint = iSizeHint;
+	}
+}
+
+
+void CSphReader::SkipBytes ( int iCount )
+{
+	// 0 means "no hint", so this clamp works alright
+	SeekTo ( m_iPos+m_iBuffPos+iCount, Max ( m_iSizeHint-m_iBuffPos-iCount, 0 ) );
+}
+
+
+void CSphReader::UpdateCache()
+{
+	CSphScopedProfile tProf ( m_pProfile, m_eProfileState );
+
+	assert ( m_iFD>=0 );
+
+	// alloc buf on first actual read
+	if ( !m_pBuff )
+	{
+		if ( m_iBufSize<=0 )
+			m_iBufSize = DEFAULT_READ_BUFFER;
+
+		m_bBufOwned = true;
+		m_pBuff = new BYTE [ m_iBufSize ];
+	}
+
+	// stream position could be changed externally
+	// so let's just hope that the OS optimizes redundant seeks
+	SphOffset_t iNewPos = m_iPos + Min ( m_iBuffPos, m_iBuffUsed );
+
+	if ( m_iSizeHint<=0 )
+		m_iSizeHint = ( m_iReadUnhinted>0 ) ? m_iReadUnhinted : DEFAULT_READ_UNHINTED;
+	int iReadLen = Min ( m_iSizeHint, m_iBufSize );
+
+	m_iBuffPos = 0;
+	m_iBuffUsed = sphPread ( m_iFD, m_pBuff, iReadLen, iNewPos ); // FIXME! what about throttling?
+
+	if ( m_iBuffUsed<0 )
+	{
+		m_iBuffUsed = m_iBuffPos = 0;
+		m_bError = true;
+		m_sError.SetSprintf ( "pread error in %s: pos=" INT64_FMT ", len=%d, code=%d, msg=%s",
+			m_sFilename.cstr(), (int64_t)iNewPos, iReadLen, errno, strerror(errno) );
+		return;
+	}
+
+	// all fine, adjust offset and hint
+	m_iSizeHint -= m_iBuffUsed;
+	m_iPos = iNewPos;
+}
+
+
+int CSphReader::GetByte()
+{
+	if ( m_iBuffPos>=m_iBuffUsed )
+	{
+		UpdateCache();
+		if ( m_iBuffPos>=m_iBuffUsed )
+			return 0; // unexpected io failure
+	}
+
+	assert ( m_iBuffPos<m_iBuffUsed );
+	return m_pBuff [ m_iBuffPos++ ];
+}
+
+
+void CSphReader::GetBytes ( void * pData, int iSize )
+{
+	BYTE * pOut = (BYTE*) pData;
+
+	while ( iSize>m_iBufSize )
+	{
+		int iLen = m_iBuffUsed - m_iBuffPos;
+		assert ( iLen<=m_iBufSize );
+
+		memcpy ( pOut, m_pBuff+m_iBuffPos, iLen );
+		m_iBuffPos += iLen;
+		pOut += iLen;
+		iSize -= iLen;
+		m_iSizeHint = Max ( m_iReadUnhinted, iSize );
+
+		if ( iSize>0 )
+		{
+			UpdateCache();
+			if ( !m_iBuffUsed )
+			{
+				memset ( pData, 0, iSize );
+				return; // unexpected io failure
+			}
+		}
+	}
+
+	if ( iSize>m_iBuffUsed-m_iBuffPos )
+	{
+		// move old buffer tail to buffer head to avoid losing the data
+		const int iLen = m_iBuffUsed - m_iBuffPos;
+		if ( iLen>0 )
+		{
+			memcpy ( pOut, m_pBuff+m_iBuffPos, iLen );
+			m_iBuffPos += iLen;
+			pOut += iLen;
+			iSize -= iLen;
+		}
+
+		m_iSizeHint = Max ( m_iReadUnhinted, iSize );
+		UpdateCache();
+		if ( iSize>m_iBuffUsed-m_iBuffPos )
+		{
+			memset ( pData, 0, iSize ); // unexpected io failure
+			return;
+		}
+	}
+
+	assert ( (m_iBuffPos+iSize)<=m_iBuffUsed );
+	memcpy ( pOut, m_pBuff+m_iBuffPos, iSize );
+	m_iBuffPos += iSize;
+}
+
+
+int CSphReader::GetLine ( char * sBuffer, int iMaxLen )
+{
+	int iOutPos = 0;
+	iMaxLen--; // reserve space for trailing '\0'
+
+			   // grab as many chars as we can
+	while ( iOutPos<iMaxLen )
+	{
+		// read next chunk if necessary
+		if ( m_iBuffPos>=m_iBuffUsed )
+		{
+			UpdateCache();
+			if ( m_iBuffPos>=m_iBuffUsed )
+			{
+				if ( iOutPos==0 ) return -1; // current line is empty; indicate eof
+				break; // return current line; will return eof next time
+			}
+		}
+
+		// break on CR or LF
+		if ( m_pBuff[m_iBuffPos]=='\r' || m_pBuff[m_iBuffPos]=='\n' )
+			break;
+
+		// one more valid char
+		sBuffer[iOutPos++] = m_pBuff[m_iBuffPos++];
+	}
+
+	// skip everything until the newline or eof
+	while (true)
+	{
+		// read next chunk if necessary
+		if ( m_iBuffPos>=m_iBuffUsed )
+			UpdateCache();
+
+		// eof?
+		if ( m_iBuffPos>=m_iBuffUsed )
+			break;
+
+		// newline?
+		if ( m_pBuff[m_iBuffPos++]=='\n' )
+			break;
+	}
+
+	// finalize
+	sBuffer[iOutPos] = '\0';
+	return iOutPos;
+}
+
+
+void CSphReader::ResetError()
+{
+	m_bError = false;
+	m_sError = "";
+}
+
+
+DWORD CSphReader::UnzipInt()
+{
+	SPH_VARINT_DECODE ( DWORD, GetByte() );
+}
+
+
+uint64_t CSphReader::UnzipOffset()
+{
+	SPH_VARINT_DECODE ( uint64_t, GetByte() );
+}
+
+
+CSphReader & CSphReader::operator = ( const CSphReader & rhs )
+{
+	SetFile ( rhs.m_iFD, rhs.m_sFilename.cstr() );
+	SeekTo ( rhs.m_iPos + rhs.m_iBuffPos, rhs.m_iSizeHint );
+	return *this;
+}
+
+
+DWORD CSphReader::GetDword()
+{
+	DWORD uRes = 0;
+	GetBytes ( &uRes, sizeof(DWORD) );
+	return uRes;
+}
+
+
+SphOffset_t CSphReader::GetOffset()
+{
+	SphOffset_t uRes = 0;
+	GetBytes ( &uRes, sizeof(SphOffset_t) );
+	return uRes;
+}
+
+
+CSphString CSphReader::GetString()
+{
+	CSphString sRes;
+
+	DWORD iLen = GetDword();
+	if ( iLen )
+	{
+		char * sBuf = new char [ iLen ];
+		GetBytes ( sBuf, iLen );
+		sRes.SetBinary ( sBuf, iLen );
+		SafeDeleteArray ( sBuf );
+	}
+
+	return sRes;
+}
+
+bool CSphReader::Tag ( const char * sTag )
+{
+	if ( m_bError )
+		return false;
+
+	assert ( sTag && *sTag ); // empty tags are nonsense
+	assert ( strlen(sTag)<64 ); // huge tags are nonsense
+
+	auto iLen = (int) strlen(sTag);
+	char sBuf[64];
+	GetBytes ( sBuf, iLen );
+	if ( !memcmp ( sBuf, sTag, iLen ) )
+		return true;
+	m_bError = true;
+	m_sError.SetSprintf ( "expected tag %s was not found", sTag );
+	return false;
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+
+bool CSphAutoreader::Open ( const CSphString & sFilename, CSphString & sError )
+{
+	assert ( m_iFD<0 );
+	assert ( !sFilename.IsEmpty() );
+
+	m_iFD = AutoFileOpen ( sFilename, SPH_O_READ );
+	m_iPos = 0;
+	m_iBuffPos = 0;
+	m_iBuffUsed = 0;
+	m_sFilename = sFilename;
+
+	if ( m_iFD<0 )
+		sError.SetSprintf ( "failed to open %s: %s", sFilename.cstr(), strerror(errno) );
+	return ( m_iFD>=0 );
+}
+
+
+void CSphAutoreader::Close()
+{
+	if ( m_iFD>=0 )
+		::close ( m_iFD	);
+	m_iFD = -1;
+}
+
+
+SphOffset_t FileReader_c::GetFilesize() const
+{
+	assert ( m_iFD>=0 );
+
+	return sphGetFileSize ( m_iFD, nullptr );
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void CSphWriter::SetBufferSize ( int iBufferSize )
+{
+	if ( iBufferSize!=m_iBufferSize )
+	{
+		m_iBufferSize = Max ( iBufferSize, 262144 );
+		SafeDeleteArray ( m_pBuffer );
+	}
+}
+
+
+bool CSphWriter::OpenFile ( const CSphString & sName, CSphString & sErrorBuffer )
+{
+	assert ( !sName.IsEmpty() );
+	assert ( m_iFD<0 && "already open" );
+
+	m_bOwnFile = true;
+	m_sName = sName;
+	m_pError = &sErrorBuffer;
+
+	if ( !m_pBuffer )
+		m_pBuffer = new BYTE [ m_iBufferSize ];
+
+	m_iFD = ::open ( m_sName.cstr(), SPH_O_NEW, 0644 );
+	m_pPool = m_pBuffer;
+	m_iPoolUsed = 0;
+	m_iPos = 0;
+	m_iDiskPos = 0;
+	m_bError = ( m_iFD<0 );
+
+	if ( m_bError )
+		m_pError->SetSprintf ( "failed to create %s: %s" , sName.cstr(), strerror(errno) );
+
+	return !m_bError;
+}
+
+
+void CSphWriter::SetFile ( CSphAutofile & tAuto, SphOffset_t * pSharedOffset, CSphString & sError )
+{
+	assert ( m_iFD<0 && "already open" );
+	m_bOwnFile = false;
+
+	if ( !m_pBuffer )
+		m_pBuffer = new BYTE [ m_iBufferSize ];
+
+	m_iFD = tAuto.GetFD();
+	m_sName = tAuto.GetFilename();
+	m_pPool = m_pBuffer;
+	m_iPoolUsed = 0;
+	m_iPos = 0;
+	m_iDiskPos = 0;
+	m_pSharedOffset = pSharedOffset;
+	m_pError = &sError;
+	assert ( m_pError );
+}
+
+
+CSphWriter::~CSphWriter()
+{
+	CloseFile();
+	SafeDeleteArray ( m_pBuffer );
+}
+
+
+void CSphWriter::CloseFile ( bool bTruncate )
+{
+	if ( m_iFD>=0 )
+	{
+		Flush();
+		if ( bTruncate )
+			sphTruncate ( m_iFD );
+		if ( m_bOwnFile )
+			::close ( m_iFD );
+		m_iFD = -1;
+	}
+}
+
+void CSphWriter::UnlinkFile()
+{
+	if ( m_bOwnFile )
+	{
+		if ( m_iFD>=0 )
+			::close ( m_iFD );
+
+		m_iFD = -1;
+		::unlink ( m_sName.cstr() );
+		m_sName = "";
+	}
+	SafeDeleteArray ( m_pBuffer );
+}
+
+
+void CSphWriter::UpdatePoolUsed()
+{
+	if ( m_pPool-m_pBuffer > m_iPoolUsed )
+		m_iPoolUsed = m_pPool-m_pBuffer;
+}
+
+
+void CSphWriter::PutByte ( BYTE uValue )
+{
+	assert ( m_pPool );
+	if ( m_iPoolUsed==m_iBufferSize )
+		Flush();
+	*m_pPool++ = uValue;
+	UpdatePoolUsed();
+	m_iPos++;
+}
+
+
+void CSphWriter::PutBytes ( const void * pData, int64_t iSize )
+{
+	assert ( m_pPool );
+	const BYTE * pBuf = (const BYTE *) pData;
+	while ( iSize>0 )
+	{
+		int iPut = ( iSize<m_iBufferSize ? int(iSize) : m_iBufferSize ); // comparison int64 to int32
+		if ( m_iPoolUsed+iPut>m_iBufferSize )
+			Flush();
+		assert ( m_iPoolUsed+iPut<=m_iBufferSize );
+
+		memcpy ( m_pPool, pBuf, iPut );
+		m_pPool += iPut;
+		UpdatePoolUsed();
+		m_iPos += iPut;
+
+		pBuf += iPut;
+		iSize -= iPut;
+	}
+}
+
+
+void CSphWriter::ZipInt ( DWORD uValue )
+{
+	sphZipValue ( [this] ( BYTE b ) { PutByte ( b ); }, uValue );
+}
+
+
+void CSphWriter::ZipOffset ( uint64_t uValue )
+{
+	sphZipValue ( [this] ( BYTE b ) { PutByte ( b ); }, uValue );
+}
+
+
+void CSphWriter::Flush()
+{
+	if ( m_pSharedOffset && *m_pSharedOffset!=m_iDiskPos )
+	{
+		auto uMoved = sphSeek ( m_iFD, m_iDiskPos, SEEK_SET );
+		if ( uMoved!= m_iDiskPos )
+		{
+			m_bError = true;
+			return;
+		}
+	}
+
+	if ( !sphWriteThrottled ( m_iFD, m_pBuffer, m_iPoolUsed, m_sName.cstr(), *m_pError ) )
+		m_bError = true;
+
+	m_iDiskPos += m_iPoolUsed;
+	m_iPoolUsed = 0;
+	m_pPool = m_pBuffer;
+
+	if ( m_pSharedOffset )
+		*m_pSharedOffset = m_iDiskPos;
+}
+
+
+void CSphWriter::PutString ( const char * szString )
+{
+	int iLen = szString ? (int) strlen ( szString ) : 0;
+	PutDword ( iLen );
+	if ( iLen )
+		PutBytes ( szString, iLen );
+}
+
+
+void CSphWriter::PutString ( const CSphString & sString )
+{
+	int iLen = sString.Length();
+	PutDword ( iLen );
+	if ( iLen )
+		PutBytes ( sString.cstr(), iLen );
+}
+
+
+void CSphWriter::Tag ( const char * sTag )
+{
+	assert ( sTag && *sTag ); // empty tags are nonsense
+	assert ( strlen(sTag)<64 ); // huge tags are nonsense
+	PutBytes ( sTag, strlen(sTag) );
+}
+
+
+bool SeekAndWarn ( int iFD, SphOffset_t iPos, const char * szWarnPrefix )
+{
+	assert ( szWarnPrefix );
+	auto iSeek = sphSeek ( iFD, iPos, SEEK_SET );
+	if ( iSeek!=iPos )
+	{
+		if ( iSeek<0 )
+			sphWarning ( "%s : seek error. Error: %d '%s'", szWarnPrefix, errno, strerrorm (errno) );
+		else
+			sphWarning ( "%s : seek error. Expected: " INT64_FMT ", got " INT64_FMT, szWarnPrefix, (int64_t) iPos, (int64_t) iSeek );
+		return false;
+	}
+
+	assert ( iSeek==iPos );
+	return true;
+}
+
+
+void CSphWriter::SeekTo ( SphOffset_t iPos, bool bTruncate )
+{
+	assert ( iPos>=0 );
+
+	if ( iPos>=m_iDiskPos && iPos<=( m_iDiskPos + m_iPoolUsed ) )
+	{
+		// seeking inside the buffer
+		// m_iPoolUsed should be always in sync with m_iPos
+		// or it breaks seek back at cidxHit
+		m_iPoolUsed = (int)( iPos - m_iDiskPos );
+		m_pPool = m_pBuffer + m_iPoolUsed;
+	} else
+	{
+		Flush();
+		SeekAndWarn ( m_iFD, iPos, "CSphWriter::SeekTo" );
+
+		if ( bTruncate )
+			sphTruncate(m_iFD);
+
+		m_pPool = m_pBuffer;
+		m_iPoolUsed = 0;
+		m_iDiskPos = iPos;
+	}
+	m_iPos = iPos;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+#if USE_WINDOWS
+
+// atomic seek+read for Windows
+int sphPread ( int iFD, void * pBuf, int iBytes, SphOffset_t iOffset )
+{
+	if ( iBytes==0 )
+		return 0;
+
+	CSphIOStats * pIOStats = GetIOStats();
+	int64_t tmStart = 0;
+	if ( pIOStats )
+		tmStart = sphMicroTimer();
+
+	HANDLE hFile;
+	hFile = (HANDLE) _get_osfhandle ( iFD );
+	if ( hFile==INVALID_HANDLE_VALUE )
+		return -1;
+
+	STATIC_SIZE_ASSERT ( SphOffset_t, 8 );
+	OVERLAPPED tOverlapped = { 0 };
+	tOverlapped.Offset = (DWORD)( iOffset & I64C(0xffffffff) );
+	tOverlapped.OffsetHigh = (DWORD)( iOffset>>32 );
+
+	DWORD uRes;
+	if ( !ReadFile ( hFile, pBuf, iBytes, &uRes, &tOverlapped ) )
+	{
+		DWORD uErr = GetLastError();
+		if ( uErr==ERROR_HANDLE_EOF )
+			return 0;
+
+		errno = uErr; // FIXME! should remap from Win to POSIX
+		return -1;
+	}
+
+	if ( pIOStats )
+	{
+		pIOStats->m_iReadTime += sphMicroTimer() - tmStart;
+		pIOStats->m_iReadOps++;
+		pIOStats->m_iReadBytes += iBytes;
+	}
+
+	return uRes;
+}
+
+#else
+#if HAVE_PREAD
+
+// atomic seek+read for non-Windows systems with pread() call
+int sphPread ( int iFD, void * pBuf, int iBytes, SphOffset_t iOffset )
+{
+	CSphIOStats * pIOStats = GetIOStats();
+	if ( !pIOStats )
+		return ::pread ( iFD, pBuf, iBytes, iOffset );
+
+	int64_t tmStart = sphMicroTimer();
+	int iRes = (int) ::pread ( iFD, pBuf, iBytes, iOffset );
+	if ( pIOStats )
+	{
+		pIOStats->m_iReadTime += sphMicroTimer() - tmStart;
+		pIOStats->m_iReadOps++;
+		pIOStats->m_iReadBytes += iBytes;
+	}
+	return iRes;
+}
+
+#else
+
+// generic fallback; prone to races between seek and read
+int sphPread ( int iFD, void * pBuf, int iBytes, SphOffset_t iOffset )
+{
+	if ( sphSeek ( iFD, iOffset, SEEK_SET )==-1 )
+		return -1;
+
+	return sphReadThrottled ( iFD, pBuf, iBytes, &g_tThrottle );
+}
+
+#endif // HAVE_PREAD
+#endif // USE_WINDOWS

+ 213 - 0
src/fileio.h

@@ -0,0 +1,213 @@
+//
+// Copyright (c) 2017-2020, Manticore Software LTD (http://manticoresearch.com)
+// Copyright (c) 2001-2016, Andrew Aksyonoff
+// Copyright (c) 2008-2016, Sphinx Technologies Inc
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#ifndef _fileio_
+#define _fileio_
+
+#include "queryprofile.h"
+
+/// file which closes automatically when going out of scope
+class CSphAutofile : ISphNoncopyable
+{
+public:
+					CSphAutofile() = default;
+					CSphAutofile ( const CSphString & sName, int iMode, CSphString & sError, bool bTemp=false );
+					~CSphAutofile();
+
+	int				Open ( const CSphString & sName, int iMode, CSphString & sError, bool bTemp=false );
+	void			Close ();
+	void			SetTemporary(); ///< would be set if a shit happened and the file is not actual.
+	int				GetFD () const { return m_iFD; }
+	const char *	GetFilename () const;
+	SphOffset_t		GetSize ( SphOffset_t iMinSize, bool bCheckSizeT, CSphString & sError );
+	SphOffset_t		GetSize ();
+
+	bool			Read ( void * pBuf, int64_t iCount, CSphString & sError );
+	void			SetProgressCallback ( CSphIndexProgress * pStat );
+
+protected:
+	int			m_iFD = -1;					///< my file descriptor
+	CSphString	m_sFilename;				///< my file name
+	bool		m_bTemporary = false;		///< whether to unlink this file on Close()
+	bool		m_bWouldTemporary = false;	///< backup of the m_bTemporary
+
+	CSphIndexProgress *	m_pStat = nullptr;
+};
+
+
+/// file reader with read buffering and int decoder
+class CSphReader
+{
+public:
+	QueryProfile_c *	m_pProfile = nullptr;
+	ESphQueryState		m_eProfileState { SPH_QSTATE_IO };
+
+
+				CSphReader ( BYTE * pBuf=NULL, int iSize=0 );
+	virtual		~CSphReader ();
+
+	CSphReader & operator = ( const CSphReader & rhs );
+
+	void		SetBuffers ( int iReadBuffer, int iReadUnhinted );
+	void		SetFile ( int iFD, const char * sFilename );
+	void		SetFile ( const CSphAutofile & tFile );
+	void		Reset ();
+	void		SeekTo ( SphOffset_t iPos, int iSizeHint );
+
+	void		SkipBytes ( int iCount );
+	SphOffset_t	GetPos () const { return m_iPos+m_iBuffPos; }
+
+	void		GetBytes ( void * pData, int iSize );
+
+	int			GetByte ();
+	DWORD		GetDword ();
+	SphOffset_t	GetOffset ();
+	CSphString	GetString ();
+	int			GetLine ( char * sBuffer, int iMaxLen );
+	bool		Tag ( const char * sTag );
+
+	DWORD		UnzipInt ();
+	uint64_t	UnzipOffset ();
+
+	bool					GetErrorFlag () const		{ return m_bError; }
+	const CSphString &		GetErrorMessage () const	{ return m_sError; }
+	const CSphString &		GetFilename() const			{ return m_sFilename; }
+	int						GetBufferSize() const		{ return m_iBufSize; }
+	void					ResetError();
+
+	inline RowID_t		UnzipRowid ()	{ return UnzipInt(); }
+	inline SphWordID_t	UnzipWordid ()	{ return UnzipOffset(); }
+
+	///< zerocopy method; returns actual length present in buffer (upto iMax)
+	inline int GetBytesZerocopy ( const BYTE ** ppData, int64_t iMax )
+	{
+		if ( m_iBuffPos>=m_iBuffUsed )
+		{
+			UpdateCache();
+			if ( m_iBuffPos>=m_iBuffUsed )
+				return 0; // unexpected io failure
+		}
+
+		int iChunk = Min ( m_iBuffUsed-m_iBuffPos, iMax );
+		*ppData = m_pBuff + m_iBuffPos;
+		m_iBuffPos += iChunk;
+		return iChunk;
+	}
+
+protected:
+	int			m_iFD = -1;
+	CSphString m_sFilename;
+	int			m_iBuffUsed = 0;	///< how many bytes in buffer are valid
+
+	SphOffset_t	m_iPos = 0;			///< position in the file from witch m_pBuff starts
+	BYTE *		m_pBuff;            ///< the buffer
+	int			m_iBuffPos = 0;		///< position in the buffer. (so pos in file is m_iPos + m_iBuffPos)
+
+	virtual void		UpdateCache ();
+
+private:
+	int			m_iSizeHint = 0;	///< how much do we expect to read (>=m_iReadUnhinted)
+
+	int			m_iBufSize;
+	bool		m_bBufOwned = false;
+	int			m_iReadUnhinted;	///< how much to read if no hint provided.
+
+	bool		m_bError = false;
+	CSphString	m_sError;
+};
+
+
+class FileReader_c: public CSphReader
+{
+public:
+	explicit FileReader_c ( BYTE* pBuf = nullptr, int iSize = 0 )
+		: CSphReader ( pBuf, iSize )
+	{}
+
+	SphOffset_t GetFilesize () const;
+
+	// added for DebugCheck()
+	int GetFD () const { return m_iFD; }
+};
+
+/// scoped file reader
+class CSphAutoreader : public FileReader_c
+{
+public:
+	CSphAutoreader ( BYTE * pBuf=nullptr, int iSize=0 ) : FileReader_c ( pBuf, iSize ) {}
+	~CSphAutoreader () override { Close(); }
+
+	bool		Open ( const CSphString & sFilename, CSphString & sError );
+	void		Close ();
+};
+
+
+/// file writer with write buffering and int encoder
+class CSphWriter : ISphNoncopyable
+{
+public:
+	virtual			~CSphWriter ();
+
+	void			SetBufferSize ( int iBufferSize );	///< tune write cache size; must be called before OpenFile() or SetFile()
+
+	bool			OpenFile ( const CSphString & sName, CSphString & sError );
+	void			SetFile ( CSphAutofile & tAuto, SphOffset_t * pSharedOffset, CSphString & sError );
+	void			CloseFile ( bool bTruncate = false );	///< note: calls Flush(), ie. IsError() might get true after this call
+	void			UnlinkFile (); /// some shit happened (outside) and the file is no more actual.
+
+	void			PutByte ( BYTE uValue );
+	void			PutBytes ( const void * pData, int64_t iSize );
+	void			PutWord ( WORD uValue ) { PutBytes ( &uValue, sizeof(WORD) ); }
+	void			PutDword ( DWORD uValue ) { PutBytes ( &uValue, sizeof(DWORD) ); }
+	void			PutOffset ( SphOffset_t uValue ) { PutBytes ( &uValue, sizeof(SphOffset_t) ); }
+	void			PutString ( const char * szString );
+	void			PutString ( const CSphString & sString );
+	void			Tag ( const char * sTag );
+
+	void			SeekTo ( SphOffset_t iPos, bool bTruncate = false );
+
+	void			ZipInt ( DWORD uValue );
+	void			ZipOffset ( uint64_t uValue );
+
+	bool			IsError () const	{ return m_bError; }
+	SphOffset_t		GetPos () const		{ return m_iPos; }
+	CSphString		GetFilename() const	{ return m_sName; }
+
+	virtual void	Flush ();
+
+protected:
+	CSphString		m_sName;
+	SphOffset_t		m_iPos = -1;
+	SphOffset_t		m_iDiskPos = 0;
+
+	int				m_iFD = -1;
+	int				m_iPoolUsed = 0;
+	BYTE *			m_pBuffer = nullptr;
+	BYTE *			m_pPool = nullptr;
+	bool			m_bOwnFile = false;
+	SphOffset_t	*	m_pSharedOffset = nullptr;
+	int				m_iBufferSize = 262144;
+
+	bool			m_bError = false;
+	CSphString *	m_pError = nullptr;
+
+private:
+	void			UpdatePoolUsed();
+};
+
+
+bool SeekAndWarn ( int iFD, SphOffset_t iPos, const char * szWarnPrefix );
+
+// atomic seek+read wrapper
+int sphPread ( int iFD, void * pBuf, int iBytes, SphOffset_t iOffset );
+
+#endif // _sphinxint_

+ 14 - 2
src/fileutils.cpp

@@ -506,7 +506,7 @@ bool MkDir ( const char * szDir )
 }
 
 
-bool CopyFile ( const CSphString & sSource, const CSphString & sDest, CSphString & sError )
+bool CopyFile ( const CSphString & sSource, const CSphString & sDest, CSphString & sError, int iMode )
 {
 	const int BUFFER_SIZE = 1048576;
 	CSphFixedVector<BYTE> dBuffer(BUFFER_SIZE);
@@ -517,7 +517,7 @@ bool CopyFile ( const CSphString & sSource, const CSphString & sDest, CSphString
 		return false;
 
 	CSphAutofile tDest;
-	int iDstFD = tDest.Open ( sDest, SPH_O_NEW, sError );
+	int iDstFD = tDest.Open ( sDest, iMode, sError );
 	if ( iDstFD<0 )
 		return false;
 
@@ -706,4 +706,16 @@ const char * GetExtension ( const CSphString & sFullPath )
 		return nullptr;
 
 	return pDot+1;
+}
+
+
+void SeekAndPutOffset ( CSphWriter & tWriter, SphOffset_t tOffset, SphOffset_t tValue )
+{
+	SphOffset_t tTotalSize = tWriter.GetPos();
+
+	// order matters here
+	tWriter.Flush(); // store collected data as SeekTo may get rid of buffer collected so far
+	tWriter.SeekTo(tOffset); 
+	tWriter.PutOffset(tValue);
+	tWriter.SeekTo(tTotalSize);
 }

+ 7 - 3
src/fileutils.h

@@ -38,8 +38,9 @@
 	#define SPH_O_BINARY 0
 #endif
 
-#define SPH_O_READ	( O_RDONLY | SPH_O_BINARY )
-#define SPH_O_NEW	( O_CREAT | O_RDWR | O_TRUNC | SPH_O_BINARY )
+#define SPH_O_READ		( O_RDONLY | SPH_O_BINARY )
+#define SPH_O_NEW		( O_CREAT | O_RDWR | O_TRUNC | SPH_O_BINARY )
+#define SPH_O_APPEND	( O_CREAT | O_RDWR | O_APPEND | SPH_O_BINARY )
 
 class CSphIOStats
 {
@@ -122,7 +123,7 @@ bool			sphWrite ( int iFD, const Str_t& dBuf );
 
 StrVec_t		FindFiles ( const char * szPath, bool bNeedDirs=false );
 bool			MkDir ( const char * szDir );
-bool			CopyFile ( const CSphString & sSource, const CSphString & sDest, CSphString & sError );
+bool			CopyFile ( const CSphString & sSource, const CSphString & sDest, CSphString & sError, int iMode=SPH_O_NEW );
 bool			RenameFiles ( const StrVec_t & dSrc, const StrVec_t & dDst, CSphString & sError );
 bool			RenameWithRollback ( const StrVec_t & dSrc, const StrVec_t & dDst, CSphString & sError );
 
@@ -135,6 +136,9 @@ CSphString &	StripPath ( CSphString & sPath );
 CSphString		GetPathOnly ( const CSphString & sFullPath );
 const char *	GetExtension ( const CSphString & sFullPath );
 
+class CSphWriter;
+void			SeekAndPutOffset ( CSphWriter & tWriter, SphOffset_t tOffset, SphOffset_t tValue );
+
 // FIXME! unify this weird zoo of file function naming
 namespace sph
 {

+ 90 - 1
src/gtests/gtests_functions.cpp

@@ -17,6 +17,7 @@
 #include "json/cJSON.h"
 #include "threadutils.h"
 #include <cmath>
+#include "histogram.h"
 
 // Miscelaneous short functional tests: TDigest, SpanSearch,
 // stringbuilder, CJson, TaggedHash, Log2
@@ -3370,4 +3371,92 @@ TEST ( functions, partition_descending )
 	lazy_partition ( dValues, -1, COEFF );
 	pr ( dValues, 0, 99 );
 	ASSERT_TRUE ( CheckData ( dValues, COEFF ));
-}
+}
+
+static CSphString GetHist ( const Histogram_i * pHist )
+{
+	StringBuilder_c tOut;
+	pHist->Dump ( tOut );
+	const char * pFull = tOut.cstr();
+
+	const char * sDel = strchr ( pFull, '\n' );
+	int iLen = tOut.GetLength() - ( sDel - pFull ) + 1;
+
+	CSphString sDump;
+	sDump.SetBinary ( sDel+1, iLen );
+
+	return sDump;
+}
+
+static const float g_dHistSrc[] = {0.0f,41.0f,50.0f,54.0f,60.0f,61.0f,63.0f,64.0f,65.0f,67.0f,68.0f,69.0f,71.0f,72.0f,73.0f,74.0f,75.0f,76.0f,77.0f,78.0f,79.0f,80.0f,81.0f,
+	83.0f,84.0f,96.0f,107.0f,143.0f,147.0f,148.0f,149.0f,150.0f,151.0f,152.0f,153.0f,154.0f,155.0f,156.0f,157.0f,158.0f,159.0f,160.0f,162.0f,165.0f,
+	166.0f,167.0f,168.0f,169.0f,170.0f,171.0f,175.0f,178.0f,180.0f,181.0f,182.0f,183.0f,184.0f,185.0f,186.0f,188.0f,189.0f,190.0f,192.0f,193.0f,
+	195.0f,197.0f,198.0f};
+
+struct HistCase_t
+{
+	int m_iLoop = 0;
+	int m_iSize = 0;
+	const char * m_sRef = nullptr;
+};
+
+static Histogram_i * PopulateHist ( const HistCase_t & tCase )
+{
+	CSphScopedPtr<Histogram_i> pHist ( CreateHistogram ( "dyn", SPH_ATTR_FLOAT, tCase.m_iSize ) );
+
+	for ( int i=0; i<tCase.m_iLoop; i++ )
+	{
+		for ( float fVal : g_dHistSrc )
+		{
+			SphAttr_t tVal = sphF2DW ( fVal );
+			pHist->Insert ( tVal );
+		}
+	}
+
+	pHist->Finalize();
+
+	return pHist.LeakPtr();
+}
+
+TEST ( functions, histogram )
+{
+	HistCase_t dCases[] = {
+		{1, 35, R"(values:35
+0.000,1;41.000,1;50.000,1;54.000,1;60.500,2;64.000,3;68.000,3;72.500,4;75.500,2;77.500,2;80.000,3;83.500,2;96.000,1;107.000,1;143.000,1;147.500,2;149.500,2;151.500,2;153.500,2;155.500,2;157.500,2;159.500,2;162.000,1;165.500,2;167.500,2;170.000,3;175.000,1;178.000,1;180.500,2;182.500,2;185.000,3;189.000,3;192.500,2;195.000,1;197.500,2)"},
+		{2, 35, R"(values:35
+0.000,2;41.000,2;50.000,2;54.000,2;60.500,4;64.000,6;68.000,6;72.286,7;75.200,5;77.500,4;80.000,6;83.500,4;96.000,2;107.000,2;143.000,2;147.500,4;149.500,4;151.500,4;153.500,4;155.500,4;157.500,4;159.500,4;162.000,2;165.500,4;167.500,4;170.000,6;175.000,2;178.000,2;180.500,4;182.500,4;185.000,6;189.000,6;192.500,4;195.000,2;197.500,4)"},
+		{1, 65, R"(values:65
+0.000,1;41.000,1;50.000,1;54.000,1;60.500,2;63.500,2;65.000,1;67.000,1;68.000,1;69.000,1;71.000,1;72.000,1;73.000,1;74.000,1;75.000,1;76.000,1;77.000,1;78.000,1;79.000,1;80.000,1;81.000,1;83.000,1;84.000,1;96.000,1;107.000,1;143.000,1;147.000,1;148.000,1;149.000,1;150.000,1;151.000,1;152.000,1;153.000,1;154.000,1;155.000,1;156.000,1;157.000,1;158.000,1;159.000,1;160.000,1;162.000,1;165.000,1;166.000,1;167.000,1;168.000,1;169.000,1;170.000,1;171.000,1;175.000,1;178.000,1;180.000,1;181.000,1;182.000,1;183.000,1;184.000,1;185.000,1;186.000,1;188.000,1;189.000,1;190.000,1;192.000,1;193.000,1;195.000,1;197.000,1;198.000,1)"},
+		{1, 70, R"(values:67
+0.000,1;41.000,1;50.000,1;54.000,1;60.000,1;61.000,1;63.000,1;64.000,1;65.000,1;67.000,1;68.000,1;69.000,1;71.000,1;72.000,1;73.000,1;74.000,1;75.000,1;76.000,1;77.000,1;78.000,1;79.000,1;80.000,1;81.000,1;83.000,1;84.000,1;96.000,1;107.000,1;143.000,1;147.000,1;148.000,1;149.000,1;150.000,1;151.000,1;152.000,1;153.000,1;154.000,1;155.000,1;156.000,1;157.000,1;158.000,1;159.000,1;160.000,1;162.000,1;165.000,1;166.000,1;167.000,1;168.000,1;169.000,1;170.000,1;171.000,1;175.000,1;178.000,1;180.000,1;181.000,1;182.000,1;183.000,1;184.000,1;185.000,1;186.000,1;188.000,1;189.000,1;190.000,1;192.000,1;193.000,1;195.000,1;197.000,1;198.000,1)"},
+		{20, 15, R"(values:15
+0.000,20;41.000,20;52.000,40;62.600,100;70.587,138;79.115,182;96.000,20;107.000,20;143.000,20;150.126,143;157.656,157;168.000,140;176.585,41;183.057,140;192.780,159)"}
+	};
+
+	for ( const HistCase_t & tCase : dCases )
+	{
+		CSphScopedPtr<Histogram_i> pHist ( PopulateHist ( tCase ) );
+		ASSERT_STREQ( GetHist ( pHist.Ptr() ).cstr(), tCase.m_sRef );
+	}
+
+	// estimate of merged values
+	{
+		HistCase_t tCase;
+		tCase.m_iLoop = 1;
+		tCase.m_iSize = 10;
+		CSphScopedPtr<Histogram_i> pHist ( PopulateHist ( tCase ) );
+		for ( int i=0; i<20; i++)
+			pHist->Insert ( sphF2DW ( 10.0f ) );
+
+		pHist->Finalize();
+
+		CSphFilterSettings tFilter;
+		tFilter.m_eType = SPH_FILTER_FLOATRANGE;
+		tFilter.m_fMinValue = 0.0f;
+		tFilter.m_fMaxValue = 10.0f;
+
+		int64_t iRes = 0;
+		pHist->EstimateRsetSize ( tFilter, iRes );
+		ASSERT_EQ( iRes, 3 );
+	}
+}

+ 6 - 6
src/gtests/gtests_json.cpp

@@ -425,12 +425,12 @@ TEST_F ( TJson, bson_ScientificDouble )
 {
 	auto tst = Bsons ( R"([1e-5, 1e5, -1e-5, -1e5, 6.022e+3, 1.4738223E-1])" );
 
-	ASSERT_FLOAT_EQ ( tst[0].Double (), 0.00001 );
-	ASSERT_FLOAT_EQ ( tst[1].Double (), 100000.0 );
-	ASSERT_FLOAT_EQ ( tst[2].Double (), -0.00001 );
-	ASSERT_FLOAT_EQ ( tst[3].Double (), -100000.0 );
-	ASSERT_FLOAT_EQ ( tst[4].Double (), 6022.0 );
-	ASSERT_FLOAT_EQ ( tst[5].Double (), 0.14738223 );
+	ASSERT_DOUBLE_EQ ( tst[0].Double (), 0.00001 );
+	ASSERT_DOUBLE_EQ ( tst[1].Double (), 100000.0 );
+	ASSERT_DOUBLE_EQ ( tst[2].Double (), -0.00001 );
+	ASSERT_DOUBLE_EQ ( tst[3].Double (), -100000.0 );
+	ASSERT_DOUBLE_EQ ( tst[4].Double (), 6022.0 );
+	ASSERT_DOUBLE_EQ ( tst[5].Double (), 0.14738223 );
 }
 
 // test bson::String

+ 7 - 3
src/gtests/gtests_rtstuff.cpp

@@ -15,6 +15,7 @@
 #include "sphinxint.h"
 #include "attribute.h"
 #include "sphinxrt.h"
+#include "sphinxsort.h"
 #include "searchdaemon.h"
 
 #include <gmock/gmock.h>
@@ -286,7 +287,8 @@ TEST_P ( RTN, WeightBoundary )
 	pIndex->SetTokenizer ( pTok->Clone ( SPH_CLONE_INDEX ) );
 	pIndex->SetDictionary ( pDict->Clone () );
 	pIndex->PostSetup ();
-	EXPECT_TRUE ( pIndex->Prealloc ( false, nullptr ) );
+	StrVec_t dWarnings;
+	EXPECT_TRUE ( pIndex->Prealloc ( false, nullptr, dWarnings ) );
 
 	CSphVector<int64_t> dMvas;
 	CSphString sFilter;
@@ -387,7 +389,8 @@ TEST_F ( RT, RankerFactors )
 	pIndex->SetTokenizer ( pTok ); // index will own this pair from now on
 	pIndex->SetDictionary ( sphCreateDictionaryCRC ( tDictSettings, NULL, pTok, "rt", false, 32, nullptr, sError ) );
 	pIndex->PostSetup ();
-	Verify ( pIndex->Prealloc ( false, nullptr ) );
+	StrVec_t dWarnings;
+	Verify ( pIndex->Prealloc ( false, nullptr, dWarnings ) );
 
 	CSphString sFilter;
 	CSphVector<int64_t> dMvas;
@@ -566,7 +569,8 @@ TEST_F ( RT, SendVsMerge )
 	pIndex->SetTokenizer ( pTok ); // index will own this pair from now on
 	pIndex->SetDictionary ( pDict );
 	pIndex->PostSetup ();
-	ASSERT_TRUE ( pIndex->Prealloc ( false, nullptr ) );
+	StrVec_t dWarnings;
+	ASSERT_TRUE ( pIndex->Prealloc ( false, nullptr, dWarnings ) );
 
 	CSphQuery tQuery;
 	AggrResult_t tResult;

+ 1 - 1
src/gtests/gtests_tokenizer.cpp

@@ -812,7 +812,7 @@ public:
 
 	int					Build ( const CSphVector<CSphSource*> & , int , int ) override { return 0; }
 	bool				Merge ( CSphIndex * , const VecTraits_T<CSphFilterSettings> &, bool ) override { return false; }
-	bool				Prealloc ( bool, FilenameBuilder_i * ) override { return false; }
+	bool				Prealloc ( bool, FilenameBuilder_i *, StrVec_t & ) override { return false; }
 	void				Dealloc () override {}
 	void				Preread () override {}
 	void				SetBase ( const char * ) override {}

+ 901 - 0
src/histogram.cpp

@@ -0,0 +1,901 @@
+//
+// Copyright (c) 2018-2020, Manticore Software LTD (http://manticoresearch.com)
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#include "histogram.h"
+
+#include "attribute.h"
+#include "sphinxint.h"
+#include <math.h>
+
+template <typename T>
+struct HSBucket_T
+{
+    T	m_tCentroid;
+    int m_iCount;
+
+	HSBucket_T Merge ( const HSBucket_T & tB )
+	{
+		T tDeltaCentroid = tB.m_tCentroid - m_tCentroid;
+
+		HSBucket_T<T> tVal;
+		tVal.m_tCentroid = m_tCentroid + tDeltaCentroid * tB.m_iCount / ( m_iCount + tB.m_iCount );
+		tVal.m_iCount = m_iCount + tB.m_iCount;
+		return tVal;
+	}
+
+	bool IsCenterEq ( const HSBucket_T & tB ) const
+	{
+		return ( m_tCentroid==tB.m_tCentroid );
+	}
+};
+
+template<> bool HSBucket_T<float>::IsCenterEq ( const HSBucket_T & tB ) const
+{
+	return ( fabs ( m_tCentroid - tB.m_tCentroid )<=FLT_EPSILON );
+}
+
+// operators for FindSpan
+
+template <typename T>
+bool operator < ( const HSBucket_T<T> & a, T b )
+{
+	return ( a.m_tCentroid<b );
+}
+
+template <typename T>
+bool operator < ( T a, const HSBucket_T<T> & b )
+{
+	return ( a<b.m_tCentroid);
+}
+
+template <typename T>
+bool operator == ( const HSBucket_T<T> & a, T b )
+{
+	return ( a.m_tCentroid==b );
+}
+
+template<> bool operator == ( const HSBucket_T<float> & a, float b )
+{
+	return ( fabs ( a.m_tCentroid - b )<=FLT_EPSILON );
+}
+
+
+template <typename T>
+struct HSQueueItem_T
+{
+	T	m_tDelta { 0 };
+	int m_iId { 0 };
+
+	HSQueueItem_T() = default;
+	HSQueueItem_T ( T tDelta, int iId )
+		: m_tDelta ( tDelta )
+		, m_iId ( iId )
+	{}
+
+	static inline bool IsLess ( const HSQueueItem_T & tA, const HSQueueItem_T & tB )
+	{
+		if ( tA.m_tDelta==tB.m_tDelta )
+			return ( tA.m_iId<tB.m_iId );
+
+		return ( tA.m_tDelta<tB.m_tDelta );
+	}
+};
+
+template<> bool HSQueueItem_T<float>::IsLess ( const HSQueueItem_T & tA, const HSQueueItem_T & tB )
+{
+	if ( fabs ( tA.m_tDelta-tB.m_tDelta )<=FLT_EPSILON )
+		return ( tA.m_iId<tB.m_iId );
+
+	return ( tA.m_tDelta<tB.m_tDelta );
+}
+
+template <typename T>
+struct HSListItem_T
+{
+	// list items
+	HSListItem_T *	m_pPrev { nullptr };
+	HSListItem_T *	m_pNext { nullptr };
+	bool			m_bAlive { true };
+	HSBucket_T<T> *	m_pItem { nullptr };
+	int				m_iId { 0 };
+
+	static T Delta ( const HSListItem_T & tCur )
+	{
+		return ( tCur.m_pNext->m_pItem->m_tCentroid - tCur.m_pItem->m_tCentroid );
+	}
+
+	static void Delete ( HSListItem_T * pNode )
+	{
+		if ( !pNode )
+			return;
+
+		if ( pNode->m_pNext )
+			pNode->m_pNext->m_pPrev = pNode->m_pPrev;
+		if ( pNode->m_pPrev )
+			pNode->m_pPrev->m_pNext = pNode->m_pNext;
+
+		pNode->m_pNext = nullptr;
+		pNode->m_pPrev = nullptr;
+
+		pNode->m_bAlive = false;
+	}
+};
+
+struct HSBucketTrait_t
+{
+	int m_iBucket = 0;
+	int m_iCount = 0;
+
+	HSBucketTrait_t() = default;
+	HSBucketTrait_t ( int iBucket, int iCount )
+		: m_iBucket ( iBucket )
+		, m_iCount ( iCount )
+	{}
+};
+
+//////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+class HistogramStreamed_T : public Histogram_i
+{
+public:
+						HistogramStreamed_T ( const CSphString & sAttr, int iBins );
+
+    void				Insert ( SphAttr_t tAttrVal ) override;
+    void				Finalize() override;
+	bool				Save ( CSphWriter & tWriter ) const override;
+	bool				Load ( CSphReader & tReader, CSphString & sError ) override;
+	DWORD				GetNumValues() const override { return m_uValues; }
+	HistogramType_e		GetType() const override { return TYPE; }
+	const CSphString &	GetAttrName() const override { return m_sAttr; }
+	void				Delete ( SphAttr_t tAttr ) override;
+	void				UpdateCounter ( SphAttr_t tAttr ) override;
+	bool				IsOutdated() const override;
+	void				Dump ( StringBuilder_c & tOut ) const override;
+
+	bool				EstimateRsetSize ( const CSphFilterSettings & tFilter, int64_t & iEstimate ) const override;
+
+private:
+	static const HistogramType_e TYPE;
+	static const T		MIN_BY_TYPE;
+	static const T		MAX_BY_TYPE;
+	static const int	m_iKbufferFactor = 2;
+	static const DWORD	VERSION = 3;
+
+	CSphString			m_sAttr;
+	T					m_tMinValue;
+	T					m_tMaxValue;
+
+	int					m_iMaxBins = 0;
+	int					m_iSize = 0;
+	DWORD				m_uValues = 0;
+	DWORD				m_uOutdated = 0;
+	CSphFixedVector<HSBucket_T<T>> m_dBuckets { 0 };
+
+	bool				m_bUpdateMode = false;
+
+	DWORD				GetLength() const { return Min ( m_iSize, m_iMaxBins ); }
+	void				DumpValue ( const HSBucket_T < T > & tVal, StringBuilder_c & tBuf ) const;
+    void				Push ( T tValue, int iCount );
+	void				Aggregate ( int iBins );
+	int					GetBucket ( T tValue, bool bCounterLess ) const;
+	int					LerpCounter ( int iBucket, T tVal ) const;
+	HSBucketTrait_t		GetBucket ( T tValue ) const;
+
+	DWORD				EstimateValues ( const SphAttr_t * pValues, int nValues ) const;
+	DWORD				EstimateRangeFilter ( bool bExclude, bool bHasEqualMin, bool bHasEqualMax, bool bOpenLeft, bool bOpenRight, T tMinValue, T tMaxValue ) const;
+	T					Saturate ( T tVal ) const;
+	DWORD				EstimateInterval ( T tMin, T tMax, bool bHasEqualMin, bool bHasEqualMax, bool bOpenLeft, bool bOpenRight ) const;
+	bool				IsOutdated ( SphAttr_t tAttr ) const;
+	void				UpdateMinMax();
+};
+
+
+template<typename T>
+HistogramStreamed_T<T>::HistogramStreamed_T ( const CSphString & sAttr, int iBins )
+	: m_sAttr ( sAttr )
+	, m_iMaxBins ( iBins )
+	, m_dBuckets ( iBins * m_iKbufferFactor )
+{
+	UpdateMinMax();
+}
+
+template<typename T>
+void HistogramStreamed_T<T>::Insert ( SphAttr_t tAttrVal )
+{
+	assert ( !m_bUpdateMode );
+	Push ( ConvertType<T>(tAttrVal), 1 );
+}
+
+template<typename T>
+void HistogramStreamed_T<T>::Finalize()
+{
+	Aggregate(m_iMaxBins);
+
+	// fold close values like uniq but with merge operations
+	if ( m_iSize )
+	{
+		int iSrc = 1;
+		int iDst = 1;
+		while ( iSrc<m_iSize )
+		{
+			if ( m_dBuckets[iDst-1].IsCenterEq ( m_dBuckets[iSrc] ) )
+			{
+				m_dBuckets[iDst-1] = m_dBuckets[iDst-1].Merge ( m_dBuckets[iSrc] );
+				iSrc++;
+			} else
+			{
+				m_dBuckets[iDst] = m_dBuckets[iSrc];
+				iDst++;
+				iSrc++;
+			}
+		}
+		m_iSize = iDst;
+	}
+
+	UpdateMinMax();
+}
+
+template<typename T>
+bool HistogramStreamed_T<T>::Save ( CSphWriter & tWriter ) const
+{
+	tWriter.PutDword ( VERSION );
+	tWriter.PutBytes ( &m_tMinValue, sizeof(T) );
+	tWriter.PutBytes ( &m_tMaxValue, sizeof(T) );
+	tWriter.PutDword ( m_iMaxBins );
+	tWriter.PutDword ( m_iSize );
+	tWriter.PutDword ( m_uValues );
+	tWriter.PutDword ( m_uOutdated );
+	tWriter.PutBytes ( m_dBuckets.Begin(), m_iSize*sizeof(m_dBuckets[0]) );
+
+	return true;
+}
+
+template<typename T>
+bool HistogramStreamed_T<T>::Load ( CSphReader & tReader, CSphString & sError )
+{
+	DWORD uVersion = tReader.GetDword();
+	if ( uVersion > VERSION )
+	{
+		sError.SetSprintf ( "index histogram version (%u) greater than code histogram version (%u)", uVersion, VERSION );
+		return false;
+	}
+
+	if ( uVersion<=1 )
+	{
+		sError.SetSprintf ( "non-streamed histograms are no longer supported" );
+		return false;
+	}
+
+	tReader.GetBytes ( &m_tMinValue, sizeof(T) );
+	tReader.GetBytes ( &m_tMaxValue, sizeof(T) );
+	m_iMaxBins = tReader.GetDword();
+	m_iSize = tReader.GetDword();
+	m_uValues = tReader.GetDword();
+	m_dBuckets.Reset ( m_iSize );
+	if ( uVersion>=3 )
+		m_uOutdated = tReader.GetDword();
+	tReader.GetBytes ( m_dBuckets.Begin(), m_iSize*sizeof(m_dBuckets[0]) );
+
+	if ( tReader.GetErrorFlag() )
+	{
+		sError = tReader.GetErrorMessage();
+		return false;
+	}
+
+	// can not insert values after load - only update existed
+	m_bUpdateMode = true;
+
+	return true;
+}
+
+template<typename T>
+bool HistogramStreamed_T<T>::IsOutdated ( SphAttr_t tAttr ) const
+{
+	T tVal = ConvertType<T> ( tAttr );
+	return ( tVal<m_tMinValue || m_tMaxValue<tVal );
+}
+
+template<typename T>
+void HistogramStreamed_T<T>::Delete ( SphAttr_t tAttr )
+{
+	// select bucket with larger counter to decrease
+	int iBucket = GetBucket ( tAttr, false );
+	assert ( iBucket>=0 && iBucket<m_iSize && m_uValues>0 );
+	if ( m_dBuckets[iBucket].m_iCount )
+		m_dBuckets[iBucket].m_iCount--;
+
+	m_uValues--;
+	if ( IsOutdated ( tAttr ) )
+		m_uOutdated--;
+}
+
+template<typename T>
+void HistogramStreamed_T<T>::UpdateCounter ( SphAttr_t tAttr )
+{
+	// select bucket with smaller counter to update
+	int iBucket = GetBucket ( tAttr, true );
+	assert ( iBucket>=0 && iBucket<m_iSize );
+	m_dBuckets[iBucket].m_iCount++;
+	m_uValues++;
+	if ( IsOutdated ( tAttr ) )
+		m_uOutdated++;
+}
+
+template<typename T>
+bool HistogramStreamed_T<T>::IsOutdated() const
+{
+	if ( !m_uValues )
+		return true;
+	
+	// outdated values should be less than 30% for histogram to estimate properly
+	const float	MAX_OUT_OF_RANGE = 0.3f;
+	return ( ( (float)m_uOutdated / (float)m_uValues )>=MAX_OUT_OF_RANGE );
+}
+
+template<typename T>
+void HistogramStreamed_T<T>::Dump ( StringBuilder_c & tOut ) const
+{
+	StringBuilder_c tBuf ( ";" );
+	for ( int i=0; i<m_iSize; i++ )
+		DumpValue ( m_dBuckets[i], tBuf );
+
+	tOut.Appendf ( "%s hist-streamed\nvalues:%d\n%s", m_sAttr.cstr(), m_iSize, tBuf.cstr() );
+}
+
+
+template<typename T>
+void HistogramStreamed_T<T>::Push ( T tValue, int iCount )
+{
+	m_dBuckets[m_iSize] = { tValue, iCount };
+	m_iSize++;
+	m_uValues++;
+
+	if ( m_iSize>=m_dBuckets.GetLength() )
+		Aggregate(m_iMaxBins);
+}
+
+template<typename T>
+void HistogramStreamed_T<T>::UpdateMinMax()
+{
+	if ( m_iSize )
+	{
+		m_tMinValue = m_dBuckets[0].m_tCentroid;
+		m_tMaxValue = m_dBuckets[m_iSize-1].m_tCentroid;
+	} else
+	{
+		m_tMinValue = MAX_BY_TYPE;
+		m_tMaxValue = MIN_BY_TYPE;
+	}
+}
+
+template<typename T>
+void HistogramStreamed_T<T>::Aggregate ( int iBins )
+{
+	// order by centers
+	if ( m_iSize )
+		m_dBuckets.Sort ( bind ( &HSBucket_T<T>::m_tCentroid ), 0, m_iSize-1 );
+
+	int iSize = m_iSize;
+	if ( m_iSize<=iBins )
+		return;
+
+	using ListItem_t = HSListItem_T < T >;
+	CSphFixedVector<ListItem_t> dList ( m_iSize );
+	for (int i = 0; i<m_iSize; ++i)
+	{
+		dList[i].m_pPrev = dList.Begin() + i - 1;
+		dList[i].m_pNext = dList.Begin() + i + 1;
+		dList[i].m_iId = i;
+		dList[i].m_pItem = m_dBuckets.Begin() + i;
+	}
+	dList[0].m_pPrev = nullptr;
+	dList[m_iSize-1].m_pNext = nullptr;
+
+	using QItem_t = HSQueueItem_T<T>;
+	CSphQueue<QItem_t, QItem_t> tQueue ( 2 * m_iSize - iBins );
+	// do not all last item as it has wrong delta from head to tail
+	for (int i = 0; i<m_iSize-1; ++i)
+	{
+		const ListItem_t & tItem = dList[i];
+		Verify ( tQueue.Push ( QItem_t ( ListItem_t::Delta ( tItem ), tItem.m_iId ) ) );
+	}
+
+	while ( iSize>iBins && tQueue.GetLength() )
+	{
+		QItem_t tMin = tQueue.Root();
+		tQueue.Pop();
+		ListItem_t & tItem = dList[tMin.m_iId];
+
+		// check alive item and next to compare then compare delta from pqueue BUT not from actual entry
+		if ( !tItem.m_bAlive || !tItem.m_pNext || !tItem.m_pNext->m_bAlive || tMin.m_tDelta<ListItem_t::Delta ( tItem ) )
+			continue;
+
+		*tItem.m_pItem = tItem.m_pItem->Merge ( *tItem.m_pNext->m_pItem );
+		ListItem_t::Delete ( tItem.m_pNext );
+
+		// check and add curent -> next due changed delta from current, could be dupe and will be rejected above
+		if ( tItem.m_pNext && tItem.m_pNext->m_bAlive )
+			Verify ( tQueue.Push ( QItem_t ( ListItem_t::Delta ( tItem ), tMin.m_iId ) ) );
+
+		// check and add prev -> curent due changed delta to current, could be dupe and will be rejected above
+		if ( tItem.m_pPrev && tItem.m_pPrev->m_bAlive )
+			Verify ( tQueue.Push ( QItem_t ( ListItem_t::Delta ( *tItem.m_pPrev ), tItem.m_pPrev->m_iId ) ) );
+
+		iSize--;
+	}
+
+	// copy buckets
+	int iSrc = 0;
+	int iDst = 0;
+	while ( iSrc<m_iSize )
+	{
+		if ( dList[iSrc].m_bAlive )
+		{
+			m_dBuckets[iDst] = m_dBuckets[iSrc];
+			iDst++;
+		}
+		iSrc++;
+	}
+	m_iSize = iSize;
+}
+
+template<typename T>
+int HistogramStreamed_T<T>::GetBucket ( T tValue, bool bCounterLess ) const
+{
+	T tVal = ConvertType<T> ( tValue );
+	if ( tVal<m_tMinValue )
+		return 0;
+	if ( tVal>m_tMaxValue )
+		return ( m_iSize - 1 );
+
+	// m_dBuckets is larger than m_iSize
+	int iBestBucket = FindSpan ( m_dBuckets.Slice ( 0, m_iSize ), tVal );
+
+	assert ( iBestBucket>=0 && iBestBucket<m_iSize );
+
+	if ( iBestBucket<m_iSize-1 )
+	{
+		const HSBucket_T<T> & tBucketL = m_dBuckets[iBestBucket];
+		const HSBucket_T<T> & tBucketR = m_dBuckets[iBestBucket+1];
+		T tDistL = tVal - tBucketL.m_tCentroid;
+		T tDist = tBucketR.m_tCentroid - tBucketL.m_tCentroid;
+
+		if ( tDistL>( tDist/3 ) && tDistL<( tDist*2/3 ) ) // center case - select bucket with smaller \ larger counter
+		{
+			if ( bCounterLess )
+				iBestBucket = ( tBucketL.m_iCount<tBucketR.m_iCount ? iBestBucket : iBestBucket+1 );
+			else
+				iBestBucket = ( tBucketL.m_iCount>tBucketR.m_iCount ? iBestBucket : iBestBucket+1 );
+
+		} else // select closest bucket
+			iBestBucket = ( tDistL<( tDist-tDistL ) ? iBestBucket : iBestBucket+1 );
+	}
+
+	return iBestBucket;
+
+}
+
+template<typename T>
+int HistogramStreamed_T<T>::LerpCounter ( int iBucket, T tVal ) const
+{
+	const HSBucket_T<T> & tBucketL = m_dBuckets[iBucket];
+	const HSBucket_T<T> & tBucketR = m_dBuckets[iBucket+1];
+	assert ( tBucketL.m_tCentroid<=tVal && tVal<=tBucketR.m_tCentroid );
+
+	T tDistL = tVal - tBucketL.m_tCentroid;
+	T tDist = tBucketR.m_tCentroid - tBucketL.m_tCentroid;
+
+	float fLerp = (float)tDistL / (float)tDist;
+	assert ( fLerp>=0.0f && fLerp<=1.0f );
+
+	int iCount = fLerp * tBucketL.m_iCount + ( 1.0f - fLerp ) * tBucketR.m_iCount;
+
+	return iCount;
+}
+
+template<typename T>
+HSBucketTrait_t HistogramStreamed_T<T>::GetBucket ( T tValue ) const
+{
+	T tVal = ConvertType<T> ( tValue );
+	if ( tVal<m_tMinValue )
+		return HSBucketTrait_t ( 0, m_dBuckets[0].m_iCount );
+
+	if ( tVal>m_tMaxValue )
+		return HSBucketTrait_t (  m_iSize - 1, m_dBuckets[m_iSize-1].m_iCount );
+
+	int iItem = FindSpan ( m_dBuckets.Slice ( 0, m_iSize ), tVal );
+	int iCount = 0;
+
+	assert ( iItem>=0 && iItem<m_iSize );
+
+	if ( iItem==m_iSize-1 )
+		iCount = m_dBuckets[iItem].m_iCount;
+	else
+		iCount = LerpCounter ( iItem, tVal );
+
+	return HSBucketTrait_t ( iItem, iCount );
+}
+
+template<typename T>
+bool HistogramStreamed_T<T>::EstimateRsetSize ( const CSphFilterSettings & tFilter, int64_t & iEstimate ) const
+{
+	if ( !m_iSize )
+		return false;
+
+	iEstimate = GetNumValues();
+
+	CommonFilterSettings_t tFixedSettings = tFilter;
+	if ( TYPE==HISTOGRAM_STREAMED_FLOAT )
+		FixupFilterSettings ( tFilter, SPH_ATTR_FLOAT, tFixedSettings );
+
+	switch ( tFixedSettings.m_eType )
+	{
+	case SPH_FILTER_VALUES:
+		assert ( TYPE==HISTOGRAM_STREAMED_UINT32 || TYPE==HISTOGRAM_STREAMED_INT64 );
+
+		if ( tFilter.m_bExclude )
+			return false;
+
+		iEstimate = EstimateValues ( tFilter.GetValueArray(), tFilter.GetNumValues() );
+		return true;
+
+	case SPH_FILTER_RANGE:
+		assert ( TYPE==HISTOGRAM_STREAMED_UINT32 || TYPE==HISTOGRAM_STREAMED_INT64 );
+		iEstimate = EstimateRangeFilter ( tFilter.m_bExclude, tFilter.m_bHasEqualMin, tFilter.m_bHasEqualMax, tFilter.m_bOpenLeft, tFilter.m_bOpenRight, (T)tFixedSettings.m_iMinValue, (T)tFixedSettings.m_iMaxValue );
+		return true;
+
+	case SPH_FILTER_FLOATRANGE:
+		assert ( TYPE==HISTOGRAM_STREAMED_FLOAT );
+		iEstimate = EstimateRangeFilter ( tFilter.m_bExclude, tFilter.m_bHasEqualMin, tFilter.m_bHasEqualMax, tFilter.m_bOpenLeft, tFilter.m_bOpenRight, tFixedSettings.m_fMinValue, tFixedSettings.m_fMaxValue );
+		return true;
+
+	default:
+		break;
+	}
+
+	return false;
+}
+
+template<typename T>
+DWORD HistogramStreamed_T<T>::EstimateValues ( const SphAttr_t * pValues, int nValues ) const
+{
+	DWORD uTotal = 0;
+	int iPrevBucket = INT_MIN;
+	for ( int i = 0; i < nValues; i++ )
+	{
+		HSBucketTrait_t tItem = GetBucket ( pValues[i] );
+		if ( tItem.m_iBucket!=iPrevBucket )
+		{
+			uTotal += tItem.m_iCount;
+			iPrevBucket = tItem.m_iBucket;
+		}
+	}
+
+	return uTotal;
+}
+
+template<typename T>
+DWORD HistogramStreamed_T<T>::EstimateRangeFilter ( bool bExclude, bool bHasEqualMin, bool bHasEqualMax, bool bOpenLeft, bool bOpenRight, T tMinValue, T tMaxValue ) const
+{
+	if ( !bExclude )
+		return EstimateInterval ( tMinValue, tMaxValue, bHasEqualMin, bHasEqualMax, bOpenLeft, bOpenRight );
+
+	assert ( !bOpenLeft || !bOpenRight );
+
+	DWORD uEstimate = 0;
+	if ( bOpenRight )
+		uEstimate = EstimateInterval ( (T)0, tMinValue, false, !bHasEqualMin, true, false );
+	else if ( bOpenLeft )
+		uEstimate = EstimateInterval ( tMaxValue, (T)0, !bHasEqualMax, false, false, true );
+	else
+	{
+		uEstimate = EstimateInterval ( (T)0, tMinValue, false, !bHasEqualMin, true, false );
+		uEstimate += EstimateInterval ( tMaxValue, (T)0, !bHasEqualMax, false, false, true );
+	}
+
+	return uEstimate;
+}
+
+template<typename T>
+T HistogramStreamed_T<T>::Saturate ( T tVal ) const
+{
+	if ( tVal<m_tMinValue )
+		return m_tMinValue;
+	if ( tVal>m_tMaxValue )
+		return m_tMaxValue;
+
+	return tVal;
+}
+
+template<typename T>
+DWORD HistogramStreamed_T<T>::EstimateInterval ( T tMin, T tMax, bool bHasEqualMin, bool bHasEqualMax, bool bOpenLeft, bool bOpenRight ) const
+{
+	if ( TYPE==HISTOGRAM_STREAMED_UINT32 || TYPE==HISTOGRAM_STREAMED_INT64 )
+	{
+		if ( !bOpenLeft && !bHasEqualMin && tMin < MAX_BY_TYPE )
+			tMin++;
+
+		if ( !bOpenRight && !bHasEqualMax && tMax > MIN_BY_TYPE )
+			tMax--;
+	}
+
+	tMin = Saturate ( tMin );
+	tMax = Saturate ( tMax );
+
+	DWORD uTotal = 0;
+
+	// open left means to process all buckets from start
+	int iStartBucket = 0;
+	if ( !bOpenLeft )
+		iStartBucket = FindSpan ( m_dBuckets.Slice ( 0, m_iSize ), tMin );
+
+	int iChecked = 0;
+	for ( int iBucket=iStartBucket+1; iBucket<m_iSize; iBucket++ )
+	{
+		const HSBucket_T<T> & tBucket = m_dBuckets[iBucket];
+		// open right means to process all buckets till end
+		if ( !bOpenRight && tBucket.m_tCentroid>tMax )
+			break;
+
+		uTotal += tBucket.m_iCount;
+		iChecked++;
+	}
+
+	if ( !iChecked ) // interval inside single bucket
+	{
+		uTotal = m_dBuckets[iStartBucket].m_iCount;
+		if ( iStartBucket+1<m_iSize )
+		{
+			DWORD uMinCount = 0;
+			DWORD uMaxCount = 0;
+			if ( m_dBuckets[iStartBucket].m_tCentroid<tMin && tMin<m_dBuckets[iStartBucket+1].m_tCentroid )
+				uMinCount = LerpCounter ( iStartBucket, tMin );
+			if ( m_dBuckets[iStartBucket].m_tCentroid<tMax && tMax<m_dBuckets[iStartBucket+1].m_tCentroid )
+				uMaxCount = LerpCounter ( iStartBucket, tMax );
+			if ( uMinCount || uMaxCount )
+				uTotal = Max ( uMinCount, uMaxCount );
+		}
+	} else // count head bucket interval
+	{
+		if ( bOpenLeft )
+			uTotal += m_dBuckets[iStartBucket].m_iCount;
+		else
+		{
+			int iMinCount = LerpCounter ( iStartBucket, tMin );
+			// substract from total range with tMin value and add more preceise counter
+			uTotal = uTotal - m_dBuckets[iStartBucket+1].m_iCount / 2 + iMinCount;
+		}
+	}
+
+	return uTotal;
+}
+
+template<> const HistogramType_e HistogramStreamed_T<DWORD>::TYPE = HISTOGRAM_STREAMED_UINT32;
+template<> const DWORD HistogramStreamed_T<DWORD>::MIN_BY_TYPE = 0;
+template<> const DWORD HistogramStreamed_T<DWORD>::MAX_BY_TYPE = UINT32_MAX;
+
+template<> const HistogramType_e HistogramStreamed_T<int64_t>::TYPE = HISTOGRAM_STREAMED_INT64;
+template<> const int64_t HistogramStreamed_T<int64_t>::MIN_BY_TYPE = 0;
+template<> const int64_t HistogramStreamed_T<int64_t>::MAX_BY_TYPE = INT64_MAX;
+
+template<> const HistogramType_e HistogramStreamed_T<float>::TYPE = HISTOGRAM_STREAMED_FLOAT;
+template<> const float HistogramStreamed_T<float>::MIN_BY_TYPE = FLT_MIN;
+template<> const float HistogramStreamed_T<float>::MAX_BY_TYPE = FLT_MAX;
+
+template<> void HistogramStreamed_T<DWORD>::DumpValue ( const HSBucket_T < DWORD > & tVal, StringBuilder_c & tBuf ) const
+{
+	tBuf.Sprintf ( "%u,%d", tVal.m_tCentroid, tVal.m_iCount );
+}
+
+template<> void HistogramStreamed_T<int64_t>::DumpValue ( const HSBucket_T < int64_t > & tVal, StringBuilder_c & tBuf ) const
+{
+	tBuf.Sprintf ( INT64_FMT ",%d", tVal.m_tCentroid, tVal.m_iCount );
+}
+
+template<> void HistogramStreamed_T<float>::DumpValue ( const HSBucket_T < float > & tVal, StringBuilder_c & tBuf ) const
+{
+	tBuf.Sprintf ( "%.3f,%d", tVal.m_tCentroid, tVal.m_iCount );
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+static Histogram_i * CreateHistogram ( const CSphString & sAttr, HistogramType_e eType, int iSize );
+
+HistogramContainer_c::~HistogramContainer_c()
+{
+	Reset();
+}
+
+
+void HistogramContainer_c::Reset()
+{
+	m_dHistogramHash.IterateStart();
+	while ( m_dHistogramHash.IterateNext() )
+		SafeDelete ( m_dHistogramHash.IterateGet() );
+
+	m_dHistogramHash.Reset();
+}
+
+
+bool HistogramContainer_c::Save ( const CSphString & sFile, CSphString & sError )
+{
+	CSphWriter tWriter;
+	if ( !tWriter.OpenFile ( sFile, sError ) )
+		return false;
+
+	tWriter.PutDword ( m_dHistogramHash.GetLength() );
+
+	m_dHistogramHash.IterateStart();
+	while ( m_dHistogramHash.IterateNext() )
+	{
+		Histogram_i * pHistogram = m_dHistogramHash.IterateGet();
+		assert ( pHistogram );
+		pHistogram->Finalize();
+		tWriter.PutString ( pHistogram->GetAttrName() );
+		tWriter.PutDword ( pHistogram->GetType() );
+
+		if ( !pHistogram->Save ( tWriter ) )
+		{
+			sError.SetSprintf ( "error saving histograms to %s", sFile.cstr() );
+			return false;
+		}
+	}
+
+	tWriter.CloseFile();
+	if ( tWriter.IsError() )
+	{
+		sError.SetSprintf ( "error saving histograms to %s", sFile.cstr() );
+		return false;
+	}
+
+	return true;
+}
+
+
+bool HistogramContainer_c::Load ( const CSphString & sFile, CSphString & sError )
+{
+	Reset();
+
+	CSphAutoreader tReader;
+	if ( !tReader.Open ( sFile, sError ) )
+		return false;
+
+	int nHistograms = tReader.GetDword();
+	for ( int i = 0; i < nHistograms; i++ )
+	{
+		CSphString sAttr = tReader.GetString();
+		HistogramType_e eType = (HistogramType_e)tReader.GetDword();
+		CSphScopedPtr<Histogram_i> pHistogram ( CreateHistogram ( sAttr, eType, 0 ) );
+		if ( !pHistogram.Ptr() )
+		{
+			sError.SetSprintf ( "error loading histograms from %s", sFile.cstr() );
+			return false;
+		}
+
+		if ( !pHistogram->Load ( tReader, sError ) )
+			return false;
+
+		if ( !m_dHistogramHash.Add ( pHistogram.Ptr(), sAttr ) )
+		{
+			sError.SetSprintf ( "duplicate histograms found in %s", sFile.cstr() );
+			return false;
+		}
+
+		pHistogram.LeakPtr();
+	}
+
+	if ( tReader.GetErrorFlag() )
+	{
+		sError = tReader.GetErrorMessage();
+		return false;
+	}
+
+	return true;
+}
+
+
+bool HistogramContainer_c::Add ( Histogram_i * pHistogram )
+{
+	assert ( pHistogram );
+	return m_dHistogramHash.Add ( pHistogram, pHistogram->GetAttrName() );
+}
+
+
+void HistogramContainer_c::Remove ( const CSphString & sAttr )
+{
+	Histogram_i * pHistogram = Get(sAttr);
+	if ( !pHistogram )
+		return;
+
+	SafeDelete ( pHistogram );
+	m_dHistogramHash.Delete(sAttr);
+}
+
+
+Histogram_i * HistogramContainer_c::Get ( const CSphString & sAttr ) const
+{
+	Histogram_i ** ppHistogram = m_dHistogramHash(sAttr);
+	return ppHistogram ? *ppHistogram : nullptr;
+}
+
+
+DWORD HistogramContainer_c::GetNumValues() const
+{
+	// all histograms should have the same amount of values
+	m_dHistogramHash.IterateStart();
+	if ( !m_dHistogramHash.IterateNext() )
+		return 0;
+
+	return m_dHistogramHash.IterateGet()->GetNumValues();
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+static bool CanCreateHistogram ( const CSphString sAttrName, ESphAttr eAttrType )
+{
+	if ( sphIsInternalAttr ( sAttrName ) )
+		return false;
+
+	return eAttrType==SPH_ATTR_INTEGER || eAttrType==SPH_ATTR_BIGINT || eAttrType==SPH_ATTR_BOOL || eAttrType==SPH_ATTR_FLOAT || eAttrType==SPH_ATTR_TIMESTAMP;
+}
+
+
+static Histogram_i * CreateHistogram ( const CSphString & sAttr, HistogramType_e eType, int iSize )
+{
+	const int MAX_BUCKETS = 1024;
+
+	if ( !iSize )
+		iSize = MAX_BUCKETS;
+
+	switch ( eType )
+	{
+		case HISTOGRAM_STREAMED_UINT32:	return new HistogramStreamed_T<DWORD> ( sAttr, iSize );
+		case HISTOGRAM_STREAMED_INT64:	return new HistogramStreamed_T<int64_t> ( sAttr, iSize );
+		case HISTOGRAM_STREAMED_FLOAT:	return new HistogramStreamed_T<float> ( sAttr, iSize );
+		default:						return nullptr;
+	}
+}
+
+
+Histogram_i * CreateHistogram ( const CSphString & sAttr, ESphAttr eAttrType, int iSize )
+{
+	if ( !CanCreateHistogram ( sAttr, eAttrType ) )
+		return nullptr;
+
+	switch ( eAttrType )
+	{
+	case SPH_ATTR_INTEGER:
+	case SPH_ATTR_TIMESTAMP:
+	case SPH_ATTR_BOOL:
+		return CreateHistogram ( sAttr, HISTOGRAM_STREAMED_UINT32, iSize );
+
+	case SPH_ATTR_BIGINT:	return CreateHistogram ( sAttr, HISTOGRAM_STREAMED_INT64, iSize );
+	case SPH_ATTR_FLOAT:	return CreateHistogram ( sAttr, HISTOGRAM_STREAMED_FLOAT, iSize );
+	default:				return nullptr;
+	}
+}
+
+
+int64_t EstimateFilterSelectivity ( const CSphFilterSettings & tSettings, const HistogramContainer_c * pHistogramContainer )
+{
+	if ( !pHistogramContainer )
+		return INT64_MAX;
+
+	Histogram_i * pHistogram = pHistogramContainer->Get ( tSettings.m_sAttrName );
+	if ( !pHistogram || pHistogram->IsOutdated() )
+		return INT64_MAX;
+
+	int64_t iEstimate = INT64_MAX;
+	if ( !pHistogram->EstimateRsetSize ( tSettings, iEstimate ) )
+		return INT64_MAX;
+
+	return iEstimate;
+}

+ 72 - 0
src/histogram.h

@@ -0,0 +1,72 @@
+//
+// Copyright (c) 2018-2020, Manticore Software LTD (http://manticoresearch.com)
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#ifndef _histogram_
+#define _histogram_
+
+#include "sphinx.h"
+
+class CSphReader;
+class CSphWriter;
+
+enum HistogramType_e
+{
+	HISTOGRAM_STREAMED_UINT32,
+	HISTOGRAM_STREAMED_INT64,
+	HISTOGRAM_STREAMED_FLOAT
+};
+
+class Histogram_i
+{
+public:
+	virtual			~Histogram_i() {}
+
+	virtual void	Insert ( SphAttr_t tAttrVal ) = 0;		// index time insert element when values and counters could be updated
+	virtual void	UpdateCounter ( SphAttr_t tAttr ) = 0;	// run-time update counters only element values are same
+	virtual void	Delete ( SphAttr_t tAttrVal ) = 0;
+	virtual bool	EstimateRsetSize ( const CSphFilterSettings & tFilter, int64_t & iEstimate ) const = 0;
+	virtual DWORD	GetNumValues() const = 0;
+	virtual bool	IsOutdated() const = 0;
+
+	virtual HistogramType_e		GetType() const = 0;
+	virtual const CSphString &	GetAttrName() const = 0;
+
+	virtual void	Finalize() {}
+	virtual bool	Save ( CSphWriter & tWriter ) const = 0;
+	virtual bool	Load ( CSphReader & tReader, CSphString & sError ) = 0;
+
+	virtual void	Dump ( StringBuilder_c & tOut ) const = 0;
+};
+
+
+class HistogramContainer_c
+{
+public:
+					~HistogramContainer_c();
+
+	bool			Save ( const CSphString & sFile, CSphString & sError );
+	bool			Load ( const CSphString & sFile, CSphString & sError );
+	bool			Add ( Histogram_i * pHistogram );
+	void			Remove ( const CSphString & sAttr );
+	Histogram_i *	Get ( const CSphString & sAttr ) const;
+	DWORD			GetNumValues() const;
+
+private:
+	SmallStringHash_T<Histogram_i*>	m_dHistogramHash;
+
+	void			Reset();
+};
+
+
+Histogram_i *	CreateHistogram ( const CSphString & sAttr, ESphAttr eAttrType, int iSize=0 );
+
+int64_t			EstimateFilterSelectivity ( const CSphFilterSettings & tSettings, const HistogramContainer_c * pHistogramContainer );
+
+#endif // _histogram_

+ 5 - 9
src/index_converter.cpp

@@ -18,6 +18,7 @@
 #include "attribute.h"
 #include "sphinxsearch.h"
 #include "secondaryindex.h"
+#include "histogram.h"
 #include "sphinxstem.h"
 #include "sphinxpq.h"
 #include "accumulator.h"
@@ -968,7 +969,7 @@ private:
 	OpenHash_T<RowID_t, SphDocID_t, HashFunc_Int64_t> m_hDoc2Row;
 	OpenHash_T<DoclistOffsets_t, SphOffset_t, HashFunc_Int64_t> m_hDoclist;
 
-	bool WriteLookup ( Index_t & tIndex, const AttrIndexBuilder_c & tMinMax, CSphString & sError );
+	bool WriteLookup ( Index_t & tIndex, CSphString & sError );
 	bool WriteAttributes ( Index_t & tIndex, CSphString & sError );
 	void WriteCheckpoints ( const Index_t & tIndex, CSphWriter & tWriterDict );
 	bool WriteKillList ( const Index_t & tIndex, bool bIgnoreKlist, CSphString & sError );
@@ -991,7 +992,7 @@ struct CmpDocidLookup_fn
 	}
 };
 
-bool ConverterPlain_t::WriteLookup ( Index_t & tIndex, const AttrIndexBuilder_c & tMinMax, CSphString & sError )
+bool ConverterPlain_t::WriteLookup ( Index_t & tIndex, CSphString & sError )
 {
 	CSphString sSPA = tIndex.GetFilename(SPH_EXT_SPA);
 	CSphAutofile tSPA ( sSPA.cstr(), SPH_O_READ, sError );
@@ -1001,11 +1002,6 @@ bool ConverterPlain_t::WriteLookup ( Index_t & tIndex, const AttrIndexBuilder_c
 	CSphReader tSPAReader;
 	tSPAReader.SetFile(tSPA);
 
-	const CSphTightVector<CSphRowitem> & dMinMaxRows = tMinMax.GetCollected();
-	int iStride = m_tSchema.GetRowSize();
-	const CSphRowitem * pMinRow = dMinMaxRows.Begin()+dMinMaxRows.GetLength()-iStride*2;
-	const CSphRowitem * pMaxRow = pMinRow+iStride;
-
 	HistogramContainer_c tHistogramContainer;
 	CSphVector<Histogram_i *> dHistograms;
 	CSphVector<CSphColumnInfo> dPOD;
@@ -1018,10 +1014,10 @@ bool ConverterPlain_t::WriteLookup ( Index_t & tIndex, const AttrIndexBuilder_c
 			Verify ( tHistogramContainer.Add ( pHistogram ) );
 			dHistograms.Add ( pHistogram );
 			dPOD.Add ( tAttr );
-			pHistogram->Setup ( sphGetRowAttr ( pMinRow, tAttr.m_tLocator ), sphGetRowAttr ( pMaxRow, tAttr.m_tLocator ) );
 		}
 	}
 
+	int iStride = m_tSchema.GetRowSize();
 	CSphVector<CSphRowitem> dRow ( iStride );
 	CSphRowitem * pRow = dRow.Begin();
 
@@ -1121,7 +1117,7 @@ bool ConverterPlain_t::WriteAttributes ( Index_t & tIndex, CSphString & sError )
 	tIndex.m_iTotalDocuments = tNextRowID;
 	m_tDocinfoIndex = ( dMinMaxRows.GetLength() / m_tSchema.GetRowSize() / 2 ) - 1;
 
-	if ( !WriteLookup ( tIndex, tMinMaxBuilder, sError ) )
+	if ( !WriteLookup ( tIndex, sError ) )
 		return false;
 
 	return true;

+ 1 - 1
src/indexcheck.cpp

@@ -183,7 +183,7 @@ void DebugCheckHelper_c::DebugCheck_Attributes ( DebugCheckReader_i & tAttrs, De
 			tReporter.Fail ( "schema has blob attrs, but blob file is empty" );
 
 		for ( int i = 0; i < tSchema.GetAttrsCount(); i++ )
-			if ( sphIsBlobAttr(  tSchema.GetAttr(i).m_eAttrType ) )
+			if ( sphIsBlobAttr ( tSchema.GetAttr(i) ) )
 				nBlobAttrs++;
 	} else
 	{

+ 14 - 3
src/indexer.cpp

@@ -1617,11 +1617,18 @@ bool SendRotate ( const CSphConfig & hConf, bool bForce )
 	return true;
 }
 
-static void ShowVersion ()
+
+static void ShowVersion()
 {
-	fprintf ( stdout, "%s", szMANTICORE_BANNER );
+	const char * szColumnarVer = GetColumnarVersionStr();
+	CSphString sColumnar = "";
+	if ( szColumnarVer )
+		sColumnar.SetSprintf ( " (columnar %s)", szColumnarVer );
+
+	fprintf ( stdout, "%s%s%s",  szMANTICORE_NAME, sColumnar.cstr(), szMANTICORE_BANNER_TEXT );
 }
 
+
 static void ShowHelp ()
 {
 	fprintf ( stdout,
@@ -1808,6 +1815,10 @@ int main ( int argc, char ** argv )
 			break;
 	}
 
+	CSphString sError;
+	if ( !InitColumnar ( sError ) )
+		sphWarning ( "Error initializing columnar storage: %s", sError.cstr() );
+
 	if ( !g_bQuiet )
 		ShowVersion();
 
@@ -1846,7 +1857,6 @@ int main ( int argc, char ** argv )
 	// load config
 	///////////////
 
-	CSphString sError;
 	if ( !sphInitCharsetAliasTable ( sError ) )
 		sphDie ( "failed to init charset alias table: %s", sError.cstr() );
 
@@ -1998,6 +2008,7 @@ int main ( int argc, char ** argv )
 	}
 
 	sphShutdownWordforms ();
+	ShutdownColumnar();
 
 	if ( !g_bQuiet )
 	{

+ 33 - 4
src/indexsettings.cpp

@@ -619,6 +619,31 @@ void CSphIndexSettings::ParseStoredFields ( const CSphConfigSection & hIndex )
 }
 
 
+#if USE_COLUMNAR
+void CSphIndexSettings::ParseColumnarSettings ( const CSphConfigSection & hIndex )
+{
+	{
+		CSphString sAttrs = hIndex.GetStr ( "columnar_attrs" );
+		sAttrs.ToLower();
+		sphSplit ( m_dColumnarAttrs, sAttrs.cstr() );
+		m_dColumnarAttrs.Uniq();
+	}
+
+	{
+		CSphString sAttrs = hIndex.GetStr ( "columnar_strings_no_hash" );
+		sAttrs.ToLower();
+		sphSplit ( m_dColumnarStringsNoHash, sAttrs.cstr() );
+		m_dColumnarStringsNoHash.Uniq();
+	}
+
+	m_sCompressionUINT32 = hIndex.GetStr ( "columnar_compression_uint32", m_sCompressionUINT32.c_str() ).cstr();
+	m_sCompressionUINT64 = hIndex.GetStr ( "columnar_compression_int64", m_sCompressionUINT64.c_str() ).cstr();
+	m_iSubblockSize = hIndex.GetInt ( "columnar_subblock", 128 );
+	m_iSubblockSizeMva = hIndex.GetInt ( "columnar_subblock_mva", 128 );
+	m_iMinMaxLeafSize = hIndex.GetInt ( "columnar_minmax_leaf", 128 );
+}
+#endif
+
 bool CSphIndexSettings::ParseDocstoreSettings ( const CSphConfigSection & hIndex, CSphString & sWarning, CSphString & sError )
 {
 	m_uBlockSize = hIndex.GetSize ( "docstore_block_size", DEFAULT_DOCSTORE_BLOCK );
@@ -681,6 +706,10 @@ bool CSphIndexSettings::Setup ( const CSphConfigSection & hIndex, const char * s
 
 	ParseStoredFields(hIndex);
 
+#if USE_COLUMNAR
+	ParseColumnarSettings(hIndex);
+#endif
+
 	if ( RawMinPrefixLen()==0 && m_dPrefixFields.GetLength()!=0 )
 	{
 		sWarning = "min_prefix_len=0, prefix_fields ignored";
@@ -883,10 +912,10 @@ void FileAccessSettings_t::Format ( SettingsFormatter_c & tOut, FilenameBuilder_
 	tOut.Add ( "read_buffer_docs",		m_iReadBufferDocList,		m_iReadBufferDocList!=tDefault.m_iReadBufferDocList );
 	tOut.Add ( "read_buffer_hits",		m_iReadBufferHitList,		m_iReadBufferHitList!=tDefault.m_iReadBufferHitList );
 
-	tOut.Add ( "access_doclists",		FileAccessName(m_eDoclist),	m_eDoclist!=tDefault.m_eDoclist );
-	tOut.Add ( "access_hitlists",		FileAccessName(m_eHitlist),	m_eHitlist!=tDefault.m_eHitlist );
-	tOut.Add ( "access_plain_attrs",	FileAccessName(m_eAttr) ,	m_eAttr!=tDefault.m_eAttr );
-	tOut.Add ( "access_blob_attrs",		FileAccessName(m_eBlob) ,	m_eBlob!=tDefault.m_eBlob );
+	tOut.Add ( "access_doclists",		FileAccessName(m_eDoclist),		m_eDoclist!=tDefault.m_eDoclist );
+	tOut.Add ( "access_hitlists",		FileAccessName(m_eHitlist),		m_eHitlist!=tDefault.m_eHitlist );
+	tOut.Add ( "access_plain_attrs",	FileAccessName(m_eAttr) ,		m_eAttr!=tDefault.m_eAttr );
+	tOut.Add ( "access_blob_attrs",		FileAccessName(m_eBlob) ,		m_eBlob!=tDefault.m_eBlob );
 }
 
 //////////////////////////////////////////////////////////////////////////

+ 12 - 0
src/indexsettings.h

@@ -17,6 +17,7 @@
 #include "sphinxutils.h"
 #include "fileutils.h"
 #include "sphinxexpr.h"
+#include "columnarlib.h"
 
 typedef uint64_t SphWordID_t;
 STATIC_SIZE_ASSERT ( SphWordID_t, 8 );
@@ -169,6 +170,9 @@ public:
 	StrVec_t m_dStoredFields;		///< list of stored fields
 	StrVec_t m_dStoredOnlyFields;	///< list of "fields" that are stored but not indexed
 
+	StrVec_t m_dColumnarAttrs;			///< list of attributes to place in columnar store
+	StrVec_t m_dColumnarStringsNoHash;	///< list of columnar string attributes that don't need pregenerated hashes
+
 	ESphWordpart GetWordpart ( const char * sField, bool bWordDict );
 	int GetMinPrefixLen ( bool bWordDict ) const;
 	void SetMinPrefixLen ( int iMinPrefixLen );
@@ -232,6 +236,9 @@ enum ESphBigram
 
 
 class CSphIndexSettings : public CSphSourceSettings, public DocstoreSettings_t
+#if USE_COLUMNAR
+	, public columnar::Settings_t
+#endif
 {
 public:
 	ESphHitFormat	m_eHitFormat = SPH_HIT_FORMAT_PLAIN;
@@ -261,6 +268,11 @@ public:
 
 private:
 	void			ParseStoredFields ( const CSphConfigSection & hIndex );
+
+#if USE_COLUMNAR
+	void			ParseColumnarSettings ( const CSphConfigSection & hIndex );
+#endif
+
 	bool			ParseDocstoreSettings ( const CSphConfigSection & hIndex, CSphString & sWarning, CSphString & sError );
 };
 

+ 15 - 4
src/indextool.cpp

@@ -909,12 +909,16 @@ void ApplyKilllists ( CSphConfig & hConf )
 				continue;
 			}
 
-			if ( !pIndex->Prealloc ( false, nullptr ) )
+			StrVec_t dWarnings;
+			if ( !pIndex->Prealloc ( false, nullptr, dWarnings ) )
 			{
 				fprintf ( stdout, "WARNING: unable to prealloc index %s: %s\n", tIndex.m_sName.cstr(), sError.cstr() );
 				continue;
 			}
 
+			for ( const auto & i : dWarnings )
+				fprintf ( stdout, "WARNING: index %s: %s\n", tIndex.m_sName.cstr(), i.cstr() );
+
 			tIndex.m_nDocs = pIndex->GetStats().m_iTotalDocuments;
 		}
 
@@ -1179,8 +1183,12 @@ static CSphIndex * CreateIndex ( CSphConfig & hConf, const CSphString & sIndex,
 static void PreallocIndex ( const CSphString & sIndex, bool bStripPath, CSphIndex * pIndex )
 {
 	CSphScopedPtr<FilenameBuilder_i> pFilenameBuilder ( CreateFilenameBuilder ( sIndex.cstr() ) );
-	if ( !pIndex->Prealloc ( bStripPath, pFilenameBuilder.Ptr() ) )
+	StrVec_t dWarnings;
+	if ( !pIndex->Prealloc ( bStripPath, pFilenameBuilder.Ptr(), dWarnings ) )
 		sphDie ( "index '%s': prealloc failed: %s\n", sIndex.cstr(), pIndex->GetLastError().cstr() );
+
+	for ( const auto & i : dWarnings )
+		fprintf ( stdout, "WARNING: index %s: %s\n", sIndex.cstr(), i.cstr() );
 }
 
 int main ( int argc, char ** argv )
@@ -1526,10 +1534,13 @@ int main ( int argc, char ** argv )
 				if ( !pIndex )
 					sphDie ( "index '%s': failed to create (%s)", sIndex.cstr(), sError.cstr() );
 
-				CSphString sWarn;
-				if ( !pIndex->Prealloc ( bStripPath, nullptr ) )
+				StrVec_t dWarnings;
+				if ( !pIndex->Prealloc ( bStripPath, nullptr, dWarnings ) )
 					sphDie ( "index '%s': prealloc failed: %s\n", sIndex.cstr(), pIndex->GetLastError().cstr() );
 
+				for ( const auto & i : dWarnings )
+					fprintf ( stdout, "WARNING: index %s: %s\n", sIndex.cstr(), i.cstr() );
+
 				pIndex->Preread();
 			} else
 				fprintf ( stdout, "dumping dictionary for index '%s'...\n", sIndex.cstr() );

+ 45 - 0
src/libutils.cpp

@@ -0,0 +1,45 @@
+//
+// Copyright (c) 2017-2021, Manticore Software LTD (https://manticoresearch.com)
+// Copyright (c) 2001-2016, Andrew Aksyonoff
+// Copyright (c) 2008-2016, Sphinx Technologies Inc
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#include "libutils.h"
+
+#if USE_WINDOWS
+
+void * dlsym ( void * lib, const char * name )
+{
+	return GetProcAddress ( (HMODULE)lib, name );
+}
+
+
+void * dlopen ( const char * libname, int )
+{
+	return LoadLibraryEx ( libname, NULL, 0 );
+}
+
+
+int dlclose ( void * lib )
+{
+	return FreeLibrary ( (HMODULE)lib )
+		? 0
+		: GetLastError();
+}
+
+
+const char * dlerror()
+{
+	static char sError[256];
+	DWORD uError = GetLastError();
+	FormatMessage ( FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, uError, LANG_SYSTEM_DEFAULT, (LPTSTR)sError, sizeof(sError), NULL );
+	return sError;
+}
+
+#endif // USE_WINDOWS

+ 45 - 0
src/libutils.h

@@ -0,0 +1,45 @@
+//
+// Copyright (c) 2017-2021, Manticore Software LTD (https://manticoresearch.com)
+// Copyright (c) 2001-2016, Andrew Aksyonoff
+// Copyright (c) 2008-2016, Sphinx Technologies Inc
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#ifndef _libutils_
+#define _libutils_
+
+#include "sphinxstd.h"
+
+#if !USE_WINDOWS
+	#include <unistd.h>
+	#include <sys/time.h>
+	#if HAVE_DLOPEN
+		#include <dlfcn.h>
+	#endif // HAVE_DLOPEN
+#endif // !USE_WINDOWS
+
+#if !USE_WINDOWS
+	#ifndef HAVE_DLERROR
+		#define dlerror() ""
+	#endif // HAVE_DLERROR
+#endif // !USE_WINDOWS
+
+#if USE_WINDOWS
+#undef HAVE_DLOPEN
+#define HAVE_DLOPEN		1
+#define RTLD_LAZY		0
+#define RTLD_LOCAL		0
+
+void *			dlsym ( void * lib, const char * name );
+void *			dlopen ( const char * libname, int );
+int				dlclose ( void * lib );
+const char *	dlerror();
+
+#endif // USE_WINDOWS
+
+#endif // _libutils_

+ 217 - 0
src/memio.cpp

@@ -0,0 +1,217 @@
+//
+// Copyright (c) 2017-2020, Manticore Software LTD (http://manticoresearch.com)
+// Copyright (c) 2001-2016, Andrew Aksyonoff
+// Copyright (c) 2008-2016, Sphinx Technologies Inc
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#include "memio.h"
+
+
+MemoryReader_c::MemoryReader_c ( const BYTE * pData, int iLen )
+	: m_pData ( pData )
+	, m_iLen ( iLen )
+	, m_pCur ( pData )
+{}
+
+
+MemoryReader_c::MemoryReader_c ( ByteBlob_t dData )
+	: m_pData ( dData.first )
+	, m_iLen ( dData.second )
+	, m_pCur ( dData.first )
+{}
+
+
+int MemoryReader_c::GetPos()
+{
+	return ( m_pCur - m_pData );
+}
+
+
+void MemoryReader_c::SetPos ( int iOff )
+{
+	assert ( iOff>=0 && iOff<=m_iLen );
+	m_pCur = m_pData + iOff;
+}
+
+
+CSphString MemoryReader_c::GetString()
+{
+	CSphString sRes;
+	DWORD iLen = GetDword();
+	if ( iLen )
+	{
+		sRes.Reserve ( iLen );
+		GetBytes ( (BYTE *)sRes.cstr(), iLen );
+	}
+
+	return sRes;
+}
+
+
+DWORD MemoryReader_c::GetDword()
+{
+	DWORD uRes = 0;
+	GetBytes ( &uRes, sizeof(uRes) );
+	return uRes;
+}
+
+
+WORD MemoryReader_c::GetWord()
+{
+	WORD uRes = 0;
+	GetBytes ( &uRes, sizeof(uRes) );
+	return uRes;
+}
+
+
+void MemoryReader_c::GetBytes ( void * pData, int iLen )
+{
+	if ( !iLen )
+		return;
+
+	assert ( m_pCur );
+	assert ( m_pCur<m_pData+m_iLen );
+	assert ( m_pCur+iLen<=m_pData+m_iLen );
+	memcpy ( pData, m_pCur, iLen );
+	m_pCur += iLen;
+}
+
+
+BYTE MemoryReader_c::GetByte()
+{
+	BYTE uVal = 0;
+	GetBytes ( &uVal, sizeof(uVal) );
+	return uVal;
+}
+
+
+uint64_t MemoryReader_c::GetUint64()
+{
+	uint64_t uVal;
+	GetBytes ( &uVal, sizeof(uVal) );
+	return uVal;
+}
+
+
+const BYTE * MemoryReader_c::Begin() const
+{
+	return m_pData;
+}
+
+
+int MemoryReader_c::GetLength() const
+{
+	return m_iLen;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+MemoryWriter_c::MemoryWriter_c ( CSphVector<BYTE> & dBuf )
+	: m_dBuf ( dBuf )
+{}
+
+
+int MemoryWriter_c::GetPos()
+{
+	return m_dBuf.GetLength();
+}
+
+
+void MemoryWriter_c::PutString ( const CSphString & sVal )
+{
+	int iLen = sVal.Length();
+	PutDword ( iLen );
+	if ( iLen )
+		PutBytes ( (const BYTE *)sVal.cstr(), iLen );
+}
+
+
+void MemoryWriter_c::PutString ( const char * sVal )
+{
+	int iLen = 0;
+	if ( sVal )
+		iLen = (int) strlen ( sVal );
+	PutDword ( iLen );
+	if ( iLen )
+		PutBytes ( (const BYTE *)sVal, iLen );
+}
+
+
+void MemoryWriter_c::PutDword ( DWORD uVal )
+{
+	PutBytes ( (BYTE *)&uVal, sizeof(uVal) );
+}
+
+
+void MemoryWriter_c::PutOffset ( SphOffset_t uValue )
+{
+	PutBytes ( &uValue, sizeof(SphOffset_t) );
+}
+
+
+void MemoryWriter_c::PutWord ( WORD uVal )
+{
+	PutBytes ( (BYTE *)&uVal, sizeof(uVal) );
+}
+
+
+void MemoryWriter_c::PutBytes ( const void * pData, int iLen )
+{
+	if ( !iLen )
+		return;
+
+	BYTE * pCur = m_dBuf.AddN ( iLen );
+	memcpy ( pCur, pData, iLen );
+}
+
+
+void MemoryWriter_c::PutByte ( BYTE uVal )
+{
+	m_dBuf.Add ( uVal );
+}
+
+
+void MemoryWriter_c::PutUint64 ( uint64_t uVal )
+{
+	PutBytes ( (BYTE *)&uVal, sizeof(uVal) );
+}
+
+//////////////////////////////////////////////////////////////////////////
+MemoryReader2_c::MemoryReader2_c ( const BYTE * pData, int iLen )
+	: MemoryReader_c ( pData, iLen )
+{}
+
+
+uint64_t MemoryReader2_c::UnzipInt()
+{
+	return sphUnzipInt(m_pCur);
+}
+
+
+uint64_t MemoryReader2_c::UnzipOffset()
+{
+	return sphUnzipOffset(m_pCur);
+}
+
+//////////////////////////////////////////////////////////////////////////
+MemoryWriter2_c::MemoryWriter2_c ( CSphVector<BYTE> & dBuf )
+	: MemoryWriter_c ( dBuf )
+{}
+
+
+void MemoryWriter2_c::ZipOffset ( uint64_t uVal )
+{
+	sphZipValue ( [this] ( BYTE b ) { PutByte ( b ); }, uVal ); 
+}
+
+
+void MemoryWriter2_c::ZipInt ( DWORD uVal )
+{
+	sphZipValue ( [this] ( BYTE b ) { PutByte ( b ); }, uVal );
+}

+ 86 - 0
src/memio.h

@@ -0,0 +1,86 @@
+//
+// Copyright (c) 2017-2020, Manticore Software LTD (http://manticoresearch.com)
+// Copyright (c) 2001-2016, Andrew Aksyonoff
+// Copyright (c) 2008-2016, Sphinx Technologies Inc
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#ifndef _memio_
+#define _memio_
+
+#include "sphinx.h"
+
+class MemoryReader_c
+{
+public:
+					MemoryReader_c ( const BYTE * pData, int iLen );
+					MemoryReader_c ( ByteBlob_t dData );
+
+	int				GetPos();
+	void			SetPos ( int iOff );
+	uint64_t		UnzipOffset();
+	DWORD			UnzipInt();
+	CSphString		GetString();
+	DWORD			GetDword();
+	WORD			GetWord();
+	void			GetBytes ( void * pData, int iLen );
+	BYTE			GetByte();
+	uint64_t		GetUint64();
+	const BYTE *	Begin() const;
+	int				GetLength() const;
+
+protected:
+	const BYTE *	m_pData = nullptr;
+	const int		m_iLen = 0;
+	const BYTE *	m_pCur = nullptr;
+};
+
+
+class MemoryWriter_c
+{
+public:
+			MemoryWriter_c ( CSphVector<BYTE> & dBuf );
+
+	int		GetPos();
+	void	ZipOffset ( uint64_t uVal );
+	void	ZipInt ( DWORD uVal );
+	void	PutString ( const CSphString & sVal );
+	void	PutString ( const char * szVal );
+	void	PutDword ( DWORD uVal );
+	void	PutOffset ( SphOffset_t uValue );
+	void	PutWord ( WORD uVal );
+	void	PutBytes ( const void * pData, int iLen );
+	void	PutByte ( BYTE uVal );
+	void	PutUint64 ( uint64_t uVal );
+
+protected:
+	CSphVector<BYTE> & m_dBuf;
+};
+
+// fixme: get rid of this
+class MemoryReader2_c : public MemoryReader_c
+{
+public:
+				MemoryReader2_c ( const BYTE * pData, int iLen );
+		
+
+	uint64_t	UnzipInt();
+	uint64_t	UnzipOffset();
+};
+
+// fixme: get rid of this
+class MemoryWriter2_c : public MemoryWriter_c
+{
+public:
+			MemoryWriter2_c ( CSphVector<BYTE> & dBuf );
+
+	void	ZipOffset ( uint64_t uVal );
+	void	ZipInt ( DWORD uVal );
+};
+
+#endif // _memio_

+ 74 - 0
src/queryprofile.cpp

@@ -0,0 +1,74 @@
+//
+// Copyright (c) 2017-2020, Manticore Software LTD (http://manticoresearch.com)
+// Copyright (c) 2001-2016, Andrew Aksyonoff
+// Copyright (c) 2008-2016, Sphinx Technologies Inc
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#include "queryprofile.h"
+
+QueryProfile_c::QueryProfile_c()
+{
+	Start ( SPH_QSTATE_TOTAL );
+}
+
+
+ESphQueryState QueryProfile_c::Switch ( ESphQueryState eNew )
+{
+	int64_t tmNow = sphMicroTimer();
+	ESphQueryState eOld = m_eState;
+	m_dSwitches [ eOld ]++;
+	m_tmTotal [ eOld ] += tmNow - m_tmStamp;
+	m_eState = eNew;
+	m_tmStamp = tmNow;
+	return eOld;
+}
+
+
+void QueryProfile_c::Start ( ESphQueryState eNew )
+{
+	memset ( m_dSwitches, 0, sizeof(m_dSwitches) );
+	memset ( m_tmTotal, 0, sizeof(m_tmTotal) );
+	m_eState = eNew;
+	m_tmStamp = sphMicroTimer();
+}
+
+
+void QueryProfile_c::AddMetric ( const QueryProfile_c & tData )
+{
+	// fixme! m.b. invent a way to display data from different profilers with kind of multiplier?
+	for ( int i = 0; i<SPH_QSTATE_TOTAL; ++i )
+	{
+		m_dSwitches[i] += tData.m_dSwitches[i];
+		m_tmTotal[i] += tData.m_tmTotal[i];
+	}
+}
+
+
+/// stop profiling
+void QueryProfile_c::Stop()
+{
+	Switch ( SPH_QSTATE_TOTAL );
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+CSphScopedProfile::CSphScopedProfile ( QueryProfile_c * pProfile, ESphQueryState eNewState )
+{
+	m_pProfile = pProfile;
+	m_eOldState = SPH_QSTATE_UNKNOWN;
+	if ( m_pProfile )
+		m_eOldState = m_pProfile->Switch ( eNewState );
+}
+
+
+CSphScopedProfile::~CSphScopedProfile()
+{
+	if ( m_pProfile )
+		m_pProfile->Switch ( m_eOldState );
+}

+ 118 - 0
src/queryprofile.h

@@ -0,0 +1,118 @@
+//
+// Copyright (c) 2017-2020, Manticore Software LTD (http://manticoresearch.com)
+// Copyright (c) 2001-2016, Andrew Aksyonoff
+// Copyright (c) 2008-2016, Sphinx Technologies Inc
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#ifndef _queryprofile_
+#define _queryprofile_
+
+#include "sphinx.h"
+
+#define SPH_QUERY_STATES \
+	SPH_QUERY_STATE ( UNKNOWN,		"unknown" ) \
+	SPH_QUERY_STATE ( NET_READ,		"net_read" ) \
+	SPH_QUERY_STATE ( IO,			"io" ) \
+	SPH_QUERY_STATE ( DIST_CONNECT,	"dist_connect" ) \
+	SPH_QUERY_STATE ( LOCAL_DF,		"local_df" ) \
+	SPH_QUERY_STATE ( LOCAL_SEARCH,	"local_search" ) \
+	SPH_QUERY_STATE ( SQL_PARSE,	"sql_parse" ) \
+	SPH_QUERY_STATE ( FULLSCAN,		"fullscan" ) \
+	SPH_QUERY_STATE ( DICT_SETUP,	"dict_setup" ) \
+	SPH_QUERY_STATE ( PARSE,		"parse" ) \
+	SPH_QUERY_STATE ( TRANSFORMS,	"transforms" ) \
+	SPH_QUERY_STATE ( INIT,			"init" ) \
+	SPH_QUERY_STATE ( INIT_SEGMENT,	"init_segment" ) \
+	SPH_QUERY_STATE ( OPEN,			"open" ) \
+	SPH_QUERY_STATE ( READ_DOCS,	"read_docs" ) \
+	SPH_QUERY_STATE ( READ_HITS,	"read_hits" ) \
+	SPH_QUERY_STATE ( GET_DOCS,		"get_docs" ) \
+	SPH_QUERY_STATE ( GET_HITS,		"get_hits" ) \
+	SPH_QUERY_STATE ( FILTER,		"filter" ) \
+	SPH_QUERY_STATE ( RANK,			"rank" ) \
+	SPH_QUERY_STATE ( QCACHE_UP,	"qcache_update" ) \
+	SPH_QUERY_STATE ( QCACHE_FINAL,	"qcache_final" ) \
+	SPH_QUERY_STATE ( SORT,			"sort" ) \
+	SPH_QUERY_STATE ( FINALIZE,		"finalize" ) \
+	SPH_QUERY_STATE ( DYNAMIC,		"clone_attrs" ) \
+	SPH_QUERY_STATE ( DIST_WAIT,	"dist_wait" ) \
+	SPH_QUERY_STATE ( AGGREGATE,	"aggregate" ) \
+	SPH_QUERY_STATE ( NET_WRITE,	"net_write" ) \
+	SPH_QUERY_STATE ( EVAL_POST,	"eval_post" ) \
+	SPH_QUERY_STATE ( EVAL_GETFIELD,"eval_getfield" ) \
+	SPH_QUERY_STATE ( SNIPPET,		"eval_snippet" ) \
+	SPH_QUERY_STATE ( EVAL_UDF,		"eval_udf" ) \
+	SPH_QUERY_STATE ( TABLE_FUNC,	"table_func" )
+
+
+/// possible query states, used for profiling
+enum ESphQueryState
+{
+	SPH_QSTATE_INFINUM = -1,
+
+#define SPH_QUERY_STATE(_name,_desc) SPH_QSTATE_##_name,
+	SPH_QUERY_STATES
+#undef SPH_QUERY_STATE
+
+	SPH_QSTATE_TOTAL
+};
+STATIC_ASSERT ( SPH_QSTATE_UNKNOWN==0, BAD_QUERY_STATE_ENUM_BASE );
+
+struct XQNode_t;
+
+/// search query profile
+class QueryProfile_c
+{
+public:
+	ESphQueryState	m_eState;							///< current state
+	int64_t			m_tmStamp;							///< timestamp when we entered the current state
+
+	int				m_dSwitches [ SPH_QSTATE_TOTAL+1 ];	///< number of switches to given state
+	int64_t			m_tmTotal [ SPH_QSTATE_TOTAL+1 ];	///< total time spent per state
+	CSphVector<BYTE> m_dPlan; 							///< bson with plan
+
+														/// create empty and stopped profile
+					QueryProfile_c();
+	virtual 		~QueryProfile_c() {};
+
+	/// switch to a new query state, and record a timestamp
+	/// returns previous state, to simplify Push/Pop like scenarios
+	ESphQueryState Switch ( ESphQueryState eNew );
+
+	/// reset everything and start profiling from a given state
+	void			Start ( ESphQueryState eNew );
+	/// stop profiling
+	void			Stop();
+	void			AddMetric ( const QueryProfile_c & tData );
+
+	void			BuildResult ( XQNode_t * pRoot, const CSphSchema & tSchema, const StrVec_t & dZones );
+};
+
+
+class CSphScopedProfile
+{
+public:
+						CSphScopedProfile ( QueryProfile_c * pProfile, ESphQueryState eNewState );
+						~CSphScopedProfile();
+
+private:
+	QueryProfile_c *	m_pProfile;
+	ESphQueryState		m_eOldState;
+};
+
+
+// acquire common pattern 'check, then switch if not null'
+inline void SwitchProfile ( QueryProfile_c * pProfile, ESphQueryState eState )
+{
+	if ( pProfile )
+		pProfile->Switch ( eState );
+}
+
+
+#endif // _queryprofile_

+ 204 - 161
src/searchd.cpp

@@ -18,6 +18,7 @@
 #include "sphinxpq.h"
 #include "sphinxint.h"
 #include "sphinxquery.h"
+#include "sphinxsort.h"
 #include "sphinxjson.h"
 #include "sphinxjsonquery.h"
 #include "sphinxplugin.h"
@@ -179,6 +180,9 @@ bool					g_bHostnameLookup = false;
 CSphString				g_sMySQLVersion = szMANTICORE_VERSION;
 CSphString				g_sDbName = "Manticore";
 
+CSphString				g_sBanner;
+CSphString				g_sStatusVersion = szMANTICORE_VERSION;
+
 // for CLang thread-safety analysis
 ThreadRole MainThread; // functions which called only from main thread
 ThreadRole HandlerThread; // thread which serves clients
@@ -745,6 +749,7 @@ void Shutdown () REQUIRES ( MainThread ) NO_THREAD_SAFETY_ANALYSIS
 	sphShutdownWordforms ();
 	sph::ShutdownGlobalIDFs ();
 	sphAotShutdown ();
+	ShutdownColumnar();
 
 	for ( auto& dListener : g_dListeners )
 		if ( dListener.m_iSock>=0 )
@@ -762,7 +767,7 @@ void Shutdown () REQUIRES ( MainThread ) NO_THREAD_SAFETY_ANALYSIS
 		::unlink ( g_sPidFile.cstr() );
 
 	hazard::Shutdown ();
-	sphInfo ( "shutdown daemon version '%s' ...", szMANTICORE_VERSION );
+	sphInfo ( "shutdown daemon version '%s' ...", g_sStatusVersion.cstr() );
 	sphInfo ( "shutdown complete" );
 
 	Threads::Done ( g_iLogFile );
@@ -1508,7 +1513,7 @@ private:
 /// values are communicated over network between searchds and APIs and MUST NOT CHANGE
 enum
 {
-	QFLAG_REVERSE_SCAN			= 1UL << 0,
+	QFLAG_REVERSE_SCAN			= 1UL << 0,		// deprecated
 	QFLAG_SORT_KBUFFER			= 1UL << 1,
 	QFLAG_MAX_PREDICTED_TIME	= 1UL << 2,
 	QFLAG_SIMPLIFY				= 1UL << 3,
@@ -1541,7 +1546,6 @@ void SearchRequestBuilder_c::SendQuery ( const char * sIndexes, ISphOutputBuffer
 	// starting with command version 1.27, flags go first
 	// reason being, i might add flags that affect *any* of the subsequent data (eg. qflag_pack_ints)
 	DWORD uFlags = 0;
-	uFlags |= QFLAG_REVERSE_SCAN * q.m_bReverseScan;
 	uFlags |= QFLAG_SORT_KBUFFER * q.m_bSortKbuffer;
 	uFlags |= QFLAG_MAX_PREDICTED_TIME * ( q.m_iMaxPredictedMsec > 0 );
 	uFlags |= QFLAG_SIMPLIFY * q.m_bSimplify;
@@ -2294,6 +2298,24 @@ static bool ParseSearchFilter ( CSphFilterSettings & tFilter, InputBuffer_c & tR
 }
 
 
+static void AddDocids ( CSphVector<CSphQueryItem> & dItems )
+{
+	if ( !dItems.GetLength() )
+		return;
+
+	bool bHaveDocID = false;
+	for ( const auto & i : dItems )
+		bHaveDocID |= i.m_sAlias==sphGetDocidName() || i.m_sExpr=="*";
+
+	if ( !bHaveDocID )
+	{
+		CSphQueryItem tId;
+		tId.m_sExpr = tId.m_sAlias = sphGetDocidName();
+		dItems.Insert ( 0, tId );
+	}
+}
+
+
 bool ParseSearchQuery ( InputBuffer_c & tReq, ISphOutputBuffer & tOut, CSphQuery & tQuery, WORD uVer, WORD uMasterVer )
 {
 	// daemon-level defaults
@@ -2434,7 +2456,6 @@ bool ParseSearchQuery ( InputBuffer_c & tReq, ISphOutputBuffer & tOut, CSphQuery
 	if ( uVer>=0x11B )
 	{
 		// parse simple flags
-		tQuery.m_bReverseScan = !!( uFlags & QFLAG_REVERSE_SCAN );
 		tQuery.m_bSortKbuffer = !!( uFlags & QFLAG_SORT_KBUFFER );
 		tQuery.m_bSimplify = !!( uFlags & QFLAG_SIMPLIFY );
 		tQuery.m_bPlainIDF = !!( uFlags & QFLAG_PLAIN_IDF );
@@ -2541,19 +2562,8 @@ bool ParseSearchQuery ( InputBuffer_c & tReq, ISphOutputBuffer & tOut, CSphQuery
 
 	// queries coming from API may not request docids
 	// but we still need docids when sending result sets
-	if ( tQuery.m_dItems.GetLength() )
-	{
-		bool bHaveDocID = false;
-		for ( const auto & i : tQuery.m_dItems )
-			bHaveDocID |= i.m_sExpr==sphGetDocidName() || i.m_sExpr=="*";
-
-		if ( !bHaveDocID )
-		{
-			CSphQueryItem tId;
-			tId.m_sExpr = sphGetDocidName();
-			tQuery.m_dItems.Insert ( 0, tId );
-		}
-	}
+	AddDocids ( tQuery.m_dItems );
+	AddDocids ( tQuery.m_dRefItems );
 
 	if ( tReq.GetError() )
 	{
@@ -3833,30 +3843,31 @@ class MatchIterator_c
 	}
 
 public:
-	OneResultset_t&			m_dResult;
+	OneResultset_t&			m_tResult;
 	DocID_t					m_tDocID;
 	int						m_iIdx;		// ordering index (each step gives matches in sorted by Docid order)
 
-	explicit MatchIterator_c ( OneResultset_t& dResult )
-		: m_dResult ( dResult )
+	explicit MatchIterator_c ( OneResultset_t & tResult )
+		: m_tResult ( tResult )
 	{
-		auto& dMatches = dResult.m_dMatches;
+		auto& dMatches = tResult.m_dMatches;
 		m_iLimit = dMatches.GetLength();
 
 		if ( MaybeUseWordOrder ( dMatches ) )
-			m_fnOrder = [pData = (WORD *) m_dResult.m_dMatches.end ()] ( int i ) { return pData[i]; };
+			m_fnOrder = [pData = (WORD *) m_tResult.m_dMatches.end ()] ( int i ) { return pData[i]; };
 		else if ( MaybeUseDwordOrder ( dMatches ) )
-			m_fnOrder = [pData = (DWORD *) m_dResult.m_dMatches.end ()] ( int i ) { return pData[i]; };
+			m_fnOrder = [pData = (DWORD *) m_tResult.m_dMatches.end ()] ( int i ) { return pData[i]; };
 		else
 		{
 			UseTags ( dMatches );
-			m_fnOrder = [this] ( int i ) { return m_dResult.m_dMatches[m_iRawIdx].m_iTag; };
-		};
-
+			m_fnOrder = [this] ( int i ) { return m_tResult.m_dMatches[m_iRawIdx].m_iTag; };
+		}
 
 		m_iRawIdx = 0;
 		m_iIdx = m_fnOrder(0);
-		m_tDocID = sphGetDocID ( m_dResult.m_dMatches[m_iIdx].m_pDynamic );
+
+		assert ( m_tResult.m_tSchema.GetAttr ( sphGetDocidName() ) );
+		m_tDocID = sphGetDocID ( m_tResult.m_dMatches[m_iIdx].m_pDynamic );
 	}
 
 	~MatchIterator_c()
@@ -3869,7 +3880,7 @@ public:
 		int iDirtyMatches = m_iLimit>0x10000 ? m_iLimit * sizeof ( DWORD ) : m_iLimit * sizeof ( WORD );
 		iDirtyMatches = ( iDirtyMatches+sizeof ( CSphMatch )-1 ) / sizeof ( CSphMatch );
 		for ( int i = 0; i<iDirtyMatches; ++i )
-			( m_dResult.m_dMatches.end ()+i )->CleanGarbage();
+			( m_tResult.m_dMatches.end ()+i )->CleanGarbage();
 	}
 
 	inline bool Step()
@@ -3878,7 +3889,7 @@ public:
 		if ( m_iRawIdx>=m_iLimit )
 			return false;
 		m_iIdx = m_fnOrder ( m_iRawIdx );
-		m_tDocID = sphGetDocID ( m_dResult.m_dMatches[m_iIdx].m_pDynamic );
+		m_tDocID = sphGetDocID ( m_tResult.m_dMatches[m_iIdx].m_pDynamic );
 		return true;
 	}
 
@@ -3891,7 +3902,7 @@ public:
 //		if ( !a->m_dResult.m_bTag && b->m_dResult.m_bTag )
 //			return true;
 
-		return a->m_dResult.m_iTag>b->m_dResult.m_iTag;
+		return a->m_tResult.m_iTag>b->m_tResult.m_iTag;
 	}
 };
 
@@ -3907,11 +3918,11 @@ int KillPlainDupes ( ISphMatchSorter * pSorter, AggrResult_t & tRes, const VecTr
 	dIterators.Reserve_static ( dResults.GetLength () );
 	CSphQueue<MatchIterator_c *, MatchIterator_c> qMatches ( dResults.GetLength () );
 
-	for ( auto& dResult : dResults )
-		if ( !dResult.m_dMatches.IsEmpty() )
+	for ( auto & tResult : dResults )
+		if ( !tResult.m_dMatches.IsEmpty() )
 		{
-			dIterators.Emplace_back ( dResult );
-			qMatches.Push ( &dIterators.Last () );
+			dIterators.Emplace_back(tResult);
+			qMatches.Push ( &dIterators.Last() );
 		}
 
 	DocID_t tPrevDocID = DOCID_MIN;
@@ -3921,9 +3932,9 @@ int KillPlainDupes ( ISphMatchSorter * pSorter, AggrResult_t & tRes, const VecTr
 		DocID_t tDocID = pMin->m_tDocID;
 		if ( tDocID!=tPrevDocID ) // by default, simply remove dupes (select first by tag)
 		{
-			CSphMatch & tMatch = pMin->m_dResult.m_dMatches[pMin->m_iIdx];
+			CSphMatch & tMatch = pMin->m_tResult.m_dMatches[pMin->m_iIdx];
 			auto iTag = tMatch.m_iTag;	// as we may use tag for ordering
-			tMatch.m_iTag = pMin->m_dResult.m_iTag; // that will link us back to docstore
+			tMatch.m_iTag = pMin->m_tResult.m_iTag; // that will link us back to docstore
 			pSorter->Push ( tMatch );
 			tMatch.m_iTag = iTag;	// restore tag
 			tPrevDocID = tDocID;
@@ -4051,24 +4062,6 @@ struct GenericMatchSort_fn : public CSphMatchComparatorState
 					continue;
 				return ( ( m_uAttrDesc>>i ) & 1 ) ^ ( a->m_tRowID < b->m_tRowID );
 
-			case SPH_KEYPART_DOCID_S:
-			{
-				register DocID_t aa = sphGetDocID ( a->m_pStatic );
-				register DocID_t bb = sphGetDocID ( b->m_pStatic );
-				if ( aa==bb )
-					continue;
-				return ( ( m_uAttrDesc>>i ) & 1 ) ^ ( aa < bb );
-			}
-
-			case SPH_KEYPART_DOCID_D:
-			{
-				register DocID_t aa = sphGetDocID ( a->m_pDynamic );
-				register DocID_t bb = sphGetDocID ( b->m_pDynamic );
-				if ( aa==bb )
-					continue;
-				return ( ( m_uAttrDesc>>i ) & 1 ) ^ ( aa < bb );
-			}
-
 			case SPH_KEYPART_WEIGHT:
 				if ( a->m_iWeight==b->m_iWeight )
 					continue;
@@ -4099,7 +4092,8 @@ struct GenericMatchSort_fn : public CSphMatchComparatorState
 				break;
 			}
 		}
-		return false;
+
+		return a->m_tRowID<b->m_tRowID;
 	}
 };
 
@@ -4267,9 +4261,9 @@ void ProcessSinglePostlimit ( OneResultset_t & tRes, VecTraits_T<const CSphColum
 	for ( const auto & pCol : dPostlimit )
 		SetupPostlimitExprs ( tRes.Docstore (), pCol, sQuery, iSessionUID );
 
-	for ( auto & dMatch : dMatches )
+	for ( auto & tMatch : dMatches )
 		for ( const auto & pCol : dPostlimit )
-			EvalPostlimitExprs ( dMatch, pCol );
+			EvalPostlimitExprs ( tMatch, pCol );
 }
 
 void ProcessLocalPostlimit ( AggrResult_t & tRes, const CSphQuery & tQuery, bool bMaster )
@@ -4749,8 +4743,9 @@ bool ApplyOuterOrder ( AggrResult_t & tRes, const CSphQuery & tQuery )
 	// reorder (aka outer order)
 	ESphSortFunc eFunc;
 	GenericMatchSort_fn tReorder;
+	CSphVector<ExtraSortExpr_t> dExtraExprs;
 
-	ESortClauseParseResult eRes = sphParseSortClause ( &tQuery, tQuery.m_sOuterOrderBy.cstr(), tRes.m_tSchema, eFunc, tReorder, true, tRes.m_sError );
+	ESortClauseParseResult eRes = sphParseSortClause ( tQuery, tQuery.m_sOuterOrderBy.cstr(), tRes.m_tSchema, eFunc, tReorder, dExtraExprs, true, tRes.m_sError );
 	if ( eRes==SORT_CLAUSE_RANDOM )
 		tRes.m_sError = "order by rand() not supported in outer select";
 
@@ -5033,6 +5028,8 @@ protected:
 	QueryType_e						m_eQueryType {QUERY_API}; ///< queries from sphinxql require special handling
 	const QueryParser_i *			m_pQueryParser;	///< parser used for queries in this handler. e.g. plain or json-style
 
+	bool							m_bNeedDocIDs = false;	///< do we need docids returned from local searches (remotes return them anyway)?
+
 	// FIXME!!! breaks for dist threads with SNIPPETS expressions for queries to multiple indexes
 	mutable ExprHook_c				m_tHook;
 
@@ -5062,24 +5059,15 @@ private:
 	bool							BuildIndexList ( int & iDivideLimits, VecRefPtrsAgentConn_t & dRemotes, CSphVector<DistrServedByAgent_t> & dDistrServedByAgent ); // fixme!
 	void							CalcTimeStats ( int64_t tmCpu, int64_t tmSubset, const CSphVector<DistrServedByAgent_t> & dDistrServedByAgent );
 	void							CalcPerIndexStats ( const CSphVector<DistrServedByAgent_t> & dDistrServedByAgent ) const;
-	void							CalcGlobalStats ( int64_t tmCpu, int64_t tmSubset, int64_t tmLocal, const CSphIOStats & tIO,
-			const VecRefPtrsAgentConn_t & dRemotes ) const;
-	int								CreateSorters ( const CSphIndex * pIndex, VecTraits_T<ISphMatchSorter*> & dSorters,
-			VecTraits_T<CSphString> & dErrors, StrVec_t* pExtra, SphQueueRes_t & tQueueRes, ISphExprHook* pHook ) const;
-	int								CreateSingleSorters ( const CSphIndex * pIndex, VecTraits_T<ISphMatchSorter*> & dSorters,
-			VecTraits_T<CSphString> & dErrors, StrVec_t * pExtra, SphQueueRes_t & tQueueRes, ISphExprHook* pHook ) const;
-	int								CreateMultiQueryOrFacetSorters ( const CSphIndex * pIndex, VecTraits_T<ISphMatchSorter*> & dSorters,
-			VecTraits_T<CSphString> & dErrors, StrVec_t * pExtra, SphQueueRes_t & tQueueRes, ISphExprHook * pHook ) const;
+	void							CalcGlobalStats ( int64_t tmCpu, int64_t tmSubset, int64_t tmLocal, const CSphIOStats & tIO, const VecRefPtrsAgentConn_t & dRemotes ) const;
+	int								CreateSorters ( const CSphIndex * pIndex, VecTraits_T<ISphMatchSorter*> & dSorters, VecTraits_T<CSphString> & dErrors, StrVec_t * pExtra, SphQueueRes_t & tQueueRes, ISphExprHook * pHook ) const;
+	int								CreateSingleSorters ( const CSphIndex * pIndex, VecTraits_T<ISphMatchSorter*> & dSorters, VecTraits_T<CSphString> & dErrors, StrVec_t * pExtra, SphQueueRes_t & tQueueRes, ISphExprHook * pHook ) const;
+	int								CreateMultiQueryOrFacetSorters ( const CSphIndex * pIndex, VecTraits_T<ISphMatchSorter*> & dSorters, VecTraits_T<CSphString> & dErrors, StrVec_t * pExtra, SphQueueRes_t & tQueueRes, ISphExprHook * pHook ) const;
 
 	SphQueueSettings_t				MakeQueueSettings ( const CSphIndex * pIndex, int iMaxMatches, ISphExprHook * pHook ) const;
-
-	const ServedDesc_t * CheckIndexSuitable ( const char* szLocal, const char* szParent,
-		VecTraits_T<SearchFailuresLog_c> & dNFailuresSet ) const;
-
-	bool CreateValidSorters ( VecTraits_T<ISphMatchSorter *> & dSrt, SphQueueRes_t* pQueueRes,
-		VecTraits_T<SearchFailuresLog_c> & dFlr, StrVec_t * pExtra, const ServedDesc_t * pServed,
-		const char * szLocal, const char * szParent, ISphExprHook * pHook );
-
+	const ServedDesc_t *			CheckIndexSuitable ( const char * szLocal, const char * szParent, VecTraits_T<SearchFailuresLog_c> & dNFailuresSet ) const;
+	bool							CreateValidSorters ( VecTraits_T<ISphMatchSorter *> & dSrt, SphQueueRes_t * pQueueRes, VecTraits_T<SearchFailuresLog_c> & dFlr, StrVec_t * pExtra, const ServedDesc_t * pServed,
+										const char * szLocal, const char * szParent, ISphExprHook * pHook );
 };
 
 PubSearchHandler_c::PubSearchHandler_c ( int iQueries, const QueryParser_i * pQueryParser, QueryType_e eQueryType, bool bMaster )
@@ -5388,13 +5376,13 @@ SphQueueSettings_t SearchHandler_c::MakeQueueSettings ( const CSphIndex * pIndex
 	tQueueSettings.m_pCollection = m_pDelDocs;
 	tQueueSettings.m_pHook = pHook;
 	tQueueSettings.m_iMaxMatches = GetMaxMatches ( iMaxMatches, pIndex );
+	tQueueSettings.m_bNeedDocids = m_bNeedDocIDs;	// need docids to merge results from indexes
 	return tQueueSettings;
 }
 
 
-int SearchHandler_c::CreateMultiQueryOrFacetSorters ( const CSphIndex * pIndex,
-		VecTraits_T<ISphMatchSorter *> & dSorters, VecTraits_T<CSphString> & dErrors, StrVec_t * pExtra,
-		SphQueueRes_t & tQueueRes, ISphExprHook * pHook ) const
+int SearchHandler_c::CreateMultiQueryOrFacetSorters ( const CSphIndex * pIndex, VecTraits_T<ISphMatchSorter *> & dSorters, VecTraits_T<CSphString> & dErrors, StrVec_t * pExtra, SphQueueRes_t & tQueueRes,
+	ISphExprHook * pHook ) const
 {
 	int iValidSorters = 0;
 
@@ -5414,8 +5402,8 @@ int SearchHandler_c::CreateMultiQueryOrFacetSorters ( const CSphIndex * pIndex,
 	return iValidSorters;
 }
 
-int SearchHandler_c::CreateSingleSorters ( const CSphIndex * pIndex, VecTraits_T<ISphMatchSorter *> & dSorters,
-		VecTraits_T<CSphString> & dErrors, StrVec_t * pExtra, SphQueueRes_t & tQueueRes, ISphExprHook * pHook ) const
+
+int SearchHandler_c::CreateSingleSorters ( const CSphIndex * pIndex, VecTraits_T<ISphMatchSorter *> & dSorters, VecTraits_T<CSphString> & dErrors, StrVec_t * pExtra, SphQueueRes_t & tQueueRes, ISphExprHook * pHook ) const
 {
 	int iValidSorters = 0;
 	tQueueRes.m_bAlowMulti = false;
@@ -5437,8 +5425,8 @@ int SearchHandler_c::CreateSingleSorters ( const CSphIndex * pIndex, VecTraits_T
 	return iValidSorters;
 }
 
-int SearchHandler_c::CreateSorters ( const CSphIndex * pIndex, VecTraits_T<ISphMatchSorter *> & dSorters
-		, VecTraits_T<CSphString> & dErrors, StrVec_t * pExtra, SphQueueRes_t & tQueueRes, ISphExprHook * pHook ) const
+
+int SearchHandler_c::CreateSorters ( const CSphIndex * pIndex, VecTraits_T<ISphMatchSorter *> & dSorters, VecTraits_T<CSphString> & dErrors, StrVec_t* pExtra, SphQueueRes_t & tQueueRes, ISphExprHook * pHook ) const
 {
 	if ( m_bMultiQueue || m_bFacetQueue )
 		return CreateMultiQueryOrFacetSorters ( pIndex, dSorters, dErrors, pExtra, tQueueRes, pHook );
@@ -5557,8 +5545,7 @@ struct LocalSearchClone_t
 	}
 };
 
-const ServedDesc_t * SearchHandler_c::CheckIndexSuitable ( const char* szLocal, const char* szParent,
-		VecTraits_T<SearchFailuresLog_c> & dNFailuresSet ) const
+const ServedDesc_t * SearchHandler_c::CheckIndexSuitable ( const char * szLocal, const char* szParent, VecTraits_T<SearchFailuresLog_c> & dNFailuresSet ) const
 {
 	const auto * pServed = m_dLocked.Get ( szLocal );
 	if ( !pServed )
@@ -5580,9 +5567,9 @@ const ServedDesc_t * SearchHandler_c::CheckIndexSuitable ( const char* szLocal,
 	return pServed;
 }
 
-bool SearchHandler_c::CreateValidSorters ( VecTraits_T<ISphMatchSorter *> & dSrt, SphQueueRes_t * pQueueRes,
-		VecTraits_T<SearchFailuresLog_c> & dFlr, StrVec_t * pExtra, const ServedDesc_t * pServed, const char * szLocal,
-		const char * szParent, ISphExprHook * pHook )
+
+bool SearchHandler_c::CreateValidSorters ( VecTraits_T<ISphMatchSorter *> & dSrt, SphQueueRes_t * pQueueRes, VecTraits_T<SearchFailuresLog_c> & dFlr, StrVec_t * pExtra, const ServedDesc_t * pServed,
+	const char * szLocal, const char * szParent, ISphExprHook * pHook )
 {
 	auto iQueries = dSrt.GetLength();
 	#if PARANOID
@@ -5591,9 +5578,8 @@ bool SearchHandler_c::CreateValidSorters ( VecTraits_T<ISphMatchSorter *> & dSrt
 	#endif
 
 	CSphFixedVector<CSphString> dErrors ( iQueries );
-
 	int iValidSorters = CreateSorters ( pServed->m_pIndex, dSrt, dErrors, pExtra, *pQueueRes, pHook );
-	if ( iValidSorters<dSrt.GetLength () )
+	if ( iValidSorters<dSrt.GetLength() )
 	{
 		ARRAY_FOREACH ( i, dErrors )
 		{
@@ -6668,6 +6654,7 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd )
 	{
 		SetupLocalDF();
 		SwitchProfile ( m_pProfile, SPH_QSTATE_LOCAL_SEARCH );
+		m_bNeedDocIDs = m_dLocal.GetLength()+dRemotes.GetLength()>1;
 		tmLocal = -sphMicroTimer();
 		tmCpu -= sphTaskCpuTimer ();
 		RunLocalSearches();
@@ -7212,9 +7199,11 @@ public:
 			case SPH_ATTR_TOKENCOUNT:
 				CSphMatch::SetAttr ( tLoc, ToInt(tVal) );
 				break;
+
 			case SPH_ATTR_BIGINT:
 				CSphMatch::SetAttr ( tLoc, ToBigInt(tVal) );
 				break;
+
 			case SPH_ATTR_FLOAT:
 				if ( tVal.m_iType==SqlInsert_t::QUOTED_STRING )
 					SetAttrFloat ( tLoc, (float)strtod ( tVal.m_sVal.cstr(), NULL ) ); // FIXME? report conversion error?
@@ -7223,13 +7212,17 @@ public:
 				else if ( tVal.m_iType==SqlInsert_t::CONST_FLOAT )
 					SetAttrFloat ( tLoc, tVal.m_fVal );
 				break;
-			case SPH_ATTR_STRING:
+
 			case SPH_ATTR_STRINGPTR:
+				CSphMatch::SetAttr ( tLoc, 0 );
+				break;
+
+			case SPH_ATTR_STRING:
 			case SPH_ATTR_UINT32SET:
 			case SPH_ATTR_INT64SET:
 			case SPH_ATTR_JSON:
-				CSphMatch::SetAttr ( tLoc, 0 );
 				break;
+
 			default:
 				return false;
 		};
@@ -8365,8 +8358,8 @@ void BuildStatus ( VectorLike & dStatus )
 	dStatus.MatchTupletf ( "uptime", "%u", (DWORD) time ( nullptr )-g_tStats.m_uStarted );
 	dStatus.MatchTupletf ( "connections", "%l", g_tStats.m_iConnections.load ( std::memory_order_relaxed ) );
 	dStatus.MatchTupletf ( "maxed_out", "%l", g_tStats.m_iMaxedOut.load ( std::memory_order_relaxed ) );
-	dStatus.MatchTuplet ( "version" , szMANTICORE_VERSION );
-	dStatus.MatchTuplet ( "mysql_version", g_sMySQLVersion.cstr () );
+	dStatus.MatchTuplet ( "version" , g_sStatusVersion.cstr() );
+	dStatus.MatchTuplet ( "mysql_version", g_sMySQLVersion.cstr() );
 
 	for ( auto i=0; i<SEARCHD_COMMAND_TOTAL; ++i)
 		dStatus.MatchTupletf ( szCommand ( i ), "%l", g_tStats.m_iCommandCount[i].load ( std::memory_order_relaxed ) );
@@ -11102,45 +11095,61 @@ void HandleMysqlCallSuggest ( RowBuffer_i & tOut, SqlStmt_t & tStmt, bool bQuery
 	tOut.Eof();
 }
 
-void DescribeLocalSchema ( VectorLike& dOut, const CSphSchema& tSchema, bool bIsTemplate )
+
+static void AddFieldDesc ( VectorLike & dOut, const CSphColumnInfo & tField )
+{
+	if ( !dOut.MatchAdd ( tField.m_sName.cstr() ) )
+		return;
+
+	dOut.Add ( "text" );
+	StringBuilder_c sProperties ( " " );
+	DWORD uFlags = tField.m_uFieldFlags;
+	if ( uFlags & CSphColumnInfo::FIELD_INDEXED )
+		sProperties << "indexed";
+
+	if ( uFlags & CSphColumnInfo::FIELD_STORED )
+		sProperties << "stored";
+	dOut.Add ( sProperties.cstr () );
+}
+
+
+static void AddAttributeDesc ( VectorLike & dOut, const CSphColumnInfo & tAttr )
+{
+	if ( sphIsInternalAttr ( tAttr ) )
+		return;
+
+	if ( dOut.MatchAdd ( tAttr.m_sName.cstr() ) )
+	{
+		if ( tAttr.m_eAttrType==SPH_ATTR_INTEGER && tAttr.m_tLocator.m_iBitCount!=ROWITEM_BITS && tAttr.m_tLocator.m_iBitCount>0 )
+		{
+			StringBuilder_c sName;
+			sName.Sprintf ( "%s:%d", sphTypeName ( tAttr.m_eAttrType ), tAttr.m_tLocator.m_iBitCount );
+			dOut.Add ( sName.cstr() );
+		} else
+			dOut.Add ( sphTypeName ( tAttr.m_eAttrType ) );
+
+		dOut.Add ( tAttr.IsColumnar() ? "columnar" : "" );
+	}
+}
+
+
+void DescribeLocalSchema ( VectorLike & dOut, const CSphSchema & tSchema, bool bIsTemplate )
 {
 	// result set header packet
 	dOut.SetColNames ( { "Field", "Type", "Properties" } );
 
 	// id comes before fields
 	if ( !bIsTemplate )
-		dOut.MatchTuplet ( "id", "bigint" );
+	{
+		assert ( tSchema.GetAttr(0).m_sName==sphGetDocidName() );
+		AddAttributeDesc ( dOut, tSchema.GetAttr(0) );
+	}
 
 	for ( int i = 0; i<tSchema.GetFieldsCount (); ++i )
-		if ( dOut.MatchAdd ( tSchema.GetFieldName ( i ) ) )
-		{
-			dOut.Add ( "text" );
-			StringBuilder_c sProperties ( " " );
-			DWORD uFlags = tSchema.GetField ( i ).m_uFieldFlags;
-			if ( uFlags & CSphColumnInfo::FIELD_INDEXED )
-				sProperties << "indexed";
-
-			if ( uFlags & CSphColumnInfo::FIELD_STORED )
-				sProperties << "stored";
-			dOut.Add ( sProperties.cstr () );
-		}
+		AddFieldDesc ( dOut, tSchema.GetField(i) );
 
 	for ( int i = 1; i<tSchema.GetAttrsCount (); ++i )
-	{
-		const CSphColumnInfo & tCol = tSchema.GetAttr ( i );
-		if ( sphIsInternalAttr ( tCol ) )
-			continue;
-
-		dOut.MatchTupletFn ( tCol.m_sName.cstr (), [&tCol] {
-			if ( tCol.m_eAttrType==SPH_ATTR_INTEGER && tCol.m_tLocator.m_iBitCount!=ROWITEM_BITS )
-			{
-				StringBuilder_c sName;
-				sName.Sprintf ( "%s:%d", sphTypeName ( tCol.m_eAttrType ), tCol.m_tLocator.m_iBitCount );
-				return CSphString ( sName );
-			} else
-				return CSphString ( sphTypeName ( tCol.m_eAttrType ) );
-		} );
-	}
+		AddAttributeDesc ( dOut, tSchema.GetAttr(i) );
 }
 
 
@@ -12754,14 +12763,15 @@ struct SessionVars_t
 	int				m_iDistThreads = 0;
 	CSphVector<int64_t> m_dLastIds;
 
-	bool bProfile () const { return m_eProfile!=Profile_e::NONE; };
+	bool IsProfile () const { return m_eProfile!=Profile_e::NONE; };
 };
 
 // fwd
 void HandleMysqlShowProfile ( RowBuffer_i & tOut, const QueryProfile_c & p, bool bMoreResultsFollow );
+
 static void HandleMysqlShowPlan ( RowBuffer_i & tOut, const QueryProfile_c & p, bool bMoreResultsFollow, bool bDot );
 
-bool bDot ( const SqlStmt_t & tStmt, const SessionVars_t & tVars )
+bool IsDot ( const SqlStmt_t & tStmt, const SessionVars_t & tVars )
 {
 	if ( tStmt.m_sThreadFormat=="dot" )
 		return true;
@@ -12819,7 +12829,7 @@ void HandleMysqlMultiStmt ( const CSphVector<SqlStmt_t> & dStmt, CSphQueryResult
 
 	// use first meta for faceted search
 	bool bUseFirstMeta = ( tHandler.m_dQueries.GetLength()>1 && !tHandler.m_dQueries[0].m_bFacet && tHandler.m_dQueries[1].m_bFacet );
-	if ( tVars.bProfile() )
+	if ( tVars.IsProfile() )
 		tHandler.SetProfile ( &tProfile );
 
 	// do search
@@ -12864,7 +12874,7 @@ void HandleMysqlMultiStmt ( const CSphVector<SqlStmt_t> & dStmt, CSphQueryResult
 			AggrResult_t & tRes = tHandler.m_dAggrResults[iSelect++];
 			if ( !sWarning.IsEmpty() )
 				tRes.m_sWarning = sWarning;
-			SendMysqlSelectResult ( dRows, tRes, bMoreResultsFollow, false, nullptr, ( tVars.bProfile() ? &tProfile : nullptr ) );
+			SendMysqlSelectResult ( dRows, tRes, bMoreResultsFollow, false, nullptr, ( tVars.IsProfile() ? &tProfile : nullptr ) );
 			// mysql server breaks send on error
 			bBreak = !tRes.m_iSuccesses;
 			break;
@@ -12886,7 +12896,7 @@ void HandleMysqlMultiStmt ( const CSphVector<SqlStmt_t> & dStmt, CSphQueryResult
 			HandleMysqlShowProfile ( dRows, tProfile, bMoreResultsFollow );
 			break;
 		case STMT_SHOW_PLAN:
-			HandleMysqlShowPlan ( dRows, tProfile, bMoreResultsFollow, ::bDot ( dStmt[i], tVars ) );
+			HandleMysqlShowPlan ( dRows, tProfile, bMoreResultsFollow, ::IsDot ( dStmt[i], tVars ) );
 		default:
 			break;
 		}
@@ -13153,7 +13163,7 @@ void HandleMysqlSet ( RowBuffer_i & tOut, SqlStmt_t & tStmt, SessionVars_t & tVa
 }
 
 
-void HandleMysqlAttach ( RowBuffer_i & tOut, const SqlStmt_t & tStmt )
+void HandleMysqlAttach ( RowBuffer_i & tOut, const SqlStmt_t & tStmt, CSphString & sWarning )
 {
 	const CSphString & sFrom = tStmt.m_sIndex;
 	const CSphString & sTo = tStmt.m_sStringParam;
@@ -13198,7 +13208,8 @@ void HandleMysqlAttach ( RowBuffer_i & tOut, const SqlStmt_t & tStmt )
 	auto * pRtTo = ( RtIndex_i * ) pTo->m_pIndex;
 
 	bool bFatal = false;
-	if ( !pRtTo->AttachDiskIndex ( pFrom->m_pIndex, bTruncate, bFatal, sError ) )
+	StrVec_t dWarnings;
+	if ( !pRtTo->AttachDiskIndex ( pFrom->m_pIndex, bTruncate, bFatal, dWarnings, sError ) )
 	{
 		if ( bFatal )
 			g_pLocalIndexes->Delete(sFrom);
@@ -13207,6 +13218,8 @@ void HandleMysqlAttach ( RowBuffer_i & tOut, const SqlStmt_t & tStmt )
 		return;
 	}
 
+	sWarning = ConcatWarnings(dWarnings);
+
 	// after a successfull Attach() RT index owns it
 	pFrom->m_pIndex = nullptr;
 	g_pLocalIndexes->Delete ( sFrom );
@@ -13838,12 +13851,14 @@ void HandleMysqlOptimize ( RowBuffer_i & tOut, const SqlStmt_t & tStmt )
 // STMT_SELECT_SYSVAR: SELECT @@sysvar1 [ as alias] [@@sysvarN [ as alias]] [limit M]
 void HandleMysqlSelectSysvar ( RowBuffer_i & tOut, const SqlStmt_t & tStmt, const SessionVars_t & tVars )
 {
-	const struct SysVar_t
+	struct SysVar_t
 	{
 		const MysqlColumnType_e m_eType;
 		const char * m_sName;
 		std::function<CSphString ( void )> m_fnValue;
-	} dSysvars[] =
+	};
+	
+	const SysVar_t dSysvars[] =
 	{	{ MYSQL_COL_STRING,	nullptr, [] {return "";}}, // stub
 		{ MYSQL_COL_LONG,	"@@session.auto_increment_increment",	[] {return "1";}},
 		{ MYSQL_COL_STRING,	"@@character_set_client", [] {return "utf8";}},
@@ -13859,10 +13874,12 @@ void HandleMysqlSelectSysvar ( RowBuffer_i & tOut, const SqlStmt_t & tStmt, cons
 			}},
 	};
 
-	auto fnVar = [&dSysvars] ( const CSphString & sVar )->const SysVar_t& {
-		for ( const auto& dVar : dSysvars )
-			if ( sVar==dVar.m_sName )
-				return dVar;
+	auto fnVar = [&dSysvars] ( const CSphString & sVar )->const SysVar_t &
+	{
+		for ( const auto & tVar : dSysvars )
+			if ( sVar==tVar.m_sName )
+				return tVar;
+
 		return dSysvars[0];
 	};
 
@@ -13873,8 +13890,8 @@ void HandleMysqlSelectSysvar ( RowBuffer_i & tOut, const SqlStmt_t & tStmt, cons
 	tOut.HeadEnd ();
 
 	// fill values
-	for ( const auto & dItem : tStmt.m_tQuery.m_dItems )
-		tOut.PutString ( fnVar ( dItem.m_sExpr ).m_fnValue().cstr() );
+	for ( const auto & tItem : tStmt.m_tQuery.m_dItems )
+		tOut.PutString ( fnVar ( tItem.m_sExpr ).m_fnValue().cstr() );
 
 	// finalize
 	tOut.Commit ();
@@ -15095,14 +15112,14 @@ void HandleMysqlImportTable ( RowBuffer_i & tOut, const SqlStmt_t & tStmt, CSphS
 	}
 
 	bool bPQ = false;
-	if ( !CopyIndexFiles ( tStmt.m_sIndex, tStmt.m_sStringParam, bPQ, sError ) )
+	StrVec_t dWarnings;
+	if ( !CopyIndexFiles ( tStmt.m_sIndex, tStmt.m_sStringParam, bPQ, dWarnings, sError ) )
 	{
 		sError.SetSprintf ( "unable to import index '%s': %s", tStmt.m_sIndex.cstr(), sError.cstr() );
 		tOut.Error ( tStmt.m_sStmt, sError.cstr() );
 		return;
 	}
 
-	StrVec_t dWarnings;
 	if ( !AddExistingIndexInt ( tStmt.m_sIndex, bPQ ? IndexType_e::PERCOLATE : IndexType_e::RT, dWarnings, sError ) )
 	{
 		sError.SetSprintf ( "unable to import index '%s': %s", tStmt.m_sIndex.cstr(), sError.cstr() );
@@ -15163,9 +15180,9 @@ private:
 	bool				m_bFederatedUser = false;
 	CSphString			m_sFederatedQuery;
 
-	bool bDot ( const SqlStmt_t & tStmt ) const
+	bool IsDot ( const SqlStmt_t & tStmt ) const
 	{
-		return ::bDot ( tStmt, m_tVars );
+		return ::IsDot ( tStmt, m_tVars );
 	}
 
 public:
@@ -15200,7 +15217,7 @@ public:
 		}
 
 		// parse SQL query
-		if ( m_tVars.bProfile() )
+		if ( m_tVars.IsProfile() )
 			m_tProfile.Switch ( SPH_QSTATE_SQL_PARSE );
 
 		m_sError = "";
@@ -15208,7 +15225,7 @@ public:
 		CSphVector<SqlStmt_t> dStmt;
 		bool bParsedOK = sphParseSqlQuery ( sQuery.first, sQuery.second, dStmt, m_sError, m_tVars.m_eCollation );
 
-		if ( m_tVars.bProfile() )
+		if ( m_tVars.IsProfile() )
 			m_tProfile.Switch ( SPH_QSTATE_UNKNOWN );
 
 		SqlStmt_e eStmt = STMT_PARSE_ERROR;
@@ -15268,7 +15285,7 @@ public:
 				dStmt.Begin()->m_pTableFunc = nullptr;
 				tHandler.m_pStmt = pStmt;
 
-				if ( m_tVars.bProfile() )
+				if ( m_tVars.IsProfile() )
 					tHandler.SetProfile ( &m_tProfile );
 				if ( m_bFederatedUser )
 					tHandler.SetFederatedUser();
@@ -15278,7 +15295,7 @@ public:
 					// query just completed ok; reset out error message
 					m_sError = "";
 					AggrResult_t & tLast = tHandler.m_dAggrResults.Last();
-					SendMysqlSelectResult ( tOut, tLast, false, m_bFederatedUser, &m_sFederatedQuery, ( m_tVars.bProfile() ? &m_tProfile : nullptr ) );
+					SendMysqlSelectResult ( tOut, tLast, false, m_bFederatedUser, &m_sFederatedQuery, ( m_tVars.IsProfile() ? &m_tProfile : nullptr ) );
 				}
 
 				// save meta for SHOW META (profile is saved elsewhere)
@@ -15507,7 +15524,8 @@ public:
 			return true;
 
 		case STMT_ATTACH_INDEX:
-			HandleMysqlAttach ( tOut, *pStmt );
+			m_tLastMeta.m_sWarning = "";
+			HandleMysqlAttach ( tOut, *pStmt, m_tLastMeta.m_sWarning );
 			return true;
 
 		case STMT_FLUSH_RTINDEX:
@@ -15563,7 +15581,7 @@ public:
 			return true;
 
 		case STMT_SHOW_PLAN:
-			HandleMysqlShowPlan ( tOut, m_tLastProfile, false, bDot ( *pStmt ));
+			HandleMysqlShowPlan ( tOut, m_tLastProfile, false, IsDot ( *pStmt ));
 			return false; // do not profile this call, keep last query profile
 
 		case STMT_SELECT_DUAL:
@@ -15673,7 +15691,7 @@ public:
 			return true;
 
 		case STMT_EXPLAIN:
-			HandleMysqlExplain ( tOut, *pStmt, bDot ( *pStmt ) );
+			HandleMysqlExplain ( tOut, *pStmt, IsDot ( *pStmt ) );
 			return true;
 
 		case STMT_IMPORT_TABLE:
@@ -15736,7 +15754,7 @@ QueryProfile_c * SphinxqlSessionPublic::StartProfiling ( ESphQueryState eState )
 {
 	assert ( m_pImpl );
 	QueryProfile_c * pProfile = nullptr;
-	if ( m_pImpl->m_tVars.bProfile() ) // the current statement might change it
+	if ( m_pImpl->m_tVars.IsProfile() ) // the current statement might change it
 	{
 		pProfile = &m_pImpl->m_tProfile;
 		pProfile->Start ( eState );
@@ -16036,7 +16054,7 @@ RotateFrom_e CheckIndexHeaderRotate ( const ServedDesc_t & tServed )
 }
 
 /// returns true if any version of the index (old or new one) has been preread
-bool RotateIndexGreedy (ServedDesc_t &tWlockedIndex, const char * szIndex, CSphString & sError )
+bool RotateIndexGreedy ( ServedDesc_t & tWlockedIndex, const char * szIndex, CSphString & sError )
 {
 	sphLogDebug ( "RotateIndexGreedy for '%s' invoked", szIndex );
 	IndexFiles_c dFiles ( tWlockedIndex.m_sIndexPath, szIndex );
@@ -16100,7 +16118,8 @@ bool RotateIndexGreedy (ServedDesc_t &tWlockedIndex, const char * szIndex, CSphS
 	DictRefPtr_c		pDictionary { tWlockedIndex.m_pIndex->LeakDictionary () };
 
 //	bool bRolledBack = false;
-	bool bPreallocSuccess = tWlockedIndex.m_pIndex->Prealloc ( g_bStripPath, nullptr );
+	StrVec_t dWarnings;
+	bool bPreallocSuccess = tWlockedIndex.m_pIndex->Prealloc ( g_bStripPath, nullptr, dWarnings );
 	if ( !bPreallocSuccess )
 	{
 		if ( tWlockedIndex.m_bOnlyNew )
@@ -16116,12 +16135,15 @@ bool RotateIndexGreedy (ServedDesc_t &tWlockedIndex, const char * szIndex, CSphS
 			sphFatal ( "%s", dFiles.FatalMsg ( "rotating" ).cstr () );
 
 		sphLogDebug ( "RotateIndexGreedy: has recovered. Prealloc it." );
-		bPreallocSuccess = tWlockedIndex.m_pIndex->Prealloc ( g_bStripPath, nullptr );
+		bPreallocSuccess = tWlockedIndex.m_pIndex->Prealloc ( g_bStripPath, nullptr, dWarnings );
 		if ( !bPreallocSuccess )
 			sError.SetSprintf ( "rotating index '%s': .new preload failed; ROLLBACK FAILED; INDEX UNUSABLE", szIndex );
 //		bRolledBack = true;
 	}
 
+	for ( const auto & i : dWarnings )
+		sphWarning ( "rotating index '%s': %s", szIndex, i.cstr() );
+
 	if ( !tWlockedIndex.m_pIndex->GetLastWarning().IsEmpty() )
 		sphWarning ( "rotating index '%s': %s", szIndex, tWlockedIndex.m_pIndex->GetLastWarning().cstr() );
 
@@ -16214,7 +16236,7 @@ void CheckLeaks () REQUIRES ( MainThread )
 bool PreallocNewIndex ( ServedDesc_t & tIdx, const CSphConfigSection * pConfig, const char * szIndexName, StrVec_t & dWarnings, CSphString & sError )
 {
 	CSphScopedPtr<FilenameBuilder_i> pFilenameBuilder ( CreateFilenameBuilder(szIndexName) );
-	if ( !tIdx.m_pIndex->Prealloc ( g_bStripPath, pFilenameBuilder.Ptr() ) )
+	if ( !tIdx.m_pIndex->Prealloc ( g_bStripPath, pFilenameBuilder.Ptr(), dWarnings ) )
 	{
 		sError.SetSprintf ( "prealloc: %s", tIdx.m_pIndex->GetLastError().cstr() );
 		return false;
@@ -16772,7 +16794,7 @@ static bool ConfigureRTPercolate ( CSphSchema & tSchema, CSphIndexSettings & tSe
 		tSettings.m_iMinInfixLen = 2;
 	}
 
-	tSchema.SetupStoredFields ( tSettings.m_dStoredFields, tSettings.m_dStoredOnlyFields );
+	tSchema.SetupFlags ( tSettings );
 
 	return true;
 }
@@ -17726,7 +17748,7 @@ void ServiceInstall ( int argc, char ** argv )
 	}
 
 	CSphString sDesc;
-	sDesc.SetSprintf ( "%s-%s", g_sServiceName, szMANTICORE_VERSION );
+	sDesc.SetSprintf ( "%s-%s", g_sServiceName, g_sStatusVersion.cstr() );
 
 	SERVICE_DESCRIPTION tDesc;
 	tDesc.lpDescription = (LPSTR) sDesc.cstr();
@@ -18442,7 +18464,8 @@ void ConfigureSearchd ( const CSphConfig & hConf, bool bOptPIDFile, bool bTestMo
 			sphWarning ( "server_id out of range 0 - 127, clamped to %d", g_iServerID );
 		}
 	}
-	g_sMySQLVersion = hSearchd.GetStr ( "mysql_version_string", szMANTICORE_VERSION );
+
+	g_sMySQLVersion = hSearchd.GetStr ( "mysql_version_string", g_sMySQLVersion.cstr() );
 
 	AllowOnlyNot ( hSearchd.GetInt ( "not_terms_only_allowed", 0 )!=0 );
 	ConfigureDaemonLog ( hSearchd.GetStr ( "query_log_commands" ) );
@@ -18901,6 +18924,20 @@ void StopOrStopWaitAnother ( CSphVariant * v, bool bWait ) REQUIRES ( MainThread
 }
 } // static namespace
 
+
+static void InitBanner()
+{
+	const char * szColumnarVer = GetColumnarVersionStr();
+	CSphString sColumnar = "";
+	if ( szColumnarVer )
+		sColumnar.SetSprintf ( " (columnar %s)", szColumnarVer );
+
+	g_sBanner.SetSprintf ( "%s%s%s",  szMANTICORE_NAME, sColumnar.cstr(), szMANTICORE_BANNER_TEXT );
+	g_sMySQLVersion.SetSprintf ( "%s%s", szMANTICORE_NAME, sColumnar.cstr() );
+	g_sStatusVersion.SetSprintf ( "%s%s", szMANTICORE_VERSION, sColumnar.cstr() );
+}
+
+
 int WINAPI ServiceMain ( int argc, char **argv ) REQUIRES (!MainThread)
 {
 	ScopedRole_c thMain (MainThread);
@@ -18943,12 +18980,19 @@ int WINAPI ServiceMain ( int argc, char **argv ) REQUIRES (!MainThread)
 
 	tzset();
 
+	CSphString sError;
+	// initialize it before other code to fetch version string for banner
+	if ( !InitColumnar ( sError ) )
+		sphWarning ( "Error initializing columnar storage: %s", sError.cstr() );
+
+	InitBanner();
+
 	if ( !g_bService )
-		fprintf ( stdout, "%s", szMANTICORE_BANNER );
+		fprintf ( stdout, "%s",  g_sBanner.cstr() );
 
-	const char* sEndian = sphCheckEndian();
-	if ( sEndian )
-		sphDie ( "%s", sEndian );
+	const char * szEndian = sphCheckEndian();
+	if ( szEndian )
+		sphDie ( "%s", szEndian );
 
 	//////////////////////
 	// parse command line
@@ -19100,7 +19144,6 @@ int WINAPI ServiceMain ( int argc, char **argv ) REQUIRES (!MainThread)
 
 	const CSphConfigSection & hSearchdpre = hConf["searchd"]["searchd"];
 
-	CSphString sError;
 	if ( !sphInitCharsetAliasTable ( sError ) )
 		sphFatal ( "failed to init charset alias table: %s", sError.cstr() );
 
@@ -19250,7 +19293,7 @@ int WINAPI ServiceMain ( int argc, char **argv ) REQUIRES (!MainThread)
 	// since that moment any 'fatal' will assume calling 'shutdown' function.
 	sphSetDieCallback ( DieOrFatalWithShutdownCb );
 
-	sphInfo( "starting daemon version '%s' ...", szMANTICORE_VERSION );
+	sphInfo( "starting daemon version '%s' ...", g_sStatusVersion.cstr() );
 
 	////////////////////
 	// network startup

+ 2 - 1
src/searchdaemon.h

@@ -18,6 +18,7 @@
 #define _searchdaemon_
 
 #include "searchdconfig.h"
+#include "memio.h"
 
 /////////////////////////////////////////////////////////////////////////////
 // MACHINE-DEPENDENT STUFF
@@ -1280,7 +1281,7 @@ public:
 	bool IsAutoCommit () const;
 	bool IsInTrans() const;
 
-	QueryProfile_c* StartProfiling ( ESphQueryState );
+	QueryProfile_c * StartProfiling ( ESphQueryState );
 	void SaveLastProfile();
 
 	// manage backend's timeout and variables

+ 9 - 9
src/searchdconfig.cpp

@@ -848,11 +848,11 @@ bool CopyExternalIndexFiles ( const StrVec_t & dFiles, const CSphString & sDestP
 }
 
 
-static CSphIndex * TryToPreallocRt ( const CSphString & sIndex, const CSphString & sNewIndexPath, CSphString & sError )
+static CSphIndex * TryToPreallocRt ( const CSphString & sIndex, const CSphString & sNewIndexPath, StrVec_t & dWarnings, CSphString & sError )
 {
 	CSphSchema tSchemaStub;
 	CSphScopedPtr<RtIndex_i> pRT ( sphCreateIndexRT ( tSchemaStub, sIndex.cstr(), 32*1024*1024, sNewIndexPath.cstr(), true ) );
-	if ( !pRT->Prealloc ( false, nullptr ) )
+	if ( !pRT->Prealloc ( false, nullptr, dWarnings ) )
 	{
 		sError.SetSprintf ( "failed to prealloc: %s", pRT->GetLastError().cstr() );
 		return nullptr;
@@ -862,11 +862,11 @@ static CSphIndex * TryToPreallocRt ( const CSphString & sIndex, const CSphString
 }
 
 
-static CSphIndex * TryToPreallocPq ( const CSphString & sIndex, const CSphString & sNewIndexPath, CSphString & sError )
+static CSphIndex * TryToPreallocPq ( const CSphString & sIndex, const CSphString & sNewIndexPath, StrVec_t & dWarnings, CSphString & sError )
 {
 	CSphSchema tSchemaStub;
 	CSphScopedPtr<PercolateIndex_i> pPQ ( CreateIndexPercolate ( tSchemaStub, sIndex.cstr(), sNewIndexPath.cstr() ) );
-	if ( !pPQ->Prealloc ( false, nullptr ) )
+	if ( !pPQ->Prealloc ( false, nullptr, dWarnings ) )
 	{
 		sError.SetSprintf ( "failed to prealloc: %s", pPQ->GetLastError().cstr() );
 		return nullptr;
@@ -879,15 +879,15 @@ static CSphIndex * TryToPreallocPq ( const CSphString & sIndex, const CSphString
 }
 
 
-static bool CopyExternalFiles ( const CSphString & sIndex, const CSphString & sNewIndexPath, StrVec_t & dCopied, bool & bPQ, CSphString & sError )
+static bool CopyExternalFiles ( const CSphString & sIndex, const CSphString & sNewIndexPath, StrVec_t & dCopied, bool & bPQ, StrVec_t & dWarnings, CSphString & sError )
 {
 	bPQ = false;
 
 	CSphString sRtError, sPqError;
-	CSphScopedPtr<CSphIndex> pIndex ( TryToPreallocRt ( sIndex, sNewIndexPath, sRtError ) );
+	CSphScopedPtr<CSphIndex> pIndex ( TryToPreallocRt ( sIndex, sNewIndexPath, dWarnings, sRtError ) );
 	if ( !pIndex )
 	{
-		pIndex = TryToPreallocPq ( sIndex, sNewIndexPath, sPqError );
+		pIndex = TryToPreallocPq ( sIndex, sNewIndexPath, dWarnings, sPqError );
 		if ( !pIndex )
 		{
 			sError = sRtError;
@@ -935,7 +935,7 @@ private:
 
 
 
-bool CopyIndexFiles ( const CSphString & sIndex, const CSphString & sPathToIndex, bool & bPQ, CSphString & sError )
+bool CopyIndexFiles ( const CSphString & sIndex, const CSphString & sPathToIndex, bool & bPQ, StrVec_t & dWarnings, CSphString & sError )
 {
 	CSphString sPath, sNewIndexPath;
 	if ( !PrepareDirForNewIndex ( sPath, sNewIndexPath, sIndex, sError ) )
@@ -975,7 +975,7 @@ bool CopyIndexFiles ( const CSphString & sIndex, const CSphString & sPathToIndex
 		dCopied.Add(sDest);
 	}
 
-	if ( !CopyExternalFiles ( sIndex, sNewIndexPath, dCopied, bPQ, sError ) )
+	if ( !CopyExternalFiles ( sIndex, sNewIndexPath, dCopied, bPQ, dWarnings, sError ) )
 		return false;
 
 	tCleanup.Ok();

+ 1 - 1
src/searchdconfig.h

@@ -113,6 +113,6 @@ bool		CreateNewIndexInt ( const CSphString & sIndex, const CreateTableSettings_t
 bool		AddExistingIndexInt ( const CSphString & sIndex, IndexType_e eType, StrVec_t & dWarnings, CSphString & sError );
 bool		DropIndexInt ( const CSphString & sIndex, bool bIfExists, CSphString & sError );
 bool		CopyExternalIndexFiles ( const StrVec_t & dFiles, const CSphString & sDestPath, StrVec_t & dCopied, CSphString & sError );
-bool		CopyIndexFiles ( const CSphString & sIndex, const CSphString & sPathToIndex, bool & bPQ, CSphString & sError );
+bool		CopyIndexFiles ( const CSphString & sIndex, const CSphString & sPathToIndex, bool & bPQ, StrVec_t & dWarnings, CSphString & sError );
 
 #endif // _searchdconfig_

+ 4 - 1
src/searchdexpr.cpp

@@ -714,11 +714,14 @@ int ExprHook_c::IsKnownFunc ( const char * sFunc ) const
 }
 
 
-ISphExpr * ExprHook_c::CreateNode ( int iID, ISphExpr * pLeft, ESphEvalStage * pEvalStage, CSphString & sError )
+ISphExpr * ExprHook_c::CreateNode ( int iID, ISphExpr * pLeft, ESphEvalStage * pEvalStage, bool * pNeedDocIds, CSphString & sError )
 {
 	if ( pEvalStage )
 		*pEvalStage = SPH_EVAL_POSTLIMIT;
 
+	if ( pNeedDocIds )
+		*pNeedDocIds = true;
+
 	ISphExpr * pRes = nullptr;
 
 	switch ( iID )

+ 1 - 1
src/searchdexpr.h

@@ -25,7 +25,7 @@ class ExprHook_c : public ISphExprHook
 public:
 	int			IsKnownIdent ( const char * ) const final	{ return -1; }
 	int			IsKnownFunc ( const char * sFunc ) const final;
-	ISphExpr *	CreateNode ( int iID, ISphExpr * pLeft, ESphEvalStage * pEvalStage, CSphString & sError ) final;
+	ISphExpr *	CreateNode ( int iID, ISphExpr * pLeft, ESphEvalStage * pEvalStage, bool * pNeedDocIds, CSphString & sError ) final;
 	ESphAttr	GetIdentType ( int ) const final;
 	ESphAttr	GetReturnType ( int iID, const CSphVector<ESphAttr> & dArgs, bool, CSphString & sError ) const final;
 	void		CheckEnter ( int ) final {}

+ 3 - 2
src/searchdhttp.cpp

@@ -27,6 +27,7 @@ const char * g_dHttpStatus[] = { "200 OK", "206 Partial Content", "400 Bad Reque
 								 "501 Not Implemented", "503 Service Unavailable", "526 Invalid SSL Certificate" };
 STATIC_ASSERT ( sizeof(g_dHttpStatus)/sizeof(g_dHttpStatus[0])==SPH_HTTP_STATUS_TOTAL, SPH_HTTP_STATUS_SHOULD_BE_SAME_AS_SPH_HTTP_STATUS_TOTAL );
 
+extern CSphString g_sStatusVersion;
 
 static void HttpBuildReply ( CSphVector<BYTE> & dData, ESphHttpStatus eCode, const char * sBody, int iBodyLen, bool bHtml )
 {
@@ -34,7 +35,7 @@ static void HttpBuildReply ( CSphVector<BYTE> & dData, ESphHttpStatus eCode, con
 
 	const char * sContent = ( bHtml ? "text/html" : "application/json" );
 	CSphString sHttp;
-	sHttp.SetSprintf ( "HTTP/1.1 %s\r\nServer: %s\r\nContent-Type: %s; charset=UTF-8\r\nContent-Length:%d\r\n\r\n", g_dHttpStatus[eCode], szMANTICORE_VERSION, sContent, iBodyLen );
+	sHttp.SetSprintf ( "HTTP/1.1 %s\r\nServer: %s\r\nContent-Type: %s; charset=UTF-8\r\nContent-Length:%d\r\n\r\n", g_dHttpStatus[eCode], g_sStatusVersion.cstr(), sContent, iBodyLen );
 
 	int iHeaderLen = sHttp.Length();
 	dData.Resize ( iHeaderLen + iBodyLen );
@@ -287,7 +288,7 @@ R"index(<!DOCTYPE html>
 static void HttpHandlerIndexPage ( CSphVector<BYTE> & dData )
 {
 	StringBuilder_c sIndexPage;
-	sIndexPage.Appendf ( g_sIndexPage, szMANTICORE_VERSION );
+	sIndexPage.Appendf ( g_sIndexPage, g_sStatusVersion.cstr() );
 	HttpBuildReply ( dData, SPH_HTTP_STATUS_200, sIndexPage.cstr(), sIndexPage.GetLength(), true );
 }
 

+ 1 - 1
src/searchdreplication.cpp

@@ -12,7 +12,7 @@
 #include "sphinx.h"
 #include "sphinxstd.h"
 #include "sphinxutils.h"
-#include "sphinxint.h"
+#include "memio.h"
 #include "sphinxpq.h"
 #include "searchdreplication.h"
 #include "accumulator.h"

+ 7 - 5
src/searchdsql.cpp

@@ -596,11 +596,8 @@ bool SqlParser_c::AddOption ( const SqlNode_t & tIdent, const SqlNode_t & tValue
 		break;
 
 	case Option_e::REVERSE_SCAN: //} else if ( sOpt=="reverse_scan" )
-		if ( !CheckInteger ( sOpt, sVal ) )
-			return false;
-
-		m_pQuery->m_bReverseScan = ( tValue.m_iValue!=0 );
-		break;
+		*m_pParseError = "reverse_scan is deprecated";
+		return false;
 
 	case Option_e::IGNORE_NONEXISTENT_COLUMNS: //} else if ( sOpt=="ignore_nonexistent_columns" )
 		if ( !CheckInteger ( sOpt, sVal ) )
@@ -1466,9 +1463,14 @@ bool sphParseSqlQuery ( const char * sQuery, int iLen, CSphVector<SqlStmt_t> & d
 		}
 
 		iFilterCount = tParser.m_dFiltersPerStmt[iStmt];
+
 		// all queries have only plain AND filters - no need for filter tree
 		if ( iFilterCount && tParser.m_bGotFilterOr )
 			CreateFilterTree ( tParser.m_dFilterTree, iFilterStart, iFilterCount, tQuery );
+		else
+			OptimizeFilters ( tQuery.m_dFilters );
+
+
 		iFilterStart = iFilterCount;
 
 		// fixup hints

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 260 - 558
src/secondaryindex.cpp


+ 14 - 56
src/secondaryindex.h

@@ -16,76 +16,34 @@
 #include <math.h>
 
 
+using RowIdBlock_t = VecTraits_T<RowID_t>;
+
 class RowidIterator_i
 {
 public:
 	virtual			~RowidIterator_i(){}
 
-	virtual RowID_t	GetNextRowID() = 0;
+	virtual bool	HintRowID ( RowID_t tRowID ) = 0;
+	virtual bool	GetNextRowIdBlock ( RowIdBlock_t & dRowIdBlock ) = 0;
 	virtual int64_t	GetNumProcessed() const = 0;
 };
 
-
-class CSphReader;
-class CSphWriter;
-
-enum HistogramType_e
-{
-	HISTOGRAM_NONE,
-	HISTOGRAM_UINT32,
-	HISTOGRAM_INT64,
-	HISTOGRAM_FLOAT	
-};
-
-
-class Histogram_i
-{
-public:
-	virtual			~Histogram_i() {}
-
-	virtual void	Setup ( SphAttr_t tMin, SphAttr_t tMax ) = 0;
-	virtual void	Insert ( SphAttr_t tAttrVal ) = 0;
-	virtual void	Delete ( SphAttr_t tAttrVal ) = 0;
-	virtual bool	EstimateRsetSize ( const CSphFilterSettings & tFilter, int64_t & iEstimate ) const = 0;
-	virtual DWORD	GetNumValues() const = 0;
-	virtual bool	IsOutdated() const = 0;
-
-	virtual HistogramType_e		GetType() const = 0;
-	virtual const CSphString &	GetAttrName() const = 0;
-
-	virtual bool	Save ( CSphWriter & tWriter ) const = 0;
-	virtual bool	Load ( CSphReader & tReader, CSphString & sError ) = 0;
-};
-
-
-class HistogramContainer_c
+struct SecondaryIndexInfo_t
 {
-public:
-					~HistogramContainer_c();
-
-	bool			Save ( const CSphString & sFile, CSphString & sError );
-	bool			Load ( const CSphString & sFile, CSphString & sError );
-	bool			Add ( Histogram_i * pHistogram );
-	void			Remove ( const CSphString & sAttr );
-	Histogram_i *	Get ( const CSphString & sAttr ) const;
-	DWORD			GetNumValues() const;
-
-private:
-	SmallStringHash_T<Histogram_i*>	m_dHistogramHash;
-
-	void			Reset();
+	int		m_iFilterId {-1};
 };
 
+RowidIterator_i * CreateFilteredIterator ( const CSphVector<CSphFilterSettings> & dFilters, CSphVector<CSphFilterSettings> & dModifiedFilters, bool & bFiltersChanged, const CSphVector<FilterTreeItem_t> & dFilterTree,
+	const CSphVector<IndexHint_t> & dHints, const HistogramContainer_c & tHistograms, const BYTE * pDocidLookup );
 
-Histogram_i *	CreateHistogram ( const CSphString & sAttr, ESphAttr eAttrType );
+RowidIterator_i * CreateIteratorIntersect ( CSphVector<RowidIterator_i*> & dIterators );
 
+#if USE_COLUMNAR
+RowidIterator_i * CreateIteratorWrapper ( columnar::BlockIterator_i * pIterator );
+RowidIterator_i * CreateIteratorIntersect ( std::vector<columnar::BlockIterator_i *> & dIterators );
+#endif
 
-struct SecondaryIndexInfo_t
-{
-	int		m_iFilterId {-1};
-};
-
-RowidIterator_i * CreateFilteredIterator ( const CSphVector<CSphFilterSettings> & dFilters, CSphVector<CSphFilterSettings> & dModifiedFilters, const CSphVector<FilterTreeItem_t> & dFilterTree, const CSphVector<IndexHint_t> & dHints, const HistogramContainer_c & tHistograms, const BYTE * pDocidLookup );
+bool ReturnIteratorResult ( RowID_t * pRowID, RowID_t * pRowIdStart, RowIdBlock_t & dRowIdBlock );
 
 //////////////////////////////////////////////////////////////////////////
 

+ 490 - 0
src/sortsetup.cpp

@@ -0,0 +1,490 @@
+//
+// Copyright (c) 2017-2021, Manticore Software LTD (https://manticoresearch.com)
+// Copyright (c) 2001-2016, Andrew Aksyonoff
+// Copyright (c) 2008-2016, Sphinx Technologies Inc
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#include "sortsetup.h"
+
+#include "sphinxjson.h"
+#include "sphinxsort.h"
+
+
+CSphMatchComparatorState::CSphMatchComparatorState()
+{
+	for ( int i=0; i<MAX_ATTRS; ++i )
+	{
+		m_eKeypart[i] = SPH_KEYPART_ROWID;
+		m_dAttrs[i] = -1;
+	}
+}
+
+
+bool CSphMatchComparatorState::UsesBitfields() const
+{
+	for ( int i=0; i<MAX_ATTRS; ++i )
+		if ( m_eKeypart[i]==SPH_KEYPART_INT && m_tLocator[i].IsBitfield() )
+			return true;
+
+	return false;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+class SortClauseTokenizer_c
+{
+public:
+	explicit SortClauseTokenizer_c ( const char * sBuffer )
+	{
+		auto iLen = (int) strlen(sBuffer);
+		m_pBuf = new char [ iLen+1 ];
+		m_pMax = m_pBuf+iLen;
+		m_pCur = m_pBuf;
+
+		// make string lowercase but keep case of JSON.field
+		bool bJson = false;
+		for ( int i=0; i<=iLen; i++ )
+		{
+			char cSrc = sBuffer[i];
+			char cDst = ToLower ( cSrc );
+			bJson = ( cSrc=='.' || cSrc=='[' || ( bJson && cDst>0 ) ); // keep case of valid char sequence after '.' and '[' symbols
+			m_pBuf[i] = bJson ? cSrc : cDst;
+		}
+	}
+
+	~SortClauseTokenizer_c()
+	{
+		SafeDeleteArray ( m_pBuf );
+	}
+
+	const char * GetToken ()
+	{
+		// skip spaces
+		while ( m_pCur<m_pMax && !*m_pCur )
+			m_pCur++;
+		if ( m_pCur>=m_pMax )
+			return nullptr;
+
+		// memorize token start, and move pointer forward
+		const char * sRes = m_pCur;
+		while ( *m_pCur )
+			m_pCur++;
+		return sRes;
+	}
+
+	bool IsSparseCount ( const char * sTok )
+	{
+		const char * sSeq = "(*)";
+		for ( ; sTok<m_pMax && *sSeq; sTok++ )
+		{
+			bool bGotSeq = ( *sSeq==*sTok );
+			if ( bGotSeq )
+				sSeq++;
+
+			// stop checking on any non space char outside sequence or sequence end
+			if ( ( !bGotSeq && !sphIsSpace ( *sTok ) && *sTok!='\0' ) || !*sSeq )
+				break;
+		}
+
+		if ( !*sSeq && sTok+1<m_pMax && !sTok[1] )
+		{
+			// advance token iterator after composite count(*) token
+			m_pCur = sTok+1;
+			return true;
+		}
+		
+		return false;
+	}
+
+protected:
+	const char *	m_pCur = nullptr;
+	const char *	m_pMax = nullptr;
+	char *			m_pBuf = nullptr;
+
+	char ToLower ( char c )
+	{
+		// 0..9, A..Z->a..z, _, a..z, @, .
+		if ( ( c>='0' && c<='9' ) || ( c>='a' && c<='z' ) || c=='_' || c=='@' || c=='.' || c=='[' || c==']' || c=='\'' || c=='\"' || c=='(' || c==')' || c=='*' )
+			return c;
+		if ( c>='A' && c<='Z' )
+			return c-'A'+'a';
+		return 0;
+	}
+};
+
+
+static inline ESphSortKeyPart Attr2Keypart ( ESphAttr eType )
+{
+	switch ( eType )
+	{
+		case SPH_ATTR_FLOAT:
+			return SPH_KEYPART_FLOAT;
+
+		case SPH_ATTR_STRING:
+			return SPH_KEYPART_STRING;
+
+		case SPH_ATTR_JSON:
+		case SPH_ATTR_JSON_PTR:
+		case SPH_ATTR_JSON_FIELD:
+		case SPH_ATTR_JSON_FIELD_PTR:
+		case SPH_ATTR_STRINGPTR:
+			return SPH_KEYPART_STRINGPTR;
+
+		default:
+			return SPH_KEYPART_INT;
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+class SortStateSetup_c
+{
+public:
+			SortStateSetup_c ( const char * szTok, SortClauseTokenizer_c & tTok, CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dExtraExprs, int iField, const ISphSchema & tSchema, const CSphQuery & tQuery );
+
+	bool	Setup ( CSphString & sError );
+
+private:
+	const char *				m_szTok = nullptr;
+	SortClauseTokenizer_c &		m_tTok;
+	CSphMatchComparatorState &	m_tState;
+	ExtraSortExpr_t &			m_tExtraExpr;
+	const int					m_iField;
+	const ISphSchema &			m_tSchema;
+	const CSphQuery &			m_tQuery;
+
+	int							m_iAttr = -1;
+	ESphAttr					m_eAttrType = SPH_ATTR_NONE;
+
+	bool	SetupSortByRelevance();
+	void	UnifyInternalAttrNames();
+	bool	CheckOrderByMva ( CSphString & sError ) const;
+	int		FindAliasedGroupby() const;
+	bool	IsJsonAttr() const;
+	void	SetupJsonAttr();
+	bool	SetupJsonField ( CSphString & sError );
+	bool	SetupColumnar ( CSphString & sError );
+	bool	SetupJson ( CSphString & sError );
+	void	SetupJsonConversions();
+	void	SetupPrecalculatedJson();
+};
+
+
+SortStateSetup_c::SortStateSetup_c ( const char * szTok, SortClauseTokenizer_c & tTok, CSphMatchComparatorState & tState, CSphVector<ExtraSortExpr_t> & dJsonExprs, int iField, const ISphSchema & tSchema, const CSphQuery & tQuery )
+	: m_szTok ( szTok )
+	, m_tTok ( tTok )
+	, m_tState ( tState )
+	, m_tExtraExpr ( dJsonExprs[iField] )
+	, m_iField ( iField )
+	, m_tSchema ( tSchema )
+	, m_tQuery ( tQuery )
+{}
+
+
+bool SortStateSetup_c::SetupSortByRelevance()
+{
+	if ( !strcasecmp ( m_szTok, "@relevance" )
+		|| !strcasecmp ( m_szTok, "@rank" )
+		|| !strcasecmp ( m_szTok, "@weight" )
+		|| !strcasecmp ( m_szTok, "weight()" ) )
+	{
+		m_tState.m_eKeypart[m_iField] = SPH_KEYPART_WEIGHT;
+		return true;
+	}
+
+	return false;
+}
+
+
+void SortStateSetup_c::UnifyInternalAttrNames()
+{
+	if ( !strcasecmp ( m_szTok, "@group" ) )
+		m_szTok = "@groupby";
+	else if ( !strcasecmp ( m_szTok, "count(*)" ) )
+		m_szTok = "@count";
+	else if ( !strcasecmp ( m_szTok, "facet()" ) )
+		m_szTok = "@groupby"; // facet() is essentially a @groupby alias
+	else if ( strcasecmp ( m_szTok, "count" )>=0 && m_tTok.IsSparseCount ( m_szTok + sizeof ( "count" ) - 1 ) ) // epression count(*) with various spaces
+		m_szTok = "@count";
+}
+
+
+bool SortStateSetup_c::CheckOrderByMva ( CSphString & sError ) const
+{
+	int iAttr = m_tSchema.GetAttrIndex(m_szTok);
+
+	if ( iAttr<0 )
+		return true;
+
+	ESphAttr eAttrType = m_tSchema.GetAttr(iAttr).m_eAttrType;
+	if ( eAttrType==SPH_ATTR_UINT32SET || eAttrType==SPH_ATTR_INT64SET || eAttrType==SPH_ATTR_UINT32SET_PTR || eAttrType==SPH_ATTR_INT64SET_PTR )
+	{
+		sError.SetSprintf ( "order by MVA is undefined" );
+		return false;
+	}
+
+	return true;
+}
+
+
+int SortStateSetup_c::FindAliasedGroupby() const
+{
+	int iAttr = m_tSchema.GetAttrIndex(m_szTok);
+	if ( iAttr>=0 )
+		return iAttr;
+
+	// try to lookup aliased count(*) and aliased groupby() in select items
+	for ( auto & i : m_tQuery.m_dItems )
+	{
+		if ( !i.m_sAlias.cstr() || strcasecmp ( i.m_sAlias.cstr(), m_szTok ) )
+			continue;
+
+		if ( i.m_sExpr.Begins("@") )
+			return m_tSchema.GetAttrIndex ( i.m_sExpr.cstr() );
+
+		if ( i.m_sExpr=="count(*)" )
+			return m_tSchema.GetAttrIndex ( "@count" );
+
+		if ( i.m_sExpr=="groupby()" )
+		{
+			CSphString sGroupJson = SortJsonInternalSet ( m_tQuery.m_sGroupBy );
+			iAttr = m_tSchema.GetAttrIndex ( sGroupJson.cstr() );
+			// try numeric group by
+			if ( iAttr<0 )
+				iAttr = m_tSchema.GetAttrIndex ( "@groupby" );
+
+			return iAttr;
+		}
+	}
+
+	return iAttr;
+}
+
+
+bool SortStateSetup_c::IsJsonAttr() const
+{
+	if ( m_iAttr<0 )
+		return false;
+
+	ESphAttr eAttrType = m_tSchema.GetAttr(m_iAttr).m_eAttrType;
+	if ( eAttrType==SPH_ATTR_JSON_FIELD || eAttrType==SPH_ATTR_JSON_FIELD_PTR || eAttrType==SPH_ATTR_JSON || eAttrType==SPH_ATTR_JSON_PTR )
+		return true;
+
+	return false;
+}
+
+
+void SortStateSetup_c::SetupJsonAttr()
+{
+	const CSphColumnInfo & tAttr = m_tSchema.GetAttr(m_iAttr);
+	SafeAddRef ( tAttr.m_pExpr );	// addref since we're reusing an existing expression
+	m_tExtraExpr.m_pExpr = tAttr.m_pExpr;
+	m_tExtraExpr.m_tKey = JsonKey_t ( m_szTok, (int)strlen(m_szTok) );
+}
+
+
+bool SortStateSetup_c::SetupJsonField ( CSphString & sError )
+{
+	CSphString sJsonCol, sJsonKey;
+	if ( !sphJsonNameSplit ( m_szTok, &sJsonCol, &sJsonKey ) )
+		return true;
+
+	m_iAttr = m_tSchema.GetAttrIndex ( sJsonCol.cstr() );
+	if ( m_iAttr>=0 )
+	{
+		ExprParseArgs_t tExprArgs;
+		ISphExpr * pExpr = sphExprParse ( m_szTok, m_tSchema, sError, tExprArgs );
+		if ( !pExpr )
+			return false;
+
+		m_tExtraExpr.m_pExpr = pExpr;
+		m_tExtraExpr.m_tKey = JsonKey_t ( m_szTok, (int) strlen ( m_szTok ) );
+	}
+
+	return true;
+}
+
+
+bool SortStateSetup_c::SetupColumnar ( CSphString & sError )
+{
+	if ( m_iAttr<0 )
+		return true;
+
+	const CSphColumnInfo & tAttr = m_tSchema.GetAttr(m_iAttr);
+	if ( !tAttr.IsColumnar() )
+		return true;
+
+	ExprParseArgs_t tExprArgs;
+	tExprArgs.m_pAttrType = &m_eAttrType;
+	ISphExpr * pExpr = sphExprParse ( m_szTok, m_tSchema, sError, tExprArgs );
+	if ( !pExpr )
+		return false;
+
+	m_tExtraExpr.m_pExpr = pExpr;
+	m_tExtraExpr.m_eType = m_eAttrType;
+	return true;
+}
+
+
+bool SortStateSetup_c::SetupJson ( CSphString & sError )
+{
+	if ( IsJsonAttr() )
+	{
+		SetupJsonAttr();
+		return true;
+	}
+
+	// try JSON attribute and use JSON attribute instead of JSON field
+	if ( m_iAttr<0 )
+		return SetupJsonField(sError);
+
+	return true;
+}
+
+
+void SortStateSetup_c::SetupJsonConversions()
+{
+	if ( m_iAttr>=0 )
+		return;
+
+	// try json conversion functions (integer()/double()/bigint() in the order by clause)
+	ExprParseArgs_t tExprArgs;
+	tExprArgs.m_pAttrType = &m_eAttrType;
+	CSphString sError; // ignored
+	ISphExpr * pExpr = sphExprParse ( m_szTok, m_tSchema, sError, tExprArgs );
+	if ( !pExpr )
+		return;
+
+	m_tExtraExpr.m_pExpr = pExpr;
+	m_tExtraExpr.m_tKey = JsonKey_t ( m_szTok, (int) strlen(m_szTok) );
+	m_tExtraExpr.m_eType = m_eAttrType;
+	m_tExtraExpr.m_tKey.m_uMask = 0;
+
+	m_iAttr = 0; // will be remapped in SetupSortRemap
+}
+
+
+void SortStateSetup_c::SetupPrecalculatedJson()
+{
+	if ( m_iAttr>=0 )
+		return;
+
+	// try precalculated json fields received from agents (prefixed with @int_*)
+	CSphString sName;
+	sName.SetSprintf ( "%s%s", GetInternalAttrPrefix(), m_szTok );
+	m_iAttr = m_tSchema.GetAttrIndex ( sName.cstr() );
+}
+
+
+bool SortStateSetup_c::Setup ( CSphString & sError )
+{
+	if ( SetupSortByRelevance() )
+		return true;
+
+	UnifyInternalAttrNames();
+
+	if ( !CheckOrderByMva(sError) )
+		return false;
+
+	m_iAttr = FindAliasedGroupby();
+	if ( !SetupColumnar(sError) )
+		return false;
+
+	if ( !SetupJson(sError) )
+		return false;
+
+	SetupJsonConversions();
+	SetupPrecalculatedJson();
+
+	if ( m_iAttr<0 )
+	{
+		sError.SetSprintf ( "sort-by attribute '%s' not found", m_szTok );
+		return false;
+	}
+
+	const CSphColumnInfo & tCol = m_tSchema.GetAttr(m_iAttr);
+	m_tState.m_eKeypart[m_iField] = Attr2Keypart ( m_eAttrType!=SPH_ATTR_NONE ? m_eAttrType : tCol.m_eAttrType );
+	m_tState.m_tLocator[m_iField] = tCol.m_tLocator;
+	m_tState.m_dAttrs[m_iField] = m_iAttr;
+
+	return true;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+ESortClauseParseResult sphParseSortClause ( const CSphQuery & tQuery, const char * szClause, const ISphSchema & tSchema, ESphSortFunc & eFunc, CSphMatchComparatorState & tState,
+	CSphVector<ExtraSortExpr_t> & dExtraExprs, bool bComputeItems, CSphString & sError )
+{
+	for ( auto & tAttr : tState.m_dAttrs )
+		tAttr = -1;
+
+	dExtraExprs.Resize ( tState.MAX_ATTRS );
+
+	// mini parser
+	SortClauseTokenizer_c tTok(szClause);
+
+	bool bField = false; // whether i'm expecting field name or sort order
+	int iField = 0;
+
+	for ( const char * pTok=tTok.GetToken(); pTok; pTok=tTok.GetToken() )
+	{
+		bField = !bField;
+
+		// special case, sort by random
+		if ( iField==0 && bField && strcmp ( pTok, "@random" )==0 )
+			return SORT_CLAUSE_RANDOM;
+
+		// handle sort order
+		if ( !bField )
+		{
+			// check
+			if ( strcmp ( pTok, "desc" ) && strcmp ( pTok, "asc" ) )
+			{
+				sError.SetSprintf ( "invalid sorting order '%s'", pTok );
+				return SORT_CLAUSE_ERROR;
+			}
+
+			// set
+			if ( !strcmp ( pTok, "desc" ) )
+				tState.m_uAttrDesc |= ( 1<<iField );
+
+			iField++;
+			continue;
+		}
+
+		// handle attribute name
+		if ( iField==CSphMatchComparatorState::MAX_ATTRS )
+		{
+			sError.SetSprintf ( "too many sort-by attributes; maximum count is %d", CSphMatchComparatorState::MAX_ATTRS );
+			return SORT_CLAUSE_ERROR;
+		}
+		
+		SortStateSetup_c tSetup ( pTok, tTok, tState, dExtraExprs, iField, tSchema, tQuery );
+		if ( !tSetup.Setup(sError) )
+			return SORT_CLAUSE_ERROR;		
+	}
+
+	if ( iField==0 )
+	{
+		sError.SetSprintf ( "no sort order defined" );
+		return SORT_CLAUSE_ERROR;
+	}
+
+	switch ( iField )
+	{
+		case 1:		eFunc = FUNC_GENERIC1; break;
+		case 2:		eFunc = FUNC_GENERIC2; break;
+		case 3:		eFunc = FUNC_GENERIC3; break;
+		case 4:		eFunc = FUNC_GENERIC4; break;
+		case 5:		eFunc = FUNC_GENERIC5; break;
+		default:	sError.SetSprintf ( "INTERNAL ERROR: %d fields in sphParseSortClause()", iField ); return SORT_CLAUSE_ERROR;
+	}
+
+	return SORT_CLAUSE_OK;
+}

+ 72 - 0
src/sortsetup.h

@@ -0,0 +1,72 @@
+//
+// Copyright (c) 2017-2020, Manticore Software LTD (http://manticoresearch.com)
+// Copyright (c) 2001-2016, Andrew Aksyonoff
+// Copyright (c) 2008-2016, Sphinx Technologies Inc
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#ifndef _sortsetup_
+#define _sortsetup_
+
+#include "sphinx.h"
+
+/// match comparator state
+struct CSphMatchComparatorState
+{
+	static const int	MAX_ATTRS = 5;
+
+	ESphSortKeyPart		m_eKeypart[MAX_ATTRS];		///< sort-by key part type
+	CSphAttrLocator		m_tLocator[MAX_ATTRS];		///< sort-by attr locator
+	int					m_dAttrs[MAX_ATTRS];		///< sort-by attr index
+
+	DWORD				m_uAttrDesc = 0;			///< sort order mask (if i-th bit is set, i-th attr order is DESC)
+	DWORD				m_iNow = 0;					///< timestamp (for timesegments sorting mode)
+	SphStringCmp_fn		m_fnStrCmp = nullptr;		///< string comparator
+
+
+						CSphMatchComparatorState();
+
+	/// check if any of my attrs are bitfields
+	bool				UsesBitfields() const;
+	void				FixupLocators ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema, bool bRemapKeyparts );
+
+	inline int CmpStrings ( const CSphMatch & a, const CSphMatch & b, int iAttr ) const
+	{
+		assert ( iAttr>=0 && iAttr<MAX_ATTRS );
+		assert ( m_eKeypart[iAttr]==SPH_KEYPART_STRING || m_eKeypart[iAttr]==SPH_KEYPART_STRINGPTR );
+		assert ( m_fnStrCmp );
+
+		const BYTE * aa = (const BYTE*) a.GetAttr ( m_tLocator[iAttr] );
+		const BYTE * bb = (const BYTE*) b.GetAttr ( m_tLocator[iAttr] );
+		if ( aa==nullptr || bb==nullptr )
+		{
+			if ( aa==bb )
+				return 0;
+			if ( aa==nullptr )
+				return -1;
+			return 1;
+		}
+
+		return m_fnStrCmp ( {aa, 0}, {bb, 0}, m_eKeypart[iAttr]==SPH_KEYPART_STRINGPTR );
+	}
+};
+
+
+struct ExtraSortExpr_t
+{
+	CSphRefcountedPtr<ISphExpr> m_pExpr;
+	JsonKey_t					m_tKey;
+	ESphAttr					m_eType = SPH_ATTR_NONE;
+};
+
+/// parses sort clause, using a given schema
+/// fills eFunc and tState and optionally sError, returns result code
+ESortClauseParseResult sphParseSortClause ( const CSphQuery & tQuery, const char * sClause, const ISphSchema & tSchema, ESphSortFunc & eFunc, CSphMatchComparatorState & tState,
+	CSphVector<ExtraSortExpr_t> & dExtraExprs, bool bComputeItems, CSphString & sError );
+
+#endif // _sortsetup_

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 235 - 1006
src/sphinx.cpp


+ 69 - 232
src/sphinx.h

@@ -95,6 +95,7 @@ STATIC_ASSERT ( ( 1 << ROWITEM_SHIFT )==ROWITEM_BITS, INVALID_ROWITEM_SHIFT );
 extern const char * szMANTICORE_VERSION;
 extern const char * szMANTICORE_NAME;
 extern const char * szMANTICORE_BANNER;
+extern const char * szMANTICORE_BANNER_TEXT;
 extern const char * szGIT_COMMIT_ID;
 extern const char * szGIT_BRANCH_ID;
 extern const char * szGDB_SOURCE_DIR;
@@ -961,6 +962,15 @@ struct CSphAttrLocator
 		return m_iBlobAttrId>=0;
 	}
 
+	void Reset()
+	{
+		m_iBitOffset = -1;
+		m_iBitCount = -1;
+		m_iBlobAttrId = -1;
+		m_nBlobAttrs = 0;
+		m_bDynamic = true;
+	}
+
 #ifndef NDEBUG
 	/// get last item touched by this attr (for debugging checks only)
 	int GetMaxRowitem () const
@@ -982,10 +992,12 @@ struct CSphAttrLocator
 /// getter
 inline SphAttr_t sphGetRowAttr ( const CSphRowitem * pRow, const CSphAttrLocator & tLoc )
 {
-	assert ( pRow );
+	assert(pRow);
+	assert ( tLoc.m_iBitCount );
+
 	int iItem = tLoc.m_iBitOffset >> ROWITEM_SHIFT;
 
-	switch (tLoc.m_iBitCount )
+	switch ( tLoc.m_iBitCount )
 	{
 	case ROWITEM_BITS:
 		return SphAttr_t ( pRow[iItem] );
@@ -1006,6 +1018,8 @@ inline SphAttr_t sphGetRowAttr ( const CSphRowitem * pRow, const CSphAttrLocator
 inline void sphSetRowAttr ( CSphRowitem * pRow, const CSphAttrLocator & tLoc, SphAttr_t uValue )
 {
 	assert(pRow);
+	assert ( tLoc.m_iBitCount );
+
 	int iItem = tLoc.m_iBitOffset >> ROWITEM_SHIFT;
 	if ( tLoc.m_iBitCount==2*ROWITEM_BITS )
 	{
@@ -1310,6 +1324,17 @@ struct CSphColumnInfo
 		FIELD_INDEXED	= 1<<1
 	};
 
+	enum
+	{
+		ATTR_NONE				= 0,
+
+#if USE_COLUMNAR
+		ATTR_COLUMNAR			= 1<<0,
+		ATTR_COLUMNAR_HASHES	= 1<<1
+#endif
+	};
+
+
 	CSphString		m_sName;		///< column name
 	ESphAttr		m_eAttrType;	///< attribute type
 	ESphWordpart	m_eWordpart { SPH_WORDPART_WHOLE };	///< wordpart processing type
@@ -1329,6 +1354,7 @@ struct CSphColumnInfo
 	bool			m_bFilename = false;			///< column is a file name
 	bool			m_bWeight = false;				///< is a weight column
 	DWORD			m_uFieldFlags = FIELD_INDEXED;	///< stored/indexed/highlighted etc
+	DWORD			m_uAttrFlags = ATTR_NONE;		///< attribute storage spec
 
 	WORD			m_uNext = 0xFFFF;			///< next in linked list for hash in CSphSchema
 
@@ -1347,6 +1373,10 @@ struct CSphColumnInfo
 
 	/// returns true if this column stores a pointer to data
 	bool IsDataPtr() const;
+
+	bool IsColumnar() const;
+	bool HasStringHashes() const;
+	bool IsColumnarExpr() const;
 };
 
 
@@ -1540,8 +1570,10 @@ public:
 
 	bool					HasBlobAttrs() const;
 	int						GetCachedRowSize() const;
-	void					SetupStoredFields ( const StrVec_t & dStored, const StrVec_t & dStoredOnly );
+	void					SetupFlags ( const CSphSourceSettings & tSettings );
 	bool					HasStoredFields() const;
+	bool					HasColumnarAttrs() const;
+	bool					HasNonColumnarAttrs() const;
 	bool					IsFieldStored ( int iField ) const;
 
 private:
@@ -1568,6 +1600,9 @@ private:
 	/// reset hash and re-add all attributes
 	void					RebuildHash ();
 
+	/// rebuild the attribute value array
+	void					RebuildLocators ( bool bDynamic );
+
 	/// add iAddVal to all indexes strictly greater than iStartIdx in hash structures
 	void					UpdateHash ( int iStartIdx, int iAddVal );
 };
@@ -1733,22 +1768,24 @@ ISphFieldFilter * sphCreateRegexpFilter ( const CSphFieldFilterSettings & tFilte
 /// create an ICU field filter
 ISphFieldFilter * sphCreateFilterICU ( ISphFieldFilter * pParent, const char * szBlendChars, CSphString & sError );
 
-class BlobSource_i
+class AttrSource_i
 {
 public:
-	BlobSource_i () {};
-	virtual ~BlobSource_i () {};
+	virtual							~AttrSource_i () {};
+
+	/// returns value of a given attribute
+	virtual SphAttr_t 				GetAttr ( int iAttr ) = 0;
 
 	/// returns mva values for a given attribute (mva must be stored in a field)
-	virtual CSphVector<int64_t> * GetFieldMVA ( int iAttr ) = 0;
+	virtual CSphVector<int64_t> *	GetFieldMVA ( int iAttr ) = 0;
 
 	/// returns string attributes for a given attribute
-	virtual const CSphString & GetStrAttr ( int iAttr ) = 0;
+	virtual const CSphString &		GetStrAttr ( int iAttr ) = 0;
 };
 
 
 /// generic data source
-class CSphSource : public CSphSourceSettings, public BlobSource_i
+class CSphSource : public CSphSourceSettings, public AttrSource_i
 {
 public:
 	CSphMatch							m_tDocInfo;		///< current document info
@@ -1856,6 +1893,7 @@ public:
 protected:
 	StrVec_t							m_dStrAttrs;	///< current document string attrs
 	CSphVector<CSphVector<int64_t>>		m_dMvas;		///< per-attribute MVA storage
+	CSphVector<SphAttr_t>				m_dAttrs;
 
 	TokenizerRefPtr_c				m_pTokenizer;	///< my tokenizer
 	DictRefPtr_c					m_pDict;		///< my dict
@@ -1910,6 +1948,7 @@ public:
 
 	CSphVector<int64_t> *	GetFieldMVA ( int iAttr ) override;
 	const CSphString &		GetStrAttr ( int iAttr ) override;
+	SphAttr_t				GetAttr ( int iAttr ) override;
 	void					GetDocFields ( CSphVector<VecTraits_T<BYTE>> & dFields ) override;
 
 	void					RowIDAssigned ( DocID_t tDocID, RowID_t tRowID ) override;
@@ -2403,19 +2442,9 @@ enum  ESphMvaFunc
 
 
 /// search query filter
-class CSphFilterSettings
+struct CommonFilterSettings_t
 {
-public:
-	CSphString			m_sAttrName = "";	///< filtered attribute name
-	bool				m_bExclude = false;		///< whether this is "include" or "exclude" filter (default is "include")
-	bool				m_bHasEqualMin = true;	///< has filter "equal" component or pure greater\less (for min)
-	bool				m_bHasEqualMax = true;	///< has filter "equal" component or pure greater\less (for max)
-	bool				m_bOpenLeft = false;
-	bool				m_bOpenRight = false;
-	bool				m_bIsNull = false;		///< for NULL or NOT NULL
-
 	ESphFilter			m_eType = SPH_FILTER_VALUES;		///< filter type
-	ESphMvaFunc			m_eMvaFunc = SPH_MVAFUNC_NONE;		///< MVA and stringlist folding function
 	union
 	{
 		SphAttr_t		m_iMinValue = LLONG_MIN;	///< range min
@@ -2426,6 +2455,21 @@ public:
 		SphAttr_t		m_iMaxValue = LLONG_MAX;	///< range max
 		float			m_fMaxValue;	///< range max
 	};
+};
+
+
+class CSphFilterSettings : public CommonFilterSettings_t
+{
+public:
+	CSphString			m_sAttrName = "";	///< filtered attribute name
+	bool				m_bExclude = false;		///< whether this is "include" or "exclude" filter (default is "include")
+	bool				m_bHasEqualMin = true;	///< has filter "equal" component or pure greater\less (for min)
+	bool				m_bHasEqualMax = true;	///< has filter "equal" component or pure greater\less (for max)
+	bool				m_bOpenLeft = false;
+	bool				m_bOpenRight = false;
+	bool				m_bIsNull = false;		///< for NULL or NOT NULL
+
+	ESphMvaFunc			m_eMvaFunc = SPH_MVAFUNC_NONE;		///< MVA and stringlist folding function
 	CSphVector<SphAttr_t>	m_dValues;	///< integer values set
 	StrVec_t				m_dStrings;	///< string values
 
@@ -2607,7 +2651,6 @@ struct CSphQuery
 	int				m_iOuterLimit = 0;
 	bool			m_bHasOuter = false;
 
-	bool			m_bReverseScan = false;		///< perform scan in reverse order
 	bool			m_bIgnoreNonexistent = false; ///< whether to warning or not about non-existent columns in select list
 	bool			m_bIgnoreNonexistentIndexes = false; ///< whether to error or not about non-existent indexes in index list
 	bool			m_bStrict = false;			///< whether to warning or not about incompatible types
@@ -2747,7 +2790,6 @@ struct CSphIndexProgress
 		PHASE_COLLECT,				///< document collection phase
 		PHASE_SORT,					///< final sorting phase
 		PHASE_LOOKUP,				///< docid lookup construction
-		PHASE_HISTOGRAMS,			///< creating histograms for POD attrs
 		PHASE_MERGE					///< index merging
 	};
 
@@ -2807,8 +2849,6 @@ enum ESortClauseParseResult
 enum ESphSortKeyPart
 {
 	SPH_KEYPART_ROWID,
-	SPH_KEYPART_DOCID_S,
-	SPH_KEYPART_DOCID_D,
 	SPH_KEYPART_WEIGHT,
 	SPH_KEYPART_INT,
 	SPH_KEYPART_FLOAT,
@@ -2828,198 +2868,11 @@ struct JsonKey_t
 	explicit JsonKey_t ( const char * sKey, int iLen );
 };
 
-
-/// match comparator state
-struct CSphMatchComparatorState
-{
-	static const int	MAX_ATTRS = 5;
-
-	ESphSortKeyPart		m_eKeypart[MAX_ATTRS];		///< sort-by key part type
-	CSphAttrLocator		m_tLocator[MAX_ATTRS];		///< sort-by attr locator
-	JsonKey_t			m_tSubKeys[MAX_ATTRS];		///< sort-by attr sub-locator
-	ISphExpr *			m_tSubExpr[MAX_ATTRS];		///< sort-by attr expression
-	ESphAttr			m_tSubType[MAX_ATTRS];		///< sort-by expression type
-	int					m_dAttrs[MAX_ATTRS];		///< sort-by attr index
-
-	DWORD				m_uAttrDesc = 0;			///< sort order mask (if i-th bit is set, i-th attr order is DESC)
-	DWORD				m_iNow = 0;					///< timestamp (for timesegments sorting mode)
-	SphStringCmp_fn		m_fnStrCmp = nullptr;		///< string comparator
-
-
-	/// create default empty state
-	CSphMatchComparatorState ()
-	{
-		for ( int i=0; i<MAX_ATTRS; ++i )
-		{
-			m_eKeypart[i] = SPH_KEYPART_ROWID;
-			m_tSubExpr[i] = nullptr;
-			m_tSubType[i] = SPH_ATTR_NONE;
-			m_dAttrs[i] = -1;
-		}
-	}
-
-	~CSphMatchComparatorState ()
-	{
-		for ( ISphExpr *&pExpr :  m_tSubExpr ) SafeRelease( pExpr );
-	}
-
-	/// check if any of my attrs are bitfields
-	bool UsesBitfields ()
-	{
-		for ( int i=0; i<MAX_ATTRS; ++i )
-			if ( m_eKeypart[i]==SPH_KEYPART_INT && m_tLocator[i].IsBitfield() )
-				return true;
-		return false;
-	}
-
-	inline int CmpStrings ( const CSphMatch & a, const CSphMatch & b, int iAttr ) const
-	{
-		assert ( iAttr>=0 && iAttr<MAX_ATTRS );
-		assert ( m_eKeypart[iAttr]==SPH_KEYPART_STRING || m_eKeypart[iAttr]==SPH_KEYPART_STRINGPTR );
-		assert ( m_fnStrCmp );
-
-		const BYTE * aa = (const BYTE*) a.GetAttr ( m_tLocator[iAttr] );
-		const BYTE * bb = (const BYTE*) b.GetAttr ( m_tLocator[iAttr] );
-		if ( aa==nullptr || bb==nullptr )
-		{
-			if ( aa==bb )
-				return 0;
-			if ( aa==nullptr )
-				return -1;
-			return 1;
-		}
-
-		return m_fnStrCmp ( {aa, 0}, {bb, 0}, m_eKeypart[iAttr]==SPH_KEYPART_STRINGPTR );
-	}
-
-	void FixupLocators ( const ISphSchema * pOldSchema, const ISphSchema * pNewSchema, bool bRemapKeyparts );
-};
-
-
-/// match processor interface
-struct ISphMatchProcessor
-{
-	virtual ~ISphMatchProcessor () {}
-	virtual void Process ( CSphMatch * pMatch ) = 0;
-};
-
-using fnGetBlobPoolFromMatch = std::function< const BYTE* ( const CSphMatch * )>;
-
-/// generic match sorter interface
-class ISphMatchSorter
-{
-public:
-	bool				m_bRandomize = false;
-	int64_t				m_iTotal = 0;
-
-	RowID_t				m_iJustPushed {INVALID_ROWID};
-	int					m_iMatchCapacity = 0;
-	CSphTightVector<RowID_t> m_dJustPopped;
-
-protected:
-	SharedPtr_t<ISphSchema*>	m_pSchema;	///< sorter schema (adds dynamic attributes on top of index schema)
-	CSphMatchComparatorState	m_tState;		///< protected to set m_iNow automatically on SetState() calls
-	StrVec_t					m_dTransformed;
-
-public:
-	/// ctor
-						ISphMatchSorter () {}
-
-	/// virtualizing dtor
-	virtual				~ISphMatchSorter () {}
-
-	/// check if this sorter does groupby
-	virtual bool		IsGroupby () const = 0;
-
-	/// set match comparator state
-	void		SetState ( const CSphMatchComparatorState & tState );
-
-	/// set match comparator state and copy expressions from there (if any)
-	void		CopyState ( const CSphMatchComparatorState & tState );
-
-	/// get match comparator stat
-	const CSphMatchComparatorState &	GetState() const { return m_tState; }
-
-	/// set group comparator state
-	virtual void		SetGroupState ( const CSphMatchComparatorState & ) {}
-
-	/// set blob pool pointer (for string+groupby sorters)
-	virtual void		SetBlobPool ( const BYTE * ) {}
-
-	/// set sorter schema
-	virtual void		SetSchema ( ISphSchema * pSchema, bool bRemapCmp );
-
-	/// get incoming schema
-	const ISphSchema * GetSchema () const { return ( ISphSchema *) m_pSchema; }
-
-	/// base push
-	/// returns false if the entry was rejected as duplicate
-	/// returns true otherwise (even if it was not actually inserted)
-	virtual bool		Push ( const CSphMatch & tEntry ) = 0;
-
-	/// submit pre-grouped match. bNewSet indicates that the match begins the bunch of matches got from one source
-	virtual bool		PushGrouped ( const CSphMatch & tEntry, bool bNewSet ) = 0;
-
-	/// get	rough entries count, due of aggregate filtering phase
-	virtual int			GetLength () const = 0;
-
-	/// get internal buffer length
-	// virtual int			GetDataLength () const = 0; // everybody uses m_iMatchCapacity instead
-
-	/// get total count of non-duplicates Push()ed through this queue
-	int64_t		GetTotalCount () const { return m_iTotal; }
-
-	/// process collected entries up to length count
-	virtual void		Finalize ( ISphMatchProcessor & tProcessor, bool bCallProcessInResultSetOrder ) = 0;
-
-	/// store all entries into specified location and remove them from the queue
-	/// entries are stored in properly sorted order,
-	/// return sorted entries count, might be less than length due of aggregate filtering phase
-	virtual int			Flatten ( CSphMatch * pTo ) = 0;
-
-	/// get a pointer to the worst element, NULL if there is no fixed location
-	virtual const CSphMatch *	GetWorst() const { return nullptr; }
-
-
-	/// returns whether the sorter can be cloned to distribute processing over multi threads
-	/// (delete and update sorters are too complex by side effects and can't be cloned)
-	virtual bool CanBeCloned () const { return true; }
-
-	/// make same sorter (for MT processing)
-	virtual ISphMatchSorter* Clone() const = 0;
-
-	/// move resultset into target
-	virtual void MoveTo ( ISphMatchSorter * pRhs ) = 0;
-
-	/// makes the same sorter
-	void CloneTo ( ISphMatchSorter * pTrg ) const;
-
-	const CSphMatchComparatorState& GetComparatorState() const { return m_tState; }
-
-	/// set attributes list these should copied into result set \ final matches
-	void							SetFilteredAttrs ( const sph::StringSet & hAttrs );
-
-	/// transform collected matches into standalone (copy all pooled attrs to ptrs, drop unused)
-	/// param fnBlobPoolFromMatch provides pool pointer from currently processed match pointer.
-	void TransformPooled2StandalonePtrs ( fnGetBlobPoolFromMatch fnBlobPoolFromMatch );
-};
-
-struct CmpPSortersByRandom_fn
-{
-	inline static bool IsLess ( const ISphMatchSorter * a, const ISphMatchSorter * b )
-	{
-		assert ( a );
-		assert ( b );
-		return a->m_bRandomize<b->m_bRandomize;
-	}
-};
-
-
 /// forward refs to internal searcher classes
 class ISphQword;
 class ISphQwordSetup;
 class CSphQueryContext;
-struct ISphFilter;
+class ISphFilter;
 struct GetKeywordsSettings_t;
 struct SuggestArgs_t;
 struct SuggestResult_t;
@@ -3230,6 +3083,8 @@ Bson_t EmptyBson();
 // returns correct size even if iBuf is 0
 int GetReadBuffer ( int iBuf );
 
+class ISphMatchSorter;
+
 /// generic fulltext index interface
 class CSphIndex : public ISphKeywordsStat, public IndexSegment_c, public DocstoreReader_i
 {
@@ -3275,7 +3130,7 @@ public:
 
 public:
 	/// check all data files, preload schema, and preallocate enough RAM to load memory-cached data
-	virtual bool				Prealloc ( bool bStripPath, FilenameBuilder_i * pFilenameBuilder ) = 0;
+	virtual bool				Prealloc ( bool bStripPath, FilenameBuilder_i * pFilenameBuilder, StrVec_t & dWarnings ) = 0;
 
 	/// deallocate all previously preallocated shared data
 	virtual void				Dealloc () = 0;
@@ -3449,6 +3304,7 @@ struct SphQueueSettings_t
 	ISphExprHook *				m_pHook = nullptr;
 	const CSphFilterSettings *	m_pAggrFilter = nullptr;
 	int							m_iMaxMatches = DEFAULT_MAX_MATCHES;
+	bool						m_bNeedDocids = false;
 
 	explicit SphQueueSettings_t ( const ISphSchema & tSchema, QueryProfile_c * pProfiler = nullptr )
 		: m_tSchema ( tSchema )
@@ -3477,25 +3333,6 @@ CSphIndex *			sphCreateIndexTemplate ( const char * szIndexName );
 /// bKeynamesToLowercase is whether to convert all key names to lowercase
 void				sphSetJsonOptions ( bool bStrict, bool bAutoconvNumbers, bool bKeynamesToLowercase );
 
-/// parses sort clause, using a given schema
-/// fills eFunc and tState and optionally sError, returns result code
-ESortClauseParseResult	sphParseSortClause ( const CSphQuery * pQuery, const char * sClause, const ISphSchema & tSchema,
-	ESphSortFunc & eFunc, CSphMatchComparatorState & tState, bool bComputeItems, CSphString & sError );
-
-/// creates proper queue for given query
-/// may return NULL on error; in this case, error message is placed in sError
-/// if the pUpdate is given, creates the updater's queue and perform the index update
-/// instead of searching
-ISphMatchSorter *	sphCreateQueue ( const SphQueueSettings_t & tQueue, const CSphQuery & tQuery,
-		CSphString & sError, SphQueueRes_t & tRes, StrVec_t * pExtra = nullptr );
-
-void sphCreateMultiQueue ( const SphQueueSettings_t & tQueue, const VecTraits_T<CSphQuery> & dQueries,
-		VecTraits_T<ISphMatchSorter *> & dSorters, VecTraits_T<CSphString> & dErrors, SphQueueRes_t & tRes,
-		StrVec_t * pExtra );
-
-/// check if tColumn is actually stored field (so, can't be used in filters/expressions)
-bool IsNotRealAttribute ( const CSphColumnInfo & tColumn );
-
 /// setup per-keyword read buffer sizes
 void SetUnhintedBuffer ( int iReadUnhinted );
 

+ 3 - 1
src/sphinxexcerpt.cpp

@@ -16,12 +16,14 @@
 #include "sphinxquery.h"
 #include "fileutils.h"
 #include "sphinxstem.h"
+#include "coroutine.h"
+#include "memio.h"
 
 #include "snippetfunctor.h"
 #include "snippetindex.h"
 #include "snippetstream.h"
 #include "snippetpassage.h"
-#include "coroutine.h"
+
 
 #include <math.h>
 

+ 562 - 14
src/sphinxexpr.cpp

@@ -183,6 +183,215 @@ public:
 	void FixupLocator ( const ISphSchema * /*pOldSchema*/, const ISphSchema * /*pNewSchema*/ ) override {}
 };
 
+#if USE_COLUMNAR
+class Expr_GetColumnar_Traits_c : public ISphExpr
+{
+public:
+				Expr_GetColumnar_Traits_c ( const CSphString & sName, int iLocator );
+				Expr_GetColumnar_Traits_c ( const Expr_GetColumnar_Traits_c & rhs );
+
+	void		FixupLocator ( const ISphSchema * /*pOldSchema*/, const ISphSchema * /*pNewSchema*/ ) final {}
+	void		Command ( ESphExprCommand eCmd, void * pArg ) final;
+	bool		IsColumnar() const final { return true; }
+
+protected:
+	CSphString	m_sName;
+	int			m_iLocator = -1;
+	CSphScopedPtr<columnar::Iterator_i> m_pIterator {nullptr};
+
+	uint64_t CalcHash ( const char * szTag, const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
+	{
+		EXPR_CLASS_NAME_NOCHECK(szTag);
+		CALC_STR_HASH(m_sName, m_sName.Length());
+		return CALC_DEP_HASHES();
+	}
+};
+
+
+Expr_GetColumnar_Traits_c::Expr_GetColumnar_Traits_c ( const CSphString & sName, int iLocator )
+	: m_sName ( sName )
+	, m_iLocator ( iLocator )
+{}
+
+
+Expr_GetColumnar_Traits_c::Expr_GetColumnar_Traits_c ( const Expr_GetColumnar_Traits_c & rhs )
+	: m_sName ( rhs.m_sName )
+	, m_iLocator ( rhs.m_iLocator )
+{}
+
+
+void Expr_GetColumnar_Traits_c::Command ( ESphExprCommand eCmd, void * pArg )
+{
+	switch ( eCmd )
+	{
+		case SPH_EXPR_SET_COLUMNAR:
+		{
+			if ( m_pIterator )
+				break;
+
+			auto pColumnar = (const columnar::Columnar_i*)pArg;
+			if ( pColumnar )
+			{
+				std::string sError; // FIXME! report errors
+				m_pIterator = pColumnar->CreateIterator ( m_sName.cstr(), columnar::IteratorHints_t(), sError );
+			}
+		}
+		break;
+
+	case SPH_EXPR_GET_COLUMNAR_COL:
+		*(int*)pArg = m_iLocator;
+		break;
+
+	default:
+		break;
+	}
+}
+
+
+class Expr_GetColumnarInt_c : public Expr_GetColumnar_Traits_c
+{
+	using Expr_GetColumnar_Traits_c::Expr_GetColumnar_Traits_c;
+
+public:
+	float		Eval ( const CSphMatch & tMatch ) const override		{ return (float)FetchValue(tMatch); }
+	int			IntEval ( const CSphMatch & tMatch ) const override		{ return FetchValue(tMatch); }
+	int64_t		Int64Eval ( const CSphMatch & tMatch ) const override	{ return FetchValue(tMatch); }
+	uint64_t	GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) final;
+	ISphExpr *	Clone() const final { return new Expr_GetColumnarInt_c ( m_sName, m_iLocator ); }
+
+protected:
+	inline SphAttr_t FetchValue ( const CSphMatch & tMatch ) const;
+};
+
+
+uint64_t Expr_GetColumnarInt_c::GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
+{
+	EXPR_CLASS_NAME("Expr_GetColumnarInt_c");
+	return CALC_PARENT_HASH();
+}
+
+
+SphAttr_t Expr_GetColumnarInt_c::FetchValue ( const CSphMatch & tMatch ) const
+{
+	if ( m_pIterator.Ptr() && m_pIterator->AdvanceTo ( tMatch.m_tRowID ) == tMatch.m_tRowID )
+		return m_pIterator->Get();
+
+	return 0;
+}
+
+
+class Expr_GetColumnarFloat_c : public Expr_GetColumnarInt_c
+{
+	using Expr_GetColumnarInt_c::Expr_GetColumnarInt_c;
+
+public:
+	float		Eval ( const CSphMatch & tMatch ) const final		{ return sphDW2F(FetchValue(tMatch)); }
+	int			IntEval ( const CSphMatch & tMatch ) const final	{ return (int)sphDW2F(FetchValue(tMatch)); }
+	int64_t		Int64Eval ( const CSphMatch & tMatch ) const final	{ return (int64_t)sphDW2F(FetchValue(tMatch)); }
+};
+
+
+class Expr_GetColumnarString_c : public Expr_GetColumnar_Traits_c
+{
+	using Expr_GetColumnar_Traits_c::Expr_GetColumnar_Traits_c;
+
+public:
+	float		Eval ( const CSphMatch & ) const final { assert ( 0 ); return 0; }
+	int			StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const final;
+	const BYTE * StringEvalPacked ( const CSphMatch & tMatch ) const final;
+	int			StringLenEval ( const CSphMatch & tMatch ) const final;
+	uint64_t	GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) final;
+	ISphExpr *	Clone() const final { return new Expr_GetColumnarInt_c ( m_sName, m_iLocator ); }
+};
+
+
+int Expr_GetColumnarString_c::StringEval ( const CSphMatch & tMatch, const BYTE ** ppStr ) const
+{
+	if ( m_pIterator.Ptr() && m_pIterator->AdvanceTo ( tMatch.m_tRowID ) == tMatch.m_tRowID )
+		return m_pIterator->Get ( *ppStr, false );
+
+	return 0;
+}
+
+
+const BYTE * Expr_GetColumnarString_c::StringEvalPacked ( const CSphMatch & tMatch ) const
+{
+	if ( m_pIterator.Ptr() && m_pIterator->AdvanceTo ( tMatch.m_tRowID ) == tMatch.m_tRowID )
+	{
+		const BYTE * pResult = nullptr;
+		m_pIterator->Get ( pResult, true );
+		return pResult;
+	}
+
+	return nullptr;
+}
+
+
+int Expr_GetColumnarString_c::StringLenEval ( const CSphMatch & tMatch ) const
+{
+	if ( m_pIterator.Ptr() && m_pIterator->AdvanceTo ( tMatch.m_tRowID ) == tMatch.m_tRowID )
+		return m_pIterator->GetLength();
+
+	return -1;
+}
+
+
+uint64_t Expr_GetColumnarString_c::GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
+{
+	EXPR_CLASS_NAME("Expr_GetColumnarString_c");
+	return CALC_PARENT_HASH();
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+class Expr_GetColumnarMva_c : public Expr_GetColumnar_Traits_c
+{
+	using Expr_GetColumnar_Traits_c::Expr_GetColumnar_Traits_c;
+
+public:
+	float		Eval ( const CSphMatch & ) const final { assert ( 0 ); return 0; }
+	int64_t		Int64Eval ( const CSphMatch & tMatch ) const final;
+	ByteBlob_t	MvaEval ( const CSphMatch & tMatch ) const final;
+	uint64_t	GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) final;
+	ISphExpr *	Clone() const final { return new Expr_GetColumnarInt_c ( m_sName, m_iLocator ); }
+};
+
+
+int64_t Expr_GetColumnarMva_c::Int64Eval ( const CSphMatch & tMatch ) const
+{
+	if ( m_pIterator.Ptr() && m_pIterator->AdvanceTo ( tMatch.m_tRowID ) == tMatch.m_tRowID )
+	{
+		const BYTE * pResult = nullptr;
+		m_pIterator->Get ( pResult, true );
+		return (int64_t)pResult;
+	}
+
+	return 0;
+}
+
+
+ByteBlob_t Expr_GetColumnarMva_c::MvaEval ( const CSphMatch & tMatch ) const
+{
+	if ( m_pIterator.Ptr() && m_pIterator->AdvanceTo ( tMatch.m_tRowID ) == tMatch.m_tRowID )
+	{
+		const BYTE * pResult = nullptr;
+		int iBytes = m_pIterator->Get ( pResult, false );
+		return {pResult, iBytes};
+	}
+
+	return {nullptr, 0};
+}
+
+
+uint64_t Expr_GetColumnarMva_c::GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable )
+{
+	EXPR_CLASS_NAME("Expr_GetColumnarMva_c");
+	return CALC_PARENT_HASH();
+}
+
+#endif // USE_COLUMNAR
+
+//////////////////////////////////////////////////////////////////////////
 
 class Expr_GetInt_c : public Expr_WithLocator_c
 {
@@ -1461,6 +1670,9 @@ public:
 
 	bool IsDataPtrAttr() const final
 	{
+		if ( m_eArg==SPH_ATTR_STRINGPTR )
+			return m_pFirst->IsDataPtrAttr();
+
 		return true;
 	}
 
@@ -3895,6 +4107,7 @@ protected:
 	int						AddNodeString ( int64_t iValue );
 	int						AddNodeAttr ( int iTokenType, uint64_t uAttrLocator );
 	int						AddNodeField ( int iTokenType, uint64_t uAttrLocator );
+	int						AddNodeColumnar ( int iTokenType, uint64_t uAttrLocator );
 	int						AddNodeWeight ();
 	int						AddNodeOp ( int iOp, int iLeft, int iRight );
 	int						AddNodeFunc0 ( int iFunc );
@@ -3939,6 +4152,7 @@ public:
 	ESphEvalStage			m_eEvalStage { SPH_EVAL_FINAL };
 	ESphCollation			m_eCollation;
 	DWORD					m_uStoredField = CSphColumnInfo::FIELD_NONE;
+	bool					m_bNeedDocIds = false;
 
 private:
 	int						GetToken ( YYSTYPE * lvalp );
@@ -3966,6 +4180,10 @@ private:
 	void					CanonizePass ( int iNode );
 	void					ConstantFoldPass ( int iNode );
 	void					VariousOptimizationsPass ( int iNode );
+	void					MultiNEPass ( int iNode );
+	bool					MultiNEMatch ( const ExprNode_t * pLeft, const ExprNode_t * pRight, ExprNode_t & tRes, CSphVector<int64_t> & dValues );
+	bool					TransformNENE ( ExprNode_t * pRoot, ExprNode_t * pLeft, ExprNode_t * pRight );
+	bool					TransformInNE ( ExprNode_t * pRoot, ExprNode_t * pLeft, ExprNode_t * pRight );
 	void					Dump ( int iNode );
 
 	ISphExpr *				CreateTree ( int iNode );
@@ -4073,6 +4291,25 @@ int ExprParser_t::ErrLex ( const char * sTemplate, ... )
 	return -1;
 }
 
+static int ConvertToColumnarType ( ESphAttr eAttr )
+{
+	switch ( eAttr )
+	{
+	case SPH_ATTR_INTEGER:		return TOK_COLUMNAR_INT;
+	case SPH_ATTR_TIMESTAMP:	return TOK_COLUMNAR_TIMESTAMP;
+	case SPH_ATTR_FLOAT:		return TOK_COLUMNAR_FLOAT;
+	case SPH_ATTR_BIGINT:		return TOK_COLUMNAR_BIGINT;
+	case SPH_ATTR_BOOL:			return TOK_COLUMNAR_BOOL;
+	case SPH_ATTR_STRING:		return TOK_COLUMNAR_STRING;
+	case SPH_ATTR_UINT32SET:	return TOK_COLUMNAR_UINT32SET;
+	case SPH_ATTR_INT64SET:		return TOK_COLUMNAR_INT64SET;
+	default:
+		assert ( 0 && "Unknown columnar type" );
+		return -1;
+	}
+}
+
+
 int ExprParser_t::ParseAttr ( int iAttr, const char* sTok, YYSTYPE * lvalp )
 {
 	// check attribute type and width
@@ -4090,6 +4327,12 @@ int ExprParser_t::ParseAttr ( int iAttr, const char* sTok, YYSTYPE * lvalp )
 			}
 	}
 
+	if ( tCol.IsColumnar() )
+	{
+		lvalp->iAttrLocator = iAttr;
+		return ConvertToColumnarType ( tCol.m_eAttrType );
+	}
+
 	int iRes = -1;
 	switch ( tCol.m_eAttrType )
 	{
@@ -4613,6 +4856,146 @@ void ExprParser_t::VariousOptimizationsPass ( int iNode )
 	}
 }
 
+
+static bool IsSupportedNEType ( int iType )
+{
+	return iType==TOK_COLUMNAR_INT || iType==TOK_COLUMNAR_BIGINT || iType==TOK_COLUMNAR_BOOL || iType==TOK_ATTR_INT;
+}
+
+
+static bool CheckAndSwap ( ExprNode_t * & pLeft, ExprNode_t * & pRight )
+{
+	if ( IsSupportedNEType ( pRight->m_iToken ) && pLeft->m_iToken==TOK_CONST_INT )
+		Swap ( pLeft, pRight );
+
+	return IsSupportedNEType ( pLeft->m_iToken ) && pRight->m_iToken==TOK_CONST_INT;
+}
+
+
+bool ExprParser_t::MultiNEMatch ( const ExprNode_t * pLeft, const ExprNode_t * pRight, ExprNode_t & tRes, CSphVector<int64_t> & dValues )
+{
+	assert ( pLeft->m_iLeft!=-1 && pLeft->m_iRight!=-1 );
+	assert ( pRight->m_iLeft!=-1 && pRight->m_iRight!=-1 );
+
+	ExprNode_t * pLeft0 = &m_dNodes [ pLeft->m_iLeft ];
+	ExprNode_t * pLeft1 = &m_dNodes [ pLeft->m_iRight ];
+	ExprNode_t * pRight0 = &m_dNodes [ pRight->m_iLeft ];
+	ExprNode_t * pRight1 = &m_dNodes [ pRight->m_iRight ];
+
+	if ( !CheckAndSwap ( pLeft0, pLeft1 ) )
+		return false;
+
+	if ( !CheckAndSwap ( pRight0, pRight1 ) )
+		return false;
+
+	if ( pRight0->m_iLocator!=pLeft0->m_iLocator || pRight0->m_iToken!=pLeft0->m_iToken )
+		return false;
+
+	tRes = *pLeft0;
+
+	dValues.Add ( pLeft1->m_iConst );
+	dValues.Add ( pRight1->m_iConst );
+	
+	return true;
+}
+
+
+bool ExprParser_t::TransformNENE ( ExprNode_t * pRoot, ExprNode_t * pLeft, ExprNode_t * pRight )
+{
+	assert ( pRoot && pLeft && pRight );
+	assert ( pRoot->m_iToken==TOK_AND && pLeft->m_iToken==TOK_NE && pRight->m_iToken==TOK_NE );
+
+	ExprNode_t tRes;
+	CSphVector<int64_t> dValues;
+	if ( MultiNEMatch ( pLeft, pRight, tRes, dValues ) )
+	{
+		pRoot->m_iToken = TOK_NOT;
+		pRoot->m_iRight = -1;
+
+		pLeft->m_iToken = TOK_FUNC;
+		pLeft->m_iFunc = FUNC_IN;
+
+		ExprNode_t * pLeft0 = &m_dNodes [ pLeft->m_iLeft ];
+		ExprNode_t * pLeft1 = &m_dNodes [ pLeft->m_iRight ];
+
+		pLeft0->m_iToken	= tRes.m_iToken;
+		pLeft0->m_iLocator	= tRes.m_iLocator;
+		pLeft0->m_tLocator	= tRes.m_tLocator;
+
+		pLeft1->m_iToken = TOK_CONST_LIST;
+		pLeft1->m_pConsts = new ConstList_c();
+		for ( auto i : dValues )
+			pLeft1->m_pConsts->Add(i);
+
+		return true;
+	}
+
+	return false;
+}
+
+
+bool ExprParser_t::TransformInNE ( ExprNode_t * pRoot, ExprNode_t * pLeft, ExprNode_t * pRight )
+{
+	assert ( pRoot && pLeft && pRight );
+	assert ( pRoot->m_iToken==TOK_AND && ( ( pLeft->m_iToken==TOK_NOT && pRight->m_iToken==TOK_NE ) || ( pLeft->m_iToken==TOK_NE && pRight->m_iToken==TOK_NOT ) ) );
+
+	ExprNode_t * pNotNode = pLeft->m_iToken==TOK_NOT ? pLeft : pRight;
+	ExprNode_t * pNENode = pLeft->m_iToken==TOK_NE ? pLeft : pRight;
+
+	assert ( pNotNode->m_iLeft!=-1 && pNotNode->m_iRight==-1 );
+	ExprNode_t * pInNode = &m_dNodes [ pNotNode->m_iLeft ];
+
+	bool bCond = pInNode->m_iToken==TOK_FUNC && pInNode->m_iFunc==FUNC_IN;
+	assert ( pInNode->m_iLeft!=-1 && pNotNode->m_iRight==-1 );
+	ExprNode_t * pIn0 = &m_dNodes [ pInNode->m_iLeft ];
+	ExprNode_t * pIn1 = &m_dNodes [ pInNode->m_iRight ];
+	bCond &= IsSupportedNEType ( pIn0->m_iToken ) && pIn1->m_iToken==TOK_CONST_LIST;
+	bCond &= pIn1->m_pConsts->m_eRetType==SPH_ATTR_INTEGER || pIn1->m_pConsts->m_eRetType==SPH_ATTR_BIGINT;
+
+	ExprNode_t * pNE0 = &m_dNodes [ pNENode->m_iLeft ];
+	ExprNode_t * pNE1 = &m_dNodes [ pNENode->m_iRight ];
+	bCond &= CheckAndSwap ( pNE0, pNE1 );
+	bCond &= pNE0->m_iToken == pIn0->m_iToken && pNE0->m_iLocator==pIn0->m_iLocator;
+
+	if ( bCond )
+	{
+		pIn1->m_pConsts->Add ( pNE1->m_iConst );
+		*pRoot = *pNotNode;
+		return true;
+	}
+
+	return false;
+}
+
+// transform "var<>1 AND var<>2 AND var<>3" into "not var in (1,2,3)"
+void ExprParser_t::MultiNEPass ( int iNode )
+{
+	if ( iNode<0 )
+		return;
+
+	MultiNEPass ( m_dNodes [ iNode ].m_iLeft );
+	MultiNEPass ( m_dNodes [ iNode ].m_iRight );
+
+	ExprNode_t * pRoot = &m_dNodes[iNode];
+	if ( pRoot->m_iLeft==-1 || pRoot->m_iRight==-1 )
+		return;
+
+	ExprNode_t * pLeft = &m_dNodes [ pRoot->m_iLeft ];
+	ExprNode_t * pRight = &m_dNodes [ pRoot->m_iRight ];
+
+	if ( pRoot->m_iToken==TOK_AND && pLeft->m_iToken==TOK_NE && pRight->m_iToken==TOK_NE )
+	{
+		if ( TransformNENE ( pRoot, pLeft, pRight ) )
+			return;
+	}
+
+	if ( pRoot->m_iToken==TOK_AND && ( ( pLeft->m_iToken==TOK_NOT && pRight->m_iToken==TOK_NE ) || ( pLeft->m_iToken==TOK_NE && pRight->m_iToken==TOK_NOT ) ) )
+	{
+		if ( TransformInNE ( pRoot, pLeft, pRight ) )
+			return;
+	}
+}
+
 /// optimize subtree
 void ExprParser_t::Optimize ( int iNode )
 {
@@ -4620,6 +5003,7 @@ void ExprParser_t::Optimize ( int iNode )
 	CanonizePass ( iNode );
 	ConstantFoldPass ( iNode );
 	VariousOptimizationsPass ( iNode );
+	MultiNEPass ( iNode );
 }
 
 
@@ -5884,6 +6268,8 @@ ISphExpr * ExprParser_t::CreateFieldNode ( int iField )
 {
 	m_eEvalStage = SPH_EVAL_POSTLIMIT;
 	m_uStoredField = CSphColumnInfo::FIELD_STORED;
+	m_bNeedDocIds = true;
+
 	const CSphColumnInfo & tField = m_pSchema->GetField(iField);
 	if ( !(tField.m_uFieldFlags & CSphColumnInfo::FIELD_STORED) )
 	{
@@ -5971,6 +6357,22 @@ ISphExpr * ExprParser_t::CreateTree ( int iNode )
 		case TOK_ATTR_MVA64:
 		case TOK_ATTR_MVA32:	return new Expr_GetMva_c ( tNode.m_tLocator, tNode.m_iLocator );
 		case TOK_ATTR_FACTORS:	return new Expr_GetFactorsAttr_c ( tNode.m_tLocator, tNode.m_iLocator );
+
+#if USE_COLUMNAR
+		case TOK_COLUMNAR_INT:
+		case TOK_COLUMNAR_TIMESTAMP:
+		case TOK_COLUMNAR_BIGINT:
+		case TOK_COLUMNAR_BOOL:
+									return new Expr_GetColumnarInt_c ( m_pSchema->GetAttr(tNode.m_iLocator).m_sName, tNode.m_iLocator );
+
+		case TOK_COLUMNAR_FLOAT:	return new Expr_GetColumnarFloat_c ( m_pSchema->GetAttr(tNode.m_iLocator).m_sName, tNode.m_iLocator );
+		case TOK_COLUMNAR_STRING:	return new Expr_GetColumnarString_c ( m_pSchema->GetAttr(tNode.m_iLocator).m_sName, tNode.m_iLocator );
+
+		case TOK_COLUMNAR_UINT32SET:
+		case TOK_COLUMNAR_INT64SET:
+								return new Expr_GetColumnarMva_c ( m_pSchema->GetAttr(tNode.m_iLocator).m_sName, tNode.m_iLocator );
+#endif
+
 		case TOK_FIELD:			return CreateFieldNode ( tNode.m_iLocator );
 
 		case TOK_CONST_FLOAT:	return new Expr_GetConst_c ( tNode.m_fConst );
@@ -6211,8 +6613,8 @@ ISphExpr * ExprParser_t::CreateTree ( int iNode )
 			}
 
 		case TOK_UDF:			return CreateUdfNode ( tNode.m_iFunc, pLeft );
-		case TOK_HOOK_IDENT:	return m_pHook->CreateNode ( tNode.m_iFunc, NULL, NULL, m_sCreateError );
-		case TOK_HOOK_FUNC:		return m_pHook->CreateNode ( tNode.m_iFunc, pLeft, &m_eEvalStage, m_sCreateError );
+		case TOK_HOOK_IDENT:	return m_pHook->CreateNode ( tNode.m_iFunc, nullptr, nullptr, nullptr, m_sCreateError );
+		case TOK_HOOK_FUNC:		return m_pHook->CreateNode ( tNode.m_iFunc, pLeft, &m_eEvalStage, &m_bNeedDocIds, m_sCreateError );
 
 		case TOK_MAP_ARG:
 			// tricky bit
@@ -6517,7 +6919,7 @@ private:
 //////////////////////////////////////////////////////////////////////////
 
 /// IN() evaluator, arbitrary scalar expression vs. constant values
-template < typename T >
+template < typename T, bool BINARY >
 class Expr_In_c : public Expr_ArgVsConstSet_c<T>
 {
 public:
@@ -6525,14 +6927,24 @@ public:
 	Expr_In_c ( ISphExpr * pArg, ConstList_c * pConsts ) :
 		Expr_ArgVsConstSet_c<T> ( pArg, pConsts, false )
 	{
-		this->m_dValues.Sort();
+		this->m_dValues.Uniq();
 	}
 
 	/// evaluate arg, check if the value is within set
 	int IntEval ( const CSphMatch & tMatch ) const final
 	{
 		T val = this->ExprEval ( this->m_pArg, tMatch ); // 'this' fixes gcc braindamage
-		return this->m_dValues.BinarySearch ( val )!=nullptr;
+
+		if_const ( BINARY )
+			return this->m_dValues.BinarySearch ( val )!=nullptr;
+		else
+		{
+			for ( auto i : this->m_dValues )
+				if ( i==val )
+					return 1;
+
+			return 0;
+		}
 	}
 
 	uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) final
@@ -6802,6 +7214,72 @@ private:
 	{}
 };
 
+// aggregate functions evaluator for columnar MVAs
+template <typename T>
+class Expr_ColumnarMVAAggr_T : public Expr_NoLocator_c
+{
+public:
+	Expr_ColumnarMVAAggr_T ( ISphExpr * pExpr, ESphAggrFunc eFunc )
+		: m_pExpr ( pExpr )
+		, m_eFunc ( eFunc )
+	{
+		SafeAddRef(m_pExpr);
+	}
+
+	int64_t Int64Eval ( const CSphMatch & tMatch ) const final
+	{
+		if ( !m_pExpr )
+			return 0;
+
+		ByteBlob_t tMva = m_pExpr->MvaEval(tMatch);
+		if ( !tMva.second )
+			return 0;
+
+		int nValues = tMva.second / sizeof(T);
+
+		const T * L = (const T *)tMva.first;
+		const T * R = L+nValues-1;
+
+		switch ( m_eFunc )
+		{
+			case SPH_AGGR_MIN:	return *L;
+			case SPH_AGGR_MAX:	return *R;
+			default:			return 0;
+		}
+	}
+
+	float	Eval ( const CSphMatch & tMatch ) const final { return (float)Int64Eval ( tMatch ); }
+	int		IntEval ( const CSphMatch & tMatch ) const final { return (int)Int64Eval ( tMatch ); }
+
+	void Command ( ESphExprCommand eCmd, void * pArg ) final
+	{
+		if ( m_pExpr )
+			m_pExpr->Command ( eCmd, pArg );
+	}
+
+	uint64_t GetHash ( const ISphSchema & tSorterSchema, uint64_t uPrevHash, bool & bDisable ) final
+	{
+		EXPR_CLASS_NAME("Expr_ColumnarMVAAggr_T");
+		CALC_POD_HASH(m_eFunc);
+		CALC_CHILD_HASH(m_pExpr);
+		return CALC_DEP_HASHES();
+	}
+
+	ISphExpr * Clone() const final
+	{
+		return new Expr_ColumnarMVAAggr_T ( *this );
+	}
+
+protected:
+	CSphRefcountedPtr<ISphExpr> m_pExpr;
+	ESphAggrFunc	m_eFunc {SPH_AGGR_NONE};
+
+private:
+	Expr_ColumnarMVAAggr_T ( const Expr_ColumnarMVAAggr_T & rhs )
+		: m_pExpr ( SafeClone ( rhs.m_pExpr ) )
+		, m_eFunc ( rhs.m_eFunc )
+	{}
+};
 
 /// IN() evaluator, JSON array vs. constant values
 class Expr_JsonFieldIn_c : public Expr_ArgVsConstSet_c<int64_t>
@@ -7787,11 +8265,27 @@ ISphExpr * ExprParser_t::CreateInNode ( int iNode )
 				default:
 				{
 					CSphRefcountedPtr<ISphExpr> pArg ( CreateTree ( m_dNodes[iNode].m_iLeft ) );
+					int iConsts = tRight.m_pConsts->m_eRetType==SPH_ATTR_INTEGER ? tRight.m_pConsts->m_dInts.GetLength() : tRight.m_pConsts->m_dFloats.GetLength();
+					bool bBinary = iConsts>128;
 					switch ( WidestType ( tLeft.m_eRetType, tRight.m_pConsts->m_eRetType ) )
 					{
-						case SPH_ATTR_INTEGER:	return new Expr_In_c<int> ( pArg, tRight.m_pConsts ); break;
-						case SPH_ATTR_BIGINT:	return new Expr_In_c<int64_t> ( pArg, tRight.m_pConsts ); break;
-						default:				return new Expr_In_c<float> ( pArg, tRight.m_pConsts ); break;
+						case SPH_ATTR_INTEGER:
+							if ( bBinary )
+								return new Expr_In_c<int,true> ( pArg, tRight.m_pConsts );
+							else
+								return new Expr_In_c<int,false> ( pArg, tRight.m_pConsts );
+
+						case SPH_ATTR_BIGINT:
+							if ( bBinary )
+								return new Expr_In_c<int64_t,true> ( pArg, tRight.m_pConsts );
+							else
+								return new Expr_In_c<int64_t,false> ( pArg, tRight.m_pConsts );
+
+						default:
+							if ( bBinary )
+								return new Expr_In_c<float,true> ( pArg, tRight.m_pConsts );
+							else
+								return new Expr_In_c<float,false> ( pArg, tRight.m_pConsts );
 					}
 				}
 			}
@@ -8001,9 +8495,11 @@ ISphExpr * ExprParser_t::CreateAggregateNode ( const ExprNode_t & tNode, ESphAgg
 	const ExprNode_t & tLeft = m_dNodes [ tNode.m_iLeft ];
 	switch ( tLeft.m_iToken )
 	{
-		case TOK_ATTR_JSON:		return new Expr_JsonFieldAggr_c ( pLeft, eFunc );
-		case TOK_ATTR_MVA32:	return new Expr_MVAAggr_c<DWORD> ( tLeft.m_tLocator, tLeft.m_iLocator, eFunc );
-		case TOK_ATTR_MVA64:	return new Expr_MVAAggr_c<int64_t> ( tLeft.m_tLocator, tLeft.m_iLocator, eFunc );
+		case TOK_ATTR_JSON:			return new Expr_JsonFieldAggr_c ( pLeft, eFunc );
+		case TOK_ATTR_MVA32:		return new Expr_MVAAggr_c<DWORD> ( tLeft.m_tLocator, tLeft.m_iLocator, eFunc );
+		case TOK_ATTR_MVA64:		return new Expr_MVAAggr_c<int64_t> ( tLeft.m_tLocator, tLeft.m_iLocator, eFunc );
+		case TOK_COLUMNAR_UINT32SET:return new Expr_ColumnarMVAAggr_T<DWORD> ( pLeft, eFunc );
+		case TOK_COLUMNAR_INT64SET:	return new Expr_ColumnarMVAAggr_T<int64_t> ( pLeft, eFunc );
 		default:				return nullptr;
 	}
 }
@@ -8233,6 +8729,34 @@ int ExprParser_t::AddNodeAttr ( int iTokenType, uint64_t uAttrLocator )
 }
 
 
+int ExprParser_t::AddNodeColumnar ( int iTokenType, uint64_t uAttrLocator )
+{
+	ExprNode_t & tNode = m_dNodes.Add();
+	tNode.m_iToken = iTokenType;
+	tNode.m_iLocator = uAttrLocator;
+
+	switch ( iTokenType )
+	{
+	case TOK_COLUMNAR_INT:
+	case TOK_COLUMNAR_TIMESTAMP:
+	case TOK_COLUMNAR_BOOL:
+			tNode.m_eRetType = SPH_ATTR_INTEGER;
+			break;
+
+	case TOK_COLUMNAR_FLOAT:		tNode.m_eRetType = SPH_ATTR_FLOAT; break;
+	case TOK_COLUMNAR_BIGINT:		tNode.m_eRetType = SPH_ATTR_BIGINT; break;
+	case TOK_COLUMNAR_STRING:		tNode.m_eRetType = SPH_ATTR_STRINGPTR; break;
+	case TOK_COLUMNAR_UINT32SET:	tNode.m_eRetType = SPH_ATTR_UINT32SET_PTR; break;
+	case TOK_COLUMNAR_INT64SET:		tNode.m_eRetType = SPH_ATTR_INT64SET_PTR; break;
+	default:
+		assert ( 0 && "Unsupported columnar type" );
+		break;
+	}
+	return m_dNodes.GetLength()-1;
+}
+
+
+
 int ExprParser_t::AddNodeField ( int iTokenType, uint64_t uAttrLocator )
 {
 	assert ( iTokenType==TOK_FIELD );
@@ -9269,8 +9793,8 @@ struct ExprNodeHeight_t
 	ExprNodeHeight_t() = default;
 };
 
-ISphExpr * ExprParser_t::Parse ( const char * sExpr, const ISphSchema & tSchema,
-	ESphAttr * pAttrType, bool * pUsesWeight, CSphString & sError )
+
+ISphExpr * ExprParser_t::Parse ( const char * sExpr, const ISphSchema & tSchema, ESphAttr * pAttrType, bool * pUsesWeight, CSphString & sError )
 {
 	const char* szExpr = sExpr;
 
@@ -9282,7 +9806,7 @@ ISphExpr * ExprParser_t::Parse ( const char * sExpr, const ISphSchema & tSchema,
 	m_sCreateError = "";
 
 	// setup lexer
-	m_sExpr = { szExpr, strlen (szExpr) };
+	m_sExpr = { szExpr, (int)strlen (szExpr) };
 	m_pSchema = &tSchema;
 
 	// setup constant functions
@@ -9435,6 +9959,9 @@ ISphExpr * sphExprParse ( const char * sExpr, const ISphSchema & tSchema, CSphSt
 		*tArgs.m_pPackedFactorsFlags = tParser.m_uPackedFactorFlags;
 	if ( tArgs.m_pStoredField )
 		*tArgs.m_pStoredField = tParser.m_uStoredField;
+	if ( tArgs.m_pNeedDocIds )
+		*tArgs.m_pNeedDocIds = tParser.m_bNeedDocIds;
+
 	return pRes;
 }
 
@@ -9443,3 +9970,24 @@ ISphExpr * sphJsonFieldConv ( ISphExpr * pExpr )
 {
 	return new Expr_JsonFieldConv_c ( pExpr );
 }
+
+#if USE_COLUMNAR
+
+ISphExpr * CreateGetColumnarStrExpr ( const CSphString & sName, int iLocator )
+{
+	return new Expr_GetColumnarString_c ( sName, iLocator );
+}
+
+
+ISphExpr * CreateGetColumnarIntExpr ( const CSphString & sName, int iLocator )
+{
+	return new Expr_GetColumnarInt_c ( sName, iLocator );
+}
+
+
+ISphExpr * CreateGetColumnarFloatExpr ( const CSphString & sName, int iLocator )
+{
+	return new Expr_GetColumnarFloat_c ( sName, iLocator );
+}
+
+#endif // USE_COLUMNAR

+ 21 - 3
src/sphinxexpr.h

@@ -80,7 +80,13 @@ enum ESphExprCommand
 	SPH_EXPR_SET_EXTRA_DATA,
 	SPH_EXPR_GET_DEPENDENT_COLS, ///< used to determine proper evaluating stage
 	SPH_EXPR_GET_UDF,
-	SPH_EXPR_SET_ITERATOR,		///< set link between JsonIn expr and iterator
+
+#if USE_COLUMNAR
+	SPH_EXPR_SET_COLUMNAR,
+	SPH_EXPR_GET_COLUMNAR_COL,
+#endif 
+
+	SPH_EXPR_SET_ITERATOR		///< set link between JsonIn expr and iterator
 };
 
 /// expression evaluator
@@ -107,6 +113,9 @@ public:
 	/// Evaluate string as a packed data ptr attr. By default it re-packs StringEval result, but can be overridden
 	virtual const BYTE * StringEvalPacked ( const CSphMatch & tMatch ) const;
 
+	/// return string len without calculating/fetching the string (if supported)
+	virtual int StringLenEval ( const CSphMatch & tMatch ) const { return -1; }
+
 	/// evaluate MVA attr
 	virtual ByteBlob_t MvaEval ( const CSphMatch & ) const { assert( 0 ); return {nullptr, 0}; }
 
@@ -120,8 +129,10 @@ public:
 	/// FIXME? replace with a single GetType() call?
 	virtual bool IsArglist () const { return false; }
 
+	/// was this expression spawned in place of a columnar attr?
+	virtual bool IsColumnar() const { return false; }
+
 	/// check for stringptr subtype
-	/// FIXME? replace with a single GetType() call?
 	virtual bool IsDataPtrAttr () const { return false; }
 
 	/// get Nth arg of an arglist
@@ -214,7 +225,7 @@ struct ISphExprHook
 	/// create node by OID
 	/// pEvalStage is an optional out-parameter
 	/// hook may fill it, but that is *not* required
-	virtual ISphExpr * CreateNode ( int iID, ISphExpr * pLeft, ESphEvalStage * pEvalStage, CSphString & sError ) = 0;
+	virtual ISphExpr * CreateNode ( int iID, ISphExpr * pLeft, ESphEvalStage * pEvalStage, bool * pNeedDocIds, CSphString & sError ) = 0;
 
 	/// get identifier return type by OID
 	virtual ESphAttr GetIdentType ( int iID ) const = 0;
@@ -306,12 +317,19 @@ struct ExprParseArgs_t
 	DWORD *				m_pPackedFactorsFlags = nullptr;
 	ESphEvalStage *		m_pEvalStage = nullptr;
 	DWORD *				m_pStoredField = nullptr;
+	bool *				m_pNeedDocIds = nullptr;
 };
 
 ISphExpr * sphExprParse ( const char * sExpr, const ISphSchema & tSchema, CSphString & sError, ExprParseArgs_t & tArgs );
 
 ISphExpr * sphJsonFieldConv ( ISphExpr * pExpr );
 
+#if USE_COLUMNAR
+ISphExpr * CreateGetColumnarIntExpr ( const CSphString & sName, int iLocator );
+ISphExpr * CreateGetColumnarFloatExpr ( const CSphString & sName, int iLocator );
+ISphExpr * CreateGetColumnarStrExpr ( const CSphString & sName, int iLocator );
+#endif
+
 void SetExprNodeStackItemSize ( int iSize );
 
 //////////////////////////////////////////////////////////////////////////

+ 16 - 0
src/sphinxexpr.y

@@ -38,6 +38,14 @@
 %token <sIdent>			TOK_IDENT
 %token <iAttrLocator>	TOK_ATTR_JSON
 %token <iAttrLocator>	TOK_FIELD
+%token <iAttrLocator>	TOK_COLUMNAR_INT
+%token <iAttrLocator>	TOK_COLUMNAR_TIMESTAMP
+%token <iAttrLocator>	TOK_COLUMNAR_BIGINT
+%token <iAttrLocator>	TOK_COLUMNAR_BOOL
+%token <iAttrLocator>	TOK_COLUMNAR_FLOAT
+%token <iAttrLocator>	TOK_COLUMNAR_STRING
+%token <iAttrLocator>	TOK_COLUMNAR_UINT32SET
+%token <iAttrLocator>	TOK_COLUMNAR_INT64SET
 
 
 %token	TOK_ATWEIGHT
@@ -102,6 +110,14 @@ attr:
 	| TOK_ATTR_JSON					{ $$ = pParser->AddNodeAttr ( TOK_ATTR_JSON, $1 ); }
 	| TOK_ATTR_MVA32				{ $$ = pParser->AddNodeAttr ( TOK_ATTR_MVA32, $1 ); }
 	| TOK_ATTR_MVA64				{ $$ = pParser->AddNodeAttr ( TOK_ATTR_MVA64, $1 ); }
+	| TOK_COLUMNAR_INT 				{ $$ = pParser->AddNodeColumnar ( TOK_COLUMNAR_INT, $1 ); }
+	| TOK_COLUMNAR_TIMESTAMP		{ $$ = pParser->AddNodeColumnar ( TOK_COLUMNAR_TIMESTAMP, $1 ); }
+	| TOK_COLUMNAR_BIGINT 			{ $$ = pParser->AddNodeColumnar ( TOK_COLUMNAR_BIGINT, $1 ); }
+	| TOK_COLUMNAR_BOOL				{ $$ = pParser->AddNodeColumnar ( TOK_COLUMNAR_BOOL, $1 ); }
+	| TOK_COLUMNAR_FLOAT			{ $$ = pParser->AddNodeColumnar ( TOK_COLUMNAR_FLOAT, $1 ); }
+	| TOK_COLUMNAR_STRING 			{ $$ = pParser->AddNodeColumnar ( TOK_COLUMNAR_STRING, $1 ); }
+	| TOK_COLUMNAR_UINT32SET 		{ $$ = pParser->AddNodeColumnar ( TOK_COLUMNAR_UINT32SET, $1 ); }
+	| TOK_COLUMNAR_INT64SET 		{ $$ = pParser->AddNodeColumnar ( TOK_COLUMNAR_INT64SET, $1 ); }
 	| TOK_FIELD						{ $$ = pParser->AddNodeField ( TOK_FIELD, $1 ); }
 	| '`' attr '`'					{ $$ = $2; }
 	;

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 647 - 194
src/sphinxfilter.cpp


+ 222 - 13
src/sphinxfilter.h

@@ -14,14 +14,33 @@
 #define _sphinxfilter_
 
 #include "sphinx.h"
+#include "columnarlib.h"
 
-struct ISphFilter
+class ISphFilter
+#if USE_COLUMNAR
+	: public columnar::BlockTester_i
+#endif
 {
+public:
 	virtual void SetLocator ( const CSphAttrLocator & ) {}
 	virtual void SetRange ( SphAttr_t, SphAttr_t ) {}
 	virtual void SetRangeFloat ( float, float ) {}
 	virtual void SetValues ( const SphAttr_t *, int ) {}
 	virtual void SetBlobStorage ( const BYTE * ) {}
+
+#if USE_COLUMNAR
+	virtual void SetColumnar ( const columnar::Columnar_i * ) {}
+	virtual void SetColumnarCol ( int ) {}
+
+	/// similar to EvalBlock w/pMinDocinfo and pMaxDocinfo, but for filter expressions
+	/// their results are not stored into matches, so no docinfo here
+	bool Test ( const columnar::MinMaxVec_t & dMinMax ) const override
+	{
+		// if filter does not implement block-level evaluation we assume the block will pass
+		return true;
+	}
+#endif
+
 	virtual void SetRefString ( const CSphString * , int ) {}
 
 	virtual ~ISphFilter () {}
@@ -56,20 +75,25 @@ struct CreateFilterContext_t
 	const CSphVector<CSphFilterSettings> * m_pFilters = nullptr;
 	const CSphVector<FilterTreeItem_t> * m_pFilterTree = nullptr;
 
-	const ISphSchema * m_pSchema = nullptr;
-	const BYTE * m_pBlobPool { nullptr };
-	ESphCollation m_eCollation { SPH_COLLATION_DEFAULT };
-	bool m_bScan = false;
+	const ISphSchema *			m_pSchema = nullptr;
+	const BYTE *				m_pBlobPool = nullptr;
 
-	ISphFilter * m_pFilter = nullptr;
-	ISphFilter * m_pWeightFilter = nullptr;
-	CSphVector<UservarIntSet_c> m_dUserVals;
+#if USE_COLUMNAR
+	const columnar::Columnar_i * m_pColumnar = nullptr;
+#endif
 
-	CreateFilterContext_t ( const ISphSchema * pSchema=nullptr )
-		: m_pSchema ( pSchema ) {};
+	ESphCollation				m_eCollation = SPH_COLLATION_DEFAULT;
+	bool						m_bScan = false;
+
+	ISphFilter *				m_pFilter = nullptr;
+	ISphFilter *				m_pWeightFilter = nullptr;
+	CSphVector<UservarIntSet_c>	m_dUserVals;
 
 	const HistogramContainer_c * m_pHistograms = nullptr;
 
+	CreateFilterContext_t ( const ISphSchema * pSchema=nullptr )
+		: m_pSchema ( pSchema ) {};
+
 	~CreateFilterContext_t();
 };
 
@@ -77,13 +101,15 @@ ISphFilter * sphCreateFilter ( const CSphFilterSettings &tSettings, const Create
 ISphFilter * sphCreateAggrFilter ( const CSphFilterSettings * pSettings, const CSphString & sAttrName, const ISphSchema & tSchema, CSphString & sError );
 ISphFilter * sphJoinFilters ( ISphFilter *, ISphFilter * );
 
-
-
 bool sphCreateFilters ( CreateFilterContext_t & tCtx, CSphString & sError, CSphString & sWarning );
 
 void FormatFilterQL ( const CSphFilterSettings & tFilter, StringBuilder_c & tBuf, int iCompactIN );
 void FormatFiltersQL ( const CSphVector<CSphFilterSettings> & dFilters, const CSphVector<FilterTreeItem_t> & dFilterTree, StringBuilder_c & tBuf, int iCompactIN=5 );
+void FixupFilterSettings ( const CSphFilterSettings & tSettings, ESphAttr eAttrType, CommonFilterSettings_t & tFixedSettings );
+
+void OptimizeFilters ( CSphVector<CSphFilterSettings> & dFilters );
 
+CSphString FilterType2Str ( ESphFilter eFilterType );
 
 // fwd
 class PercolateFilter_i
@@ -98,4 +124,187 @@ public:
 
 PercolateFilter_i * CreatePercolateFilter ( const CSphFilterSettings * pUID );
 
-#endif // _sphinxfilter_
+template<bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX, bool OPEN_LEFT = false, bool OPEN_RIGHT = false, typename T = SphAttr_t>
+inline bool EvalRange ( T tValue, T tMin, T tMax )
+{
+	if_const ( OPEN_LEFT )
+		return HAS_EQUAL_MAX ? ( tValue<=tMax ) : ( tValue<tMax );
+
+	if_const ( OPEN_RIGHT )
+		return  HAS_EQUAL_MIN ? ( tValue>=tMin ) : ( tValue>tMin );
+
+	auto bMinOk = HAS_EQUAL_MIN ? ( tValue>=tMin ) : ( tValue>tMin );
+	auto bMaxOk = HAS_EQUAL_MAX ? ( tValue<=tMax ) : ( tValue<tMax );
+
+	return bMinOk && bMaxOk;
+}
+
+template<bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX, bool OPEN_LEFT = false, bool OPEN_RIGHT = false, typename T = SphAttr_t>
+inline bool EvalBlockRangeAny ( T tMin1, T tMax1, T tMin2, T tMax2 )
+{
+	if_const ( OPEN_LEFT )
+		return HAS_EQUAL_MAX ? ( tMin1<=tMax2 ) : ( tMin1<tMax2 );
+
+	if_const ( OPEN_RIGHT )
+		return HAS_EQUAL_MIN ? ( tMax1>=tMin2 ) : ( tMax1>tMin2 );
+
+	auto bMinOk = HAS_EQUAL_MIN ? ( tMin1<=tMax2 ) : ( tMin1<tMax2 );
+	auto bMaxOk = HAS_EQUAL_MAX ? ( tMax1>=tMin2 ) : ( tMax1>tMin2 );
+
+	return bMinOk && bMaxOk;
+}
+
+template < typename T >
+inline bool MvaEval_Any ( const VecTraits_T<T> & dMvas, const VecTraits_T<const SphAttr_t> & dFilters )
+{
+	if ( dMvas.IsEmpty() || dFilters.IsEmpty() )
+		return false;
+
+	const T * L = dMvas.begin();
+
+	for ( const auto & tFilter : dFilters )
+	{
+		const T * R = &dMvas.Last();
+		while ( L<=R )
+		{
+			const T * pVal = L + (R - L) / 2;
+			T iValue = sphUnalignedRead ( *pVal );
+			if ( tFilter > iValue )
+				L = pVal + 1;
+			else if ( tFilter < iValue )
+				R = pVal - 1;
+			else
+				return true;
+		}
+	}
+
+	return false;
+}
+
+template < typename T >
+inline bool MvaEval_All ( const VecTraits_T<T> & dMvas, const VecTraits_T<const SphAttr_t> & dFilters )
+{
+	if ( dMvas.IsEmpty() || dFilters.IsEmpty() )
+		return false;
+
+	for ( const T & tValue : dMvas )
+	{
+		const SphAttr_t iCheck = sphUnalignedRead(tValue);
+		if ( !dFilters.BinarySearch(iCheck) )
+			return false;
+	}
+
+	return true;
+}
+
+template <typename T, bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX>
+inline bool MvaEval_RangeAny ( const T * pMva, int nValues, SphAttr_t m_iMinValue, SphAttr_t m_iMaxValue )
+{
+	if ( !pMva )
+		return false;
+
+	const T * pEnd = pMva+nValues;
+	const T * L = pMva;
+	const T * R = pEnd - 1;
+
+	while ( L<=R )
+	{
+		const T * pVal = L + (R - L) / 2;
+		T iMva = sphUnalignedRead ( *pVal );
+
+		if ( m_iMinValue>iMva )
+			L = pVal + 1;
+		else if ( m_iMinValue<iMva )
+			R = pVal - 1;
+		else
+			return ( HAS_EQUAL_MIN || pVal+1<pEnd );
+	}
+	if ( L==pEnd )
+		return false;
+
+	T iMvaL = sphUnalignedRead ( *L );
+	if_const ( HAS_EQUAL_MAX )
+		return iMvaL<=m_iMaxValue;
+	else
+		return iMvaL<m_iMaxValue;
+}
+
+template<bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX, typename T = SphAttr_t>
+inline bool EvalBlockRangeAll ( T tMin1, T tMax1, T tMin2, T tMax2 )
+{
+	auto bMinOk = HAS_EQUAL_MIN ? ( tMin1>=tMin2 ) : ( tMin1>tMin2 );
+	auto bMaxOk = HAS_EQUAL_MAX ? ( tMax1<=tMax2 ) : ( tMax1<tMax2 );
+
+	return bMinOk && bMaxOk;
+}
+
+template <typename T, bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX>
+inline bool MvaEval_RangeAll ( const T * pMva, int nValues, SphAttr_t m_iMinValue, SphAttr_t m_iMaxValue )
+{
+	if ( !pMva )
+		return false;
+
+	const T * L = pMva;
+	const T * R = pMva+nValues-1;
+	return EvalBlockRangeAll<HAS_EQUAL_MIN,HAS_EQUAL_MAX> ( *L, *R, (T)m_iMinValue, (T)m_iMaxValue );
+}
+
+
+struct MvaEvalAll_c
+{
+	template<typename T>
+	static inline bool Eval ( const VecTraits_T<T> & dMvas, const VecTraits_T<SphAttr_t> & dFilters )
+	{
+		return MvaEval_All ( dMvas, dFilters );
+	}
+
+	template<typename T>
+	static inline bool Eval ( const VecTraits_T<T> & dMvas, SphAttr_t tValue )
+	{
+		for ( T tMVA : dMvas )
+			if ( tMVA!=tValue )
+				return false;
+
+		return true;
+	}
+
+	template<typename T, bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX >
+	static inline bool EvalRange ( const VecTraits_T<T> & dMvas, SphAttr_t tMin, SphAttr_t tMax )
+	{
+		return MvaEval_RangeAll<T, HAS_EQUAL_MIN, HAS_EQUAL_MAX> ( dMvas.Begin(), dMvas.GetLength(), tMin, tMax );
+	}
+
+	static inline bool EvalBlock ( const VecTraits_T<SphAttr_t> & dFilters, SphAttr_t tBlockMin, SphAttr_t tBlockMax )
+	{
+		return dFilters.all_of ( [&]( const SphAttr_t & tFilter ) { return tFilter>=tBlockMin && tFilter<=tBlockMax; } );
+	}
+};
+
+
+struct MvaEvalAny_c
+{
+	template<typename T>
+	static inline bool Eval ( const VecTraits_T<T> & dMvas, const VecTraits_T<const SphAttr_t> & dFilters )
+	{
+		return MvaEval_Any ( dMvas, dFilters );
+	}
+
+	template<typename T>
+	static inline bool Eval ( const VecTraits_T<T> & dMvas, SphAttr_t tValue )
+	{
+		return !!dMvas.BinarySearch(tValue);
+	}
+
+	template<typename T, bool HAS_EQUAL_MIN, bool HAS_EQUAL_MAX >
+	static inline bool EvalRange ( const VecTraits_T<T> & dMvas, SphAttr_t tMin, SphAttr_t tMax )
+	{
+		return MvaEval_RangeAny<T, HAS_EQUAL_MIN, HAS_EQUAL_MAX> ( dMvas.Begin(), dMvas.GetLength(), tMin, tMax );
+	}
+
+	static inline bool EvalBlock ( const VecTraits_T<SphAttr_t> & dFilters, SphAttr_t tBlockMin, SphAttr_t tBlockMax )
+	{
+		return dFilters.any_of ( [&]( const SphAttr_t & tFilter ) { return tFilter>=tBlockMin && tFilter<=tBlockMax; } );
+	}
+};
+
+#endif // _sphinxfilter_

+ 39 - 554
src/sphinxint.h

@@ -21,6 +21,7 @@
 #include "sphinxudf.h"
 #include "sphinxjsonquery.h"
 #include "sphinxutils.h"
+#include "fileio.h"
 
 #include <float.h>
 
@@ -42,7 +43,7 @@ inline const char * strerrorm ( int errnum )
 //////////////////////////////////////////////////////////////////////////
 
 const DWORD		INDEX_MAGIC_HEADER			= 0x58485053;		///< my magic 'SPHX' header
-const DWORD		INDEX_FORMAT_VERSION		= 60;				///< my format version
+const DWORD		INDEX_FORMAT_VERSION		= 61;				///< my format version
 
 const char		MAGIC_SYNONYM_WHITESPACE	= 1;				// used internally in tokenizer only
 const char		MAGIC_CODE_SENTENCE			= 2;				// emitted from tokenizer on sentence boundary
@@ -82,112 +83,6 @@ extern bool g_bJsonKeynamesToLowercase;
 // INTERNAL HELPER FUNCTIONS, CLASSES, ETC
 //////////////////////////////////////////////////////////////////////////
 
-#define SPH_QUERY_STATES \
-	SPH_QUERY_STATE ( UNKNOWN,		"unknown" ) \
-	SPH_QUERY_STATE ( NET_READ,		"net_read" ) \
-	SPH_QUERY_STATE ( IO,			"io" ) \
-	SPH_QUERY_STATE ( DIST_CONNECT,	"dist_connect" ) \
-	SPH_QUERY_STATE ( LOCAL_DF,		"local_df" ) \
-	SPH_QUERY_STATE ( LOCAL_SEARCH,	"local_search" ) \
-	SPH_QUERY_STATE ( SQL_PARSE,	"sql_parse" ) \
-	SPH_QUERY_STATE ( FULLSCAN,		"fullscan" ) \
-	SPH_QUERY_STATE ( DICT_SETUP,	"dict_setup" ) \
-	SPH_QUERY_STATE ( PARSE,		"parse" ) \
-	SPH_QUERY_STATE ( TRANSFORMS,	"transforms" ) \
-	SPH_QUERY_STATE ( INIT,			"init" ) \
-	SPH_QUERY_STATE ( INIT_SEGMENT,	"init_segment" ) \
-	SPH_QUERY_STATE ( OPEN,			"open" ) \
-	SPH_QUERY_STATE ( READ_DOCS,	"read_docs" ) \
-	SPH_QUERY_STATE ( READ_HITS,	"read_hits" ) \
-	SPH_QUERY_STATE ( GET_DOCS,		"get_docs" ) \
-	SPH_QUERY_STATE ( GET_HITS,		"get_hits" ) \
-	SPH_QUERY_STATE ( FILTER,		"filter" ) \
-	SPH_QUERY_STATE ( RANK,			"rank" ) \
-	SPH_QUERY_STATE ( QCACHE_UP,	"qcache_update" ) \
-	SPH_QUERY_STATE ( QCACHE_FINAL,	"qcache_final" ) \
-	SPH_QUERY_STATE ( SORT,			"sort" ) \
-	SPH_QUERY_STATE ( FINALIZE,		"finalize" ) \
-	SPH_QUERY_STATE ( DYNAMIC,		"clone_attrs" ) \
-	SPH_QUERY_STATE ( DIST_WAIT,	"dist_wait" ) \
-	SPH_QUERY_STATE ( AGGREGATE,	"aggregate" ) \
-	SPH_QUERY_STATE ( NET_WRITE,	"net_write" ) \
-	SPH_QUERY_STATE ( EVAL_POST,	"eval_post" ) \
-	SPH_QUERY_STATE ( EVAL_GETFIELD,"eval_getfield" ) \
-	SPH_QUERY_STATE ( SNIPPET,		"eval_snippet" ) \
-	SPH_QUERY_STATE ( EVAL_UDF,		"eval_udf" ) \
-	SPH_QUERY_STATE ( TABLE_FUNC,	"table_func" )
-
-
-/// possible query states, used for profiling
-enum ESphQueryState
-{
-	SPH_QSTATE_INFINUM = -1,
-
-#define SPH_QUERY_STATE(_name,_desc) SPH_QSTATE_##_name,
-	SPH_QUERY_STATES
-#undef SPH_QUERY_STATE
-
-	SPH_QSTATE_TOTAL
-};
-STATIC_ASSERT ( SPH_QSTATE_UNKNOWN==0, BAD_QUERY_STATE_ENUM_BASE );
-
-/// search query profile
-class QueryProfile_c
-{
-public:
-	ESphQueryState	m_eState;							///< current state
-	int64_t			m_tmStamp;							///< timestamp when we entered the current state
-
-	int				m_dSwitches [ SPH_QSTATE_TOTAL+1 ];	///< number of switches to given state
-	int64_t			m_tmTotal [ SPH_QSTATE_TOTAL+1 ];	///< total time spent per state
-	CSphVector<BYTE>	m_dPlan; 						///< bson with plan
-
-	/// create empty and stopped profile
-	QueryProfile_c ()
-	{
-		Start ( SPH_QSTATE_TOTAL );
-	}
-
-	/// switch to a new query state, and record a timestamp
-	/// returns previous state, to simplify Push/Pop like scenarios
-	ESphQueryState Switch ( ESphQueryState eNew )
-	{
-		int64_t tmNow = sphMicroTimer();
-		ESphQueryState eOld = m_eState;
-		++m_dSwitches [ eOld ];
-		m_tmTotal [ eOld ] += tmNow - m_tmStamp;
-		m_eState = eNew;
-		m_tmStamp = tmNow;
-		return eOld;
-	}
-
-	/// reset everything and start profiling from a given state
-	void Start ( ESphQueryState eNew )
-	{
-		memset ( m_dSwitches, 0, sizeof(m_dSwitches) );
-		memset ( m_tmTotal, 0, sizeof(m_tmTotal) );
-		m_eState = eNew;
-		m_tmStamp = sphMicroTimer();
-	}
-
-	void AddMetric ( const QueryProfile_c& tData )
-	{
-		// fixme! m.b. invent a way to display data from different profilers with kind of multiplier?
-		for ( int i = 0; i<SPH_QSTATE_TOTAL; ++i )
-		{
-			m_dSwitches[i] += tData.m_dSwitches[i];
-			m_tmTotal[i] += tData.m_tmTotal[i];
-		}
-	}
-
-	/// stop profiling
-	void Stop()
-	{
-		Switch ( SPH_QSTATE_TOTAL );
-	}
-	void			BuildResult ( XQNode_t * pRoot, const CSphSchema & tSchema, const StrVec_t & dZones );
-};
-
 // shorter names for more compact bson
 #define SZ_TYPE				"a"
 #define SZ_VIRTUALLY_PLAIN	"b"
@@ -206,403 +101,6 @@ public:
 #define SZ_ZONES            "o"
 #define SZ_ZONESPANS        "p"
 
-// acquire common pattern 'check, then switch if not null'
-inline void SwitchProfile ( QueryProfile_c* pProfile, ESphQueryState eState )
-{
-	if ( pProfile )
-		pProfile->Switch ( eState );
-}
-
-
-class CSphScopedProfile
-{
-private:
-	QueryProfile_c *	m_pProfile;
-	ESphQueryState		m_eOldState;
-
-public:
-	explicit CSphScopedProfile ( QueryProfile_c * pProfile, ESphQueryState eNewState )
-	{
-		m_pProfile = pProfile;
-		m_eOldState = SPH_QSTATE_UNKNOWN;
-		if ( m_pProfile )
-			m_eOldState = m_pProfile->Switch ( eNewState );
-	}
-
-	~CSphScopedProfile()
-	{
-		if ( m_pProfile )
-			m_pProfile->Switch ( m_eOldState );
-	}
-};
-
-
-/// file writer with write buffering and int encoder
-class CSphWriter : ISphNoncopyable
-{
-public:
-	virtual			~CSphWriter ();
-
-	void			SetBufferSize ( int iBufferSize );	///< tune write cache size; must be called before OpenFile() or SetFile()
-
-	bool			OpenFile ( const CSphString & sName, CSphString & sError );
-	void			SetFile ( CSphAutofile & tAuto, SphOffset_t * pSharedOffset, CSphString & sError );
-	void			CloseFile ( bool bTruncate = false );	///< note: calls Flush(), ie. IsError() might get true after this call
-	void			UnlinkFile (); /// some shit happened (outside) and the file is no more actual.
-
-	void			PutByte ( BYTE uValue );
-	void			PutBytes ( const void * pData, int64_t iSize );
-	void			PutWord ( WORD uValue ) { PutBytes ( &uValue, sizeof(WORD) ); }
-	void			PutDword ( DWORD uValue ) { PutBytes ( &uValue, sizeof(DWORD) ); }
-	void			PutOffset ( SphOffset_t uValue ) { PutBytes ( &uValue, sizeof(SphOffset_t) ); }
-	void			PutString ( const char * szString );
-	void			PutString ( const CSphString & sString );
-	void			Tag ( const char * sTag );
-
-	void			SeekTo ( SphOffset_t iPos, bool bTruncate = false );
-
-	void			ZipInt ( DWORD uValue );
-	void			ZipOffset ( uint64_t uValue );
-
-	bool			IsError () const	{ return m_bError; }
-	SphOffset_t		GetPos () const		{ return m_iPos; }
-
-	virtual void	Flush ();
-
-protected:
-	CSphString		m_sName;
-	SphOffset_t		m_iPos = -1;
-	SphOffset_t		m_iDiskPos = 0;
-
-	int				m_iFD = -1;
-	int				m_iPoolUsed = 0;
-	BYTE *			m_pBuffer = nullptr;
-	BYTE *			m_pPool = nullptr;
-	bool			m_bOwnFile = false;
-	SphOffset_t	*	m_pSharedOffset = nullptr;
-	int				m_iBufferSize = 262144;
-
-	bool			m_bError = false;
-	CSphString *	m_pError = nullptr;
-
-private:
-	void			UpdatePoolUsed();
-};
-
-
-/// file which closes automatically when going out of scope
-class CSphAutofile : ISphNoncopyable
-{
-protected:
-	int			m_iFD = -1;					///< my file descriptor
-	CSphString	m_sFilename;				///< my file name
-	bool		m_bTemporary = false;		///< whether to unlink this file on Close()
-	bool		m_bWouldTemporary = false;	///< backup of the m_bTemporary
-
-	CSphIndexProgress *	m_pStat = nullptr;
-
-public:
-					CSphAutofile () = default;
-					CSphAutofile ( const CSphString & sName, int iMode, CSphString & sError, bool bTemp=false );
-					~CSphAutofile ();
-
-	int				Open ( const CSphString & sName, int iMode, CSphString & sError, bool bTemp=false );
-	void			Close ();
-	void			SetTemporary(); ///< would be set if a shit happened and the file is not actual.
-	int				GetFD () const { return m_iFD; }
-	const char *	GetFilename () const;
-	SphOffset_t		GetSize ( SphOffset_t iMinSize, bool bCheckSizeT, CSphString & sError );
-	SphOffset_t		GetSize ();
-
-	bool			Read ( void * pBuf, int64_t iCount, CSphString & sError );
-	void			SetProgressCallback ( CSphIndexProgress * pStat );
-};
-
-
-/// file reader with read buffering and int decoder
-class CSphReader
-{
-public:
-	QueryProfile_c *	m_pProfile = nullptr;
-	ESphQueryState		m_eProfileState { SPH_QSTATE_IO };
-
-public:
-	CSphReader ( BYTE * pBuf=NULL, int iSize=0 );
-	virtual		~CSphReader ();
-
-	void		SetBuffers ( int iReadBuffer, int iReadUnhinted );
-	void		SetFile ( int iFD, const char * sFilename );
-	void		SetFile ( const CSphAutofile & tFile );
-	void		Reset ();
-	void		SeekTo ( SphOffset_t iPos, int iSizeHint );
-
-	void		SkipBytes ( int iCount );
-	SphOffset_t	GetPos () const { return m_iPos+m_iBuffPos; }
-
-	void		GetBytes ( void * pData, int iSize );
-	int			GetBytesZerocopy ( const BYTE ** ppData, int64_t iMax ); ///< zerocopy method; returns actual length present in buffer (upto iMax)
-
-	int			GetByte ();
-	DWORD		GetDword ();
-	SphOffset_t	GetOffset ();
-	CSphString	GetString ();
-	int			GetLine ( char * sBuffer, int iMaxLen );
-	bool		Tag ( const char * sTag );
-
-	DWORD		UnzipInt ();
-	uint64_t	UnzipOffset ();
-
-	bool					GetErrorFlag () const		{ return m_bError; }
-	const CSphString &		GetErrorMessage () const	{ return m_sError; }
-	const CSphString &		GetFilename() const			{ return m_sFilename; }
-	int						GetBufferSize() const		{ return m_iBufSize; }
-	void					ResetError();
-
-	RowID_t		UnzipRowid ()	{ return UnzipInt(); }
-	SphWordID_t	UnzipWordid ()	{ return UnzipOffset(); }
-
-	CSphReader &	operator = ( const CSphReader & rhs );
-
-protected:
-	int			m_iFD = -1;
-	CSphString m_sFilename;
-	int			m_iBuffUsed = 0;	///< how many bytes in buffer are valid
-
-	SphOffset_t	m_iPos = 0;			///< position in the file from witch m_pBuff starts
-	BYTE *		m_pBuff;            ///< the buffer
-	int			m_iBuffPos = 0;		///< position in the buffer. (so pos in file is m_iPos + m_iBuffPos)
-
-
-private:
-	int			m_iSizeHint = 0;	///< how much do we expect to read (>=m_iReadUnhinted)
-
-	int			m_iBufSize;
-	bool		m_bBufOwned = false;
-	int			m_iReadUnhinted;	///< how much to read if no hint provided.
-
-	bool		m_bError = false;
-	CSphString	m_sError;
-
-
-protected:
-	virtual void		UpdateCache ();
-};
-
-/// file reader
-class FileReader_c: public CSphReader
-{
-public:
-	explicit FileReader_c ( BYTE* pBuf = nullptr, int iSize = 0 )
-		: CSphReader ( pBuf, iSize )
-	{}
-
-	SphOffset_t GetFilesize () const;
-
-	// added for DebugCheck()
-	int GetFD () const { return m_iFD; }
-};
-
-/// scoped file reader
-class CSphAutoreader : public FileReader_c
-{
-public:
-	CSphAutoreader ( BYTE * pBuf=nullptr, int iSize=0 ) : FileReader_c ( pBuf, iSize ) {}
-	~CSphAutoreader () override { Close(); }
-
-	bool		Open ( const CSphString & sFilename, CSphString & sError );
-	void		Close ();
-};
-
-class MemoryReader_c
-{
-public:
-	MemoryReader_c ( const BYTE * pData, int iLen )
-		: m_pData ( pData )
-		, m_iLen ( iLen )
-		, m_pCur ( pData )
-	{}
-
-	explicit MemoryReader_c ( ByteBlob_t dData )
-		: m_pData ( dData.first )
-		, m_iLen ( dData.second )
-		, m_pCur ( dData.first ) {}
-
-	int GetPos()
-	{
-		return ( m_pCur - m_pData );
-	}
-
-	void SetPos ( int iOff )
-	{
-		assert ( iOff>=0 && iOff<=m_iLen );
-		m_pCur = m_pData + iOff;
-	}
-
-	uint64_t UnzipOffset();
-	DWORD UnzipInt();
-
-	CSphString GetString()
-	{
-		CSphString sRes;
-		DWORD iLen = GetDword();
-		if ( iLen )
-		{
-			sRes.Reserve ( iLen );
-			GetBytes ( (BYTE *)sRes.cstr(), iLen );
-		}
-
-		return sRes;
-	}
-
-	DWORD GetDword()
-	{
-		DWORD uRes = 0;
-		GetBytes ( &uRes, sizeof(uRes) );
-		return uRes;
-	}
-
-	WORD GetWord()
-	{
-		WORD uRes = 0;
-		GetBytes ( &uRes, sizeof(uRes) );
-		return uRes;
-	}
-
-	void GetBytes ( void * pData, int iLen )
-	{
-		if ( !iLen )
-			return;
-
-		assert ( m_pCur );
-		assert ( m_pCur<m_pData+m_iLen );
-		assert ( m_pCur+iLen<=m_pData+m_iLen );
-		memcpy ( pData, m_pCur, iLen );
-		m_pCur += iLen;
-	}
-
-	BYTE GetByte()
-	{
-		BYTE uVal = 0;
-		GetBytes ( &uVal, sizeof(uVal) );
-		return uVal;
-	}
-
-	uint64_t GetUint64()
-	{
-		uint64_t uVal;
-		GetBytes ( &uVal, sizeof(uVal) );
-		return uVal;
-	}
-
-	const BYTE * Begin() const
-	{
-		return m_pData;
-	}
-
-	int GetLength() const
-	{
-		return m_iLen;
-	}
-
-protected:
-	const BYTE *	m_pData = nullptr;
-	const int		m_iLen = 0;
-	const BYTE *	m_pCur = nullptr;
-};
-
-class MemoryWriter_c
-{
-public:
-	MemoryWriter_c ( CSphVector<BYTE> & dBuf )
-		: m_dBuf ( dBuf )
-	{}
-
-	int GetPos()
-	{
-		return m_dBuf.GetLength();
-	}
-
-	void ZipOffset ( uint64_t uVal );
-	void ZipInt ( DWORD uVal );
-
-	void PutString ( const CSphString & sVal )
-	{
-		int iLen = sVal.Length();
-		PutDword ( iLen );
-		if ( iLen )
-			PutBytes ( (const BYTE *)sVal.cstr(), iLen );
-	}
-
-	void PutString ( const char * sVal )
-	{
-		int iLen = 0;
-		if ( sVal )
-			iLen = (int) strlen ( sVal );
-		PutDword ( iLen );
-		if ( iLen )
-			PutBytes ( (const BYTE *)sVal, iLen );
-	}
-
-	void PutDword ( DWORD uVal )
-	{
-		PutBytes ( (BYTE *)&uVal, sizeof(uVal) );
-	}
-
-	void PutWord ( WORD uVal )
-	{
-		PutBytes ( (BYTE *)&uVal, sizeof(uVal) );
-	}
-
-	void PutBytes ( const void * pData, int iLen )
-	{
-		if ( !iLen )
-			return;
-
-		BYTE * pCur = m_dBuf.AddN ( iLen );
-		memcpy ( pCur, pData, iLen );
-	}
-
-	void PutByte ( BYTE uVal )
-	{
-		m_dBuf.Add ( uVal );
-	}
-
-	void PutUint64 ( uint64_t uVal )
-	{
-		PutBytes ( (BYTE *)&uVal, sizeof(uVal) );
-	}
-
-protected:
-	CSphVector<BYTE> & m_dBuf;
-};
-
-// fixme: get rid of this
-class MemoryReader2_c : public MemoryReader_c
-{
-public:
-	MemoryReader2_c ( const BYTE * pData, int iLen )
-		: MemoryReader_c ( pData, iLen )
-	{}
-
-	uint64_t UnzipInt() { return sphUnzipInt(m_pCur); }
-	uint64_t UnzipOffset() { return sphUnzipOffset(m_pCur); }
-};
-
-// fixme: get rid of this
-class MemoryWriter2_c : public MemoryWriter_c
-{
-public:
-	MemoryWriter2_c ( CSphVector<BYTE> & dBuf )
-		: MemoryWriter_c ( dBuf )
-	{}
-
-	void ZipOffset ( uint64_t uVal ) { sphZipValue ( [this] ( BYTE b ) { PutByte ( b ); }, uVal ); }
-	void ZipInt ( DWORD uVal ) { sphZipValue ( [this] ( BYTE b ) { PutByte ( b ); }, uVal ); }
-};
-
-
-//////////////////////////////////////////////////////////////////////////
-
 /// generic COM-like uids
 enum ExtraData_e
 {
@@ -681,6 +179,9 @@ public:
 	CSphVector<CalcItem_t>		m_dCalcSort;			///< items to compute for sorting/grouping
 	CSphVector<CalcItem_t>		m_dCalcFinal;			///< items to compute when finalizing result set
 
+	IntVec_t					m_dCalcFilterPtrAttrs;	///< items to free after computing filter stage
+	IntVec_t					m_dCalcSortPtrAttrs;	///< items to free after computing sort stage
+
 	const void *							m_pIndexData = nullptr;	///< backend specific data
 	QueryProfile_c *						m_pProfile = nullptr;
 	const SmallStringHash_T<int64_t> *		m_pLocalDocs = nullptr;
@@ -690,15 +191,23 @@ public:
 
 public:
 	explicit CSphQueryContext ( const CSphQuery & q );
-	~CSphQueryContext ();
+			~CSphQueryContext ();
 
 	void	BindWeights ( const CSphQuery & tQuery, const CSphSchema & tSchema, CSphString & sWarning );
+
+#if USE_COLUMNAR
+	bool	SetupCalc ( CSphQueryResultMeta & tMeta, const ISphSchema & tInSchema, const CSphSchema & tSchema, const BYTE * pBlobPool, const columnar::Columnar_i * pColumnar,
+				const CSphVector<const ISphSchema *> & dInSchemas );
+#else
 	bool	SetupCalc ( CSphQueryResultMeta & tMeta, const ISphSchema & tInSchema, const CSphSchema & tSchema, const BYTE * pBlobPool, const CSphVector<const ISphSchema *> & dInSchemas );
+#endif
+
 	bool	CreateFilters ( CreateFilterContext_t &tCtx, CSphString &sError, CSphString &sWarning );
 
 	void	CalcFilter ( CSphMatch & tMatch ) const;
 	void	CalcSort ( CSphMatch & tMatch ) const;
 	void	CalcFinal ( CSphMatch & tMatch ) const;
+	void	CalcItem ( CSphMatch & tMatch, const CalcItem_t & tCalc ) const;
 
 	void	FreeDataFilter ( CSphMatch & tMatch ) const;
 	void	FreeDataSort ( CSphMatch & tMatch ) const;
@@ -706,11 +215,19 @@ public:
 	// note that RT index bind pools at segment searching, not at time it setups context
 	void	ExprCommand ( ESphExprCommand eCmd, void * pArg );
 	void	SetBlobPool ( const BYTE * pBlobPool );
+
+#if USE_COLUMNAR
+	void	SetColumnar ( const columnar::Columnar_i * pColumnar );
+#endif
+
 	void	SetupExtraData ( ISphRanker * pRanker, ISphMatchSorter * pSorter );
 	void	ResetFilters();
 
 private:
 	CSphVector<UservarIntSet_c>		m_dUserVals;
+
+	void	AddToFilterCalc ( const CalcItem_t & tCalc );
+	void	AddToSortCalc ( const CalcItem_t & tCalc );
 };
 
 
@@ -813,51 +330,6 @@ inline std::pair<DWORD,DWORD> MVA_BE ( const DWORD * pMva )
 #endif
 }
 
-template < typename T >
-bool MvaEval_Any ( const VecTraits_T<const T> & dMvas, const VecTraits_T<const SphAttr_t>& dFilters )
-{
-	if ( dMvas.IsEmpty () || dFilters.IsEmpty ())
-		return false;
-
-	const T * L = dMvas.begin();
-
-	for ( const auto& dFilter : dFilters )
-	{
-		const T * R = &dMvas.Last ();
-		while ( L<=R )
-		{
-			const T * pVal = L + (R - L) / 2;
-			T iValue = sphUnalignedRead ( *pVal );
-			if ( dFilter > iValue )
-				L = pVal + 1;
-			else if ( dFilter < iValue )
-				R = pVal - 1;
-			else
-				return true;
-		}
-	}
-
-	return false;
-}
-
-
-template < typename T >
-bool MvaEval_All ( const VecTraits_T<const T>& dMvas, const VecTraits_T<const SphAttr_t>& dFilters )
-{
-	if ( dMvas.IsEmpty() || dFilters.IsEmpty() )
-		return false;
-
-	for ( const T& dMva : dMvas )
-	{
-		const SphAttr_t iCheck = sphUnalignedRead ( dMva );
-		if ( !dFilters.BinarySearch ( iCheck ) )
-			return false;
-	}
-
-	return true;
-}
-
-
 // FIXME!!! for over INT_MAX attributes
 /// attr min-max builder
 class AttrIndexBuilder_c : ISphNoncopyable
@@ -1662,12 +1134,8 @@ public:
 
 const BYTE *	SkipQuoted ( const BYTE * p );
 
-int 			GetStringRemapCount ( const ISphSchema & tDstSchema, const ISphSchema & tSrcSchema );
-bool			IsSortStringInternal ( const CSphString& sColumnName );
 /// make string lowercase but keep case of JSON.field
 void			sphColumnToLowercase ( char * sVal );
-bool			IsSortJsonInternal ( const CSphString& sColumnName );
-CSphString		SortJsonInternalSet ( const CSphString& sColumnName );
 
 // returns 0: query can't be run at all (even hardlimit stack will be exceeded), sError is set.
 // returns -1: query might be run on current frame
@@ -1719,6 +1187,11 @@ enum ESphExt
 	SPH_EXT_SPH,
 	SPH_EXT_SPA,
 	SPH_EXT_SPB,
+
+#if USE_COLUMNAR
+	SPH_EXT_SPC,
+#endif
+
 	SPH_EXT_SPI,
 	SPH_EXT_SPD,
 	SPH_EXT_SPP,
@@ -2398,6 +1871,18 @@ struct SchemaItemVariant_t
 	CSphAttrLocator m_tLoc;
 };
 
+template <typename T>
+inline T ConvertType ( SphAttr_t tValue )
+{
+	return (T)tValue;
+}
+
+template <>
+inline float ConvertType<float>( SphAttr_t tValue )
+{
+	return sphDW2F(tValue);
+}
+
 using SchemaItemHash_c = OpenHash_T<SchemaItemVariant_t, uint64_t, HashFunc_Int64_t>;
 
 template <typename T>

+ 1 - 0
src/sphinxjsonquery.h

@@ -16,6 +16,7 @@
 
 class QueryParser_i;
 class StmtErrorReporter_i;
+class QueryProfile_c;
 struct cJSON;
 struct XQNode_t;
 struct SqlStmt_t;

+ 1 - 49
src/sphinxplugin.cpp

@@ -14,20 +14,7 @@
 #include "sphinxint.h"
 #include "sphinxutils.h"
 #include "sphinxplugin.h"
-
-#if !USE_WINDOWS
-#include <unistd.h>
-#include <sys/time.h>
-#if HAVE_DLOPEN
-#include <dlfcn.h>
-#endif // HAVE_DLOPEN
-#endif // !USE_WINDOWS
-
-#if !USE_WINDOWS
-#ifndef HAVE_DLERROR
-#define dlerror() ""
-#endif // HAVE_DLERROR
-#endif // !USE_WINDOWS
+#include "libutils.h"
 
 //////////////////////////////////////////////////////////////////////////
 // TYPES
@@ -97,41 +84,6 @@ static CSphOrderedHash<PluginDesc_c*, PluginKey_t, PluginKey_t, 256>	g_hPlugins;
 // PLUGIN MANAGER
 //////////////////////////////////////////////////////////////////////////
 
-#if USE_WINDOWS
-#undef HAVE_DLOPEN
-#define HAVE_DLOPEN		1
-#define RTLD_LAZY		0
-#define RTLD_LOCAL		0
-
-void * dlsym ( void * lib, const char * name )
-{
-	return GetProcAddress ( (HMODULE)lib, name );
-}
-
-void * dlopen ( const char * libname, int )
-{
-	return LoadLibraryEx ( libname, NULL, 0 );
-}
-
-int dlclose ( void * lib )
-{
-	return FreeLibrary ( (HMODULE)lib )
-		? 0
-		: GetLastError();
-}
-
-const char * dlerror()
-{
-	static char sError[256];
-	DWORD uError = GetLastError();
-	FormatMessage ( FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL,
-		uError, LANG_SYSTEM_DEFAULT, (LPTSTR)sError, sizeof(sError), NULL );
-	return sError;
-}
-#endif // USE_WINDOWS
-
-//////////////////////////////////////////////////////////////////////////
-
 PluginLib_c::PluginLib_c ( void * pHandle, const char * sName )
 {
 	assert ( pHandle );

+ 29 - 18
src/sphinxpq.cpp

@@ -11,6 +11,7 @@
 //
 
 #include "sphinxpq.h"
+#include "sphinxsort.h"
 #include "fileutils.h"
 #include "icu.h"
 #include "accumulator.h"
@@ -82,7 +83,7 @@ public:
 	StoredQuery_i * CreateQuery ( PercolateQueryArgs_t & tArgs, CSphString & sError ) final
 		EXCLUDES ( m_tLockHash, m_tLock );
 
-	bool Prealloc ( bool bStripPath, FilenameBuilder_i * pFilenameBuilder ) override;
+	bool Prealloc ( bool bStripPath, FilenameBuilder_i * pFilenameBuilder, StrVec_t & dWarnings ) override;
 	void Dealloc () override {}
 	void Preread () override {}
 	void PostSetup() override EXCLUDES ( m_tLockHash, m_tLock );
@@ -100,8 +101,8 @@ public:
 	void ForceRamFlush ( const char* szReason ) EXCLUDES ( m_tLock ) final;
 	bool IsFlushNeed() const override;
 	bool ForceDiskChunk () override;
-	bool AttachDiskIndex ( CSphIndex * , bool, bool &, CSphString & ) override { return true; }
-	void Optimize (int,int,int,const char*) override {}
+	bool AttachDiskIndex ( CSphIndex * , bool, bool &, StrVec_t &, CSphString & ) override { return true; }
+	void Optimize ( int, int, int, const char * ) override {}
 	bool IsSameSettings ( CSphReconfigureSettings & tSettings, CSphReconfigureSetup & tSetup, StrVec_t & dWarnings, CSphString & sError ) const override;
 
 	bool Reconfigure ( CSphReconfigureSetup & tSetup ) override EXCLUDES ( m_tLockHash, m_tLock );
@@ -1932,17 +1933,24 @@ bool PercolateIndex_c::Commit ( int * pDeleted, RtAccum_t * pAccExt )
 	return true;
 }
 
-struct PqMatchProcessor_t : ISphMatchProcessor, ISphNoncopyable
-{
-	const CSphQueryContext &m_tCtx;
-	int m_iTag;
 
-	PqMatchProcessor_t ( int iTag, const CSphQueryContext &tCtx )
-		: m_tCtx ( tCtx )
-		, m_iTag ( iTag )
+class PqMatchProcessor_c : public MatchProcessor_i, ISphNoncopyable
+{
+public:
+	PqMatchProcessor_c ( int iTag, const CSphQueryContext & tCtx )
+		: m_iTag ( iTag )
+		, m_tCtx ( tCtx )
 	{}
 
-	void Process ( CSphMatch * pMatch ) final
+	bool ProcessInRowIdOrder() const final				{ return false; }
+	void Process ( CSphMatch * pMatch ) final			{ ProcessMatch(pMatch); }
+	void Process ( VecTraits_T<CSphMatch *> & dMatches ){ dMatches.for_each ( [this]( CSphMatch * pMatch ){ ProcessMatch(pMatch); } ); }
+
+private:
+	int							m_iTag;
+	const CSphQueryContext &	m_tCtx;
+
+	inline void ProcessMatch ( CSphMatch * pMatch )
 	{
 		// fixme! tag is signed int,
 		// for distr. tags from remotes set with | 0x80000000,
@@ -1958,8 +1966,8 @@ struct PqMatchProcessor_t : ISphMatchProcessor, ISphNoncopyable
 	}
 };
 
-bool PercolateIndex_c::MultiScan ( CSphQueryResult & tResult, const CSphQuery & tQuery,
-		const VecTraits_T<ISphMatchSorter *> & dSorters, const CSphMultiQueryArgs &tArgs ) const
+
+bool PercolateIndex_c::MultiScan ( CSphQueryResult & tResult, const CSphQuery & tQuery, const VecTraits_T<ISphMatchSorter *> & dSorters, const CSphMultiQueryArgs & tArgs ) const
 {
 	assert ( tArgs.m_iTag>=0 );
 	auto & tMeta = *tResult.m_pMeta;
@@ -1988,8 +1996,12 @@ bool PercolateIndex_c::MultiScan ( CSphQueryResult & tResult, const CSphQuery &
 
 	// setup calculations and result schema
 	CSphQueryContext tCtx ( tQuery );
-	if ( !tCtx.SetupCalc ( tMeta, tMaxSorterSchema, m_tMatchSchema, nullptr, dSorterSchemas ) )
-		return false;
+
+#if USE_COLUMNAR
+	if ( !tCtx.SetupCalc ( tMeta, tMaxSorterSchema, m_tMatchSchema, nullptr, nullptr, dSorterSchemas ) ) return false;
+#else
+	if ( !tCtx.SetupCalc ( tMeta, tMaxSorterSchema, m_tMatchSchema, nullptr, dSorterSchemas ) ) return false;
+#endif
 
 	// setup filters
 	CreateFilterContext_t tFlx;
@@ -2103,7 +2115,7 @@ bool PercolateIndex_c::MultiScan ( CSphQueryResult & tResult, const CSphQuery &
 	// do final expression calculations
 	if ( tCtx.m_dCalcFinal.GetLength () )
 	{
-		PqMatchProcessor_t tFinal ( tArgs.m_iTag, tCtx );
+		PqMatchProcessor_c tFinal ( tArgs.m_iTag, tCtx );
 		dSorters.Apply ( [&tFinal] ( ISphMatchSorter * p ) { p->Finalize ( tFinal, false ); } );
 	}
 
@@ -2249,7 +2261,7 @@ void PercolateIndex_c::PostSetup ()
 	PostSetupUnl();
 }
 
-bool PercolateIndex_c::Prealloc ( bool bStripPath, FilenameBuilder_i * pFilenameBuilder )
+bool PercolateIndex_c::Prealloc ( bool bStripPath, FilenameBuilder_i * pFilenameBuilder, StrVec_t & dWarnings )
 {
 	CSphString sLock;
 	sLock.SetSprintf ( "%s.lock", m_sFilename.cstr() );
@@ -2332,7 +2344,6 @@ bool PercolateIndex_c::Prealloc ( bool bStripPath, FilenameBuilder_i * pFilename
 	}
 
 	// recreate tokenizer
-	StrVec_t dWarnings;
 	m_pTokenizer = ISphTokenizer::Create ( tTokenizerSettings, &tEmbeddedFiles, pFilenameBuilder, dWarnings, m_sLastError );
 	if ( !m_pTokenizer )
 		return false;

+ 130 - 173
src/sphinxrt.cpp

@@ -15,6 +15,7 @@
 #include "sphinxrt.h"
 #include "sphinxpq.h"
 #include "sphinxsearch.h"
+#include "sphinxsort.h"
 #include "sphinxutils.h"
 #include "fileutils.h"
 #include "sphinxplugin.h"
@@ -22,13 +23,15 @@
 #include "sphinxqcache.h"
 #include "attribute.h"
 #include "killlist.h"
-#include "secondaryindex.h"
+#include "histogram.h"
 #include "accumulator.h"
 #include "indexcheck.h"
 #include "indexsettings.h"
 #include "indexformat.h"
 #include "coroutine.h"
 #include "mini_timer.h"
+#include "memio.h"
+#include "secondaryindex.h"
 
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -929,22 +932,21 @@ class RtIndex_c final : public RtIndex_i, public ISphNoncopyable, public ISphWor
 		public IndexUpdateHelper_c, public IndexAlterHelper_c, public DebugCheckHelper_c
 {
 public:
-	explicit			RtIndex_c ( const CSphSchema & tSchema, const char * sIndexName, int64_t iRamSize, const char * sPath, bool bKeywordDict );
+						RtIndex_c ( const CSphSchema & tSchema, const char * sIndexName, int64_t iRamSize, const char * sPath, bool bKeywordDict );
 						~RtIndex_c () final;
 
-	bool				AddDocument ( const VecTraits_T<VecTraits_T<const char >> &dFields,
-		CSphMatch & tDoc, bool bReplace, const CSphString & sTokenFilterOptions, const char ** ppStr,
+	bool				AddDocument ( const VecTraits_T<VecTraits_T<const char >> &dFields, CSphMatch & tDoc, bool bReplace, const CSphString & sTokenFilterOptions, const char ** ppStr,
 		const VecTraits_T<int64_t> & dMvas, CSphString & sError, CSphString & sWarning, RtAccum_t * pAccExt ) override;
-	virtual bool		AddDocument ( ISphHits * pHits, const CSphMatch & tDoc, bool bReplace, const char ** ppStr, const VecTraits_T<int64_t> & dMvas,
-		const DocstoreBuilder_i::Doc_t * pStoredDoc, CSphString & sError, CSphString & sWarning, RtAccum_t * pAccExt );
+	virtual bool		AddDocument ( ISphHits * pHits, const CSphMatch & tDoc, bool bReplace, const char ** ppStr, const VecTraits_T<int64_t> & dMvas, const DocstoreBuilder_i::Doc_t * pStoredDoc,
+		CSphString & sError, CSphString & sWarning, RtAccum_t * pAccExt );
 	bool				DeleteDocument ( const VecTraits_T<DocID_t> & dDocs, CSphString & sError, RtAccum_t * pAccExt ) final;
 	bool				Commit ( int * pDeleted, RtAccum_t * pAccExt ) final;
 	void				RollBack ( RtAccum_t * pAccExt ) final;
-	bool				CommitReplayable ( RtSegment_t * pNewSeg, const CSphVector<DocID_t> & dAccKlist, int * pTotalKilled, bool bForceDump ) EXCLUDES (m_tChunkLock ); // FIXME? protect?
-	void				ForceRamFlush ( const char* szReason ) EXCLUDES ( m_tFlushLock ) final;
+	bool				CommitReplayable ( RtSegment_t * pNewSeg, const CSphVector<DocID_t> & dAccKlist, int * pTotalKilled, bool bForceDump ) EXCLUDES (m_tChunkLock); // FIXME? protect?
+	void				ForceRamFlush ( const char * szReason ) EXCLUDES ( m_tFlushLock ) final;
 	bool				IsFlushNeed() const final;
 	bool				ForceDiskChunk() final;
-	bool				AttachDiskIndex ( CSphIndex * pIndex, bool bTruncate, bool & bFatal, CSphString & sError ) 			final  EXCLUDES (m_tReading );
+	bool				AttachDiskIndex ( CSphIndex * pIndex, bool bTruncate, bool & bFatal, StrVec_t & dWarnings, CSphString & sError ) final EXCLUDES (m_tReading);
 	bool				Truncate ( CSphString & sError ) final;
 	void				Optimize ( int iCutoff, int iFromID, int iToID, const char * szUvarFilter ) final;
 	void				CommonMerge ( Selector_t&& fnSelector, const char* szUvarFilter=nullptr );
@@ -975,7 +977,7 @@ public:
 	int					Build ( const CSphVector<CSphSource*> & , int , int ) final { return 0; }
 	bool				Merge ( CSphIndex * , const VecTraits_T<CSphFilterSettings> &, bool ) final { return false; }
 
-	bool				Prealloc ( bool bStripPath, FilenameBuilder_i * pFilenameBuilder ) final;
+	bool				Prealloc ( bool bStripPath, FilenameBuilder_i * pFilenameBuilder, StrVec_t & dWarnings ) final;
 	void				Dealloc () final {}
 	void				Preread () final;
 	void				SetBase ( const char * ) final {}
@@ -1112,16 +1114,16 @@ private:
 	RtSegment_t *				MergeSegments ( const RtSegment_t * pSeg1, const RtSegment_t * pSeg2, bool bHasMorphology ) const;
 	static void					CopyWord ( RtSegment_t & tDst, const RtSegment_t & tSrc, RtDocWriter_t & tOutDoc, RtDocReader_t & tInDoc, RtWord_t & tWord, const CSphVector<RowID_t> & tRowMap );
 
-	bool						LoadMeta ( FilenameBuilder_i * pFilenameBuilder, bool bStripPath, DWORD & uVersion, bool & bRebuildInfixes );
-	bool						PreallocDiskChunks ( FilenameBuilder_i * pFilenameBuilder );
+	bool						LoadMeta ( FilenameBuilder_i * pFilenameBuilder, bool bStripPath, DWORD & uVersion, bool & bRebuildInfixes, StrVec_t & dWarnings );
+	bool						PreallocDiskChunks ( FilenameBuilder_i * pFilenameBuilder, StrVec_t & dWarnings );
 	void						SaveMeta ( int64_t iTID, bool bSaveChunks=true );
 	void						SaveMetaSpecial ( int64_t iTID, const VecTraits_T<int> & dChunkNames );
 	void						SaveDiskHeader ( SaveDiskDataContext_t & tCtx, const ChunkStats_t & tStats ) const;
-	void						SaveDiskData ( const char * sFilename, const SphChunkGuard_t & tGuard, const ChunkStats_t & tStats ) const;
-	bool						SaveDiskChunk ( int64_t iTID, const SphChunkGuard_t & tGuard,
-			const ChunkStats_t & tStats, bool bForced, int * pSavedChunkId )
-			EXCLUDES ( m_tWriting ) EXCLUDES ( m_tChunkLock );
-	CSphIndex *					PreallocDiskChunk ( const char * sChunk, int iChunk, FilenameBuilder_i * pFilenameBuilder, CSphString & sError, const char * sName=nullptr ) const;
+	void						SaveDiskData ( const char * szFilename, const SphChunkGuard_t & tGuard, const ChunkStats_t & tStats ) const;
+	bool						SaveDiskChunk ( int64_t iTID, const SphChunkGuard_t & tGuard, const ChunkStats_t & tStats, bool bForced, int * pSavedChunkId, StrVec_t & dWarnings )
+									EXCLUDES ( m_tWriting ) EXCLUDES ( m_tChunkLock );
+
+	CSphIndex *					PreallocDiskChunk ( const char * szChunk, int iChunk, FilenameBuilder_i * pFilenameBuilder, StrVec_t & dWarnings, CSphString & sError, const char * sName=nullptr ) const;
 	bool						LoadRamChunk ( DWORD uVersion, bool bRebuildInfixes );
 	bool						SaveRamChunk ( const VecTraits_T<const RtSegmentRefPtf_t>& dSegments );
 
@@ -2446,8 +2448,8 @@ RtSegment_t * RtIndex_c::MergeSegments ( const RtSegment_t * pSeg1, const RtSegm
 
 	int nBlobAttrs = 0;
 	for ( int i = 0; i < m_tSchema.GetAttrsCount(); i++ )
-		if ( sphIsBlobAttr ( m_tSchema.GetAttr(i).m_eAttrType ) )
-			++nBlobAttrs;
+		if ( sphIsBlobAttr ( m_tSchema.GetAttr(i) ) )
+			nBlobAttrs++;
 
 	RowID_t tNextRowID = 0;
 
@@ -2832,7 +2834,9 @@ bool RtIndex_c::CommitReplayable ( RtSegment_t * pNewSeg, const CSphVector<DocID
 		m_tSaveFinished.Lock();
 
 		int iSavedChunkId = -1;
-		if ( !SaveDiskChunk ( iTID, tGuard, tStat2Dump, bForceDump, &iSavedChunkId ) )
+		// fixme! report warnings
+		StrVec_t dWarnings;
+		if ( !SaveDiskChunk ( iTID, tGuard, tStat2Dump, bForceDump, &iSavedChunkId, dWarnings ) )
 		{
 			bSavedOk = false;
 			break;
@@ -3056,8 +3060,6 @@ bool RtIndex_c::WriteAttributes ( SaveDiskDataContext_t & tCtx, CSphString & sEr
 
 	tMinMaxBuilder.FinishCollect();
 	const CSphTightVector<CSphRowitem> & dMinMaxRows = tMinMaxBuilder.GetCollected();
-	const CSphRowitem * pMinRow = dMinMaxRows.Begin()+dMinMaxRows.GetLength()-iStride*2;
-	const CSphRowitem * pMaxRow = pMinRow+iStride;
 
 	// create the histograms
 	HistogramContainer_c tHistogramContainer;
@@ -3072,7 +3074,6 @@ bool RtIndex_c::WriteAttributes ( SaveDiskDataContext_t & tCtx, CSphString & sEr
 			Verify ( tHistogramContainer.Add ( pHistogram ) );
 			dHistograms.Add ( pHistogram );
 			dPOD.Add ( tAttr );
-			pHistogram->Setup ( sphGetRowAttr ( pMinRow, tAttr.m_tLocator ), sphGetRowAttr ( pMaxRow, tAttr.m_tLocator ) );
 		}
 	}
 
@@ -3604,8 +3605,7 @@ int RtIndex_c::GetNextChunkName ()
 	return m_iMaxChunkName;
 }
 
-
-bool RtIndex_c::SaveDiskChunk ( int64_t iTID, const SphChunkGuard_t & tGuard, const ChunkStats_t & tStats, bool bForce, int * pSavedChunkId )
+bool RtIndex_c::SaveDiskChunk ( int64_t iTID, const SphChunkGuard_t & tGuard, const ChunkStats_t & tStats, bool bForce, int * pSavedChunkId, StrVec_t & dWarnings )
 {
 	if ( tGuard.m_dRamChunks.IsEmpty() || m_bSaveDisabled )
 	{
@@ -3627,7 +3627,7 @@ bool RtIndex_c::SaveDiskChunk ( int64_t iTID, const SphChunkGuard_t & tGuard, co
 	// bring new disk chunk online
 	CreateFilenameBuilder_fn fnCreateFilenameBuilder = GetIndexFilenameBuilder();
 	CSphScopedPtr<FilenameBuilder_i> pFilenameBuilder ( fnCreateFilenameBuilder ? fnCreateFilenameBuilder ( m_sIndexName.cstr() ) : nullptr );
-	CSphIndex * pDiskChunk = PreallocDiskChunk ( sNewChunk.cstr(), iChunk, pFilenameBuilder.Ptr(), m_sLastError );
+	CSphIndex * pDiskChunk = PreallocDiskChunk ( sNewChunk.cstr(), iChunk, pFilenameBuilder.Ptr(), dWarnings, m_sLastError );
 	if ( !pDiskChunk )
 	{
 		sphWarning ( "rt: index %s failed to load disk chunk after RAM save: %s", m_sIndexName.cstr(), m_sLastError.cstr() );
@@ -3699,7 +3699,7 @@ bool RtIndex_c::SaveDiskChunk ( int64_t iTID, const SphChunkGuard_t & tGuard, co
 }
 
 
-CSphIndex * RtIndex_c::PreallocDiskChunk ( const char * sChunk, int iChunk, FilenameBuilder_i * pFilenameBuilder, CSphString & sError, const char * sName ) const
+CSphIndex * RtIndex_c::PreallocDiskChunk ( const char * sChunk, int iChunk, FilenameBuilder_i * pFilenameBuilder, StrVec_t & dWarnings, CSphString & sError, const char * sName ) const
 {
 	MEMORY ( MEM_INDEX_DISK );
 
@@ -3719,7 +3719,7 @@ CSphIndex * RtIndex_c::PreallocDiskChunk ( const char * sChunk, int iChunk, File
 	if ( m_bDebugCheck )
 		pDiskChunk->SetDebugCheck ( m_bCheckIdDups, -1 );
 
-	if ( !pDiskChunk->Prealloc ( m_bPathStripped, pFilenameBuilder ) )
+	if ( !pDiskChunk->Prealloc ( m_bPathStripped, pFilenameBuilder, dWarnings ) )
 	{
 		sError.SetSprintf ( "disk chunk %s: prealloc failed: %s", sChunk, pDiskChunk->GetLastError().cstr() );
 		pDiskChunk = nullptr;
@@ -3729,7 +3729,7 @@ CSphIndex * RtIndex_c::PreallocDiskChunk ( const char * sChunk, int iChunk, File
 }
 
 
-bool RtIndex_c::LoadMeta ( FilenameBuilder_i * pFilenameBuilder, bool bStripPath, DWORD & uVersion, bool & bRebuildInfixes )
+bool RtIndex_c::LoadMeta ( FilenameBuilder_i * pFilenameBuilder, bool bStripPath, DWORD & uVersion, bool & bRebuildInfixes, StrVec_t & dWarnings )
 {
 	// check if we have a meta file (kinda-header)
 	CSphString sMeta;
@@ -3776,7 +3776,6 @@ bool RtIndex_c::LoadMeta ( FilenameBuilder_i * pFilenameBuilder, bool bStripPath
 	CSphTokenizerSettings tTokenizerSettings;
 	CSphDictSettings tDictSettings;
 	CSphEmbeddedFiles tEmbeddedFiles;
-	CSphString sWarning;
 
 	// load them settings
 	DWORD uSettingsVer = rdMeta.GetDword();
@@ -3787,7 +3786,12 @@ bool RtIndex_c::LoadMeta ( FilenameBuilder_i * pFilenameBuilder, bool bStripPath
 	if ( !tTokenizerSettings.Load ( pFilenameBuilder, rdMeta, tEmbeddedFiles, m_sLastError ) )
 		return false;
 
-	tDictSettings.Load ( rdMeta, tEmbeddedFiles, sWarning );
+	{
+		CSphString sWarning;
+		tDictSettings.Load ( rdMeta, tEmbeddedFiles, sWarning );
+		if ( !sWarning.IsEmpty() )
+			dWarnings.Add(sWarning);
+	}
 
 	m_bKeywordDict = tDictSettings.m_bWordDict;
 
@@ -3795,7 +3799,11 @@ bool RtIndex_c::LoadMeta ( FilenameBuilder_i * pFilenameBuilder, bool bStripPath
 	DWORD uPrevAot = m_tSettings.m_uAotFilterMask;
 	m_tSettings.m_uAotFilterMask = sphParseMorphAot ( tDictSettings.m_sMorphology.cstr() );
 	if ( m_tSettings.m_uAotFilterMask!=uPrevAot )
-		sphWarning ( "index '%s': morphology option changed from config has no effect, ignoring", m_sIndexName.cstr() );
+	{
+		CSphString sWarning;
+		sWarning.SetSprintf ( "index '%s': morphology option changed from config has no effect, ignoring", m_sIndexName.cstr() );
+		dWarnings.Add(sWarning);
+	}
 
 	if ( bStripPath )
 	{
@@ -3805,7 +3813,6 @@ bool RtIndex_c::LoadMeta ( FilenameBuilder_i * pFilenameBuilder, bool bStripPath
 	}
 
 	// recreate tokenizer
-	StrVec_t dWarnings;
 	m_pTokenizer = ISphTokenizer::Create ( tTokenizerSettings, &tEmbeddedFiles, pFilenameBuilder, dWarnings, m_sLastError );
 	if ( !m_pTokenizer )
 		return false;
@@ -3819,7 +3826,7 @@ bool RtIndex_c::LoadMeta ( FilenameBuilder_i * pFilenameBuilder, bool bStripPath
 		return false;
 
 	if ( !m_sLastError.IsEmpty() )
-		sphWarning ( "%s", m_sLastError.cstr() );
+		dWarnings.Add(m_sLastError);
 
 	m_pTokenizer = ISphTokenizer::CreateMultiformFilter ( m_pTokenizer, m_pDict->GetMultiWordforms () );
 
@@ -3832,8 +3839,12 @@ bool RtIndex_c::LoadMeta ( FilenameBuilder_i * pFilenameBuilder, bool bStripPath
 	bRebuildInfixes = ( iBloomKeyLen!=BLOOM_PER_ENTRY_VALS_COUNT || iBloomHashesCount!=BLOOM_HASHES_COUNT );
 
 	if ( bRebuildInfixes )
-		sphWarning ( "infix definition changed (from len=%d, hashes=%d to len=%d, hashes=%d) - rebuilding...",
+	{
+		CSphString sWarning;
+		sWarning.SetSprintf ( "infix definition changed (from len=%d, hashes=%d to len=%d, hashes=%d) - rebuilding...",
 			(int)BLOOM_PER_ENTRY_VALS_COUNT, (int)BLOOM_HASHES_COUNT, iBloomKeyLen, iBloomHashesCount );
+		dWarnings.Add(sWarning);
+	}
 
 	FieldFilterRefPtr_c pFieldFilter;
 	CSphFieldFilterSettings tFieldFilterSettings;
@@ -3857,7 +3868,7 @@ bool RtIndex_c::LoadMeta ( FilenameBuilder_i * pFilenameBuilder, bool bStripPath
 }
 
 
-bool RtIndex_c::PreallocDiskChunks ( FilenameBuilder_i * pFilenameBuilder )
+bool RtIndex_c::PreallocDiskChunks ( FilenameBuilder_i * pFilenameBuilder, StrVec_t & dWarnings )
 {
 	// load disk chunks, if any
 	ARRAY_FOREACH ( iName, m_dChunkNames )
@@ -3865,7 +3876,7 @@ bool RtIndex_c::PreallocDiskChunks ( FilenameBuilder_i * pFilenameBuilder )
 		int iChunkIndex = m_dChunkNames[iName];
 		CSphString sChunk;
 		sChunk.SetSprintf ( "%s.%d", m_sPath.cstr(), iChunkIndex );
-		CSphIndex * pIndex = PreallocDiskChunk ( sChunk.cstr(), iChunkIndex, pFilenameBuilder, m_sLastError );
+		CSphIndex * pIndex = PreallocDiskChunk ( sChunk.cstr(), iChunkIndex, pFilenameBuilder, dWarnings, m_sLastError );
 		if ( !pIndex )
 			sphDie ( "%s", m_sLastError.cstr() );
 
@@ -3891,7 +3902,7 @@ bool RtIndex_c::PreallocDiskChunks ( FilenameBuilder_i * pFilenameBuilder )
 }
 
 
-bool RtIndex_c::Prealloc ( bool bStripPath, FilenameBuilder_i * pFilenameBuilder )
+bool RtIndex_c::Prealloc ( bool bStripPath, FilenameBuilder_i * pFilenameBuilder, StrVec_t & dWarnings )
 {
 	MEMORY ( MEM_INDEX_RT );
 
@@ -3924,7 +3935,7 @@ bool RtIndex_c::Prealloc ( bool bStripPath, FilenameBuilder_i * pFilenameBuilder
 
 	DWORD uVersion = 0;
 	bool bRebuildInfixes = false;
-	if ( !LoadMeta ( pFilenameBuilder, bStripPath, uVersion, bRebuildInfixes ) )
+	if ( !LoadMeta ( pFilenameBuilder, bStripPath, uVersion, bRebuildInfixes, dWarnings ) )
 		return false;
 
 	if ( m_bDebugCheck )
@@ -3938,7 +3949,7 @@ bool RtIndex_c::Prealloc ( bool bStripPath, FilenameBuilder_i * pFilenameBuilder
 
 	m_bPathStripped = bStripPath;
 
-	if ( !PreallocDiskChunks ( pFilenameBuilder ) )
+	if ( !PreallocDiskChunks ( pFilenameBuilder, dWarnings ) )
 		return false;
 
 	// load ram chunk
@@ -4181,7 +4192,7 @@ bool RtIndex_c::LoadRamChunk ( DWORD uVersion, bool bRebuildInfixes )
 			return false;
 
 		if ( bRebuildInfixes )
-				BuildSegmentInfixes ( pSeg, bHasMorphology, m_bKeywordDict, m_tSettings.m_iMinInfixLen, m_iWordsCheckpoint, ( m_iMaxCodepointLength>1 ), m_tSettings.m_eHitless );
+			BuildSegmentInfixes ( pSeg, bHasMorphology, m_bKeywordDict, m_tSettings.m_iMinInfixLen, m_iWordsCheckpoint, ( m_iMaxCodepointLength>1 ), m_tSettings.m_eHitless );
 
 		pSeg->BuildDocID2RowIDMap();
 		dRamChunk = AdoptSegment ( pSeg );
@@ -4836,6 +4847,7 @@ int RtIndex_c::DebugCheckDisk ( DebugCheckError_c & tReporter, FILE * fp )
 	}
 
 	int iFailsPlain = 0;
+	StrVec_t dWarnings;
 	ARRAY_FOREACH ( i, dChunks )
 	{
 		int iChunk = dChunks[i];
@@ -4843,7 +4855,7 @@ int RtIndex_c::DebugCheckDisk ( DebugCheckError_c & tReporter, FILE * fp )
 		sChunk.SetSprintf ( "%s.%d", m_sPath.cstr(), iChunk );
 		tReporter.Msg ( "checking disk chunk, extension %d, %d(%d)...", dChunks[i], i, m_dChunkNames.GetLength() );
 
-		CSphScopedPtr<CSphIndex> pIndex ( PreallocDiskChunk ( sChunk.cstr(), iChunk, pFilenameBuilder.Ptr(), m_sLastError ) );
+		CSphScopedPtr<CSphIndex> pIndex ( PreallocDiskChunk ( sChunk.cstr(), iChunk, pFilenameBuilder.Ptr(), dWarnings, m_sLastError ) );
 		if ( pIndex.Ptr() )
 		{
 			iFailsPlain += pIndex->DebugCheck ( fp );
@@ -4855,9 +4867,13 @@ int RtIndex_c::DebugCheckDisk ( DebugCheckError_c & tReporter, FILE * fp )
 		}
 	}
 
+	for ( const auto & i : dWarnings )
+		tReporter.Msg ( "warning: %s", i.cstr() );
+
 	return iFailsPlain;
 }
 
+
 void RtIndex_c::SetDebugCheck ( bool bCheckIdDups, int iCheckChunk )
 {
 	m_bDebugCheck = true;
@@ -5782,16 +5798,8 @@ void SetupStarDict ( DictRefPtr_c& pDict, ISphTokenizer * pTokenizer )
 	pDict = new CSphDictStarV8 ( pDict, true );
 }
 
-class SphRtFinalMatchCalc_c : public ISphMatchProcessor, ISphNoncopyable
+class SphRtFinalMatchCalc_c : public MatchProcessor_i, ISphNoncopyable
 {
-private:
-	const CSphQueryContext &	m_tCtx;
-	int							m_iSeg = 0;
-	int							m_iSegments;
-	// count per segments matches
-	// to skip iteration of matches at sorter and pool setup for segment without matches at sorter
-	CSphBitvec					m_dSegments;
-
 public:
 	SphRtFinalMatchCalc_c ( int iSegments, const CSphQueryContext & tCtx )
 		: m_tCtx ( tCtx )
@@ -5816,12 +5824,20 @@ public:
 		return ( iSeg==0 || bSegmentGotRows );
 	}
 
-	bool HasSegments () const
-	{
-		return ( m_iSeg==0 || m_dSegments.BitCount()>0 );
-	}
+	bool HasSegments () const							{ return ( m_iSeg==0 || m_dSegments.BitCount()>0 );	}
+	void Process ( CSphMatch * pMatch ) final			{ ProcessMatch ( pMatch ); }
+	void Process ( VecTraits_T<CSphMatch *> & dMatches ){ dMatches.for_each ( [this]( CSphMatch * pMatch ){ ProcessMatch(pMatch); } ); }
+	bool ProcessInRowIdOrder() const final				{ return m_tCtx.m_dCalcFinal.any_of ( []( const CSphQueryContext::CalcItem_t & i ){ return i.m_pExpr && i.m_pExpr->IsColumnar(); } );	}
 
-	void Process ( CSphMatch * pMatch ) final
+private:
+	const CSphQueryContext &	m_tCtx;
+	int							m_iSeg = 0;
+	int							m_iSegments;
+	// count per segments matches
+	// to skip iteration of matches at sorter and pool setup for segment without matches at sorter
+	CSphBitvec					m_dSegments;
+
+	inline void ProcessMatch ( CSphMatch * pMatch )
 	{
 		int iMatchSegment = pMatch->m_iTag-1;
 		if ( iMatchSegment==m_iSeg && pMatch->m_pStatic )
@@ -5998,19 +6014,10 @@ struct DiskChunkSearcherCloneCtx_t
 	}
 };
 
-void QueryDiskChunks ( const CSphQuery & tQuery,
-		CSphQueryResultMeta& tResult,
-		const CSphMultiQueryArgs & tArgs,
-		SphChunkGuard_t& tGuard,
-		VecTraits_T<ISphMatchSorter *>& dSorters,
-		QueryProfile_c * pProfiler,
-		bool bGotLocalDF,
-		const SmallStringHash_T<int64_t> * pLocalDocs,
-		int64_t iTotalDocs,
-		const char * szIndexName,
-		VecTraits_T<const BYTE*>& dDiskBlobPools,
-		int64_t tmMaxTimer
-		)
+
+void QueryDiskChunks ( const CSphQuery & tQuery, CSphQueryResultMeta & tResult, const CSphMultiQueryArgs & tArgs, SphChunkGuard_t & tGuard, VecTraits_T<ISphMatchSorter *> & dSorters,
+		QueryProfile_c * pProfiler, bool bGotLocalDF, const SmallStringHash_T<int64_t> * pLocalDocs, int64_t iTotalDocs, const char * szIndexName, VecTraits_T<const BYTE*> & dDiskBlobPools,
+		int64_t tmMaxTimer )
 {
 	if ( tGuard.m_dDiskChunks.IsEmpty() )
 		return;
@@ -6100,11 +6107,10 @@ void QueryDiskChunks ( const CSphQuery & tQuery,
 	dCtx.Finalize();
 }
 
-void FinalExpressionCalculation( CSphQueryContext& tCtx,
-		const VecTraits_T<RtSegmentRefPtf_t>& dRamChunks,
-		VecTraits_T<ISphMatchSorter *>& dSorters )
+
+void FinalExpressionCalculation ( CSphQueryContext & tCtx, const VecTraits_T<RtSegmentRefPtf_t> & dRamChunks, VecTraits_T<ISphMatchSorter *> & dSorters )
 {
-	if ( tCtx.m_dCalcFinal.IsEmpty () )
+	if ( tCtx.m_dCalcFinal.IsEmpty() )
 		return;
 
 	const int iSegmentsTotal = dRamChunks.GetLength ();
@@ -6125,31 +6131,15 @@ void FinalExpressionCalculation( CSphQueryContext& tCtx,
 }
 
 // perform initial query transformations and expansion.
-int PrepareFTSearch ( const RtIndex_c * pThis,
-		bool bIsStarDict,
-		bool bKeywordDict,
-		int iExpandKeywords,
-		int iExpansionLimit,
-		const char * sModifiedQuery,
-		const CSphIndexSettings& tSettings,
-		const QueryParser_i * pQueryParser,
-		const CSphQuery& tQuery,
-		const CSphSchema & tSchema,
-		void * pIndexData,
-		ISphTokenizer * pTokenizer,
-		ISphTokenizer * pQueryTokenizer,
-		CSphDict* pDict,
-		CSphQueryResultMeta& tMeta,
-		QueryProfile_c* pProfiler,
-		CSphScopedPayload* pPayloads,
-		XQQuery_t & tParsed )
+static int PrepareFTSearch ( const RtIndex_c * pThis, bool bIsStarDict, bool bKeywordDict, int iExpandKeywords, int iExpansionLimit, const char * szModifiedQuery, const CSphIndexSettings & tSettings,
+		const QueryParser_i * pQueryParser, const CSphQuery & tQuery, const CSphSchema & tSchema, void * pIndexData, ISphTokenizer * pTokenizer, ISphTokenizer * pQueryTokenizer, CSphDict * pDict,
+		CSphQueryResultMeta & tMeta, QueryProfile_c * pProfiler, CSphScopedPayload * pPayloads, XQQuery_t & tParsed )
 {
 	// OPTIMIZE! make a lightweight clone here? and/or remove double clone?
 	TokenizerRefPtr_c pQueryTokenizerJson { pTokenizer };
 	sphSetupQueryTokenizer ( pQueryTokenizerJson, bIsStarDict, tSettings.m_bIndexExactWords, true );
 
-	if ( !pQueryParser->ParseQuery ( tParsed, sModifiedQuery, &tQuery, pQueryTokenizer
-									 , pQueryTokenizerJson, &tSchema, pDict, tSettings ) )
+	if ( !pQueryParser->ParseQuery ( tParsed, szModifiedQuery, &tQuery, pQueryTokenizer, pQueryTokenizerJson, &tSchema, pDict, tSettings ) )
 	{
 		tMeta.m_sError = tParsed.m_sParseError;
 		return 0;
@@ -6192,16 +6182,12 @@ int PrepareFTSearch ( const RtIndex_c * pThis,
 
 		tParsed.m_pRoot = sphExpandXQNode ( tParsed.m_pRoot, tExpCtx ); // here magics happens
 	}
+
 	return ConsiderStack ( tParsed.m_pRoot, tMeta.m_sError );
 }
 
-// setup filters
-bool SetupFilters ( const CSphQuery & tQuery,
-		const ISphSchema* pSchema,
-		bool bFullscan,
-		CSphQueryContext & tCtx,
-		CSphString& sError,
-		CSphString& sWarning )
+
+static bool SetupFilters ( const CSphQuery & tQuery, const ISphSchema * pSchema, bool bFullscan, CSphQueryContext & tCtx, CSphString & sError, CSphString & sWarning )
 {
 	CreateFilterContext_t tFlx;
 	tFlx.m_pFilters = &tQuery.m_dFilters;
@@ -6209,19 +6195,13 @@ bool SetupFilters ( const CSphQuery & tQuery,
 	tFlx.m_pSchema = pSchema;
 	tFlx.m_eCollation = tQuery.m_eCollation;
 	tFlx.m_bScan = bFullscan;
+
 	return tCtx.CreateFilters ( tFlx, sError, sWarning );
 }
 
-void PerformFullScan ( const VecTraits_T<RtSegmentRefPtf_t> & dRamChunks,
-		int iMaxDynamicSize,
-		int iIndexWeight,
-		int iStride,
-		int iCutoff,
-		int64_t tmMaxTimer,
-		QueryProfile_c* pProfiler,
-		CSphQueryContext& tCtx,
-		VecTraits_T<ISphMatchSorter*>& dSorters,
-		CSphString& sWarning )
+
+void PerformFullScan ( const VecTraits_T<RtSegmentRefPtf_t> & dRamChunks, int iMaxDynamicSize, int iIndexWeight, int iStride, int iCutoff, int64_t tmMaxTimer, QueryProfile_c* pProfiler,
+		CSphQueryContext & tCtx, VecTraits_T<ISphMatchSorter*> & dSorters, CSphString & sWarning )
 {
 	bool bRandomize = dSorters[0]->m_bRandomize;
 
@@ -6292,23 +6272,16 @@ void PerformFullScan ( const VecTraits_T<RtSegmentRefPtf_t> & dRamChunks,
 	}
 }
 
-bool DoFullScanQuery ( const VecTraits_T<RtSegmentRefPtf_t> & dRamChunks,
-		const ISphSchema& tMaxSorterSchema,
-		const CSphQuery& tQuery,
-		int iIndexWeight,
-		int iStride,
-		int64_t tmMaxTimer,
-		QueryProfile_c* pProfiler,
-		CSphQueryContext& tCtx,
-		VecTraits_T<ISphMatchSorter*>& dSorters,
-		CSphQueryResultMeta& tMeta )
+
+static bool DoFullScanQuery ( const VecTraits_T<RtSegmentRefPtf_t> & dRamChunks, const ISphSchema & tMaxSorterSchema, const CSphQuery & tQuery, int iIndexWeight, int iStride, int64_t tmMaxTimer,
+		QueryProfile_c * pProfiler, CSphQueryContext & tCtx, VecTraits_T<ISphMatchSorter*> & dSorters, CSphQueryResultMeta & tMeta )
 {
 	// probably redundant, but just in case
 	SwitchProfile ( pProfiler, SPH_QSTATE_INIT );
 
 	// search segments no looking to max_query_time
 	// FIXME!!! move searching at segments before disk chunks as result set is safe with kill-lists
-	if ( !dRamChunks.IsEmpty () )
+	if ( !dRamChunks.IsEmpty() )
 	{
 		if ( !SetupFilters ( tQuery, &tMaxSorterSchema, true, tCtx, tMeta.m_sError, tMeta.m_sWarning ) )
 			return false;
@@ -6318,22 +6291,17 @@ bool DoFullScanQuery ( const VecTraits_T<RtSegmentRefPtf_t> & dRamChunks,
 		int iCutoff = tQuery.m_iCutoff;
 		if ( iCutoff<=0 )
 			iCutoff = -1;
-		PerformFullScan ( dRamChunks, tMaxSorterSchema.GetDynamicSize (), iIndexWeight, iStride
-						  , iCutoff, tmMaxTimer, pProfiler, tCtx, dSorters, tMeta.m_sWarning );
+
+		PerformFullScan ( dRamChunks, tMaxSorterSchema.GetDynamicSize(), iIndexWeight, iStride, iCutoff, tmMaxTimer, pProfiler, tCtx, dSorters, tMeta.m_sWarning );
 	}
 
 	FinalExpressionCalculation ( tCtx, dRamChunks, dSorters );
 	return true;
 }
 
-void PerformFullTextSearch ( const VecTraits_T<RtSegmentRefPtf_t> & dRamChunks,
-		RtQwordSetup_t& tTermSetup,
-		ISphRanker* pRanker,
-		int iIndexWeight,
-		int iCutoff,
-		QueryProfile_c* pProfiler,
-		CSphQueryContext& tCtx,
-		VecTraits_T<ISphMatchSorter*>& dSorters )
+
+void PerformFullTextSearch ( const VecTraits_T<RtSegmentRefPtf_t> & dRamChunks, RtQwordSetup_t & tTermSetup, ISphRanker * pRanker, int iIndexWeight, int iCutoff, QueryProfile_c * pProfiler,
+		CSphQueryContext & tCtx, VecTraits_T<ISphMatchSorter*> & dSorters )
 {
 	bool bRandomize = dSorters[0]->m_bRandomize;
 	// query matching
@@ -6416,20 +6384,10 @@ void PerformFullTextSearch ( const VecTraits_T<RtSegmentRefPtf_t> & dRamChunks,
 	}
 }
 
-bool DoFullTextSearch ( const VecTraits_T<RtSegmentRefPtf_t> & dRamChunks,
-		const ISphSchema& tMaxSorterSchema,
-		const CSphQuery& tQuery,
-		const char* szIndexName,
-		int iIndexWeight,
-		int iMatchPoolSize,
-		int iStackNeed,
-		RtQwordSetup_t& tTermSetup,
-		QueryProfile_c* pProfiler,
-		CSphQueryContext& tCtx,
-		VecTraits_T<ISphMatchSorter*>& dSorters,
-		XQQuery_t& tParsed,
-		CSphQueryResultMeta& tMeta,
-		ISphMatchSorter* pSorter )
+
+static bool DoFullTextSearch ( const VecTraits_T<RtSegmentRefPtf_t> & dRamChunks, const ISphSchema & tMaxSorterSchema, const CSphQuery & tQuery, const char * szIndexName, int iIndexWeight,
+	int iMatchPoolSize, int iStackNeed, RtQwordSetup_t & tTermSetup, QueryProfile_c * pProfiler, CSphQueryContext & tCtx, VecTraits_T<ISphMatchSorter*> & dSorters, XQQuery_t & tParsed,
+	CSphQueryResultMeta & tMeta, ISphMatchSorter * pSorter )
 {
 	// set zonespanlist settings
 	tParsed.m_bNeedSZlist = tQuery.m_bZSlist;
@@ -6491,14 +6449,13 @@ bool DoFullTextSearch ( const VecTraits_T<RtSegmentRefPtf_t> & dRamChunks,
 // FIXME! missing MVA, index_exact_words support
 // FIXME? any chance to factor out common backend agnostic code?
 // FIXME? do we need to support pExtraFilters?
-bool RtIndex_c::MultiQuery ( CSphQueryResult & tResult, const CSphQuery & tQuery,
-		const VecTraits_T<ISphMatchSorter *> & dAllSorters, const CSphMultiQueryArgs & tArgs ) const
+bool RtIndex_c::MultiQuery ( CSphQueryResult & tResult, const CSphQuery & tQuery, const VecTraits_T<ISphMatchSorter *> & dAllSorters, const CSphMultiQueryArgs & tArgs ) const
 {
 	// to avoid the checking of a ppSorters's element for NULL on every next step,
 	// just filter out all nulls right here
 	CSphVector<ISphMatchSorter*> dSorters;
 	dSorters.Reserve ( dAllSorters.GetLength() );
-	dAllSorters.Apply ([&dSorters] ( ISphMatchSorter* p) { if ( p ) dSorters.Add(p); });
+	dAllSorters.Apply ([&dSorters] ( ISphMatchSorter* p ) { if ( p ) dSorters.Add(p); });
 	auto& tMeta = *tResult.m_pMeta;
 
 	// if we have anything to work with
@@ -6581,9 +6538,8 @@ bool RtIndex_c::MultiQuery ( CSphQueryResult & tResult, const CSphQuery & tQuery
 
 	CSphVector<const BYTE *> dDiskBlobPools ( tGuard.m_dDiskChunks.GetLength() );
 
-	if ( !tGuard.m_dDiskChunks.IsEmpty () )
-		QueryDiskChunks ( tQuery,tMeta,tArgs,tGuard,dSorters,pProfiler,bGotLocalDF,pLocalDocs,iTotalDocs,
-				m_sIndexName.cstr(), dDiskBlobPools, tmMaxTimer);
+	if ( !tGuard.m_dDiskChunks.IsEmpty() )
+		QueryDiskChunks ( tQuery, tMeta, tArgs, tGuard, dSorters, pProfiler, bGotLocalDF, pLocalDocs, iTotalDocs, m_sIndexName.cstr(), dDiskBlobPools, tmMaxTimer );
 
 	////////////////////
 	// search RAM chunk
@@ -6609,8 +6565,11 @@ bool RtIndex_c::MultiQuery ( CSphQueryResult & tResult, const CSphQuery & tQuery
 	tCtx.m_iTotalDocs = iTotalDocs;
 	tCtx.m_uPackedFactorFlags = tArgs.m_uPackedFactorFlags;
 
-	if ( !tCtx.SetupCalc ( tMeta, tMaxSorterSchema, m_tSchema, nullptr, dSorterSchemas ) )
-		return false;
+#if USE_COLUMNAR
+	if ( !tCtx.SetupCalc ( tMeta, tMaxSorterSchema, m_tSchema, nullptr, nullptr, dSorterSchemas ) ) return false;
+#else
+	if ( !tCtx.SetupCalc ( tMeta, tMaxSorterSchema, m_tSchema, nullptr, dSorterSchemas ) ) return false;
+#endif
 
 	// setup search terms
 	RtQwordSetup_t tTermSetup ( tGuard );
@@ -6685,11 +6644,9 @@ bool RtIndex_c::MultiQuery ( CSphQueryResult & tResult, const CSphQuery & tQuery
 
 	bool bResult;
 	if ( bFullscan || pQueryParser->IsFullscan ( tParsed ) )
-		bResult = DoFullScanQuery ( tGuard.m_dRamChunks, tMaxSorterSchema, tQuery, tArgs.m_iIndexWeight, m_iStride,
-				tmMaxTimer, pProfiler, tCtx, dSorters, tMeta );
+		bResult = DoFullScanQuery ( tGuard.m_dRamChunks, tMaxSorterSchema, tQuery, tArgs.m_iIndexWeight, m_iStride, tmMaxTimer, pProfiler, tCtx, dSorters, tMeta );
 	else
-		bResult = DoFullTextSearch ( tGuard.m_dRamChunks, tMaxSorterSchema, tQuery, m_sIndexName.cstr (),
-				tArgs.m_iIndexWeight, iMatchPoolSize, iStackNeed, tTermSetup, pProfiler, tCtx, dSorters,
+		bResult = DoFullTextSearch ( tGuard.m_dRamChunks, tMaxSorterSchema, tQuery, m_sIndexName.cstr (), tArgs.m_iIndexWeight, iMatchPoolSize, iStackNeed, tTermSetup, pProfiler, tCtx, dSorters,
 				tParsed, tMeta, dSorters.GetLength()==1 ? dSorters[0] : nullptr );
 
 	if (!bResult)
@@ -6776,7 +6733,7 @@ struct CSphRtQueryFilter : public ISphQueryFilter, public ISphNoncopyable
 	}
 };
 
-static void HashKeywords ( CSphVector<CSphKeywordInfo> & dKeywords, SmallStringHash_T<CSphKeywordInfo> & hKeywords )
+void HashKeywords ( CSphVector<CSphKeywordInfo> & dKeywords, SmallStringHash_T<CSphKeywordInfo> & hKeywords )
 {
 	for ( CSphKeywordInfo & tSrc : dKeywords )
 	{
@@ -7195,11 +7152,11 @@ bool RtIndex_c::AddRemoveAttribute ( bool bAdd, const CSphString & sAttrName, ES
 
 	bool bHadBlobs = false;
 	for ( int i = 0; i < m_tSchema.GetAttrsCount(); i++ )
-		bHadBlobs |= sphIsBlobAttr ( m_tSchema.GetAttr(i).m_eAttrType );
+		bHadBlobs |= sphIsBlobAttr ( m_tSchema.GetAttr(i));
 
 	bool bHaveBlobs = false;
 	for ( int i = 0; i < tNewSchema.GetAttrsCount(); i++ )
-		bHaveBlobs |= sphIsBlobAttr ( tNewSchema.GetAttr(i).m_eAttrType );
+		bHaveBlobs |= sphIsBlobAttr ( tNewSchema.GetAttr(i) );
 
 	bool bBlob = sphIsBlobAttr ( eAttrType );
 	bool bBlobsModified = bBlob && ( bAdd || bHaveBlobs==bHadBlobs );
@@ -7246,7 +7203,7 @@ bool RtIndex_c::AddRemoveAttribute ( bool bAdd, const CSphString & sAttrName, ES
 // MAGIC CONVERSIONS
 //////////////////////////////////////////////////////////////////////////
 
-bool RtIndex_c::AttachDiskIndex ( CSphIndex * pIndex, bool bTruncate, bool & bFatal, CSphString & sError )
+bool RtIndex_c::AttachDiskIndex ( CSphIndex * pIndex, bool bTruncate, bool & bFatal, StrVec_t & dWarnings, CSphString & sError )
 {
 	bFatal = false;
 	bool bEmptyRT = ( ( !m_dRamChunks.GetLength() && !m_dDiskChunks.GetLength() ) || bTruncate );
@@ -7288,7 +7245,7 @@ bool RtIndex_c::AttachDiskIndex ( CSphIndex * pIndex, bool bTruncate, bool & bFa
 		{	// scope for SphChunkGuard_t. Fixme! Check if it is necessary in current context
 			RlChunkGuard_t tGuard ( m_tReading );
 			GetReaderChunks ( tGuard );
-			if ( !SaveDiskChunk ( m_iTID, tGuard, tStats, true, nullptr ) )
+			if ( !SaveDiskChunk ( m_iTID, tGuard, tStats, true, nullptr, dWarnings ) )
 			{
 				bFatal = true;
 				return false;
@@ -7516,7 +7473,8 @@ CSphIndex * RtIndex_c::CompressDiskChunk ( const CSphIndex * pChunk )
 	auto fnFnameBuilder = GetIndexFilenameBuilder ();
 	CSphScopedPtr<FilenameBuilder_i> pFilenameBuilder { fnFnameBuilder ? fnFnameBuilder ( m_sIndexName.cstr () ) : nullptr };
 
-	return PreallocDiskChunk ( sCompressed.cstr(), pChunk->m_iChunk, pFilenameBuilder.Ptr(), sError, pChunk->GetName() );
+	StrVec_t dWarnings; // FIXME! report warnings
+	return PreallocDiskChunk ( sCompressed.cstr(), pChunk->m_iChunk, pFilenameBuilder.Ptr(), dWarnings, sError, pChunk->GetName() );
 }
 
 int64_t RtIndex_c::NumAliveDocs ( const CSphIndex * pChunk )
@@ -7551,7 +7509,6 @@ bool RtIndex_c::SkipOrDrop ( int iChunk, const CSphIndex * pChunk, int64_t * pAl
 	return false;
 }
 
-
 bool RtIndex_c::CompressOneChunk ( int iChunk )
 {
 	const CSphIndex * pChunk = nullptr;
@@ -7707,8 +7664,8 @@ bool RtIndex_c::SplitOneChunk ( int iChunk, const char* szUvarFilter )
 	CSphScopedPtr<FilenameBuilder_i> pFilenameBuilder {
 			fnFnameBuilder ? fnFnameBuilder ( m_sIndexName.cstr () ) : nullptr };
 
-	CSphScopedPtr<CSphIndex> pChunkE { PreallocDiskChunk ( sMerged.cstr (), pOldChunk->m_iChunk,
-		pFilenameBuilder.Ptr(), sError, pOldChunk->GetName () ) };
+	StrVec_t dWarnings;	// FIXME! report warnings
+	CSphScopedPtr<CSphIndex> pChunkE { PreallocDiskChunk ( sMerged.cstr(), pOldChunk->m_iChunk, pFilenameBuilder.Ptr(), dWarnings, sError, pOldChunk->GetName() ) };
 
 	// if everything or nothing is alive after filter applied - fast break, nothing to do.
 	auto iNewAlives = NumAliveDocs ( pChunkE.Ptr() );
@@ -7751,8 +7708,7 @@ bool RtIndex_c::SplitOneChunk ( int iChunk, const char* szUvarFilter )
 	if ( m_bOptimizeStop || m_bSaveDisabled || sphInterrupted () )
 		return false;
 
-	CSphScopedPtr<CSphIndex> pChunkI { PreallocDiskChunk ( sMerged.cstr (), pOldChunk->m_iChunk,
-		pFilenameBuilder.Ptr (), sError, pOldChunk->GetName () ) };
+	CSphScopedPtr<CSphIndex> pChunkI { PreallocDiskChunk ( sMerged.cstr(), pOldChunk->m_iChunk, pFilenameBuilder.Ptr (), dWarnings, sError, pOldChunk->GetName() ) };
 
 	CSphString sChunkI;
 	int iChunkI;
@@ -7872,11 +7828,12 @@ bool RtIndex_c::MergeTwoChunks ( int iA, int iB )
 	}
 	sMergedID.SetSprintf ( "%s.%d", m_sPath.cstr (), iMergedID );
 
-	CSphScopedPtr<CSphIndex> pMerged ( PreallocDiskChunk ( sMerged.cstr(), iMergedID, pFilenameBuilder.Ptr(), sError, pOlder->GetName() ) );
+	StrVec_t dWarnings;	// FIXME! report warnings
+	CSphScopedPtr<CSphIndex> pMerged ( PreallocDiskChunk ( sMerged.cstr(), iMergedID, pFilenameBuilder.Ptr(), dWarnings, sError, pOlder->GetName() ) );
+
 	if ( !pMerged )
 	{
-		sphWarning ( "rt optimize: index %s: failed to load merged chunk (error %s)",
-			m_sIndexName.cstr(), sError.cstr() );
+		sphWarning ( "rt optimize: index %s: failed to load merged chunk (error %s)", m_sIndexName.cstr(), sError.cstr() );
 		return false;
 	}
 	// check forced exit after long operation
@@ -7884,11 +7841,11 @@ bool RtIndex_c::MergeTwoChunks ( int iA, int iB )
 		return false;
 
 	// lets rotate indexes
+
 	// rename merged disk chunk to 0
 	if ( !pMerged->Rename ( sMergedID.cstr() ) )
 	{
-		sphWarning ( "rt optimize: index %s: merged to cur rename failed (error %s)",
-			m_sIndexName.cstr(), pMerged->GetLastError().cstr() );
+		sphWarning ( "rt optimize: index %s: merged to cur rename failed (error %s)", m_sIndexName.cstr(), pMerged->GetLastError().cstr() );
 		return false;
 	}
 

+ 3 - 4
src/sphinxrt.h

@@ -29,7 +29,7 @@ class RtAccum_t;
 class RtIndex_i : public CSphIndex
 {
 public:
-	explicit RtIndex_i ( const char * sIndexName, const char * sFileName ) : CSphIndex ( sIndexName, sFileName ) {}
+	RtIndex_i ( const char * sIndexName, const char * sFileName ) : CSphIndex ( sIndexName, sFileName ) {}
 
 	/// get internal schema (to use for Add calls)
 	virtual const CSphSchema & GetInternalSchema () const { return m_tSchema; }
@@ -37,8 +37,7 @@ public:
 
 	/// insert/update document in current txn
 	/// fails in case of two open txns to different indexes
-	virtual bool AddDocument ( const VecTraits_T<VecTraits_T<const char >> &dFields, CSphMatch & tDoc,
-		bool bReplace, const CSphString & sTokenFilterOptions, const char ** ppStr, const VecTraits_T<int64_t> & dMvas,
+	virtual bool AddDocument ( const VecTraits_T<VecTraits_T<const char>> & dFields, CSphMatch & tDoc, bool bReplace, const CSphString & sTokenFilterOptions, const char ** ppStr, const VecTraits_T<int64_t> & dMvas,
 		CSphString & sError, CSphString & sWarning, RtAccum_t * pAccExt ) = 0;
 
 	/// delete document in current txn
@@ -63,7 +62,7 @@ public:
 	virtual bool ForceDiskChunk () = 0;
 
 	/// attach a disk chunk to current index
-	virtual bool AttachDiskIndex ( CSphIndex * pIndex, bool bTruncate, bool & bFatal, CSphString & sError ) = 0;
+	virtual bool AttachDiskIndex ( CSphIndex * pIndex, bool bTruncate, bool & bFatal, StrVec_t & dWarnings, CSphString & sError ) = 0;
 
 	/// truncate index (that is, kill all data)
 	virtual bool Truncate ( CSphString & sError ) = 0;

+ 2 - 3
src/sphinxsearch.cpp

@@ -2925,7 +2925,7 @@ public:
 		return -1;
 	}
 
-	ISphExpr * CreateNode ( int iID, ISphExpr * _pLeft, ESphEvalStage *, CSphString & ) final
+	ISphExpr * CreateNode ( int iID, ISphExpr * _pLeft, ESphEvalStage *, bool *, CSphString & ) final
 	{
 		SafeAddRef ( _pLeft );
 		CSphRefcountedPtr<ISphExpr> pLeft ( _pLeft );
@@ -3179,8 +3179,7 @@ public:
 
 /// initialize ranker state
 template < bool NEED_PACKEDFACTORS, bool HANDLE_DUPES >
-bool RankerState_Expr_fn<NEED_PACKEDFACTORS, HANDLE_DUPES>::Init ( int iFields, const int * pWeights, ExtRanker_T<true> * pRanker, CSphString & sError,
-																	DWORD uFactorFlags )
+bool RankerState_Expr_fn<NEED_PACKEDFACTORS, HANDLE_DUPES>::Init ( int iFields, const int * pWeights, ExtRanker_T<true> * pRanker, CSphString & sError,	DWORD uFactorFlags )
 {
 	m_iFields = iFields;
 	m_pWeights = pWeights;

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 373 - 232
src/sphinxsort.cpp


+ 194 - 0
src/sphinxsort.h

@@ -0,0 +1,194 @@
+//
+// Copyright (c) 2017-2020, Manticore Software LTD (http://manticoresearch.com)
+// Copyright (c) 2001-2016, Andrew Aksyonoff
+// Copyright (c) 2008-2016, Sphinx Technologies Inc
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+#ifndef _sphinxsort_
+#define _sphinxsort_
+
+#include "sphinx.h"
+#include "sortsetup.h"
+
+#if USE_COLUMNAR
+namespace columnar
+{
+	class Columnar_i;
+}
+#endif
+
+
+class MatchProcessor_i
+{
+public:
+	virtual			~MatchProcessor_i () {}
+	virtual void	Process ( CSphMatch * pMatch ) = 0;
+	virtual void	Process ( VecTraits_T<CSphMatch *> & dMatches ) = 0;
+	virtual bool	ProcessInRowIdOrder() const = 0;
+};
+
+using fnGetBlobPoolFromMatch = std::function< const BYTE* ( const CSphMatch * )>;
+
+/// generic match sorter interface
+class ISphMatchSorter
+{
+public:
+	bool				m_bRandomize = false;
+	int64_t				m_iTotal = 0;
+
+	RowID_t				m_iJustPushed {INVALID_ROWID};
+	int					m_iMatchCapacity = 0;
+	CSphTightVector<RowID_t> m_dJustPopped;
+
+	virtual				~ISphMatchSorter() {}
+
+	/// check if this sorter does groupby
+	virtual bool		IsGroupby() const = 0;
+
+	/// set match comparator state
+	void				SetState ( const CSphMatchComparatorState & tState );
+
+	/// get match comparator stat
+	const CSphMatchComparatorState & GetState() const { return m_tState; }
+
+	/// set group comparator state
+	virtual void		SetGroupState ( const CSphMatchComparatorState & ) {}
+
+	/// set blob pool pointer (for string+groupby sorters)
+	virtual void		SetBlobPool ( const BYTE * ) {}
+
+#if USE_COLUMNAR
+	/// set columnar (to work with columnar attributes)
+	virtual void		SetColumnar ( columnar::Columnar_i * pColumnar );
+#endif
+
+	/// set sorter schema
+	virtual void		SetSchema ( ISphSchema * pSchema, bool bRemapCmp );
+
+	/// get incoming schema
+	const ISphSchema *	GetSchema() const { return ( ISphSchema *) m_pSchema; }
+
+	/// base push
+	/// returns false if the entry was rejected as duplicate
+	/// returns true otherwise (even if it was not actually inserted)
+	virtual bool		Push ( const CSphMatch & tEntry ) = 0;
+
+	/// submit pre-grouped match. bNewSet indicates that the match begins the bunch of matches got from one source
+	virtual bool		PushGrouped ( const CSphMatch & tEntry, bool bNewSet ) = 0;
+
+	/// get	rough entries count, due of aggregate filtering phase
+	virtual int			GetLength() const = 0;
+
+	/// get total count of non-duplicates Push()ed through this queue
+	int64_t				GetTotalCount() const { return m_iTotal; }
+
+	/// process collected entries up to length count
+	virtual void		Finalize ( MatchProcessor_i & tProcessor, bool bCallProcessInResultSetOrder ) = 0;
+
+	/// store all entries into specified location and remove them from the queue
+	/// entries are stored in properly sorted order
+	/// return sorted entries count, might be less than length due of aggregate filtering phase
+	virtual int			Flatten ( CSphMatch * pTo ) = 0;
+
+	/// get a pointer to the worst element, NULL if there is no fixed location
+	virtual const CSphMatch * GetWorst() const { return nullptr; }
+
+
+	/// returns whether the sorter can be cloned to distribute processing over multi threads
+	/// (delete and update sorters are too complex by side effects and can't be cloned)
+	virtual bool		CanBeCloned() const { return true; }
+
+	/// make same sorter (for MT processing)
+	virtual ISphMatchSorter * Clone() const = 0;
+
+	/// move resultset into target
+	virtual void		MoveTo ( ISphMatchSorter * pRhs ) = 0;
+
+	/// makes the same sorter
+	void				CloneTo ( ISphMatchSorter * pTrg ) const;
+
+	const CSphMatchComparatorState & GetComparatorState() const { return m_tState; }
+
+	/// set attributes list these should copied into result set \ final matches
+	void				SetFilteredAttrs ( const sph::StringSet & hAttrs, bool bAddDocid );
+
+	/// transform collected matches into standalone (copy all pooled attrs to ptrs, drop unused)
+	/// param fnBlobPoolFromMatch provides pool pointer from currently processed match pointer.
+	void				TransformPooled2StandalonePtrs ( fnGetBlobPoolFromMatch fnBlobPoolFromMatch );
+
+protected:
+	SharedPtr_t<ISphSchema*>	m_pSchema;	///< sorter schema (adds dynamic attributes on top of index schema)
+	CSphMatchComparatorState	m_tState;		///< protected to set m_iNow automatically on SetState() calls
+	StrVec_t					m_dTransformed;
+
+#if USE_COLUMNAR
+	columnar::Columnar_i *		m_pColumnar;
+#endif
+};
+
+
+struct CmpPSortersByRandom_fn
+{
+	inline static bool IsLess ( const ISphMatchSorter * a, const ISphMatchSorter * b )
+	{
+		assert ( a );
+		assert ( b );
+		return a->m_bRandomize<b->m_bRandomize;
+	}
+};
+
+
+class BlobPool_c
+{
+public:
+	virtual void	SetBlobPool ( const BYTE * pBlobPool ) { m_pBlobPool = pBlobPool; }
+	const BYTE *	GetBlobPool () const { return m_pBlobPool; }
+
+protected:
+	const BYTE *	m_pBlobPool {nullptr};
+};
+
+/// groupby key type
+typedef int64_t SphGroupKey_t;
+
+/// base grouper (class that computes groupby key)
+class CSphGrouper : public BlobPool_c, public ISphRefcountedMT
+{
+public:
+	virtual SphGroupKey_t	KeyFromValue ( SphAttr_t uValue ) const = 0;
+	virtual SphGroupKey_t	KeyFromMatch ( const CSphMatch & tMatch ) const = 0;
+	virtual void			GetLocator ( CSphAttrLocator & tOut ) const = 0;
+	virtual ESphAttr		GetResultType () const = 0;
+	virtual CSphGrouper *	Clone() const = 0;
+
+#if USE_COLUMNAR
+	virtual void			SetColumnar ( const columnar::Columnar_i * pColumnar ) {}
+#endif
+
+protected:
+	virtual					~CSphGrouper () {}; // =default causes bunch of errors building on wheezy
+};
+
+const char *	GetInternalAttrPrefix();
+int 			GetStringRemapCount ( const ISphSchema & tDstSchema, const ISphSchema & tSrcSchema );
+bool			IsSortStringInternal ( const CSphString & sColumnName );
+bool			IsSortJsonInternal ( const CSphString & sColumnName );
+CSphString		SortJsonInternalSet ( const CSphString & sColumnName );
+
+/// creates proper queue for given query
+/// may return NULL on error; in this case, error message is placed in sError
+/// if the pUpdate is given, creates the updater's queue and perform the index update
+/// instead of searching
+ISphMatchSorter * sphCreateQueue ( const SphQueueSettings_t & tQueue, const CSphQuery & tQuery, CSphString & sError, SphQueueRes_t & tRes, StrVec_t * pExtra = nullptr );
+
+void sphCreateMultiQueue ( const SphQueueSettings_t & tQueue, const VecTraits_T<CSphQuery> & dQueries, VecTraits_T<ISphMatchSorter *> & dSorters, VecTraits_T<CSphString> & dErrors,
+	SphQueueRes_t & tRes, StrVec_t * pExtra );
+
+
+#endif // _sphinxsort_

+ 36 - 5
src/sphinxstd.h

@@ -549,6 +549,22 @@ inline double sqr ( double v ) { return v*v;}
 /// float argument squared
 inline float fsqr ( float v ) { return v*v; }
 
+#ifndef FORCE_INLINE
+#  ifdef _MSC_VER
+#    define FORCE_INLINE __forceinline
+#  else
+#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#      ifdef __GNUC__
+#        define FORCE_INLINE inline __attribute__((always_inline))
+#      else
+#        define FORCE_INLINE inline
+#      endif
+#    else
+#      define FORCE_INLINE
+#    endif
+#  endif
+#endif
+
 //////////////////////////////////////////////////////////////////////////
 // RANDOM NUMBERS GENERATOR
 //////////////////////////////////////////////////////////////////////////
@@ -1908,12 +1924,24 @@ public:
 	/// copy + move
 	// if provided lvalue, it will be copied into rhs via copy ctr, then swapped to *this
 	// if provided rvalue, it will just pass to SwapData immediately.
-	Vector_T &operator= ( Vector_T<T> rhs ) noexcept
+	Vector_T & operator = ( Vector_T<T> rhs ) noexcept
 	{
 		SwapData ( rhs );
 		return *this;
 	}
 
+	bool operator == ( const Vector_T<T> & rhs ) noexcept
+	{
+		if ( m_iCount!=rhs.m_iCount )
+			return false;
+
+		for ( int i = 0; i < m_iCount; i++ )
+			if ( m_pData[i]!=rhs.m_pData[i] )
+				return false;
+
+		return true;
+	}
+
 	/// memmove N elements from raw pointer to the end
 	/// works ONLY if T is POD type (i.e. may be simple memmoved)
 	/// otherwize compile error will appear (if so, use typed version below).
@@ -2926,8 +2954,9 @@ inline void Swap ( CSphString & v1, CSphString & v2 )
 	v1.Swap ( v2 );
 }
 
-// commonly used vector of strings
+// commonly used vectors
 using StrVec_t = CSphVector<CSphString>;
+using IntVec_t = CSphVector<int>;
 
 // vector of byte vectors
 using BlobVec_t = CSphVector<CSphVector<BYTE> >;
@@ -5681,11 +5710,13 @@ public:
  * Use m_dLogger.Print() either as direct call, either as 'evaluate expression' in debugger.
  */
 
-#define LOC_ADD LocMessages_c    m_dLogger
-#define LOC_SWAP( RHS ) m_dLogger.Swap(RHS.m_dLogger)
-#define LOC_MSG m_dLogger.GetLoc()
+#define LOC_ADD LocMessages_c    m_tLogger
+#define LOC_SWAP( RHS ) m_tLogger.Swap(RHS.m_tLogger)
+#define LOC_MSG m_tLogger.GetLoc()
 #define LOC( Level, Component ) \
     if_const (LOG_LEVEL_##Level) \
         LOC_MSG << LOG_COMPONENT_##Component
 
+using ByteBlob_t = std::pair<const BYTE *, int>;
+
 #endif // _sphinxstd_

+ 11 - 0
src/sphinxutils.cpp

@@ -786,6 +786,7 @@ static KeyDesc_t g_dKeysIndex[] =
 	{ "killlist_target",		0, nullptr },
 	{ "read_buffer_docs",		0, nullptr },
 	{ "read_buffer_hits",		0, nullptr },
+	{ "read_buffer_columnar",	0, nullptr },
 	{ "read_unhinted",			0, nullptr },
 	{ "access_plain_attrs",		0, nullptr },
 	{ "access_blob_attrs",		0, nullptr },
@@ -796,6 +797,15 @@ static KeyDesc_t g_dKeysIndex[] =
 	{ "docstore_block_size",	0, nullptr },
 	{ "docstore_compression",	0, nullptr },
 	{ "docstore_compression_level",	0, nullptr },
+#if USE_COLUMNAR
+	{ "columnar_attrs",			0, nullptr },
+	{ "columnar_strings_no_hash", 0, nullptr },
+	{ "columnar_compression_uint32", 0, nullptr },
+	{ "columnar_compression_int64", 0, nullptr },
+	{ "columnar_subblock",		0, nullptr },
+	{ "columnar_subblock_mva",	0, nullptr },
+	{ "columnar_minmax_leaf",	0, nullptr },
+#endif
 	{ nullptr,					0, nullptr }
 };
 
@@ -847,6 +857,7 @@ static KeyDesc_t g_dKeysSearchd[] =
 	{ "read_buffer",			KEY_DEPRECATED, "read_buffer_docs or read_buffer_hits" },
 	{ "read_buffer_docs",		0, NULL },
 	{ "read_buffer_hits",		0, NULL },
+	{ "read_buffer_columnar",	0, NULL },
 	{ "read_unhinted",			0, NULL },
 	{ "max_batch_queries",		0, NULL },
 	{ "subtree_docs_cache",		0, NULL },

+ 3 - 1
src/sphinxversion.cpp

@@ -38,14 +38,16 @@
 
 #define PRODUCT_VERSION          VERNUMBERS " " SPH_GIT_COMMIT_ID "@" GIT_TIMESTAMP_ID " " BANNER_TAG
 #define PRODUCT_NAME			"Manticore " PRODUCT_VERSION
-#define PRODUCT_BANNER            PRODUCT_NAME "\nCopyright (c) 2001-2016, Andrew Aksyonoff\n" \
+#define PRODUCT_BANNER_TEXT		"\nCopyright (c) 2001-2016, Andrew Aksyonoff\n" \
 	"Copyright (c) 2008-2016, Sphinx Technologies Inc (http://sphinxsearch.com)\n" \
 	"Copyright (c) 2017-2021, Manticore Software LTD (https://manticoresearch.com)\n\n"
 
+#define PRODUCT_BANNER            PRODUCT_NAME PRODUCT_BANNER_TEXT
 
 const char * szMANTICORE_VERSION = PRODUCT_VERSION;
 const char * szMANTICORE_NAME = PRODUCT_NAME;
 const char * szMANTICORE_BANNER = PRODUCT_BANNER;
+const char * szMANTICORE_BANNER_TEXT = PRODUCT_BANNER_TEXT;
 const char * szGIT_COMMIT_ID = SPH_GIT_COMMIT_ID;
 const char * szGIT_BRANCH_ID = GIT_BRANCH_ID;
 const char * szGDB_SOURCE_DIR = GDB_SOURCE_DIR;

+ 3 - 1
src/testrt.cpp

@@ -13,6 +13,7 @@
 #include "sphinx.h"
 #include "sphinxrt.h"
 #include "sphinxutils.h"
+#include "sphinxsort.h"
 #include "searchdaemon.h"
 
 #if HAVE_RTESTCONFIG_H
@@ -626,7 +627,8 @@ int main ( int argc, char ** argv )
 	RtIndex_i * pIndex = sphCreateIndexRT ( tSchema, "testrt", 32*1024*1024, DATAFLD "dump", false );
 	pIndex->SetTokenizer ( pTok ); // index will own this pair from now on
 	pIndex->SetDictionary ( pDict );
-	if ( !pIndex->Prealloc ( false, nullptr ) )
+	StrVec_t dWarnings;
+	if ( !pIndex->Prealloc ( false, nullptr, dWarnings ) )
 		sphDie ( "prealloc failed: %s", pIndex->GetLastError().cstr() );
 	pIndex->PostSetup();
 	g_pIndex = pIndex;

+ 7 - 1
test/helpers.inc

@@ -3853,7 +3853,7 @@ function CopyDirContents ( $from, $to )
 
 function CheckConfig ( $config, $path )
 {
-	global $g_re2, $g_icu, $g_odbc, $windows, $g_locals, $mysql_simulated, $g_repli, $g_ssl;
+	global $g_re2, $g_icu, $g_odbc, $windows, $g_locals, $mysql_simulated, $g_repli, $g_ssl, $g_columnar;
 	
 	if ( $config->Requires("non-windows") && $windows )
 	{
@@ -3899,6 +3899,12 @@ function CheckConfig ( $config, $path )
         }
     }
 
+	if ( $config->Requires("columnar") && !$g_columnar )
+	{
+		printf ( "SKIPPING %s, %s - compile with columnar support to run this test\n", $path, $config->Name () );
+		return false;
+	}
+
 	if ( $config->Requires("php_mysql") && $mysql_simulated )
 	{
 		printf ( "SKIPPING %s, %s - need php_mysql (not simulated by mysqli) to run this test\n", $path, $config->Name () );

+ 7 - 0
test/settings.inc

@@ -38,6 +38,7 @@ $g_model	= false;
 $g_re2		= false;
 $g_icu		= false;
 $g_odbc     = false;
+$g_columnar	= false;
 $g_strict	= false;
 $g_skipdemo = false;
 $g_usemarks = true; // that we mark the output of every test in the searchd.log and query.log
@@ -182,6 +183,12 @@ function GuessODBC ()
     $g_odbc = GuessFeature ( 'test_odbc.conf','failed to configure some of the sources');
 }
 
+function GuessColumnar()
+{
+    global $g_columnar;
+    $g_columnar = GuessFeature ( 'test_columnar.conf','no columnar support compiled');
+}
+
 function GetVersion ()
 {
 	$banner = GetBanner();

+ 1 - 1
test/test_020/test.xml

@@ -123,7 +123,7 @@ index mva_mixed
 
 <query mode="extended2" index="test" groupattr="time" groupfunc="year" groupdistinct="tag"></query>
 <query mode="extended2" index="test2" groupattr="time" groupfunc="year" groupdistinct="tAG"></query>
-<query mode="extended2" index="test,test2" groupattr="time" groupfunc="year" groupdistinct="tag"></query>
+<query mode="extended2" index="test,test2" groupattr="time" groupfunc="year" groupdistinct="tag" sortmode="extended" sortby="id asc"></query>
 
 <query mode="extended2" index="mini1,mini2" groupattr="time" groupfunc="year" groupdistinct="tag"></query>
 

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 0 - 0
test/test_039/model.bin


+ 6 - 6
test/test_039/test.xml

@@ -117,9 +117,9 @@ index rt
 <num_agents>2</num_agents>
 
 <sphqueries>
-<sphinxql>SELECT * FROM dist</sphinxql>
+<sphinxql>SELECT * FROM dist order by id asc</sphinxql>
 <sphinxql>UPDATE dist SET group_id=123 WHERE id=11</sphinxql>
-<sphinxql>SELECT * FROM dist</sphinxql>
+<sphinxql>SELECT * FROM dist order by id asc</sphinxql>
 
 <!-- regression crash on update of RT index that matches documents from different disk chunks-->
 <sphinxql>INSERT INTO rt (id, gid, title) VALUES (1001, 11, 'x10010 x10011 x10012 x10013 x10014 x10015 x10016 x10017 x10018 x10019 x100110 x100111 x100112 x100113 x100114 x100115 x100116 x100117 x100118 x100119 x100120 x100121 x100122 x100123 x100124 x100125 x100126 x100127 x100128 x100129 ')</sphinxql>
@@ -136,7 +136,7 @@ index rt
 
 <sphinxql>UPDATE i1, i2 SET gid=333 WHERE id IN ( 2, 102 )</sphinxql>
 <sphinxql>UPDATE i1, rt SET gid=333 WHERE id IN ( 3, 1003, 1333 )</sphinxql>
-<sphinxql>SELECT * FROM i1, i2, rt WHERE id IN ( 1,2,3,4, 101,102,103,104, 1001,1002,1003,1004, 1331,1332,1333,1334, 2001, 2002, 2003, 2004 )</sphinxql>
+<sphinxql>SELECT * FROM i1, i2, rt WHERE id IN ( 1,2,3,4, 101,102,103,104, 1001,1002,1003,1004, 1331,1332,1333,1334, 2001, 2002, 2003, 2004 ) order by id asc</sphinxql>
 
 <sphinxql>INSERT INTO rt (id, gid, title) VALUES (2001, 11, 'x20010 x20011 x20012 x20013 x20014 x20015 x20016 x20017 x20018 x20019 x200110 x200111 x200112 x200113 x200114 x200115 x200116 x200117 x200118 x200119 x200120 x200121 x200122 x200123 x200124 x200125 x200126 x200127 x200128 x200129 ')</sphinxql>
 <sphinxql>INSERT INTO rt (id, gid, title) VALUES (2002, 11, 'x20020 x20021 x20022 x20023 x20024 x20025 x20026 x20027 x20028 x20029 x200210 x200211 x200212 x200213 x200214 x200215 x200216 x200217 x200218 x200219 x200220 x200221 x200222 x200223 x200224 x200225 x200226 x200227 x200228 x200229 ')</sphinxql>
@@ -144,11 +144,11 @@ index rt
 <sphinxql>INSERT INTO rt (id, gid, title) VALUES (2004, 11, 'x20040 x20041 x20042 x20043 x20044 x20045 x20046 x20047 x20048 x20049 x200410 x200411 x200412 x200413 x200414 x200415 x200416 x200417 x200418 x200419 x200420 x200421 x200422 x200423 x200424 x200425 x200426 x200427 x200428 x200429 ')</sphinxql>
 
 <sphinxql>UPDATE rt SET gid=444 WHERE id IN ( 3, 1003, 1333, 2003 )</sphinxql>
-<sphinxql>SELECT * FROM i1, i2, rt WHERE id IN ( 1,2,3,4, 101,102,103,104, 1001,1002,1003,1004, 1331,1332,1333,1334, 2001, 2002, 2003, 2004 )</sphinxql>
+<sphinxql>SELECT * FROM i1, i2, rt WHERE id IN ( 1,2,3,4, 101,102,103,104, 1001,1002,1003,1004, 1331,1332,1333,1334, 2001, 2002, 2003, 2004 ) order by id asc</sphinxql>
 
 <!-- regression @uservars at agents only -->
 <sphinxql>SET GLOBAL @var1 = (3,4,12,13)</sphinxql>
-<sphinxql>SELECT * FROM dist</sphinxql>
+<sphinxql>SELECT * FROM dist order by id asc</sphinxql>
 <sphinxql>SELECT * FROM dist WHERE id in @var1</sphinxql>
 
 <sphinxql>RECONNECT</sphinxql>
@@ -169,7 +169,7 @@ index rt
 <sphinxql>SELECT * FROM dist2 WHERE id in @var2</sphinxql>
 <sphinxql>SELECT * FROM block1 WHERE id in @var3</sphinxql>
 <sphinxql>SET INDEX dist GLOBAL @var3 = (1,4,11,12)</sphinxql>
-<sphinxql>SELECT * FROM dist WHERE id in @var3</sphinxql>
+<sphinxql>SELECT * FROM dist WHERE id in @var3 order by id asc</sphinxql>
 <sphinxql>SELECT * FROM block1 WHERE id in @var3</sphinxql>
 </sphqueries>
 

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 0 - 0
test/test_041/model.bin


+ 1 - 1
test/test_048/model.bin

@@ -1 +1 @@
-a:1:{i:0;a:14:{i:0;a:2:{s:8:"sphinxql";s:59:"create table rt (t1 text indexed stored, gid int) type='rt'";s:14:"total_affected";i:0;}i:1;a:2:{s:8:"sphinxql";s:61:"insert into rt (id,t1,gid) values (1,'test',10),(2,'test',10)";s:14:"total_affected";i:2;}i:2;a:2:{s:8:"sphinxql";s:17:"flush ramchunk rt";s:14:"total_affected";i:0;}i:3;a:2:{s:8:"sphinxql";s:47:"insert into rt (id,t1,gid) values (3,'test',10)";s:14:"total_affected";i:1;}i:4;a:3:{s:8:"sphinxql";s:16:"select * from rt";s:10:"total_rows";i:3;s:4:"rows";a:3:{i:0;a:3:{s:2:"id";s:1:"1";s:3:"gid";s:2:"10";s:2:"t1";s:4:"test";}i:1;a:3:{s:2:"id";s:1:"2";s:3:"gid";s:2:"10";s:2:"t1";s:4:"test";}i:2;a:3:{s:2:"id";s:1:"3";s:3:"gid";s:2:"10";s:2:"t1";s:4:"test";}}}i:5;a:3:{s:8:"sphinxql";s:55:"SELECT gid g, COUNT(distinct id) b FROM rt group by gid";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:1:"g";s:2:"10";s:1:"b";s:1:"3";}}}i:6;a:3:{s:8:"sphinxql";s:53:"SELECT gid g, COUNT(distinct id) b FROM rt group by g";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:1:"g";s:2:"10";s:1:"b";s:1:"3";}}}i:7;a:2:{s:8:"sphinxql";s:17:"flush ramchunk rt";s:14:"total_affected";i:0;}i:8;a:3:{s:8:"sphinxql";s:55:"SELECT gid g, COUNT(distinct id) b FROM rt group by gid";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:1:"g";s:2:"10";s:1:"b";s:1:"3";}}}i:9;a:3:{s:8:"sphinxql";s:53:"SELECT gid g, COUNT(distinct id) b FROM rt group by g";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:1:"g";s:2:"10";s:1:"b";s:1:"3";}}}i:10;a:2:{s:8:"sphinxql";s:17:"optimize index rt";s:14:"total_affected";i:0;}i:11;a:3:{s:8:"sphinxql";s:55:"SELECT gid g, COUNT(distinct id) b FROM rt group by gid";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:1:"g";s:2:"10";s:1:"b";s:1:"3";}}}i:12;a:3:{s:8:"sphinxql";s:53:"SELECT gid g, COUNT(distinct id) b FROM rt group by g";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:1:"g";s:2:"10";s:1:"b";s:1:"3";}}}i:13;a:2:{s:8:"sphinxql";s:13:"drop table rt";s:14:"total_affected";i:0;}}}
+a:1:{i:0;a:14:{i:0;a:2:{s:8:"sphinxql";s:59:"create table rt (t1 text indexed stored, gid int) type='rt'";s:14:"total_affected";i:0;}i:1;a:2:{s:8:"sphinxql";s:61:"insert into rt (id,t1,gid) values (1,'test',10),(2,'test',10)";s:14:"total_affected";i:2;}i:2;a:2:{s:8:"sphinxql";s:17:"flush ramchunk rt";s:14:"total_affected";i:0;}i:3;a:2:{s:8:"sphinxql";s:47:"insert into rt (id,t1,gid) values (3,'test',10)";s:14:"total_affected";i:1;}i:4;a:3:{s:8:"sphinxql";s:32:"select * from rt order by id asc";s:10:"total_rows";i:3;s:4:"rows";a:3:{i:0;a:3:{s:2:"id";s:1:"1";s:3:"gid";s:2:"10";s:2:"t1";s:4:"test";}i:1;a:3:{s:2:"id";s:1:"2";s:3:"gid";s:2:"10";s:2:"t1";s:4:"test";}i:2;a:3:{s:2:"id";s:1:"3";s:3:"gid";s:2:"10";s:2:"t1";s:4:"test";}}}i:5;a:3:{s:8:"sphinxql";s:55:"SELECT gid g, COUNT(distinct id) b FROM rt group by gid";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:1:"g";s:2:"10";s:1:"b";s:1:"3";}}}i:6;a:3:{s:8:"sphinxql";s:53:"SELECT gid g, COUNT(distinct id) b FROM rt group by g";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:1:"g";s:2:"10";s:1:"b";s:1:"3";}}}i:7;a:2:{s:8:"sphinxql";s:17:"flush ramchunk rt";s:14:"total_affected";i:0;}i:8;a:3:{s:8:"sphinxql";s:55:"SELECT gid g, COUNT(distinct id) b FROM rt group by gid";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:1:"g";s:2:"10";s:1:"b";s:1:"3";}}}i:9;a:3:{s:8:"sphinxql";s:53:"SELECT gid g, COUNT(distinct id) b FROM rt group by g";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:1:"g";s:2:"10";s:1:"b";s:1:"3";}}}i:10;a:2:{s:8:"sphinxql";s:17:"optimize index rt";s:14:"total_affected";i:0;}i:11;a:3:{s:8:"sphinxql";s:55:"SELECT gid g, COUNT(distinct id) b FROM rt group by gid";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:1:"g";s:2:"10";s:1:"b";s:1:"3";}}}i:12;a:3:{s:8:"sphinxql";s:53:"SELECT gid g, COUNT(distinct id) b FROM rt group by g";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:1:"g";s:2:"10";s:1:"b";s:1:"3";}}}i:13;a:2:{s:8:"sphinxql";s:13:"drop table rt";s:14:"total_affected";i:0;}}}

+ 1 - 1
test/test_048/test.xml

@@ -17,7 +17,7 @@ searchd
     insert into rt (id,t1,gid) values (1,'test',10),(2,'test',10);
     flush ramchunk rt;
     insert into rt (id,t1,gid) values (3,'test',10);
-    select * from rt;
+    select * from rt order by id asc;
     SELECT gid g, COUNT(distinct id) b FROM rt group by gid;
     SELECT gid g, COUNT(distinct id) b FROM rt group by g;
     flush ramchunk rt;

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 0 - 0
test/test_057/model.bin


+ 12 - 12
test/test_057/test.xml

@@ -51,20 +51,20 @@ index rt
 <sphinxql>
 	REPLACE INTO rt ( id, text ) VALUES ( 5, 'test me' ),( 15, 'test framework' );
 	flush ramchunk rt;
-	select id, weight() as w from rt where match ('test');
-	select id, weight() as w from rt where match ('best');
+	select id, weight() as w from rt where match ('test') order by weight() desc, id asc;
+	select id, weight() as w from rt where match ('best') order by weight() desc, id asc;
 </sphinxql>
 
 <!-- remove 3rd disk chunk by values -->
 <sphinxql>
 	DELETE FROM rt WHERE id=5;
-	select id, weight() as w from rt where match ('test');
-	select id, weight() as w from rt where match ('best');
+	select id, weight() as w from rt where match ('test') order by weight() desc, id asc;
+	select id, weight() as w from rt where match ('best') order by weight() desc, id asc;
 	optimize index rt option sync=1, cutoff=3;
 	select count(*) chunks, sum(disk_bytes) bytes from rt.status;
 	DELETE FROM rt WHERE id=15;
-	select id, weight() as w from rt where match ('test');
-	select id, weight() as w from rt where match ('best');
+	select id, weight() as w from rt where match ('test') order by weight() desc, id asc;
+	select id, weight() as w from rt where match ('best') order by weight() desc, id asc;
 	optimize index rt option sync=1, cutoff=3;
 	select count(*) chunks, sum(disk_bytes) bytes from rt.status;
 </sphinxql>
@@ -72,11 +72,11 @@ index rt
 <!-- remove couple of previous values and compare result after optimize -->
 <sphinxql>
 	DELETE FROM rt WHERE id in (3,13);
-	select id, weight() as w from rt where match ('test');
-	select id, weight() as w from rt where match ('best');
+	select id, weight() as w from rt where match ('test') order by weight() desc, id asc;
+	select id, weight() as w from rt where match ('best') order by weight() desc, id asc;
 	optimize index rt option sync=1, cutoff=2;
-	select id, weight() as w from rt where match ('test');
-	select id, weight() as w from rt where match ('best');
+	select id, weight() as w from rt where match ('test') order by weight() desc, id asc;
+	select id, weight() as w from rt where match ('best') order by weight() desc, id asc;
 </sphinxql>
 
 <!-- only disk chunk -->
@@ -84,8 +84,8 @@ index rt
 	select count(*) chunks, sum(disk_bytes) bytes from rt.status;
 	optimize index rt option sync=1, cutoff=1;
 	select count(*) chunks, sum(disk_bytes) bytes from rt.status;
-	select id, weight() as w from rt where match ('test');
-	select id, weight() as w from rt where match ('best');
+	select id, weight() as w from rt where match ('test') order by weight() desc, id asc;
+	select id, weight() as w from rt where match ('best') order by weight() desc, id asc;
 </sphinxql>
 </sphqueries>
 </test>

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 0 - 0
test/test_066/model.bin


+ 1 - 1
test/test_066/test.xml

@@ -74,7 +74,7 @@ index test
 	select * from test.2;
 	select * from test.3;
 
-	select * from test
+	select * from test order by id asc;
 
 </SPHinxQL>
 </SPHQueries>

+ 1 - 1
test/test_067/test.xml

@@ -143,7 +143,7 @@ index testr
 <query index="dist3" select="gid,price,views,sum(views) as sumviews, avg(price) as avgprice" groupattr="gid"/>
 <query index="test" select="gid, sum(views) as sumviews, avg(price) as avgprice" groupattr="gid"/>
 <query index="test3" select="gid, sum(views) as sumviews, avg(price) as avgprice" groupattr="gid"/>
-<query index="test,test3" select="gid, sum(views) as sumviews, avg(price) as avgprice" groupattr="gid"/>
+<query index="test,test3" select="gid, sum(views) as sumviews, avg(price) as avgprice" groupattr="gid" sortmode="extended" sortby="id asc"/>
 </queries>
 
 <sphqueries>

+ 1 - 1
test/test_068/test.xml

@@ -228,7 +228,7 @@ index idx_large
 <query index="csv2">another</query>
 <query index="csv2">my</query>
 <!-- regression tail hits from ext-conditional node -->
-<query index="idx51" mode="extended2">^go | go$</query>
+<query index="idx51" mode="extended2" sortmode="extended" sortby="@weight desc, id asc">^go | go$</query>
 <!-- document end (^Z) at data source  -->
 <query index="tsv_spec" mode="extended2">"test one"</query>
 <query index="csv_spec" mode="extended2">"test one"</query>

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 0 - 0
test/test_087/model.bin


+ 3 - 3
test/test_087/test.xml

@@ -39,7 +39,7 @@ index rt_zs
 	<SPHinxQL>insert into test (id,content) values (2,'query defined')</SPHinxQL>
 	<SPHinxQL>select * from test where match('content')</SPHinxQL>
 	<SPHinxQL>select * from test where match('query')</SPHinxQL>
-	<SPHinxQL>select * from test</SPHinxQL>
+	<SPHinxQL>select * from test order by id asc</SPHinxQL>
 	<SPHinxQL>insert into test values (5,6,7,8)</SPHinxQL> <!-- must fail; must not crash -->
 	<SPHinxQL>insert into test values (5,6,7,8,9,10,11,12)</SPHinxQL> <!-- must fail -->
 	<SPHinxQL>insert into test ( id, gid, gid) values ( 123, 456, 789 )</SPHinxQL><!-- must fail -->
@@ -76,8 +76,8 @@ index rt_zs
 	<SphinxQL>insert into rt_zs (id, gid, title) values (28,1,'test')</SphinxQL>
 	<SphinxQL>insert into rt_zs (id, gid, title) values (29,1,'test')</SphinxQL>
 	<SphinxQL>insert into rt_zs (id, gid, title) values (30,1,'test')</SphinxQL>
-	<SphinxQL>select * from rt_zs</SphinxQL>
-	<SphinxQL>select * from rt_zs where match('test')</SphinxQL>
+	<SphinxQL>select * from rt_zs order by id asc</SphinxQL>
+	<SphinxQL>select * from rt_zs where match('test') order by weight() desc, id asc</SphinxQL>
 	<SphinxQL>replace into rt_zs (id, gid, title) values (1,1,'test'),(2,1,'test'),(3,1,'test'),(4,1,'test'),(5,1,'test'),(6,1,'test'),(7,1,'test'),(8,1,'test'),(9,1,'test'),(10,1,'test'),(11,1,'test'),(12,1,'test'),(13,1,'test'),(14,1,'test'),(15,1,'test'),(16,1,'test'),(17,1,'test'),(18,1,'test'),(19,1,'test'),(20,1,'test'),(21,1,'test'),(22,1,'test'),(23,1,'test'),(24,1,'test'),(25,1,'test'),(26,1,'test'),(27,1,'test'),(28,1,'test'),(29,1,'test'),(30,1,'test')</SphinxQL>
 	<SphinxQL>select * from rt_zs</SphinxQL>
 	<SphinxQL>select * from rt_zs where match('test')</SphinxQL>

+ 1 - 1
test/test_088/model.bin

@@ -1 +1 @@
-a:1:{i:0;a:5:{i:0;a:2:{s:8:"sphinxql";s:116:"insert into test (id, title, content) values (1,'title1','content1'),(2,'title2','content2'),(3,'title3','content3')";s:14:"total_affected";i:3;}i:1;a:2:{s:8:"sphinxql";s:73:"insert into test (id,title) values (4,'title1'),(5,'title5'),(6,'title6')";s:14:"total_affected";i:3;}i:2;a:2:{s:8:"sphinxql";s:81:"insert into test (id,content) values (7,'content7'),(8,'content8'),(9,'content9')";s:14:"total_affected";i:3;}i:3;a:2:{s:8:"sphinxql";s:80:"insert into test (title,content) values ('title','failure'),('title1','failure')";s:14:"total_affected";i:2;}i:4;a:3:{s:8:"sphinxql";s:18:"select * from test";s:10:"total_rows";i:11;s:4:"rows";a:11:{i:0;a:2:{s:2:"id";s:1:"1";s:5:"dummy";s:1:"0";}i:1;a:2:{s:2:"id";s:1:"2";s:5:"dummy";s:1:"0";}i:2;a:2:{s:2:"id";s:1:"3";s:5:"dummy";s:1:"0";}i:3;a:2:{s:2:"id";s:1:"4";s:5:"dummy";s:1:"0";}i:4;a:2:{s:2:"id";s:1:"5";s:5:"dummy";s:1:"0";}i:5;a:2:{s:2:"id";s:1:"6";s:5:"dummy";s:1:"0";}i:6;a:2:{s:2:"id";s:1:"7";s:5:"dummy";s:1:"0";}i:7;a:2:{s:2:"id";s:1:"8";s:5:"dummy";s:1:"0";}i:8;a:2:{s:2:"id";s:1:"9";s:5:"dummy";s:1:"0";}i:9;a:2:{s:2:"id";s:13:"1677721600001";s:5:"dummy";s:1:"0";}i:10;a:2:{s:2:"id";s:13:"1677721600002";s:5:"dummy";s:1:"0";}}}}}
+a:1:{i:0;a:5:{i:0;a:2:{s:8:"sphinxql";s:116:"insert into test (id, title, content) values (1,'title1','content1'),(2,'title2','content2'),(3,'title3','content3')";s:14:"total_affected";i:3;}i:1;a:2:{s:8:"sphinxql";s:73:"insert into test (id,title) values (4,'title1'),(5,'title5'),(6,'title6')";s:14:"total_affected";i:3;}i:2;a:2:{s:8:"sphinxql";s:81:"insert into test (id,content) values (7,'content7'),(8,'content8'),(9,'content9')";s:14:"total_affected";i:3;}i:3;a:2:{s:8:"sphinxql";s:80:"insert into test (title,content) values ('title','failure'),('title1','failure')";s:14:"total_affected";i:2;}i:4;a:3:{s:8:"sphinxql";s:34:"select * from test order by id asc";s:10:"total_rows";i:11;s:4:"rows";a:11:{i:0;a:2:{s:2:"id";s:1:"1";s:5:"dummy";s:1:"0";}i:1;a:2:{s:2:"id";s:1:"2";s:5:"dummy";s:1:"0";}i:2;a:2:{s:2:"id";s:1:"3";s:5:"dummy";s:1:"0";}i:3;a:2:{s:2:"id";s:1:"4";s:5:"dummy";s:1:"0";}i:4;a:2:{s:2:"id";s:1:"5";s:5:"dummy";s:1:"0";}i:5;a:2:{s:2:"id";s:1:"6";s:5:"dummy";s:1:"0";}i:6;a:2:{s:2:"id";s:1:"7";s:5:"dummy";s:1:"0";}i:7;a:2:{s:2:"id";s:1:"8";s:5:"dummy";s:1:"0";}i:8;a:2:{s:2:"id";s:1:"9";s:5:"dummy";s:1:"0";}i:9;a:2:{s:2:"id";s:13:"1677721600001";s:5:"dummy";s:1:"0";}i:10;a:2:{s:2:"id";s:13:"1677721600002";s:5:"dummy";s:1:"0";}}}}}

+ 1 - 1
test/test_088/test.xml

@@ -31,7 +31,7 @@ index test
 	<SPHinxQL>insert into test (id,title) values (4,'title1'),(5,'title5'),(6,'title6')</SPHinxQL>
 	<SPHinxQL>insert into test (id,content) values (7,'content7'),(8,'content8'),(9,'content9')</SPHinxQL>
 	<SPHinxQL>insert into test (title,content) values ('title','failure'),('title1','failure')</SPHinxQL>
-	<SPHinxQL>select * from test</SPHinxQL>
+	<SPHinxQL>select * from test order by id asc</SPHinxQL>
 </SPHQueries>
 
 </Test>

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 0 - 0
test/test_092/model.bin


+ 1 - 1
test/test_092/test.xml

@@ -64,7 +64,7 @@ index test105
 	<SPHinxQL>select * from test102 where match('newcontent')</SPHinxQL>
 	<SPHinxQL>insert into test104 (id,content) values (1,'word1'),(2,'word2')</SPHinxQL>
 	<SPHinxQL>insert into test104 (id,content) values (3,'word3')</SPHinxQL>
-	<SPHinxQL>select * from test104 where match('word1|word3')</SPHinxQL>
+	<SPHinxQL>select * from test104 where match('word1|word3') order by weight() desc, id asc</SPHinxQL>
 	<SPHinxQL>insert into test105 (id,content) values (1,'content'),(2,'newcontent')</SPHinxQL>
 	<SPHinxQL>replace into test105 (id,content) values (1,'contend')</SPHinxQL>
 	<SPHinxQL>select * from test105 where match('contend|newcontent')</SPHinxQL>

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 0 - 0
test/test_104/model.bin


+ 1 - 1
test/test_117/model.bin

@@ -1 +1 @@
-a:1:{i:0;a:1:{i:0;a:11:{i:0;s:66:"query 0: total_rows=2 q=select * from rt0 where match('submarine')";i:1;a:3:{s:2:"id";s:1:"1";s:3:"id1";s:2:"11";s:3:"id2";s:2:"11";}i:2;a:3:{s:2:"id";s:3:"301";s:3:"id1";s:2:"77";s:3:"id2";s:2:"77";}i:3;s:52:"query 1: total_rows=1 q=select * from rt0 where id=1";i:4;a:3:{s:2:"id";s:1:"1";s:3:"id1";s:2:"11";s:3:"id2";s:2:"11";}i:5;s:27:"query 0: total_affected=101";i:6;s:57:"query 0: total_rows=3 q=SELECT * FROM rt_mva WHERE id<203";i:7;a:3:{s:2:"id";s:3:"200";s:3:"idd";s:3:"200";s:4:"mva1";s:14:"1000,1001,1004";}i:8;a:3:{s:2:"id";s:3:"201";s:3:"idd";s:3:"201";s:4:"mva1";s:14:"1000,1001,1004";}i:9;a:3:{s:2:"id";s:3:"202";s:3:"idd";s:3:"202";s:4:"mva1";s:14:"1000,1001,1004";}i:10;i:1;}}}
+a:1:{i:0;a:1:{i:0;a:11:{i:0;s:97:"query 0: total_rows=2 q=select * from rt0 where match('submarine') order by weight() desc, id asc";i:1;a:3:{s:2:"id";s:1:"1";s:3:"id1";s:2:"11";s:3:"id2";s:2:"11";}i:2;a:3:{s:2:"id";s:3:"301";s:3:"id1";s:2:"77";s:3:"id2";s:2:"77";}i:3;s:83:"query 1: total_rows=1 q=select * from rt0 where id=1 order by weight() desc, id asc";i:4;a:3:{s:2:"id";s:1:"1";s:3:"id1";s:2:"11";s:3:"id2";s:2:"11";}i:5;s:27:"query 0: total_affected=101";i:6;s:57:"query 0: total_rows=3 q=SELECT * FROM rt_mva WHERE id<203";i:7;a:3:{s:2:"id";s:3:"200";s:3:"idd";s:3:"200";s:4:"mva1";s:14:"1000,1001,1004";}i:8;a:3:{s:2:"id";s:3:"201";s:3:"idd";s:3:"201";s:4:"mva1";s:14:"1000,1001,1004";}i:9;a:3:{s:2:"id";s:3:"202";s:3:"idd";s:3:"202";s:4:"mva1";s:14:"1000,1001,1004";}i:10;i:1;}}}

+ 1 - 1
test/test_117/test.xml

@@ -141,7 +141,7 @@ mysql_query ( "REPLACE INTO rt0 VALUES ( 1, 'submarine', 'submarine', 11, 11 )",
 $queries = array ( "match('submarine')", "id=1" );
 for ( $i=0; $i<count ( $queries ); $i++ )
 {
-	$results = array_merge ( $results, $query ( "select * from rt0 where $queries[$i]", $sock, $i ) );
+	$results = array_merge ( $results, $query ( "select * from rt0 where $queries[$i] order by weight() desc, id asc", $sock, $i ) );
 }
 
 // regression

+ 1 - 1
test/test_118/model.bin

@@ -1 +1 @@
-a:1:{i:0;a:6:{i:0;a:2:{s:8:"sphinxql";s:68:"insert into test (id,idd,content) values (1,1,'content'),(2,2,'you')";s:14:"total_affected";i:2;}i:1;a:2:{s:8:"sphinxql";s:63:"insert into test (id,idd,content) values (10,10,'dog cat fish')";s:14:"total_affected";i:1;}i:2;a:2:{s:8:"sphinxql";s:58:"insert into test (id,idd,content) values (11,11,'dog cat')";s:14:"total_affected";i:1;}i:3;a:3:{s:8:"sphinxql";s:18:"select * from test";s:10:"total_rows";i:4;s:4:"rows";a:4:{i:0;a:2:{s:2:"id";s:1:"1";s:3:"idd";s:1:"1";}i:1;a:2:{s:2:"id";s:1:"2";s:3:"idd";s:1:"2";}i:2;a:2:{s:2:"id";s:2:"10";s:3:"idd";s:2:"10";}i:3;a:2:{s:2:"id";s:2:"11";s:3:"idd";s:2:"11";}}}i:4;a:3:{s:8:"sphinxql";s:46:"select * from test where match( 'dog << cat' )";s:10:"total_rows";i:2;s:4:"rows";a:2:{i:0;a:2:{s:2:"id";s:2:"10";s:3:"idd";s:2:"10";}i:1;a:2:{s:2:"id";s:2:"11";s:3:"idd";s:2:"11";}}}i:5;a:3:{s:8:"sphinxql";s:53:"select * from test where match( '(dog | cat) -fish' )";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:2:"id";s:2:"11";s:3:"idd";s:2:"11";}}}}}
+a:1:{i:0;a:6:{i:0;a:2:{s:8:"sphinxql";s:68:"insert into test (id,idd,content) values (1,1,'content'),(2,2,'you')";s:14:"total_affected";i:2;}i:1;a:2:{s:8:"sphinxql";s:63:"insert into test (id,idd,content) values (10,10,'dog cat fish')";s:14:"total_affected";i:1;}i:2;a:2:{s:8:"sphinxql";s:58:"insert into test (id,idd,content) values (11,11,'dog cat')";s:14:"total_affected";i:1;}i:3;a:3:{s:8:"sphinxql";s:34:"select * from test order by id asc";s:10:"total_rows";i:4;s:4:"rows";a:4:{i:0;a:2:{s:2:"id";s:1:"1";s:3:"idd";s:1:"1";}i:1;a:2:{s:2:"id";s:1:"2";s:3:"idd";s:1:"2";}i:2;a:2:{s:2:"id";s:2:"10";s:3:"idd";s:2:"10";}i:3;a:2:{s:2:"id";s:2:"11";s:3:"idd";s:2:"11";}}}i:4;a:3:{s:8:"sphinxql";s:46:"select * from test where match( 'dog << cat' )";s:10:"total_rows";i:2;s:4:"rows";a:2:{i:0;a:2:{s:2:"id";s:2:"10";s:3:"idd";s:2:"10";}i:1;a:2:{s:2:"id";s:2:"11";s:3:"idd";s:2:"11";}}}i:5;a:3:{s:8:"sphinxql";s:53:"select * from test where match( '(dog | cat) -fish' )";s:10:"total_rows";i:1;s:4:"rows";a:1:{i:0;a:2:{s:2:"id";s:2:"11";s:3:"idd";s:2:"11";}}}}}

+ 1 - 1
test/test_118/test.xml

@@ -30,7 +30,7 @@ index test
 	<SPHinxQL>insert into test (id,idd,content) values (10,10,'dog cat fish')</SPHinxQL>	
 	<SPHinxQL>insert into test (id,idd,content) values (11,11,'dog cat')</SPHinxQL>
 
-	<SPHinxQL>select * from test</SPHinxQL>
+	<SPHinxQL>select * from test order by id asc</SPHinxQL>
 	<SPHinxQL><![CDATA[select * from test where match( 'dog << cat' )]]></SPHinxQL>
 	<SPHinxQL>select * from test where match( '(dog | cat) -fish' )</SPHinxQL>
 </SPHQueries>

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 0 - 0
test/test_119/model.bin


이 변경점에서 너무 많은 파일들이 변경되어 몇몇 파일들은 표시되지 않았습니다.