sortsupport.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. /*-------------------------------------------------------------------------
  2. *
  3. * sortsupport.h
  4. * Framework for accelerated sorting.
  5. *
  6. * Traditionally, PostgreSQL has implemented sorting by repeatedly invoking
  7. * an SQL-callable comparison function "cmp(x, y) returns int" on pairs of
  8. * values to be compared, where the comparison function is the BTORDER_PROC
  9. * pg_amproc support function of the appropriate btree index opclass.
  10. *
  11. * This file defines alternative APIs that allow sorting to be performed with
  12. * reduced overhead. To support lower-overhead sorting, a btree opclass may
  13. * provide a BTSORTSUPPORT_PROC pg_amproc entry, which must take a single
  14. * argument of type internal and return void. The argument is actually a
  15. * pointer to a SortSupportData struct, which is defined below.
  16. *
  17. * If provided, the BTSORTSUPPORT function will be called during sort setup,
  18. * and it must initialize the provided struct with pointers to function(s)
  19. * that can be called to perform sorting. This API is defined to allow
  20. * multiple acceleration mechanisms to be supported, but no opclass is
  21. * required to provide all of them. The BTSORTSUPPORT function should
  22. * simply not set any function pointers for mechanisms it doesn't support.
  23. * Opclasses that provide BTSORTSUPPORT and don't provide a comparator
  24. * function will have a shim set up by sort support automatically. However,
  25. * opclasses that support the optional additional abbreviated key capability
  26. * must always provide an authoritative comparator used to tie-break
  27. * inconclusive abbreviated comparisons and also used when aborting
  28. * abbreviation. Furthermore, a converter and abort/costing function must be
  29. * provided.
  30. *
  31. * All sort support functions will be passed the address of the
  32. * SortSupportData struct when called, so they can use it to store
  33. * additional private data as needed. In particular, for collation-aware
  34. * datatypes, the ssup_collation field is set before calling BTSORTSUPPORT
  35. * and is available to all support functions. Additional opclass-dependent
  36. * data can be stored using the ssup_extra field. Any such data
  37. * should be allocated in the ssup_cxt memory context.
  38. *
  39. * Note: since pg_amproc functions are indexed by (lefttype, righttype)
  40. * it is possible to associate a BTSORTSUPPORT function with a cross-type
  41. * comparison. This could sensibly be used to provide a fast comparator
  42. * function for such cases, but probably not any other acceleration method.
  43. *
  44. *
  45. * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  46. * Portions Copyright (c) 1994, Regents of the University of California
  47. *
  48. * src/include/utils/sortsupport.h
  49. *
  50. *-------------------------------------------------------------------------
  51. */
  52. #ifndef SORTSUPPORT_H
  53. #define SORTSUPPORT_H
  54. #include "access/attnum.h"
  55. #include "utils/relcache.h"
  56. typedef struct SortSupportData *SortSupport;
  57. typedef struct SortSupportData
  58. {
  59. /*
  60. * These fields are initialized before calling the BTSORTSUPPORT function
  61. * and should not be changed later.
  62. */
  63. MemoryContext ssup_cxt; /* Context containing sort info */
  64. Oid ssup_collation; /* Collation to use, or InvalidOid */
  65. /*
  66. * Additional sorting parameters; but unlike ssup_collation, these can be
  67. * changed after BTSORTSUPPORT is called, so don't use them in selecting
  68. * sort support functions.
  69. */
  70. bool ssup_reverse; /* descending-order sort? */
  71. bool ssup_nulls_first; /* sort nulls first? */
  72. /*
  73. * These fields are workspace for callers, and should not be touched by
  74. * opclass-specific functions.
  75. */
  76. AttrNumber ssup_attno; /* column number to sort */
  77. /*
  78. * ssup_extra is zeroed before calling the BTSORTSUPPORT function, and is
  79. * not touched subsequently by callers.
  80. */
  81. void *ssup_extra; /* Workspace for opclass functions */
  82. /*
  83. * Function pointers are zeroed before calling the BTSORTSUPPORT function,
  84. * and must be set by it for any acceleration methods it wants to supply.
  85. * The comparator pointer must be set, others are optional.
  86. */
  87. /*
  88. * Comparator function has the same API as the traditional btree
  89. * comparison function, ie, return <0, 0, or >0 according as x is less
  90. * than, equal to, or greater than y. Note that x and y are guaranteed
  91. * not null, and there is no way to return null either.
  92. *
  93. * This may be either the authoritative comparator, or the abbreviated
  94. * comparator. Core code may switch this over the initial preference of
  95. * an opclass support function despite originally indicating abbreviation
  96. * was applicable, by assigning the authoritative comparator back.
  97. */
  98. int (*comparator) (Datum x, Datum y, SortSupport ssup);
  99. /*
  100. * "Abbreviated key" infrastructure follows.
  101. *
  102. * All callbacks must be set by sortsupport opclasses that make use of
  103. * this optional additional infrastructure (unless for whatever reasons
  104. * the opclass doesn't proceed with abbreviation, in which case
  105. * abbrev_converter must not be set).
  106. *
  107. * This allows opclass authors to supply a conversion routine, used to
  108. * create an alternative representation of the underlying type (an
  109. * "abbreviated key"). This representation must be pass-by-value and
  110. * typically will use some ad-hoc format that only the opclass has
  111. * knowledge of. An alternative comparator, used only with this
  112. * alternative representation must also be provided (which is assigned to
  113. * "comparator"). This representation is a simple approximation of the
  114. * original Datum. It must be possible to compare datums of this
  115. * representation with each other using the supplied alternative
  116. * comparator, and have any non-zero return value be a reliable proxy for
  117. * what a proper comparison would indicate. Returning zero from the
  118. * alternative comparator does not indicate equality, as with a
  119. * conventional support routine 1, though -- it indicates that it wasn't
  120. * possible to determine how the two abbreviated values compared. A
  121. * proper comparison, using "abbrev_full_comparator"/
  122. * ApplySortAbbrevFullComparator() is therefore required. In many cases
  123. * this results in most or all comparisons only using the cheap
  124. * alternative comparison func, which is typically implemented as code
  125. * that compiles to just a few CPU instructions. CPU cache miss penalties
  126. * are expensive; to get good overall performance, sort infrastructure
  127. * must heavily weigh cache performance.
  128. *
  129. * Opclass authors must consider the final cardinality of abbreviated keys
  130. * when devising an encoding scheme. It's possible for a strategy to work
  131. * better than an alternative strategy with one usage pattern, while the
  132. * reverse might be true for another usage pattern. All of these factors
  133. * must be considered.
  134. */
  135. /*
  136. * "abbreviate" concerns whether or not the abbreviated key optimization
  137. * is applicable in principle (that is, the sortsupport routine needs to
  138. * know if its dealing with a key where an abbreviated representation can
  139. * usefully be packed together. Conventionally, this is the leading
  140. * attribute key). Note, however, that in order to determine that
  141. * abbreviation is not in play, the core code always checks whether or not
  142. * the opclass has set abbrev_converter. This is a one way, one time
  143. * message to the opclass.
  144. */
  145. bool abbreviate;
  146. /*
  147. * Converter to abbreviated format, from original representation. Core
  148. * code uses this callback to convert from a pass-by-reference "original"
  149. * Datum to a pass-by-value abbreviated key Datum. Note that original is
  150. * guaranteed NOT NULL, because it doesn't make sense to factor NULLness
  151. * into ad-hoc cost model.
  152. *
  153. * abbrev_converter is tested to see if abbreviation is in play. Core
  154. * code may set it to NULL to indicate abbreviation should not be used
  155. * (which is something sortsupport routines need not concern themselves
  156. * with). However, sortsupport routines must not set it when it is
  157. * immediately established that abbreviation should not proceed (e.g., for
  158. * !abbreviate calls, or due to platform-specific impediments to using
  159. * abbreviation).
  160. */
  161. Datum (*abbrev_converter) (Datum original, SortSupport ssup);
  162. /*
  163. * abbrev_abort callback allows clients to verify that the current
  164. * strategy is working out, using a sortsupport routine defined ad-hoc
  165. * cost model. If there is a lot of duplicate abbreviated keys in
  166. * practice, it's useful to be able to abandon the strategy before paying
  167. * too high a cost in conversion (perhaps certain opclass-specific
  168. * adaptations are useful too).
  169. */
  170. bool (*abbrev_abort) (int memtupcount, SortSupport ssup);
  171. /*
  172. * Full, authoritative comparator for key that an abbreviated
  173. * representation was generated for, used when an abbreviated comparison
  174. * was inconclusive (by calling ApplySortAbbrevFullComparator()), or used
  175. * to replace "comparator" when core system ultimately decides against
  176. * abbreviation.
  177. */
  178. int (*abbrev_full_comparator) (Datum x, Datum y, SortSupport ssup);
  179. } SortSupportData;
  180. /*
  181. * Apply a sort comparator function and return a 3-way comparison result.
  182. * This takes care of handling reverse-sort and NULLs-ordering properly.
  183. */
  184. static inline int
  185. ApplySortComparator(Datum datum1, bool isNull1,
  186. Datum datum2, bool isNull2,
  187. SortSupport ssup)
  188. {
  189. int compare;
  190. if (isNull1)
  191. {
  192. if (isNull2)
  193. compare = 0; /* NULL "=" NULL */
  194. else if (ssup->ssup_nulls_first)
  195. compare = -1; /* NULL "<" NOT_NULL */
  196. else
  197. compare = 1; /* NULL ">" NOT_NULL */
  198. }
  199. else if (isNull2)
  200. {
  201. if (ssup->ssup_nulls_first)
  202. compare = 1; /* NOT_NULL ">" NULL */
  203. else
  204. compare = -1; /* NOT_NULL "<" NULL */
  205. }
  206. else
  207. {
  208. compare = ssup->comparator(datum1, datum2, ssup);
  209. if (ssup->ssup_reverse)
  210. INVERT_COMPARE_RESULT(compare);
  211. }
  212. return compare;
  213. }
  214. static inline int
  215. ApplyUnsignedSortComparator(Datum datum1, bool isNull1,
  216. Datum datum2, bool isNull2,
  217. SortSupport ssup)
  218. {
  219. int compare;
  220. if (isNull1)
  221. {
  222. if (isNull2)
  223. compare = 0; /* NULL "=" NULL */
  224. else if (ssup->ssup_nulls_first)
  225. compare = -1; /* NULL "<" NOT_NULL */
  226. else
  227. compare = 1; /* NULL ">" NOT_NULL */
  228. }
  229. else if (isNull2)
  230. {
  231. if (ssup->ssup_nulls_first)
  232. compare = 1; /* NOT_NULL ">" NULL */
  233. else
  234. compare = -1; /* NOT_NULL "<" NULL */
  235. }
  236. else
  237. {
  238. compare = datum1 < datum2 ? -1 : datum1 > datum2 ? 1 : 0;
  239. if (ssup->ssup_reverse)
  240. INVERT_COMPARE_RESULT(compare);
  241. }
  242. return compare;
  243. }
  244. #if SIZEOF_DATUM >= 8
  245. static inline int
  246. ApplySignedSortComparator(Datum datum1, bool isNull1,
  247. Datum datum2, bool isNull2,
  248. SortSupport ssup)
  249. {
  250. int compare;
  251. if (isNull1)
  252. {
  253. if (isNull2)
  254. compare = 0; /* NULL "=" NULL */
  255. else if (ssup->ssup_nulls_first)
  256. compare = -1; /* NULL "<" NOT_NULL */
  257. else
  258. compare = 1; /* NULL ">" NOT_NULL */
  259. }
  260. else if (isNull2)
  261. {
  262. if (ssup->ssup_nulls_first)
  263. compare = 1; /* NOT_NULL ">" NULL */
  264. else
  265. compare = -1; /* NOT_NULL "<" NULL */
  266. }
  267. else
  268. {
  269. compare = DatumGetInt64(datum1) < DatumGetInt64(datum2) ? -1 :
  270. DatumGetInt64(datum1) > DatumGetInt64(datum2) ? 1 : 0;
  271. if (ssup->ssup_reverse)
  272. INVERT_COMPARE_RESULT(compare);
  273. }
  274. return compare;
  275. }
  276. #endif
  277. static inline int
  278. ApplyInt32SortComparator(Datum datum1, bool isNull1,
  279. Datum datum2, bool isNull2,
  280. SortSupport ssup)
  281. {
  282. int compare;
  283. if (isNull1)
  284. {
  285. if (isNull2)
  286. compare = 0; /* NULL "=" NULL */
  287. else if (ssup->ssup_nulls_first)
  288. compare = -1; /* NULL "<" NOT_NULL */
  289. else
  290. compare = 1; /* NULL ">" NOT_NULL */
  291. }
  292. else if (isNull2)
  293. {
  294. if (ssup->ssup_nulls_first)
  295. compare = 1; /* NOT_NULL ">" NULL */
  296. else
  297. compare = -1; /* NOT_NULL "<" NULL */
  298. }
  299. else
  300. {
  301. compare = DatumGetInt32(datum1) < DatumGetInt32(datum2) ? -1 :
  302. DatumGetInt32(datum1) > DatumGetInt32(datum2) ? 1 : 0;
  303. if (ssup->ssup_reverse)
  304. INVERT_COMPARE_RESULT(compare);
  305. }
  306. return compare;
  307. }
  308. /*
  309. * Apply a sort comparator function and return a 3-way comparison using full,
  310. * authoritative comparator. This takes care of handling reverse-sort and
  311. * NULLs-ordering properly.
  312. */
  313. static inline int
  314. ApplySortAbbrevFullComparator(Datum datum1, bool isNull1,
  315. Datum datum2, bool isNull2,
  316. SortSupport ssup)
  317. {
  318. int compare;
  319. if (isNull1)
  320. {
  321. if (isNull2)
  322. compare = 0; /* NULL "=" NULL */
  323. else if (ssup->ssup_nulls_first)
  324. compare = -1; /* NULL "<" NOT_NULL */
  325. else
  326. compare = 1; /* NULL ">" NOT_NULL */
  327. }
  328. else if (isNull2)
  329. {
  330. if (ssup->ssup_nulls_first)
  331. compare = 1; /* NOT_NULL ">" NULL */
  332. else
  333. compare = -1; /* NOT_NULL "<" NULL */
  334. }
  335. else
  336. {
  337. compare = ssup->abbrev_full_comparator(datum1, datum2, ssup);
  338. if (ssup->ssup_reverse)
  339. INVERT_COMPARE_RESULT(compare);
  340. }
  341. return compare;
  342. }
  343. /*
  344. * Datum comparison functions that we have specialized sort routines for.
  345. * Datatypes that install these as their comparator or abbrevated comparator
  346. * are eligible for faster sorting.
  347. */
  348. extern int ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup);
  349. #if SIZEOF_DATUM >= 8
  350. extern int ssup_datum_signed_cmp(Datum x, Datum y, SortSupport ssup);
  351. #endif
  352. extern int ssup_datum_int32_cmp(Datum x, Datum y, SortSupport ssup);
  353. /* Other functions in utils/sort/sortsupport.c */
  354. extern void PrepareSortSupportComparisonShim(Oid cmpFunc, SortSupport ssup);
  355. extern void PrepareSortSupportFromOrderingOp(Oid orderingOp, SortSupport ssup);
  356. extern void PrepareSortSupportFromIndexRel(Relation indexRel, int16 strategy,
  357. SortSupport ssup);
  358. extern void PrepareSortSupportFromGistIndexRel(Relation indexRel, SortSupport ssup);
  359. #endif /* SORTSUPPORT_H */