2
0

tuplesort.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. /*-------------------------------------------------------------------------
  2. *
  3. * tuplesort.h
  4. * Generalized tuple sorting routines.
  5. *
  6. * This module handles sorting of heap tuples, index tuples, or single
  7. * Datums (and could easily support other kinds of sortable objects,
  8. * if necessary). It works efficiently for both small and large amounts
  9. * of data. Small amounts are sorted in-memory using qsort(). Large
  10. * amounts are sorted using temporary files and a standard external sort
  11. * algorithm. Parallel sorts use a variant of this external sort
  12. * algorithm, and are typically only used for large amounts of data.
  13. *
  14. * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  15. * Portions Copyright (c) 1994, Regents of the University of California
  16. *
  17. * src/include/utils/tuplesort.h
  18. *
  19. *-------------------------------------------------------------------------
  20. */
  21. #ifndef TUPLESORT_H
  22. #define TUPLESORT_H
  23. #include "access/itup.h"
  24. #include "executor/tuptable.h"
  25. #include "storage/dsm.h"
  26. #include "utils/relcache.h"
  27. /*
  28. * Tuplesortstate and Sharedsort are opaque types whose details are not
  29. * known outside tuplesort.c.
  30. */
  31. typedef struct Tuplesortstate Tuplesortstate;
  32. typedef struct Sharedsort Sharedsort;
  33. /*
  34. * Tuplesort parallel coordination state, allocated by each participant in
  35. * local memory. Participant caller initializes everything. See usage notes
  36. * below.
  37. */
  38. typedef struct SortCoordinateData
  39. {
  40. /* Worker process? If not, must be leader. */
  41. bool isWorker;
  42. /*
  43. * Leader-process-passed number of participants known launched (workers
  44. * set this to -1). Includes state within leader needed for it to
  45. * participate as a worker, if any.
  46. */
  47. int nParticipants;
  48. /* Private opaque state (points to shared memory) */
  49. Sharedsort *sharedsort;
  50. } SortCoordinateData;
  51. typedef struct SortCoordinateData *SortCoordinate;
  52. /*
  53. * Data structures for reporting sort statistics. Note that
  54. * TuplesortInstrumentation can't contain any pointers because we
  55. * sometimes put it in shared memory.
  56. *
  57. * The parallel-sort infrastructure relies on having a zero TuplesortMethod
  58. * to indicate that a worker never did anything, so we assign zero to
  59. * SORT_TYPE_STILL_IN_PROGRESS. The other values of this enum can be
  60. * OR'ed together to represent a situation where different workers used
  61. * different methods, so we need a separate bit for each one. Keep the
  62. * NUM_TUPLESORTMETHODS constant in sync with the number of bits!
  63. */
  64. typedef enum
  65. {
  66. SORT_TYPE_STILL_IN_PROGRESS = 0,
  67. SORT_TYPE_TOP_N_HEAPSORT = 1 << 0,
  68. SORT_TYPE_QUICKSORT = 1 << 1,
  69. SORT_TYPE_EXTERNAL_SORT = 1 << 2,
  70. SORT_TYPE_EXTERNAL_MERGE = 1 << 3
  71. } TuplesortMethod;
  72. #define NUM_TUPLESORTMETHODS 4
  73. typedef enum
  74. {
  75. SORT_SPACE_TYPE_DISK,
  76. SORT_SPACE_TYPE_MEMORY
  77. } TuplesortSpaceType;
  78. /* Bitwise option flags for tuple sorts */
  79. #define TUPLESORT_NONE 0
  80. /* specifies whether non-sequential access to the sort result is required */
  81. #define TUPLESORT_RANDOMACCESS (1 << 0)
  82. /* specifies if the tuplesort is able to support bounded sorts */
  83. #define TUPLESORT_ALLOWBOUNDED (1 << 1)
  84. typedef struct TuplesortInstrumentation
  85. {
  86. TuplesortMethod sortMethod; /* sort algorithm used */
  87. TuplesortSpaceType spaceType; /* type of space spaceUsed represents */
  88. int64 spaceUsed; /* space consumption, in kB */
  89. } TuplesortInstrumentation;
  90. /*
  91. * We provide multiple interfaces to what is essentially the same code,
  92. * since different callers have different data to be sorted and want to
  93. * specify the sort key information differently. There are two APIs for
  94. * sorting HeapTuples and two more for sorting IndexTuples. Yet another
  95. * API supports sorting bare Datums.
  96. *
  97. * Serial sort callers should pass NULL for their coordinate argument.
  98. *
  99. * The "heap" API actually stores/sorts MinimalTuples, which means it doesn't
  100. * preserve the system columns (tuple identity and transaction visibility
  101. * info). The sort keys are specified by column numbers within the tuples
  102. * and sort operator OIDs. We save some cycles by passing and returning the
  103. * tuples in TupleTableSlots, rather than forming actual HeapTuples (which'd
  104. * have to be converted to MinimalTuples). This API works well for sorts
  105. * executed as parts of plan trees.
  106. *
  107. * The "cluster" API stores/sorts full HeapTuples including all visibility
  108. * info. The sort keys are specified by reference to a btree index that is
  109. * defined on the relation to be sorted. Note that putheaptuple/getheaptuple
  110. * go with this API, not the "begin_heap" one!
  111. *
  112. * The "index_btree" API stores/sorts IndexTuples (preserving all their
  113. * header fields). The sort keys are specified by a btree index definition.
  114. *
  115. * The "index_hash" API is similar to index_btree, but the tuples are
  116. * actually sorted by their hash codes not the raw data.
  117. *
  118. * Parallel sort callers are required to coordinate multiple tuplesort states
  119. * in a leader process and one or more worker processes. The leader process
  120. * must launch workers, and have each perform an independent "partial"
  121. * tuplesort, typically fed by the parallel heap interface. The leader later
  122. * produces the final output (internally, it merges runs output by workers).
  123. *
  124. * Callers must do the following to perform a sort in parallel using multiple
  125. * worker processes:
  126. *
  127. * 1. Request tuplesort-private shared memory for n workers. Use
  128. * tuplesort_estimate_shared() to get the required size.
  129. * 2. Have leader process initialize allocated shared memory using
  130. * tuplesort_initialize_shared(). Launch workers.
  131. * 3. Initialize a coordinate argument within both the leader process, and
  132. * for each worker process. This has a pointer to the shared
  133. * tuplesort-private structure, as well as some caller-initialized fields.
  134. * Leader's coordinate argument reliably indicates number of workers
  135. * launched (this is unused by workers).
  136. * 4. Begin a tuplesort using some appropriate tuplesort_begin* routine,
  137. * (passing the coordinate argument) within each worker. The workMem
  138. * arguments need not be identical. All other arguments should match
  139. * exactly, though.
  140. * 5. tuplesort_attach_shared() should be called by all workers. Feed tuples
  141. * to each worker, and call tuplesort_performsort() within each when input
  142. * is exhausted.
  143. * 6. Call tuplesort_end() in each worker process. Worker processes can shut
  144. * down once tuplesort_end() returns.
  145. * 7. Begin a tuplesort in the leader using the same tuplesort_begin*
  146. * routine, passing a leader-appropriate coordinate argument (this can
  147. * happen as early as during step 3, actually, since we only need to know
  148. * the number of workers successfully launched). The leader must now wait
  149. * for workers to finish. Caller must use own mechanism for ensuring that
  150. * next step isn't reached until all workers have called and returned from
  151. * tuplesort_performsort(). (Note that it's okay if workers have already
  152. * also called tuplesort_end() by then.)
  153. * 8. Call tuplesort_performsort() in leader. Consume output using the
  154. * appropriate tuplesort_get* routine. Leader can skip this step if
  155. * tuplesort turns out to be unnecessary.
  156. * 9. Call tuplesort_end() in leader.
  157. *
  158. * This division of labor assumes nothing about how input tuples are produced,
  159. * but does require that caller combine the state of multiple tuplesorts for
  160. * any purpose other than producing the final output. For example, callers
  161. * must consider that tuplesort_get_stats() reports on only one worker's role
  162. * in a sort (or the leader's role), and not statistics for the sort as a
  163. * whole.
  164. *
  165. * Note that callers may use the leader process to sort runs as if it was an
  166. * independent worker process (prior to the process performing a leader sort
  167. * to produce the final sorted output). Doing so only requires a second
  168. * "partial" tuplesort within the leader process, initialized like that of a
  169. * worker process. The steps above don't touch on this directly. The only
  170. * difference is that the tuplesort_attach_shared() call is never needed within
  171. * leader process, because the backend as a whole holds the shared fileset
  172. * reference. A worker Tuplesortstate in leader is expected to do exactly the
  173. * same amount of total initial processing work as a worker process
  174. * Tuplesortstate, since the leader process has nothing else to do before
  175. * workers finish.
  176. *
  177. * Note that only a very small amount of memory will be allocated prior to
  178. * the leader state first consuming input, and that workers will free the
  179. * vast majority of their memory upon returning from tuplesort_performsort().
  180. * Callers can rely on this to arrange for memory to be used in a way that
  181. * respects a workMem-style budget across an entire parallel sort operation.
  182. *
  183. * Callers are responsible for parallel safety in general. However, they
  184. * can at least rely on there being no parallel safety hazards within
  185. * tuplesort, because tuplesort thinks of the sort as several independent
  186. * sorts whose results are combined. Since, in general, the behavior of
  187. * sort operators is immutable, caller need only worry about the parallel
  188. * safety of whatever the process is through which input tuples are
  189. * generated (typically, caller uses a parallel heap scan).
  190. */
  191. extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
  192. int nkeys, AttrNumber *attNums,
  193. Oid *sortOperators, Oid *sortCollations,
  194. bool *nullsFirstFlags,
  195. int workMem, SortCoordinate coordinate,
  196. int sortopt);
  197. extern Tuplesortstate *tuplesort_begin_cluster(TupleDesc tupDesc,
  198. Relation indexRel, int workMem,
  199. SortCoordinate coordinate,
  200. int sortopt);
  201. extern Tuplesortstate *tuplesort_begin_index_btree(Relation heapRel,
  202. Relation indexRel,
  203. bool enforceUnique,
  204. bool uniqueNullsNotDistinct,
  205. int workMem, SortCoordinate coordinate,
  206. int sortopt);
  207. extern Tuplesortstate *tuplesort_begin_index_hash(Relation heapRel,
  208. Relation indexRel,
  209. uint32 high_mask,
  210. uint32 low_mask,
  211. uint32 max_buckets,
  212. int workMem, SortCoordinate coordinate,
  213. int sortopt);
  214. extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
  215. Relation indexRel,
  216. int workMem, SortCoordinate coordinate,
  217. int sortopt);
  218. extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
  219. Oid sortOperator, Oid sortCollation,
  220. bool nullsFirstFlag,
  221. int workMem, SortCoordinate coordinate,
  222. int sortopt);
  223. extern void tuplesort_set_bound(Tuplesortstate *state, int64 bound);
  224. extern bool tuplesort_used_bound(Tuplesortstate *state);
  225. extern void tuplesort_puttupleslot(Tuplesortstate *state,
  226. TupleTableSlot *slot);
  227. extern void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup);
  228. extern void tuplesort_putindextuplevalues(Tuplesortstate *state,
  229. Relation rel, ItemPointer self,
  230. Datum *values, bool *isnull);
  231. extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
  232. bool isNull);
  233. extern void tuplesort_performsort(Tuplesortstate *state);
  234. extern bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
  235. bool copy, TupleTableSlot *slot, Datum *abbrev);
  236. extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward);
  237. extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward);
  238. extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
  239. Datum *val, bool *isNull, Datum *abbrev);
  240. extern bool tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples,
  241. bool forward);
  242. extern void tuplesort_end(Tuplesortstate *state);
  243. extern void tuplesort_reset(Tuplesortstate *state);
  244. extern void tuplesort_get_stats(Tuplesortstate *state,
  245. TuplesortInstrumentation *stats);
  246. extern const char *tuplesort_method_name(TuplesortMethod m);
  247. extern const char *tuplesort_space_type_name(TuplesortSpaceType t);
  248. extern int tuplesort_merge_order(int64 allowedMem);
  249. extern Size tuplesort_estimate_shared(int nworkers);
  250. extern void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers,
  251. dsm_segment *seg);
  252. extern void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg);
  253. /*
  254. * These routines may only be called if TUPLESORT_RANDOMACCESS was specified
  255. * during tuplesort_begin_*. Additionally backwards scan in gettuple/getdatum
  256. * also require TUPLESORT_RANDOMACCESS. Note that parallel sorts do not
  257. * support random access.
  258. */
  259. extern void tuplesort_rescan(Tuplesortstate *state);
  260. extern void tuplesort_markpos(Tuplesortstate *state);
  261. extern void tuplesort_restorepos(Tuplesortstate *state);
  262. #endif /* TUPLESORT_H */