nodeAgg.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. /*-------------------------------------------------------------------------
  2. *
  3. * nodeAgg.h
  4. * prototypes for nodeAgg.c
  5. *
  6. *
  7. * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  8. * Portions Copyright (c) 1994, Regents of the University of California
  9. *
  10. * src/include/executor/nodeAgg.h
  11. *
  12. *-------------------------------------------------------------------------
  13. */
  14. #ifndef NODEAGG_H
  15. #define NODEAGG_H
  16. #include "access/parallel.h"
  17. #include "nodes/execnodes.h"
  18. /*
  19. * AggStatePerTransData - per aggregate state value information
  20. *
  21. * Working state for updating the aggregate's state value, by calling the
  22. * transition function with an input row. This struct does not store the
  23. * information needed to produce the final aggregate result from the transition
  24. * state, that's stored in AggStatePerAggData instead. This separation allows
  25. * multiple aggregate results to be produced from a single state value.
  26. */
  27. typedef struct AggStatePerTransData
  28. {
  29. /*
  30. * These values are set up during ExecInitAgg() and do not change
  31. * thereafter:
  32. */
  33. /*
  34. * Link to an Aggref expr this state value is for.
  35. *
  36. * There can be multiple Aggref's sharing the same state value, so long as
  37. * the inputs and transition functions are identical and the final
  38. * functions are not read-write. This points to the first one of them.
  39. */
  40. Aggref *aggref;
  41. /*
  42. * Is this state value actually being shared by more than one Aggref?
  43. */
  44. bool aggshared;
  45. /*
  46. * Number of aggregated input columns. This includes ORDER BY expressions
  47. * in both the plain-agg and ordered-set cases. Ordered-set direct args
  48. * are not counted, though.
  49. */
  50. int numInputs;
  51. /*
  52. * Number of aggregated input columns to pass to the transfn. This
  53. * includes the ORDER BY columns for ordered-set aggs, but not for plain
  54. * aggs. (This doesn't count the transition state value!)
  55. */
  56. int numTransInputs;
  57. /* Oid of the state transition or combine function */
  58. Oid transfn_oid;
  59. /* Oid of the serialization function or InvalidOid */
  60. Oid serialfn_oid;
  61. /* Oid of the deserialization function or InvalidOid */
  62. Oid deserialfn_oid;
  63. /* Oid of state value's datatype */
  64. Oid aggtranstype;
  65. /*
  66. * fmgr lookup data for transition function or combine function. Note in
  67. * particular that the fn_strict flag is kept here.
  68. */
  69. FmgrInfo transfn;
  70. /* fmgr lookup data for serialization function */
  71. FmgrInfo serialfn;
  72. /* fmgr lookup data for deserialization function */
  73. FmgrInfo deserialfn;
  74. /* Input collation derived for aggregate */
  75. Oid aggCollation;
  76. /* number of sorting columns */
  77. int numSortCols;
  78. /* number of sorting columns to consider in DISTINCT comparisons */
  79. /* (this is either zero or the same as numSortCols) */
  80. int numDistinctCols;
  81. /* deconstructed sorting information (arrays of length numSortCols) */
  82. AttrNumber *sortColIdx;
  83. Oid *sortOperators;
  84. Oid *sortCollations;
  85. bool *sortNullsFirst;
  86. /*
  87. * Comparators for input columns --- only set/used when aggregate has
  88. * DISTINCT flag. equalfnOne version is used for single-column
  89. * comparisons, equalfnMulti for the case of multiple columns.
  90. */
  91. FmgrInfo equalfnOne;
  92. ExprState *equalfnMulti;
  93. /*
  94. * initial value from pg_aggregate entry
  95. */
  96. Datum initValue;
  97. bool initValueIsNull;
  98. /*
  99. * We need the len and byval info for the agg's input and transition data
  100. * types in order to know how to copy/delete values.
  101. *
  102. * Note that the info for the input type is used only when handling
  103. * DISTINCT aggs with just one argument, so there is only one input type.
  104. */
  105. int16 inputtypeLen,
  106. transtypeLen;
  107. bool inputtypeByVal,
  108. transtypeByVal;
  109. /*
  110. * Slots for holding the evaluated input arguments. These are set up
  111. * during ExecInitAgg() and then used for each input row requiring either
  112. * FILTER or ORDER BY/DISTINCT processing.
  113. */
  114. TupleTableSlot *sortslot; /* current input tuple */
  115. TupleTableSlot *uniqslot; /* used for multi-column DISTINCT */
  116. TupleDesc sortdesc; /* descriptor of input tuples */
  117. /*
  118. * These values are working state that is initialized at the start of an
  119. * input tuple group and updated for each input tuple.
  120. *
  121. * For a simple (non DISTINCT/ORDER BY) aggregate, we just feed the input
  122. * values straight to the transition function. If it's DISTINCT or
  123. * requires ORDER BY, we pass the input values into a Tuplesort object;
  124. * then at completion of the input tuple group, we scan the sorted values,
  125. * eliminate duplicates if needed, and run the transition function on the
  126. * rest.
  127. *
  128. * We need a separate tuplesort for each grouping set.
  129. */
  130. Tuplesortstate **sortstates; /* sort objects, if DISTINCT or ORDER BY */
  131. /*
  132. * This field is a pre-initialized FunctionCallInfo struct used for
  133. * calling this aggregate's transfn. We save a few cycles per row by not
  134. * re-initializing the unchanging fields; which isn't much, but it seems
  135. * worth the extra space consumption.
  136. */
  137. FunctionCallInfo transfn_fcinfo;
  138. /* Likewise for serialization and deserialization functions */
  139. FunctionCallInfo serialfn_fcinfo;
  140. FunctionCallInfo deserialfn_fcinfo;
  141. } AggStatePerTransData;
  142. /*
  143. * AggStatePerAggData - per-aggregate information
  144. *
  145. * This contains the information needed to call the final function, to produce
  146. * a final aggregate result from the state value. If there are multiple
  147. * identical Aggrefs in the query, they can all share the same per-agg data.
  148. *
  149. * These values are set up during ExecInitAgg() and do not change thereafter.
  150. */
  151. typedef struct AggStatePerAggData
  152. {
  153. /*
  154. * Link to an Aggref expr this state value is for.
  155. *
  156. * There can be multiple identical Aggref's sharing the same per-agg. This
  157. * points to the first one of them.
  158. */
  159. Aggref *aggref;
  160. /* index to the state value which this agg should use */
  161. int transno;
  162. /* Optional Oid of final function (may be InvalidOid) */
  163. Oid finalfn_oid;
  164. /*
  165. * fmgr lookup data for final function --- only valid when finalfn_oid is
  166. * not InvalidOid.
  167. */
  168. FmgrInfo finalfn;
  169. /*
  170. * Number of arguments to pass to the finalfn. This is always at least 1
  171. * (the transition state value) plus any ordered-set direct args. If the
  172. * finalfn wants extra args then we pass nulls corresponding to the
  173. * aggregated input columns.
  174. */
  175. int numFinalArgs;
  176. /* ExprStates for any direct-argument expressions */
  177. List *aggdirectargs;
  178. /*
  179. * We need the len and byval info for the agg's result data type in order
  180. * to know how to copy/delete values.
  181. */
  182. int16 resulttypeLen;
  183. bool resulttypeByVal;
  184. /*
  185. * "shareable" is false if this agg cannot share state values with other
  186. * aggregates because the final function is read-write.
  187. */
  188. bool shareable;
  189. } AggStatePerAggData;
  190. /*
  191. * AggStatePerGroupData - per-aggregate-per-group working state
  192. *
  193. * These values are working state that is initialized at the start of
  194. * an input tuple group and updated for each input tuple.
  195. *
  196. * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these
  197. * structs (pointed to by aggstate->pergroup); we re-use the array for
  198. * each input group, if it's AGG_SORTED mode. In AGG_HASHED mode, the
  199. * hash table contains an array of these structs for each tuple group.
  200. *
  201. * Logically, the sortstate field belongs in this struct, but we do not
  202. * keep it here for space reasons: we don't support DISTINCT aggregates
  203. * in AGG_HASHED mode, so there's no reason to use up a pointer field
  204. * in every entry of the hashtable.
  205. */
  206. typedef struct AggStatePerGroupData
  207. {
  208. #define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUE 0
  209. Datum transValue; /* current transition value */
  210. #define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUEISNULL 1
  211. bool transValueIsNull;
  212. #define FIELDNO_AGGSTATEPERGROUPDATA_NOTRANSVALUE 2
  213. bool noTransValue; /* true if transValue not set yet */
  214. /*
  215. * Note: noTransValue initially has the same value as transValueIsNull,
  216. * and if true both are cleared to false at the same time. They are not
  217. * the same though: if transfn later returns a NULL, we want to keep that
  218. * NULL and not auto-replace it with a later input value. Only the first
  219. * non-NULL input will be auto-substituted.
  220. */
  221. } AggStatePerGroupData;
  222. /*
  223. * AggStatePerPhaseData - per-grouping-set-phase state
  224. *
  225. * Grouping sets are divided into "phases", where a single phase can be
  226. * processed in one pass over the input. If there is more than one phase, then
  227. * at the end of input from the current phase, state is reset and another pass
  228. * taken over the data which has been re-sorted in the mean time.
  229. *
  230. * Accordingly, each phase specifies a list of grouping sets and group clause
  231. * information, plus each phase after the first also has a sort order.
  232. */
  233. typedef struct AggStatePerPhaseData
  234. {
  235. AggStrategy aggstrategy; /* strategy for this phase */
  236. int numsets; /* number of grouping sets (or 0) */
  237. int *gset_lengths; /* lengths of grouping sets */
  238. Bitmapset **grouped_cols; /* column groupings for rollup */
  239. ExprState **eqfunctions; /* expression returning equality, indexed by
  240. * nr of cols to compare */
  241. Agg *aggnode; /* Agg node for phase data */
  242. Sort *sortnode; /* Sort node for input ordering for phase */
  243. ExprState *evaltrans; /* evaluation of transition functions */
  244. /*----------
  245. * Cached variants of the compiled expression.
  246. * first subscript: 0: outerops; 1: TTSOpsMinimalTuple
  247. * second subscript: 0: no NULL check; 1: with NULL check
  248. *----------
  249. */
  250. ExprState *evaltrans_cache[2][2];
  251. } AggStatePerPhaseData;
  252. /*
  253. * AggStatePerHashData - per-hashtable state
  254. *
  255. * When doing grouping sets with hashing, we have one of these for each
  256. * grouping set. (When doing hashing without grouping sets, we have just one of
  257. * them.)
  258. */
  259. typedef struct AggStatePerHashData
  260. {
  261. TupleHashTable hashtable; /* hash table with one entry per group */
  262. TupleHashIterator hashiter; /* for iterating through hash table */
  263. TupleTableSlot *hashslot; /* slot for loading hash table */
  264. FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
  265. Oid *eqfuncoids; /* per-grouping-field equality fns */
  266. int numCols; /* number of hash key columns */
  267. int numhashGrpCols; /* number of columns in hash table */
  268. int largestGrpColIdx; /* largest col required for hashing */
  269. AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
  270. AttrNumber *hashGrpColIdxHash; /* indices in hash table tuples */
  271. Agg *aggnode; /* original Agg node, for numGroups etc. */
  272. } AggStatePerHashData;
  273. extern AggState *ExecInitAgg(Agg *node, EState *estate, int eflags);
  274. extern void ExecEndAgg(AggState *node);
  275. extern void ExecReScanAgg(AggState *node);
  276. extern Size hash_agg_entry_size(int numTrans, Size tupleWidth,
  277. Size transitionSpace);
  278. extern void hash_agg_set_limits(double hashentrysize, double input_groups,
  279. int used_bits, Size *mem_limit,
  280. uint64 *ngroups_limit, int *num_partitions);
  281. /* parallel instrumentation support */
  282. extern void ExecAggEstimate(AggState *node, ParallelContext *pcxt);
  283. extern void ExecAggInitializeDSM(AggState *node, ParallelContext *pcxt);
  284. extern void ExecAggInitializeWorker(AggState *node, ParallelWorkerContext *pwcxt);
  285. extern void ExecAggRetrieveInstrumentation(AggState *node);
  286. #endif /* NODEAGG_H */