array.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. /*-------------------------------------------------------------------------
  2. *
  3. * array.h
  4. * Declarations for Postgres arrays.
  5. *
  6. * A standard varlena array has the following internal structure:
  7. * <vl_len_> - standard varlena header word
  8. * <ndim> - number of dimensions of the array
  9. * <dataoffset> - offset to stored data, or 0 if no nulls bitmap
  10. * <elemtype> - element type OID
  11. * <dimensions> - length of each array axis (C array of int)
  12. * <lower bnds> - lower boundary of each dimension (C array of int)
  13. * <null bitmap> - bitmap showing locations of nulls (OPTIONAL)
  14. * <actual data> - whatever is the stored data
  15. *
  16. * The <dimensions> and <lower bnds> arrays each have ndim elements.
  17. *
  18. * The <null bitmap> may be omitted if the array contains no NULL elements.
  19. * If it is absent, the <dataoffset> field is zero and the offset to the
  20. * stored data must be computed on-the-fly. If the bitmap is present,
  21. * <dataoffset> is nonzero and is equal to the offset from the array start
  22. * to the first data element (including any alignment padding). The bitmap
  23. * follows the same conventions as tuple null bitmaps, ie, a 1 indicates
  24. * a non-null entry and the LSB of each bitmap byte is used first.
  25. *
  26. * The actual data starts on a MAXALIGN boundary. Individual items in the
  27. * array are aligned as specified by the array element type. They are
  28. * stored in row-major order (last subscript varies most rapidly).
  29. *
  30. * NOTE: it is important that array elements of toastable datatypes NOT be
  31. * toasted, since the tupletoaster won't know they are there. (We could
  32. * support compressed toasted items; only out-of-line items are dangerous.
  33. * However, it seems preferable to store such items uncompressed and allow
  34. * the toaster to compress the whole array as one input.)
  35. *
  36. *
  37. * The OIDVECTOR and INT2VECTOR datatypes are storage-compatible with
  38. * generic arrays, but they support only one-dimensional arrays with no
  39. * nulls (and no null bitmap). They don't support being toasted, either.
  40. *
  41. * There are also some "fixed-length array" datatypes, such as NAME and
  42. * POINT. These are simply a sequence of a fixed number of items each
  43. * of a fixed-length datatype, with no overhead; the item size must be
  44. * a multiple of its alignment requirement, because we do no padding.
  45. * We support subscripting on these types, but array_in() and array_out()
  46. * only work with varlena arrays.
  47. *
  48. * In addition, arrays are a major user of the "expanded object" TOAST
  49. * infrastructure. This allows a varlena array to be converted to a
  50. * separate representation that may include "deconstructed" Datum/isnull
  51. * arrays holding the elements.
  52. *
  53. *
  54. * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  55. * Portions Copyright (c) 1994, Regents of the University of California
  56. *
  57. * src/include/utils/array.h
  58. *
  59. *-------------------------------------------------------------------------
  60. */
  61. #ifndef ARRAY_H
  62. #define ARRAY_H
  63. #include "fmgr.h"
  64. #include "utils/expandeddatum.h"
  65. /* avoid including execnodes.h here */
  66. struct ExprState;
  67. struct ExprContext;
  68. /*
  69. * Maximum number of array subscripts (arbitrary limit)
  70. */
  71. #define MAXDIM 6
  72. /*
  73. * Arrays are varlena objects, so must meet the varlena convention that
  74. * the first int32 of the object contains the total object size in bytes.
  75. * Be sure to use VARSIZE() and SET_VARSIZE() to access it, though!
  76. *
  77. * CAUTION: if you change the header for ordinary arrays you will also
  78. * need to change the headers for oidvector and int2vector!
  79. */
  80. typedef struct ArrayType
  81. {
  82. int32 vl_len_; /* varlena header (do not touch directly!) */
  83. int ndim; /* # of dimensions */
  84. int32 dataoffset; /* offset to data, or 0 if no bitmap */
  85. Oid elemtype; /* element type OID */
  86. } ArrayType;
  87. /*
  88. * An expanded array is contained within a private memory context (as
  89. * all expanded objects must be) and has a control structure as below.
  90. *
  91. * The expanded array might contain a regular "flat" array if that was the
  92. * original input and we've not modified it significantly. Otherwise, the
  93. * contents are represented by Datum/isnull arrays plus dimensionality and
  94. * type information. We could also have both forms, if we've deconstructed
  95. * the original array for access purposes but not yet changed it. For pass-
  96. * by-reference element types, the Datums would point into the flat array in
  97. * this situation. Once we start modifying array elements, new pass-by-ref
  98. * elements are separately palloc'd within the memory context.
  99. */
  100. #define EA_MAGIC 689375833 /* ID for debugging crosschecks */
  101. typedef struct ExpandedArrayHeader
  102. {
  103. /* Standard header for expanded objects */
  104. ExpandedObjectHeader hdr;
  105. /* Magic value identifying an expanded array (for debugging only) */
  106. int ea_magic;
  107. /* Dimensionality info (always valid) */
  108. int ndims; /* # of dimensions */
  109. int *dims; /* array dimensions */
  110. int *lbound; /* index lower bounds for each dimension */
  111. /* Element type info (always valid) */
  112. Oid element_type; /* element type OID */
  113. int16 typlen; /* needed info about element datatype */
  114. bool typbyval;
  115. char typalign;
  116. /*
  117. * If we have a Datum-array representation of the array, it's kept here;
  118. * else dvalues/dnulls are NULL. The dvalues and dnulls arrays are always
  119. * palloc'd within the object private context, but may change size from
  120. * time to time. For pass-by-ref element types, dvalues entries might
  121. * point either into the fstartptr..fendptr area, or to separately
  122. * palloc'd chunks. Elements should always be fully detoasted, as they
  123. * are in the standard flat representation.
  124. *
  125. * Even when dvalues is valid, dnulls can be NULL if there are no null
  126. * elements.
  127. */
  128. Datum *dvalues; /* array of Datums */
  129. bool *dnulls; /* array of is-null flags for Datums */
  130. int dvalueslen; /* allocated length of above arrays */
  131. int nelems; /* number of valid entries in above arrays */
  132. /*
  133. * flat_size is the current space requirement for the flat equivalent of
  134. * the expanded array, if known; otherwise it's 0. We store this to make
  135. * consecutive calls of get_flat_size cheap.
  136. */
  137. Size flat_size;
  138. /*
  139. * fvalue points to the flat representation if it is valid, else it is
  140. * NULL. If we have or ever had a flat representation then
  141. * fstartptr/fendptr point to the start and end+1 of its data area; this
  142. * is so that we can tell which Datum pointers point into the flat
  143. * representation rather than being pointers to separately palloc'd data.
  144. */
  145. ArrayType *fvalue; /* must be a fully detoasted array */
  146. char *fstartptr; /* start of its data area */
  147. char *fendptr; /* end+1 of its data area */
  148. } ExpandedArrayHeader;
  149. /*
  150. * Functions that can handle either a "flat" varlena array or an expanded
  151. * array use this union to work with their input. Don't refer to "flt";
  152. * instead, cast to ArrayType. This struct nominally requires 8-byte
  153. * alignment on 64-bit, but it's often used for an ArrayType having 4-byte
  154. * alignment. UBSan complains about referencing "flt" in such cases.
  155. */
  156. typedef union AnyArrayType
  157. {
  158. ArrayType flt;
  159. ExpandedArrayHeader xpn;
  160. } AnyArrayType;
  161. /*
  162. * working state for accumArrayResult() and friends
  163. * note that the input must be scalars (legal array elements)
  164. */
  165. typedef struct ArrayBuildState
  166. {
  167. MemoryContext mcontext; /* where all the temp stuff is kept */
  168. Datum *dvalues; /* array of accumulated Datums */
  169. bool *dnulls; /* array of is-null flags for Datums */
  170. int alen; /* allocated length of above arrays */
  171. int nelems; /* number of valid entries in above arrays */
  172. Oid element_type; /* data type of the Datums */
  173. int16 typlen; /* needed info about datatype */
  174. bool typbyval;
  175. char typalign;
  176. bool private_cxt; /* use private memory context */
  177. } ArrayBuildState;
  178. /*
  179. * working state for accumArrayResultArr() and friends
  180. * note that the input must be arrays, and the same array type is returned
  181. */
  182. typedef struct ArrayBuildStateArr
  183. {
  184. MemoryContext mcontext; /* where all the temp stuff is kept */
  185. char *data; /* accumulated data */
  186. bits8 *nullbitmap; /* bitmap of is-null flags, or NULL if none */
  187. int abytes; /* allocated length of "data" */
  188. int nbytes; /* number of bytes used so far */
  189. int aitems; /* allocated length of bitmap (in elements) */
  190. int nitems; /* total number of elements in result */
  191. int ndims; /* current dimensions of result */
  192. int dims[MAXDIM];
  193. int lbs[MAXDIM];
  194. Oid array_type; /* data type of the arrays */
  195. Oid element_type; /* data type of the array elements */
  196. bool private_cxt; /* use private memory context */
  197. } ArrayBuildStateArr;
  198. /*
  199. * working state for accumArrayResultAny() and friends
  200. * these functions handle both cases
  201. */
  202. typedef struct ArrayBuildStateAny
  203. {
  204. /* Exactly one of these is not NULL: */
  205. ArrayBuildState *scalarstate;
  206. ArrayBuildStateArr *arraystate;
  207. } ArrayBuildStateAny;
  208. /*
  209. * structure to cache type metadata needed for array manipulation
  210. */
  211. typedef struct ArrayMetaState
  212. {
  213. Oid element_type;
  214. int16 typlen;
  215. bool typbyval;
  216. char typalign;
  217. char typdelim;
  218. Oid typioparam;
  219. Oid typiofunc;
  220. FmgrInfo proc;
  221. } ArrayMetaState;
  222. /*
  223. * private state needed by array_map (here because caller must provide it)
  224. */
  225. typedef struct ArrayMapState
  226. {
  227. ArrayMetaState inp_extra;
  228. ArrayMetaState ret_extra;
  229. } ArrayMapState;
  230. /* ArrayIteratorData is private in arrayfuncs.c */
  231. typedef struct ArrayIteratorData *ArrayIterator;
  232. /* fmgr macros for regular varlena array objects */
  233. #define DatumGetArrayTypeP(X) ((ArrayType *) PG_DETOAST_DATUM(X))
  234. #define DatumGetArrayTypePCopy(X) ((ArrayType *) PG_DETOAST_DATUM_COPY(X))
  235. #define PG_GETARG_ARRAYTYPE_P(n) DatumGetArrayTypeP(PG_GETARG_DATUM(n))
  236. #define PG_GETARG_ARRAYTYPE_P_COPY(n) DatumGetArrayTypePCopy(PG_GETARG_DATUM(n))
  237. #define PG_RETURN_ARRAYTYPE_P(x) PG_RETURN_POINTER(x)
  238. /* fmgr macros for expanded array objects */
  239. #define PG_GETARG_EXPANDED_ARRAY(n) DatumGetExpandedArray(PG_GETARG_DATUM(n))
  240. #define PG_GETARG_EXPANDED_ARRAYX(n, metacache) \
  241. DatumGetExpandedArrayX(PG_GETARG_DATUM(n), metacache)
  242. #define PG_RETURN_EXPANDED_ARRAY(x) PG_RETURN_DATUM(EOHPGetRWDatum(&(x)->hdr))
  243. /* fmgr macros for AnyArrayType (ie, get either varlena or expanded form) */
  244. #define PG_GETARG_ANY_ARRAY_P(n) DatumGetAnyArrayP(PG_GETARG_DATUM(n))
  245. /*
  246. * Access macros for varlena array header fields.
  247. *
  248. * ARR_DIMS returns a pointer to an array of array dimensions (number of
  249. * elements along the various array axes).
  250. *
  251. * ARR_LBOUND returns a pointer to an array of array lower bounds.
  252. *
  253. * That is: if the third axis of an array has elements 5 through 8, then
  254. * ARR_DIMS(a)[2] == 4 and ARR_LBOUND(a)[2] == 5.
  255. *
  256. * Unlike C, the default lower bound is 1.
  257. */
  258. #define ARR_SIZE(a) VARSIZE(a)
  259. #define ARR_NDIM(a) ((a)->ndim)
  260. #define ARR_HASNULL(a) ((a)->dataoffset != 0)
  261. #define ARR_ELEMTYPE(a) ((a)->elemtype)
  262. #define ARR_DIMS(a) \
  263. ((int *) (((char *) (a)) + sizeof(ArrayType)))
  264. #define ARR_LBOUND(a) \
  265. ((int *) (((char *) (a)) + sizeof(ArrayType) + \
  266. sizeof(int) * ARR_NDIM(a)))
  267. #define ARR_NULLBITMAP(a) \
  268. (ARR_HASNULL(a) ? \
  269. (bits8 *) (((char *) (a)) + sizeof(ArrayType) + \
  270. 2 * sizeof(int) * ARR_NDIM(a)) \
  271. : (bits8 *) NULL)
  272. /*
  273. * The total array header size (in bytes) for an array with the specified
  274. * number of dimensions and total number of items.
  275. */
  276. #define ARR_OVERHEAD_NONULLS(ndims) \
  277. MAXALIGN(sizeof(ArrayType) + 2 * sizeof(int) * (ndims))
  278. #define ARR_OVERHEAD_WITHNULLS(ndims, nitems) \
  279. MAXALIGN(sizeof(ArrayType) + 2 * sizeof(int) * (ndims) + \
  280. ((nitems) + 7) / 8)
  281. #define ARR_DATA_OFFSET(a) \
  282. (ARR_HASNULL(a) ? (a)->dataoffset : ARR_OVERHEAD_NONULLS(ARR_NDIM(a)))
  283. /*
  284. * Returns a pointer to the actual array data.
  285. */
  286. #define ARR_DATA_PTR(a) \
  287. (((char *) (a)) + ARR_DATA_OFFSET(a))
  288. /*
  289. * Macros for working with AnyArrayType inputs. Beware multiple references!
  290. */
  291. #define AARR_NDIM(a) \
  292. (VARATT_IS_EXPANDED_HEADER(a) ? \
  293. (a)->xpn.ndims : ARR_NDIM((ArrayType *) (a)))
  294. #define AARR_HASNULL(a) \
  295. (VARATT_IS_EXPANDED_HEADER(a) ? \
  296. ((a)->xpn.dvalues != NULL ? (a)->xpn.dnulls != NULL : ARR_HASNULL((a)->xpn.fvalue)) : \
  297. ARR_HASNULL((ArrayType *) (a)))
  298. #define AARR_ELEMTYPE(a) \
  299. (VARATT_IS_EXPANDED_HEADER(a) ? \
  300. (a)->xpn.element_type : ARR_ELEMTYPE((ArrayType *) (a)))
  301. #define AARR_DIMS(a) \
  302. (VARATT_IS_EXPANDED_HEADER(a) ? \
  303. (a)->xpn.dims : ARR_DIMS((ArrayType *) (a)))
  304. #define AARR_LBOUND(a) \
  305. (VARATT_IS_EXPANDED_HEADER(a) ? \
  306. (a)->xpn.lbound : ARR_LBOUND((ArrayType *) (a)))
  307. /*
  308. * GUC parameter
  309. */
  310. extern PGDLLIMPORT bool Array_nulls;
  311. /*
  312. * prototypes for functions defined in arrayfuncs.c
  313. */
  314. extern void CopyArrayEls(ArrayType *array,
  315. Datum *values,
  316. bool *nulls,
  317. int nitems,
  318. int typlen,
  319. bool typbyval,
  320. char typalign,
  321. bool freedata);
  322. extern Datum array_get_element(Datum arraydatum, int nSubscripts, int *indx,
  323. int arraytyplen, int elmlen, bool elmbyval, char elmalign,
  324. bool *isNull);
  325. extern Datum array_set_element(Datum arraydatum, int nSubscripts, int *indx,
  326. Datum dataValue, bool isNull,
  327. int arraytyplen, int elmlen, bool elmbyval, char elmalign);
  328. extern Datum array_get_slice(Datum arraydatum, int nSubscripts,
  329. int *upperIndx, int *lowerIndx,
  330. bool *upperProvided, bool *lowerProvided,
  331. int arraytyplen, int elmlen, bool elmbyval, char elmalign);
  332. extern Datum array_set_slice(Datum arraydatum, int nSubscripts,
  333. int *upperIndx, int *lowerIndx,
  334. bool *upperProvided, bool *lowerProvided,
  335. Datum srcArrayDatum, bool isNull,
  336. int arraytyplen, int elmlen, bool elmbyval, char elmalign);
  337. extern Datum array_ref(ArrayType *array, int nSubscripts, int *indx,
  338. int arraytyplen, int elmlen, bool elmbyval, char elmalign,
  339. bool *isNull);
  340. extern ArrayType *array_set(ArrayType *array, int nSubscripts, int *indx,
  341. Datum dataValue, bool isNull,
  342. int arraytyplen, int elmlen, bool elmbyval, char elmalign);
  343. extern Datum array_map(Datum arrayd,
  344. struct ExprState *exprstate, struct ExprContext *econtext,
  345. Oid retType, ArrayMapState *amstate);
  346. extern void array_bitmap_copy(bits8 *destbitmap, int destoffset,
  347. const bits8 *srcbitmap, int srcoffset,
  348. int nitems);
  349. extern ArrayType *construct_array(Datum *elems, int nelems,
  350. Oid elmtype,
  351. int elmlen, bool elmbyval, char elmalign);
  352. extern ArrayType *construct_md_array(Datum *elems,
  353. bool *nulls,
  354. int ndims,
  355. int *dims,
  356. int *lbs,
  357. Oid elmtype, int elmlen, bool elmbyval, char elmalign);
  358. extern ArrayType *construct_empty_array(Oid elmtype);
  359. extern ExpandedArrayHeader *construct_empty_expanded_array(Oid element_type,
  360. MemoryContext parentcontext,
  361. ArrayMetaState *metacache);
  362. extern void deconstruct_array(ArrayType *array,
  363. Oid elmtype,
  364. int elmlen, bool elmbyval, char elmalign,
  365. Datum **elemsp, bool **nullsp, int *nelemsp);
  366. extern bool array_contains_nulls(ArrayType *array);
  367. extern ArrayBuildState *initArrayResult(Oid element_type,
  368. MemoryContext rcontext, bool subcontext);
  369. extern ArrayBuildState *accumArrayResult(ArrayBuildState *astate,
  370. Datum dvalue, bool disnull,
  371. Oid element_type,
  372. MemoryContext rcontext);
  373. extern Datum makeArrayResult(ArrayBuildState *astate,
  374. MemoryContext rcontext);
  375. extern Datum makeMdArrayResult(ArrayBuildState *astate, int ndims,
  376. int *dims, int *lbs, MemoryContext rcontext, bool release);
  377. extern ArrayBuildStateArr *initArrayResultArr(Oid array_type, Oid element_type,
  378. MemoryContext rcontext, bool subcontext);
  379. extern ArrayBuildStateArr *accumArrayResultArr(ArrayBuildStateArr *astate,
  380. Datum dvalue, bool disnull,
  381. Oid array_type,
  382. MemoryContext rcontext);
  383. extern Datum makeArrayResultArr(ArrayBuildStateArr *astate,
  384. MemoryContext rcontext, bool release);
  385. extern ArrayBuildStateAny *initArrayResultAny(Oid input_type,
  386. MemoryContext rcontext, bool subcontext);
  387. extern ArrayBuildStateAny *accumArrayResultAny(ArrayBuildStateAny *astate,
  388. Datum dvalue, bool disnull,
  389. Oid input_type,
  390. MemoryContext rcontext);
  391. extern Datum makeArrayResultAny(ArrayBuildStateAny *astate,
  392. MemoryContext rcontext, bool release);
  393. extern ArrayIterator array_create_iterator(ArrayType *arr, int slice_ndim, ArrayMetaState *mstate);
  394. extern bool array_iterate(ArrayIterator iterator, Datum *value, bool *isnull);
  395. extern void array_free_iterator(ArrayIterator iterator);
  396. /*
  397. * prototypes for functions defined in arrayutils.c
  398. */
  399. extern int ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx);
  400. extern int ArrayGetOffset0(int n, const int *tup, const int *scale);
  401. extern int ArrayGetNItems(int ndim, const int *dims);
  402. extern void ArrayCheckBounds(int ndim, const int *dims, const int *lb);
  403. extern void mda_get_range(int n, int *span, const int *st, const int *endp);
  404. extern void mda_get_prod(int n, const int *range, int *prod);
  405. extern void mda_get_offset_values(int n, int *dist, const int *prod, const int *span);
  406. extern int mda_next_tuple(int n, int *curr, const int *span);
  407. extern int32 *ArrayGetIntegerTypmods(ArrayType *arr, int *n);
  408. /*
  409. * prototypes for functions defined in array_expanded.c
  410. */
  411. extern Datum expand_array(Datum arraydatum, MemoryContext parentcontext,
  412. ArrayMetaState *metacache);
  413. extern ExpandedArrayHeader *DatumGetExpandedArray(Datum d);
  414. extern ExpandedArrayHeader *DatumGetExpandedArrayX(Datum d,
  415. ArrayMetaState *metacache);
  416. extern AnyArrayType *DatumGetAnyArrayP(Datum d);
  417. extern void deconstruct_expanded_array(ExpandedArrayHeader *eah);
  418. #endif /* ARRAY_H */