2
0

jsonb.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. /*-------------------------------------------------------------------------
  2. *
  3. * jsonb.h
  4. * Declarations for jsonb data type support.
  5. *
  6. * Copyright (c) 1996-2022, PostgreSQL Global Development Group
  7. *
  8. * src/include/utils/jsonb.h
  9. *
  10. *-------------------------------------------------------------------------
  11. */
  12. #ifndef __JSONB_H__
  13. #define __JSONB_H__
  14. #include "lib/stringinfo.h"
  15. #include "utils/array.h"
  16. #include "utils/numeric.h"
  17. /* Tokens used when sequentially processing a jsonb value */
  18. typedef enum
  19. {
  20. WJB_DONE,
  21. WJB_KEY,
  22. WJB_VALUE,
  23. WJB_ELEM,
  24. WJB_BEGIN_ARRAY,
  25. WJB_END_ARRAY,
  26. WJB_BEGIN_OBJECT,
  27. WJB_END_OBJECT
  28. } JsonbIteratorToken;
  29. /* Strategy numbers for GIN index opclasses */
  30. #define JsonbContainsStrategyNumber 7
  31. #define JsonbExistsStrategyNumber 9
  32. #define JsonbExistsAnyStrategyNumber 10
  33. #define JsonbExistsAllStrategyNumber 11
  34. #define JsonbJsonpathExistsStrategyNumber 15
  35. #define JsonbJsonpathPredicateStrategyNumber 16
  36. /*
  37. * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both
  38. * keys and values. The storage format is text. The first byte of the text
  39. * string distinguishes whether this is a key (always a string), null value,
  40. * boolean value, numeric value, or string value. However, array elements
  41. * that are strings are marked as though they were keys; this imprecision
  42. * supports the definition of the "exists" operator, which treats array
  43. * elements like keys. The remainder of the text string is empty for a null
  44. * value, "t" or "f" for a boolean value, a normalized print representation of
  45. * a numeric value, or the text of a string value. However, if the length of
  46. * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash
  47. * the text representation and store an 8-hex-digit representation of the
  48. * uint32 hash value, marking the prefix byte with an additional bit to
  49. * distinguish that this has happened. Hashing long strings saves space and
  50. * ensures that we won't overrun the maximum entry length for a GIN index.
  51. * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit. It's chosen
  52. * to ensure that the on-disk text datum will have a short varlena header.)
  53. * Note that when any hashed item appears in a query, we must recheck index
  54. * matches against the heap tuple; currently, this costs nothing because we
  55. * must always recheck for other reasons.
  56. */
  57. #define JGINFLAG_KEY 0x01 /* key (or string array element) */
  58. #define JGINFLAG_NULL 0x02 /* null value */
  59. #define JGINFLAG_BOOL 0x03 /* boolean value */
  60. #define JGINFLAG_NUM 0x04 /* numeric value */
  61. #define JGINFLAG_STR 0x05 /* string value (if not an array element) */
  62. #define JGINFLAG_HASHED 0x10 /* OR'd into flag if value was hashed */
  63. #define JGIN_MAXLENGTH 125 /* max length of text part before hashing */
  64. /* Convenience macros */
  65. #define DatumGetJsonbP(d) ((Jsonb *) PG_DETOAST_DATUM(d))
  66. #define DatumGetJsonbPCopy(d) ((Jsonb *) PG_DETOAST_DATUM_COPY(d))
  67. #define JsonbPGetDatum(p) PointerGetDatum(p)
  68. #define PG_GETARG_JSONB_P(x) DatumGetJsonbP(PG_GETARG_DATUM(x))
  69. #define PG_GETARG_JSONB_P_COPY(x) DatumGetJsonbPCopy(PG_GETARG_DATUM(x))
  70. #define PG_RETURN_JSONB_P(x) PG_RETURN_POINTER(x)
  71. typedef struct JsonbPair JsonbPair;
  72. typedef struct JsonbValue JsonbValue;
  73. /*
  74. * Jsonbs are varlena objects, so must meet the varlena convention that the
  75. * first int32 of the object contains the total object size in bytes. Be sure
  76. * to use VARSIZE() and SET_VARSIZE() to access it, though!
  77. *
  78. * Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue
  79. * representation. Often, JsonbValues are just shims through which a Jsonb
  80. * buffer is accessed, but they can also be deep copied and passed around.
  81. *
  82. * Jsonb is a tree structure. Each node in the tree consists of a JEntry
  83. * header and a variable-length content (possibly of zero size). The JEntry
  84. * header indicates what kind of a node it is, e.g. a string or an array,
  85. * and provides the length of its variable-length portion.
  86. *
  87. * The JEntry and the content of a node are not stored physically together.
  88. * Instead, the container array or object has an array that holds the JEntrys
  89. * of all the child nodes, followed by their variable-length portions.
  90. *
  91. * The root node is an exception; it has no parent array or object that could
  92. * hold its JEntry. Hence, no JEntry header is stored for the root node. It
  93. * is implicitly known that the root node must be an array or an object,
  94. * so we can get away without the type indicator as long as we can distinguish
  95. * the two. For that purpose, both an array and an object begin with a uint32
  96. * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
  97. * scalar value needs to be stored as a Jsonb value, what we actually store is
  98. * an array with one element, with the flags in the array's header field set
  99. * to JB_FSCALAR | JB_FARRAY.
  100. *
  101. * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
  102. * the variable-length portion of some node types is aligned to a 4-byte
  103. * boundary, while others are not. When alignment is needed, the padding is
  104. * in the beginning of the node that requires it. For example, if a numeric
  105. * node is stored after a string node, so that the numeric node begins at
  106. * offset 3, the variable-length portion of the numeric node will begin with
  107. * one padding byte so that the actual numeric data is 4-byte aligned.
  108. */
  109. /*
  110. * JEntry format.
  111. *
  112. * The least significant 28 bits store either the data length of the entry,
  113. * or its end+1 offset from the start of the variable-length portion of the
  114. * containing object. The next three bits store the type of the entry, and
  115. * the high-order bit tells whether the least significant bits store a length
  116. * or an offset.
  117. *
  118. * The reason for the offset-or-length complication is to compromise between
  119. * access speed and data compressibility. In the initial design each JEntry
  120. * always stored an offset, but this resulted in JEntry arrays with horrible
  121. * compressibility properties, so that TOAST compression of a JSONB did not
  122. * work well. Storing only lengths would greatly improve compressibility,
  123. * but it makes random access into large arrays expensive (O(N) not O(1)).
  124. * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
  125. * a length in the rest. This results in reasonably compressible data (as
  126. * long as the stride isn't too small). We may have to examine as many as
  127. * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
  128. * given item, but that's still O(1) no matter how large the container is.
  129. *
  130. * We could avoid eating a flag bit for this purpose if we were to store
  131. * the stride in the container header, or if we were willing to treat the
  132. * stride as an unchangeable constant. Neither of those options is very
  133. * attractive though.
  134. */
  135. typedef uint32 JEntry;
  136. #define JENTRY_OFFLENMASK 0x0FFFFFFF
  137. #define JENTRY_TYPEMASK 0x70000000
  138. #define JENTRY_HAS_OFF 0x80000000
  139. /* values stored in the type bits */
  140. #define JENTRY_ISSTRING 0x00000000
  141. #define JENTRY_ISNUMERIC 0x10000000
  142. #define JENTRY_ISBOOL_FALSE 0x20000000
  143. #define JENTRY_ISBOOL_TRUE 0x30000000
  144. #define JENTRY_ISNULL 0x40000000
  145. #define JENTRY_ISCONTAINER 0x50000000 /* array or object */
  146. /* Access macros. Note possible multiple evaluations */
  147. #define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK)
  148. #define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0)
  149. #define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
  150. #define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
  151. #define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
  152. #define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL)
  153. #define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE)
  154. #define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
  155. #define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
  156. /* Macro for advancing an offset variable to the next JEntry */
  157. #define JBE_ADVANCE_OFFSET(offset, je) \
  158. do { \
  159. JEntry je_ = (je); \
  160. if (JBE_HAS_OFF(je_)) \
  161. (offset) = JBE_OFFLENFLD(je_); \
  162. else \
  163. (offset) += JBE_OFFLENFLD(je_); \
  164. } while(0)
  165. /*
  166. * We store an offset, not a length, every JB_OFFSET_STRIDE children.
  167. * Caution: this macro should only be referenced when creating a JSONB
  168. * value. When examining an existing value, pay attention to the HAS_OFF
  169. * bits instead. This allows changes in the offset-placement heuristic
  170. * without breaking on-disk compatibility.
  171. */
  172. #define JB_OFFSET_STRIDE 32
  173. /*
  174. * A jsonb array or object node, within a Jsonb Datum.
  175. *
  176. * An array has one child for each element, stored in array order.
  177. *
  178. * An object has two children for each key/value pair. The keys all appear
  179. * first, in key sort order; then the values appear, in an order matching the
  180. * key order. This arrangement keeps the keys compact in memory, making a
  181. * search for a particular key more cache-friendly.
  182. */
  183. typedef struct JsonbContainer
  184. {
  185. uint32 header; /* number of elements or key/value pairs, and
  186. * flags */
  187. JEntry children[FLEXIBLE_ARRAY_MEMBER];
  188. /* the data for each child node follows. */
  189. } JsonbContainer;
  190. /* flags for the header-field in JsonbContainer */
  191. #define JB_CMASK 0x0FFFFFFF /* mask for count field */
  192. #define JB_FSCALAR 0x10000000 /* flag bits */
  193. #define JB_FOBJECT 0x20000000
  194. #define JB_FARRAY 0x40000000
  195. /* convenience macros for accessing a JsonbContainer struct */
  196. #define JsonContainerSize(jc) ((jc)->header & JB_CMASK)
  197. #define JsonContainerIsScalar(jc) (((jc)->header & JB_FSCALAR) != 0)
  198. #define JsonContainerIsObject(jc) (((jc)->header & JB_FOBJECT) != 0)
  199. #define JsonContainerIsArray(jc) (((jc)->header & JB_FARRAY) != 0)
  200. /* The top-level on-disk format for a jsonb datum. */
  201. typedef struct
  202. {
  203. int32 vl_len_; /* varlena header (do not touch directly!) */
  204. JsonbContainer root;
  205. } Jsonb;
  206. /* convenience macros for accessing the root container in a Jsonb datum */
  207. #define JB_ROOT_COUNT(jbp_) (*(uint32 *) VARDATA(jbp_) & JB_CMASK)
  208. #define JB_ROOT_IS_SCALAR(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FSCALAR) != 0)
  209. #define JB_ROOT_IS_OBJECT(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FOBJECT) != 0)
  210. #define JB_ROOT_IS_ARRAY(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FARRAY) != 0)
  211. enum jbvType
  212. {
  213. /* Scalar types */
  214. jbvNull = 0x0,
  215. jbvString,
  216. jbvNumeric,
  217. jbvBool,
  218. /* Composite types */
  219. jbvArray = 0x10,
  220. jbvObject,
  221. /* Binary (i.e. struct Jsonb) jbvArray/jbvObject */
  222. jbvBinary,
  223. /*
  224. * Virtual types.
  225. *
  226. * These types are used only for in-memory JSON processing and serialized
  227. * into JSON strings when outputted to json/jsonb.
  228. */
  229. jbvDatetime = 0x20,
  230. };
  231. /*
  232. * JsonbValue: In-memory representation of Jsonb. This is a convenient
  233. * deserialized representation, that can easily support using the "val"
  234. * union across underlying types during manipulation. The Jsonb on-disk
  235. * representation has various alignment considerations.
  236. */
  237. struct JsonbValue
  238. {
  239. enum jbvType type; /* Influences sort order */
  240. union
  241. {
  242. Numeric numeric;
  243. bool boolean;
  244. struct
  245. {
  246. int len;
  247. char *val; /* Not necessarily null-terminated */
  248. } string; /* String primitive type */
  249. struct
  250. {
  251. int nElems;
  252. JsonbValue *elems;
  253. bool rawScalar; /* Top-level "raw scalar" array? */
  254. } array; /* Array container type */
  255. struct
  256. {
  257. int nPairs; /* 1 pair, 2 elements */
  258. JsonbPair *pairs;
  259. } object; /* Associative container type */
  260. struct
  261. {
  262. int len;
  263. JsonbContainer *data;
  264. } binary; /* Array or object, in on-disk format */
  265. struct
  266. {
  267. Datum value;
  268. Oid typid;
  269. int32 typmod;
  270. int tz; /* Numeric time zone, in seconds, for
  271. * TimestampTz data type */
  272. } datetime;
  273. } val;
  274. };
  275. #define IsAJsonbScalar(jsonbval) (((jsonbval)->type >= jbvNull && \
  276. (jsonbval)->type <= jbvBool) || \
  277. (jsonbval)->type == jbvDatetime)
  278. /*
  279. * Key/value pair within an Object.
  280. *
  281. * This struct type is only used briefly while constructing a Jsonb; it is
  282. * *not* the on-disk representation.
  283. *
  284. * Pairs with duplicate keys are de-duplicated. We store the originally
  285. * observed pair ordering for the purpose of removing duplicates in a
  286. * well-defined way (which is "last observed wins").
  287. */
  288. struct JsonbPair
  289. {
  290. JsonbValue key; /* Must be a jbvString */
  291. JsonbValue value; /* May be of any type */
  292. uint32 order; /* Pair's index in original sequence */
  293. };
  294. /* Conversion state used when parsing Jsonb from text, or for type coercion */
  295. typedef struct JsonbParseState
  296. {
  297. JsonbValue contVal;
  298. Size size;
  299. struct JsonbParseState *next;
  300. } JsonbParseState;
  301. /*
  302. * JsonbIterator holds details of the type for each iteration. It also stores a
  303. * Jsonb varlena buffer, which can be directly accessed in some contexts.
  304. */
  305. typedef enum
  306. {
  307. JBI_ARRAY_START,
  308. JBI_ARRAY_ELEM,
  309. JBI_OBJECT_START,
  310. JBI_OBJECT_KEY,
  311. JBI_OBJECT_VALUE
  312. } JsonbIterState;
  313. typedef struct JsonbIterator
  314. {
  315. /* Container being iterated */
  316. JsonbContainer *container;
  317. uint32 nElems; /* Number of elements in children array (will
  318. * be nPairs for objects) */
  319. bool isScalar; /* Pseudo-array scalar value? */
  320. JEntry *children; /* JEntrys for child nodes */
  321. /* Data proper. This points to the beginning of the variable-length data */
  322. char *dataProper;
  323. /* Current item in buffer (up to nElems) */
  324. int curIndex;
  325. /* Data offset corresponding to current item */
  326. uint32 curDataOffset;
  327. /*
  328. * If the container is an object, we want to return keys and values
  329. * alternately; so curDataOffset points to the current key, and
  330. * curValueOffset points to the current value.
  331. */
  332. uint32 curValueOffset;
  333. /* Private state */
  334. JsonbIterState state;
  335. struct JsonbIterator *parent;
  336. } JsonbIterator;
  337. /* Support functions */
  338. extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
  339. extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
  340. extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
  341. extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader,
  342. uint32 flags,
  343. JsonbValue *key);
  344. extern JsonbValue *getKeyJsonValueFromContainer(JsonbContainer *container,
  345. const char *keyVal, int keyLen,
  346. JsonbValue *res);
  347. extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *sheader,
  348. uint32 i);
  349. extern JsonbValue *pushJsonbValue(JsonbParseState **pstate,
  350. JsonbIteratorToken seq, JsonbValue *jbval);
  351. extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container);
  352. extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val,
  353. bool skipNested);
  354. extern void JsonbToJsonbValue(Jsonb *jsonb, JsonbValue *val);
  355. extern Jsonb *JsonbValueToJsonb(JsonbValue *val);
  356. extern bool JsonbDeepContains(JsonbIterator **val,
  357. JsonbIterator **mContained);
  358. extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
  359. extern void JsonbHashScalarValueExtended(const JsonbValue *scalarVal,
  360. uint64 *hash, uint64 seed);
  361. /* jsonb.c support functions */
  362. extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
  363. int estimated_len);
  364. extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
  365. int estimated_len);
  366. extern bool JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res);
  367. extern const char *JsonbTypeName(JsonbValue *jb);
  368. extern Datum jsonb_set_element(Jsonb *jb, Datum *path, int path_len,
  369. JsonbValue *newval);
  370. extern Datum jsonb_get_element(Jsonb *jb, Datum *path, int npath,
  371. bool *isnull, bool as_text);
  372. #endif /* __JSONB_H__ */