bufpage.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. /*-------------------------------------------------------------------------
  2. *
  3. * bufpage.h
  4. * Standard POSTGRES buffer page definitions.
  5. *
  6. *
  7. * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  8. * Portions Copyright (c) 1994, Regents of the University of California
  9. *
  10. * src/include/storage/bufpage.h
  11. *
  12. *-------------------------------------------------------------------------
  13. */
  14. #ifndef BUFPAGE_H
  15. #define BUFPAGE_H
  16. #include "access/xlogdefs.h"
  17. #include "storage/block.h"
  18. #include "storage/item.h"
  19. #include "storage/off.h"
  20. /*
  21. * A postgres disk page is an abstraction layered on top of a postgres
  22. * disk block (which is simply a unit of i/o, see block.h).
  23. *
  24. * specifically, while a disk block can be unformatted, a postgres
  25. * disk page is always a slotted page of the form:
  26. *
  27. * +----------------+---------------------------------+
  28. * | PageHeaderData | linp1 linp2 linp3 ... |
  29. * +-----------+----+---------------------------------+
  30. * | ... linpN | |
  31. * +-----------+--------------------------------------+
  32. * | ^ pd_lower |
  33. * | |
  34. * | v pd_upper |
  35. * +-------------+------------------------------------+
  36. * | | tupleN ... |
  37. * +-------------+------------------+-----------------+
  38. * | ... tuple3 tuple2 tuple1 | "special space" |
  39. * +--------------------------------+-----------------+
  40. * ^ pd_special
  41. *
  42. * a page is full when nothing can be added between pd_lower and
  43. * pd_upper.
  44. *
  45. * all blocks written out by an access method must be disk pages.
  46. *
  47. * EXCEPTIONS:
  48. *
  49. * obviously, a page is not formatted before it is initialized by
  50. * a call to PageInit.
  51. *
  52. * NOTES:
  53. *
  54. * linp1..N form an ItemId (line pointer) array. ItemPointers point
  55. * to a physical block number and a logical offset (line pointer
  56. * number) within that block/page. Note that OffsetNumbers
  57. * conventionally start at 1, not 0.
  58. *
  59. * tuple1..N are added "backwards" on the page. Since an ItemPointer
  60. * offset is used to access an ItemId entry rather than an actual
  61. * byte-offset position, tuples can be physically shuffled on a page
  62. * whenever the need arises. This indirection also keeps crash recovery
  63. * relatively simple, because the low-level details of page space
  64. * management can be controlled by standard buffer page code during
  65. * logging, and during recovery.
  66. *
  67. * AM-generic per-page information is kept in PageHeaderData.
  68. *
  69. * AM-specific per-page data (if any) is kept in the area marked "special
  70. * space"; each AM has an "opaque" structure defined somewhere that is
  71. * stored as the page trailer. an access method should always
  72. * initialize its pages with PageInit and then set its own opaque
  73. * fields.
  74. */
  75. typedef Pointer Page;
  76. /*
  77. * location (byte offset) within a page.
  78. *
  79. * note that this is actually limited to 2^15 because we have limited
  80. * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
  81. */
  82. typedef uint16 LocationIndex;
  83. /*
  84. * For historical reasons, the 64-bit LSN value is stored as two 32-bit
  85. * values.
  86. */
  87. typedef struct
  88. {
  89. uint32 xlogid; /* high bits */
  90. uint32 xrecoff; /* low bits */
  91. } PageXLogRecPtr;
  92. #define PageXLogRecPtrGet(val) \
  93. ((uint64) (val).xlogid << 32 | (val).xrecoff)
  94. #define PageXLogRecPtrSet(ptr, lsn) \
  95. ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
  96. /*
  97. * disk page organization
  98. *
  99. * space management information generic to any page
  100. *
  101. * pd_lsn - identifies xlog record for last change to this page.
  102. * pd_checksum - page checksum, if set.
  103. * pd_flags - flag bits.
  104. * pd_lower - offset to start of free space.
  105. * pd_upper - offset to end of free space.
  106. * pd_special - offset to start of special space.
  107. * pd_pagesize_version - size in bytes and page layout version number.
  108. * pd_prune_xid - oldest XID among potentially prunable tuples on page.
  109. *
  110. * The LSN is used by the buffer manager to enforce the basic rule of WAL:
  111. * "thou shalt write xlog before data". A dirty buffer cannot be dumped
  112. * to disk until xlog has been flushed at least as far as the page's LSN.
  113. *
  114. * pd_checksum stores the page checksum, if it has been set for this page;
  115. * zero is a valid value for a checksum. If a checksum is not in use then
  116. * we leave the field unset. This will typically mean the field is zero
  117. * though non-zero values may also be present if databases have been
  118. * pg_upgraded from releases prior to 9.3, when the same byte offset was
  119. * used to store the current timelineid when the page was last updated.
  120. * Note that there is no indication on a page as to whether the checksum
  121. * is valid or not, a deliberate design choice which avoids the problem
  122. * of relying on the page contents to decide whether to verify it. Hence
  123. * there are no flag bits relating to checksums.
  124. *
  125. * pd_prune_xid is a hint field that helps determine whether pruning will be
  126. * useful. It is currently unused in index pages.
  127. *
  128. * The page version number and page size are packed together into a single
  129. * uint16 field. This is for historical reasons: before PostgreSQL 7.3,
  130. * there was no concept of a page version number, and doing it this way
  131. * lets us pretend that pre-7.3 databases have page version number zero.
  132. * We constrain page sizes to be multiples of 256, leaving the low eight
  133. * bits available for a version number.
  134. *
  135. * Minimum possible page size is perhaps 64B to fit page header, opaque space
  136. * and a minimal tuple; of course, in reality you want it much bigger, so
  137. * the constraint on pagesize mod 256 is not an important restriction.
  138. * On the high end, we can only support pages up to 32KB because lp_off/lp_len
  139. * are 15 bits.
  140. */
  141. typedef struct PageHeaderData
  142. {
  143. /* XXX LSN is member of *any* block, not only page-organized ones */
  144. PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
  145. * record for last change to this page */
  146. uint16 pd_checksum; /* checksum */
  147. uint16 pd_flags; /* flag bits, see below */
  148. LocationIndex pd_lower; /* offset to start of free space */
  149. LocationIndex pd_upper; /* offset to end of free space */
  150. LocationIndex pd_special; /* offset to start of special space */
  151. uint16 pd_pagesize_version;
  152. TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
  153. ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
  154. } PageHeaderData;
  155. typedef PageHeaderData *PageHeader;
  156. /*
  157. * pd_flags contains the following flag bits. Undefined bits are initialized
  158. * to zero and may be used in the future.
  159. *
  160. * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
  161. * pd_lower. This should be considered a hint rather than the truth, since
  162. * changes to it are not WAL-logged.
  163. *
  164. * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
  165. * page for its new tuple version; this suggests that a prune is needed.
  166. * Again, this is just a hint.
  167. */
  168. #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
  169. #define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */
  170. #define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
  171. * everyone */
  172. #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
  173. /*
  174. * Page layout version number 0 is for pre-7.3 Postgres releases.
  175. * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
  176. * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
  177. * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
  178. * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
  179. * added the pd_flags field (by stealing some bits from pd_tli),
  180. * as well as adding the pd_prune_xid field (which enlarges the header).
  181. *
  182. * As of Release 9.3, the checksum version must also be considered when
  183. * handling pages.
  184. */
  185. #define PG_PAGE_LAYOUT_VERSION 4
  186. #define PG_DATA_CHECKSUM_VERSION 1
  187. /* ----------------------------------------------------------------
  188. * page support macros
  189. * ----------------------------------------------------------------
  190. */
  191. /*
  192. * PageIsValid
  193. * True iff page is valid.
  194. */
  195. #define PageIsValid(page) PointerIsValid(page)
  196. /*
  197. * line pointer(s) do not count as part of header
  198. */
  199. #define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
  200. /*
  201. * PageIsEmpty
  202. * returns true iff no itemid has been allocated on the page
  203. */
  204. #define PageIsEmpty(page) \
  205. (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData)
  206. /*
  207. * PageIsNew
  208. * returns true iff page has not been initialized (by PageInit)
  209. */
  210. #define PageIsNew(page) (((PageHeader) (page))->pd_upper == 0)
  211. /*
  212. * PageGetItemId
  213. * Returns an item identifier of a page.
  214. */
  215. #define PageGetItemId(page, offsetNumber) \
  216. ((ItemId) (&((PageHeader) (page))->pd_linp[(offsetNumber) - 1]))
  217. /*
  218. * PageGetContents
  219. * To be used in cases where the page does not contain line pointers.
  220. *
  221. * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
  222. * Now it is. Beware of old code that might think the offset to the contents
  223. * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
  224. */
  225. #define PageGetContents(page) \
  226. ((char *) (page) + MAXALIGN(SizeOfPageHeaderData))
  227. /* ----------------
  228. * macros to access page size info
  229. * ----------------
  230. */
  231. /*
  232. * PageSizeIsValid
  233. * True iff the page size is valid.
  234. */
  235. #define PageSizeIsValid(pageSize) ((pageSize) == BLCKSZ)
  236. /*
  237. * PageGetPageSize
  238. * Returns the page size of a page.
  239. *
  240. * this can only be called on a formatted page (unlike
  241. * BufferGetPageSize, which can be called on an unformatted page).
  242. * however, it can be called on a page that is not stored in a buffer.
  243. */
  244. #define PageGetPageSize(page) \
  245. ((Size) (((PageHeader) (page))->pd_pagesize_version & (uint16) 0xFF00))
  246. /*
  247. * PageGetPageLayoutVersion
  248. * Returns the page layout version of a page.
  249. */
  250. #define PageGetPageLayoutVersion(page) \
  251. (((PageHeader) (page))->pd_pagesize_version & 0x00FF)
  252. /*
  253. * PageSetPageSizeAndVersion
  254. * Sets the page size and page layout version number of a page.
  255. *
  256. * We could support setting these two values separately, but there's
  257. * no real need for it at the moment.
  258. */
  259. #define PageSetPageSizeAndVersion(page, size, version) \
  260. ( \
  261. AssertMacro(((size) & 0xFF00) == (size)), \
  262. AssertMacro(((version) & 0x00FF) == (version)), \
  263. ((PageHeader) (page))->pd_pagesize_version = (size) | (version) \
  264. )
  265. /* ----------------
  266. * page special data macros
  267. * ----------------
  268. */
  269. /*
  270. * PageGetSpecialSize
  271. * Returns size of special space on a page.
  272. */
  273. #define PageGetSpecialSize(page) \
  274. ((uint16) (PageGetPageSize(page) - ((PageHeader)(page))->pd_special))
  275. /*
  276. * Using assertions, validate that the page special pointer is OK.
  277. *
  278. * This is intended to catch use of the pointer before page initialization.
  279. * It is implemented as a function due to the limitations of the MSVC
  280. * compiler, which choked on doing all these tests within another macro. We
  281. * return true so that AssertMacro() can be used while still getting the
  282. * specifics from the macro failure within this function.
  283. */
  284. static inline bool
  285. PageValidateSpecialPointer(Page page)
  286. {
  287. Assert(PageIsValid(page));
  288. Assert(((PageHeader) (page))->pd_special <= BLCKSZ);
  289. Assert(((PageHeader) (page))->pd_special >= SizeOfPageHeaderData);
  290. return true;
  291. }
  292. /*
  293. * PageGetSpecialPointer
  294. * Returns pointer to special space on a page.
  295. */
  296. #define PageGetSpecialPointer(page) \
  297. ( \
  298. AssertMacro(PageValidateSpecialPointer(page)), \
  299. (char *) ((char *) (page) + ((PageHeader) (page))->pd_special) \
  300. )
  301. /*
  302. * PageGetItem
  303. * Retrieves an item on the given page.
  304. *
  305. * Note:
  306. * This does not change the status of any of the resources passed.
  307. * The semantics may change in the future.
  308. */
  309. #define PageGetItem(page, itemId) \
  310. ( \
  311. AssertMacro(PageIsValid(page)), \
  312. AssertMacro(ItemIdHasStorage(itemId)), \
  313. (Item)(((char *)(page)) + ItemIdGetOffset(itemId)) \
  314. )
  315. /*
  316. * PageGetMaxOffsetNumber
  317. * Returns the maximum offset number used by the given page.
  318. * Since offset numbers are 1-based, this is also the number
  319. * of items on the page.
  320. *
  321. * NOTE: if the page is not initialized (pd_lower == 0), we must
  322. * return zero to ensure sane behavior. Accept double evaluation
  323. * of the argument so that we can ensure this.
  324. */
  325. #define PageGetMaxOffsetNumber(page) \
  326. (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData ? 0 : \
  327. ((((PageHeader) (page))->pd_lower - SizeOfPageHeaderData) \
  328. / sizeof(ItemIdData)))
  329. /*
  330. * Additional macros for access to page headers. (Beware multiple evaluation
  331. * of the arguments!)
  332. */
  333. #define PageGetLSN(page) \
  334. PageXLogRecPtrGet(((PageHeader) (page))->pd_lsn)
  335. #define PageSetLSN(page, lsn) \
  336. PageXLogRecPtrSet(((PageHeader) (page))->pd_lsn, lsn)
  337. #define PageHasFreeLinePointers(page) \
  338. (((PageHeader) (page))->pd_flags & PD_HAS_FREE_LINES)
  339. #define PageSetHasFreeLinePointers(page) \
  340. (((PageHeader) (page))->pd_flags |= PD_HAS_FREE_LINES)
  341. #define PageClearHasFreeLinePointers(page) \
  342. (((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES)
  343. #define PageIsFull(page) \
  344. (((PageHeader) (page))->pd_flags & PD_PAGE_FULL)
  345. #define PageSetFull(page) \
  346. (((PageHeader) (page))->pd_flags |= PD_PAGE_FULL)
  347. #define PageClearFull(page) \
  348. (((PageHeader) (page))->pd_flags &= ~PD_PAGE_FULL)
  349. #define PageIsAllVisible(page) \
  350. (((PageHeader) (page))->pd_flags & PD_ALL_VISIBLE)
  351. #define PageSetAllVisible(page) \
  352. (((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE)
  353. #define PageClearAllVisible(page) \
  354. (((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
  355. #define PageSetPrunable(page, xid) \
  356. do { \
  357. Assert(TransactionIdIsNormal(xid)); \
  358. if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
  359. TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
  360. ((PageHeader) (page))->pd_prune_xid = (xid); \
  361. } while (0)
  362. #define PageClearPrunable(page) \
  363. (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
  364. /* ----------------------------------------------------------------
  365. * extern declarations
  366. * ----------------------------------------------------------------
  367. */
  368. /* flags for PageAddItemExtended() */
  369. #define PAI_OVERWRITE (1 << 0)
  370. #define PAI_IS_HEAP (1 << 1)
  371. /* flags for PageIsVerifiedExtended() */
  372. #define PIV_LOG_WARNING (1 << 0)
  373. #define PIV_REPORT_STAT (1 << 1)
  374. #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
  375. PageAddItemExtended(page, item, size, offsetNumber, \
  376. ((overwrite) ? PAI_OVERWRITE : 0) | \
  377. ((is_heap) ? PAI_IS_HEAP : 0))
  378. #define PageIsVerified(page, blkno) \
  379. PageIsVerifiedExtended(page, blkno, \
  380. PIV_LOG_WARNING | PIV_REPORT_STAT)
  381. /*
  382. * Check that BLCKSZ is a multiple of sizeof(size_t). In
  383. * PageIsVerifiedExtended(), it is much faster to check if a page is
  384. * full of zeroes using the native word size. Note that this assertion
  385. * is kept within a header to make sure that StaticAssertDecl() works
  386. * across various combinations of platforms and compilers.
  387. */
  388. StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
  389. "BLCKSZ has to be a multiple of sizeof(size_t)");
  390. extern void PageInit(Page page, Size pageSize, Size specialSize);
  391. extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags);
  392. extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
  393. OffsetNumber offsetNumber, int flags);
  394. extern Page PageGetTempPage(Page page);
  395. extern Page PageGetTempPageCopy(Page page);
  396. extern Page PageGetTempPageCopySpecial(Page page);
  397. extern void PageRestoreTempPage(Page tempPage, Page oldPage);
  398. extern void PageRepairFragmentation(Page page);
  399. extern void PageTruncateLinePointerArray(Page page);
  400. extern Size PageGetFreeSpace(Page page);
  401. extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
  402. extern Size PageGetExactFreeSpace(Page page);
  403. extern Size PageGetHeapFreeSpace(Page page);
  404. extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
  405. extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
  406. extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offset);
  407. extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
  408. Item newtup, Size newsize);
  409. extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
  410. extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
  411. #endif /* BUFPAGE_H */