xlogrecord.h 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. /*
  2. * xlogrecord.h
  3. *
  4. * Definitions for the WAL record format.
  5. *
  6. * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  7. * Portions Copyright (c) 1994, Regents of the University of California
  8. *
  9. * src/include/access/xlogrecord.h
  10. */
  11. #ifndef XLOGRECORD_H
  12. #define XLOGRECORD_H
  13. #include "access/rmgr.h"
  14. #include "access/xlogdefs.h"
  15. #include "port/pg_crc32c.h"
  16. #include "storage/block.h"
  17. #include "storage/relfilenode.h"
  18. /*
  19. * The overall layout of an XLOG record is:
  20. * Fixed-size header (XLogRecord struct)
  21. * XLogRecordBlockHeader struct
  22. * XLogRecordBlockHeader struct
  23. * ...
  24. * XLogRecordDataHeader[Short|Long] struct
  25. * block data
  26. * block data
  27. * ...
  28. * main data
  29. *
  30. * There can be zero or more XLogRecordBlockHeaders, and 0 or more bytes of
  31. * rmgr-specific data not associated with a block. XLogRecord structs
  32. * always start on MAXALIGN boundaries in the WAL files, but the rest of
  33. * the fields are not aligned.
  34. *
  35. * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and
  36. * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's
  37. * used to distinguish between block references, and the main data structs.
  38. */
  39. typedef struct XLogRecord
  40. {
  41. uint32 xl_tot_len; /* total len of entire record */
  42. TransactionId xl_xid; /* xact id */
  43. XLogRecPtr xl_prev; /* ptr to previous record in log */
  44. uint8 xl_info; /* flag bits, see below */
  45. RmgrId xl_rmid; /* resource manager for this record */
  46. /* 2 bytes of padding here, initialize to zero */
  47. pg_crc32c xl_crc; /* CRC for this record */
  48. /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
  49. } XLogRecord;
  50. #define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c))
  51. /*
  52. * The high 4 bits in xl_info may be used freely by rmgr. The
  53. * XLR_SPECIAL_REL_UPDATE and XLR_CHECK_CONSISTENCY bits can be passed by
  54. * XLogInsert caller. The rest are set internally by XLogInsert.
  55. */
  56. #define XLR_INFO_MASK 0x0F
  57. #define XLR_RMGR_INFO_MASK 0xF0
  58. /*
  59. * If a WAL record modifies any relation files, in ways not covered by the
  60. * usual block references, this flag is set. This is not used for anything
  61. * by PostgreSQL itself, but it allows external tools that read WAL and keep
  62. * track of modified blocks to recognize such special record types.
  63. */
  64. #define XLR_SPECIAL_REL_UPDATE 0x01
  65. /*
  66. * Enforces consistency checks of replayed WAL at recovery. If enabled,
  67. * each record will log a full-page write for each block modified by the
  68. * record and will reuse it afterwards for consistency checks. The caller
  69. * of XLogInsert can use this value if necessary, but if
  70. * wal_consistency_checking is enabled for a rmgr this is set unconditionally.
  71. */
  72. #define XLR_CHECK_CONSISTENCY 0x02
  73. /*
  74. * Header info for block data appended to an XLOG record.
  75. *
  76. * 'data_length' is the length of the rmgr-specific payload data associated
  77. * with this block. It does not include the possible full page image, nor
  78. * XLogRecordBlockHeader struct itself.
  79. *
  80. * Note that we don't attempt to align the XLogRecordBlockHeader struct!
  81. * So, the struct must be copied to aligned local storage before use.
  82. */
  83. typedef struct XLogRecordBlockHeader
  84. {
  85. uint8 id; /* block reference ID */
  86. uint8 fork_flags; /* fork within the relation, and flags */
  87. uint16 data_length; /* number of payload bytes (not including page
  88. * image) */
  89. /* If BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows */
  90. /* If BKPBLOCK_SAME_REL is not set, a RelFileNode follows */
  91. /* BlockNumber follows */
  92. } XLogRecordBlockHeader;
  93. #define SizeOfXLogRecordBlockHeader (offsetof(XLogRecordBlockHeader, data_length) + sizeof(uint16))
  94. /*
  95. * Additional header information when a full-page image is included
  96. * (i.e. when BKPBLOCK_HAS_IMAGE is set).
  97. *
  98. * The XLOG code is aware that PG data pages usually contain an unused "hole"
  99. * in the middle, which contains only zero bytes. Since we know that the
  100. * "hole" is all zeros, we remove it from the stored data (and it's not counted
  101. * in the XLOG record's CRC, either). Hence, the amount of block data actually
  102. * present is (BLCKSZ - <length of "hole" bytes>).
  103. *
  104. * Additionally, when wal_compression is enabled, we will try to compress full
  105. * page images using one of the supported algorithms, after removing the
  106. * "hole". This can reduce the WAL volume, but at some extra cost of CPU spent
  107. * on the compression during WAL logging. In this case, since the "hole"
  108. * length cannot be calculated by subtracting the number of page image bytes
  109. * from BLCKSZ, basically it needs to be stored as an extra information.
  110. * But when no "hole" exists, we can assume that the "hole" length is zero
  111. * and no such an extra information needs to be stored. Note that
  112. * the original version of page image is stored in WAL instead of the
  113. * compressed one if the number of bytes saved by compression is less than
  114. * the length of extra information. Hence, when a page image is successfully
  115. * compressed, the amount of block data actually present is less than
  116. * BLCKSZ - the length of "hole" bytes - the length of extra information.
  117. */
  118. typedef struct XLogRecordBlockImageHeader
  119. {
  120. uint16 length; /* number of page image bytes */
  121. uint16 hole_offset; /* number of bytes before "hole" */
  122. uint8 bimg_info; /* flag bits, see below */
  123. /*
  124. * If BKPIMAGE_HAS_HOLE and BKPIMAGE_COMPRESSED(), an
  125. * XLogRecordBlockCompressHeader struct follows.
  126. */
  127. } XLogRecordBlockImageHeader;
  128. #define SizeOfXLogRecordBlockImageHeader \
  129. (offsetof(XLogRecordBlockImageHeader, bimg_info) + sizeof(uint8))
  130. /* Information stored in bimg_info */
  131. #define BKPIMAGE_HAS_HOLE 0x01 /* page image has "hole" */
  132. #define BKPIMAGE_APPLY 0x02 /* page image should be restored
  133. * during replay */
  134. /* compression methods supported */
  135. #define BKPIMAGE_COMPRESS_PGLZ 0x04
  136. #define BKPIMAGE_COMPRESS_LZ4 0x08
  137. #define BKPIMAGE_COMPRESS_ZSTD 0x10
  138. #define BKPIMAGE_COMPRESSED(info) \
  139. ((info & (BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | \
  140. BKPIMAGE_COMPRESS_ZSTD)) != 0)
  141. /*
  142. * Extra header information used when page image has "hole" and
  143. * is compressed.
  144. */
  145. typedef struct XLogRecordBlockCompressHeader
  146. {
  147. uint16 hole_length; /* number of bytes in "hole" */
  148. } XLogRecordBlockCompressHeader;
  149. #define SizeOfXLogRecordBlockCompressHeader \
  150. sizeof(XLogRecordBlockCompressHeader)
  151. /*
  152. * Maximum size of the header for a block reference. This is used to size a
  153. * temporary buffer for constructing the header.
  154. */
  155. #define MaxSizeOfXLogRecordBlockHeader \
  156. (SizeOfXLogRecordBlockHeader + \
  157. SizeOfXLogRecordBlockImageHeader + \
  158. SizeOfXLogRecordBlockCompressHeader + \
  159. sizeof(RelFileNode) + \
  160. sizeof(BlockNumber))
  161. /*
  162. * The fork number fits in the lower 4 bits in the fork_flags field. The upper
  163. * bits are used for flags.
  164. */
  165. #define BKPBLOCK_FORK_MASK 0x0F
  166. #define BKPBLOCK_FLAG_MASK 0xF0
  167. #define BKPBLOCK_HAS_IMAGE 0x10 /* block data is an XLogRecordBlockImage */
  168. #define BKPBLOCK_HAS_DATA 0x20
  169. #define BKPBLOCK_WILL_INIT 0x40 /* redo will re-init the page */
  170. #define BKPBLOCK_SAME_REL 0x80 /* RelFileNode omitted, same as previous */
  171. /*
  172. * XLogRecordDataHeaderShort/Long are used for the "main data" portion of
  173. * the record. If the length of the data is less than 256 bytes, the short
  174. * form is used, with a single byte to hold the length. Otherwise the long
  175. * form is used.
  176. *
  177. * (These structs are currently not used in the code, they are here just for
  178. * documentation purposes).
  179. */
  180. typedef struct XLogRecordDataHeaderShort
  181. {
  182. uint8 id; /* XLR_BLOCK_ID_DATA_SHORT */
  183. uint8 data_length; /* number of payload bytes */
  184. } XLogRecordDataHeaderShort;
  185. #define SizeOfXLogRecordDataHeaderShort (sizeof(uint8) * 2)
  186. typedef struct XLogRecordDataHeaderLong
  187. {
  188. uint8 id; /* XLR_BLOCK_ID_DATA_LONG */
  189. /* followed by uint32 data_length, unaligned */
  190. } XLogRecordDataHeaderLong;
  191. #define SizeOfXLogRecordDataHeaderLong (sizeof(uint8) + sizeof(uint32))
  192. /*
  193. * Block IDs used to distinguish different kinds of record fragments. Block
  194. * references are numbered from 0 to XLR_MAX_BLOCK_ID. A rmgr is free to use
  195. * any ID number in that range (although you should stick to small numbers,
  196. * because the WAL machinery is optimized for that case). A few ID
  197. * numbers are reserved to denote the "main" data portion of the record,
  198. * as well as replication-supporting transaction metadata.
  199. *
  200. * The maximum is currently set at 32, quite arbitrarily. Most records only
  201. * need a handful of block references, but there are a few exceptions that
  202. * need more.
  203. */
  204. #define XLR_MAX_BLOCK_ID 32
  205. #define XLR_BLOCK_ID_DATA_SHORT 255
  206. #define XLR_BLOCK_ID_DATA_LONG 254
  207. #define XLR_BLOCK_ID_ORIGIN 253
  208. #define XLR_BLOCK_ID_TOPLEVEL_XID 252
  209. #endif /* XLOGRECORD_H */