xlogreader.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. /*-------------------------------------------------------------------------
  2. *
  3. * xlogreader.h
  4. * Definitions for the generic XLog reading facility
  5. *
  6. * Portions Copyright (c) 2013-2022, PostgreSQL Global Development Group
  7. *
  8. * IDENTIFICATION
  9. * src/include/access/xlogreader.h
  10. *
  11. * NOTES
  12. * See the definition of the XLogReaderState struct for instructions on
  13. * how to use the XLogReader infrastructure.
  14. *
  15. * The basic idea is to allocate an XLogReaderState via
  16. * XLogReaderAllocate(), position the reader to the first record with
  17. * XLogBeginRead() or XLogFindNextRecord(), and call XLogReadRecord()
  18. * until it returns NULL.
  19. *
  20. * Callers supply a page_read callback if they want to call
  21. * XLogReadRecord or XLogFindNextRecord; it can be passed in as NULL
  22. * otherwise. The WALRead function can be used as a helper to write
  23. * page_read callbacks, but it is not mandatory; callers that use it,
  24. * must supply segment_open callbacks. The segment_close callback
  25. * must always be supplied.
  26. *
  27. * After reading a record with XLogReadRecord(), it's decomposed into
  28. * the per-block and main data parts, and the parts can be accessed
  29. * with the XLogRec* macros and functions. You can also decode a
  30. * record that's already constructed in memory, without reading from
  31. * disk, by calling the DecodeXLogRecord() function.
  32. *-------------------------------------------------------------------------
  33. */
  34. #ifndef XLOGREADER_H
  35. #define XLOGREADER_H
  36. #ifndef FRONTEND
  37. #include "access/transam.h"
  38. #endif
  39. #include "access/xlogrecord.h"
  40. #include "storage/buf.h"
  41. /* WALOpenSegment represents a WAL segment being read. */
  42. typedef struct WALOpenSegment
  43. {
  44. int ws_file; /* segment file descriptor */
  45. XLogSegNo ws_segno; /* segment number */
  46. TimeLineID ws_tli; /* timeline ID of the currently open file */
  47. } WALOpenSegment;
  48. /* WALSegmentContext carries context information about WAL segments to read */
  49. typedef struct WALSegmentContext
  50. {
  51. char ws_dir[MAXPGPATH];
  52. int ws_segsize;
  53. } WALSegmentContext;
  54. typedef struct XLogReaderState XLogReaderState;
  55. /* Function type definitions for various xlogreader interactions */
  56. typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
  57. XLogRecPtr targetPagePtr,
  58. int reqLen,
  59. XLogRecPtr targetRecPtr,
  60. char *readBuf);
  61. typedef void (*WALSegmentOpenCB) (XLogReaderState *xlogreader,
  62. XLogSegNo nextSegNo,
  63. TimeLineID *tli_p);
  64. typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader);
  65. typedef struct XLogReaderRoutine
  66. {
  67. /*
  68. * Data input callback
  69. *
  70. * This callback shall read at least reqLen valid bytes of the xlog page
  71. * starting at targetPagePtr, and store them in readBuf. The callback
  72. * shall return the number of bytes read (never more than XLOG_BLCKSZ), or
  73. * -1 on failure. The callback shall sleep, if necessary, to wait for the
  74. * requested bytes to become available. The callback will not be invoked
  75. * again for the same page unless more than the returned number of bytes
  76. * are needed.
  77. *
  78. * targetRecPtr is the position of the WAL record we're reading. Usually
  79. * it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs
  80. * to read and verify the page or segment header, before it reads the
  81. * actual WAL record it's interested in. In that case, targetRecPtr can
  82. * be used to determine which timeline to read the page from.
  83. *
  84. * The callback shall set ->seg.ws_tli to the TLI of the file the page was
  85. * read from.
  86. */
  87. XLogPageReadCB page_read;
  88. /*
  89. * Callback to open the specified WAL segment for reading. ->seg.ws_file
  90. * shall be set to the file descriptor of the opened segment. In case of
  91. * failure, an error shall be raised by the callback and it shall not
  92. * return.
  93. *
  94. * "nextSegNo" is the number of the segment to be opened.
  95. *
  96. * "tli_p" is an input/output argument. WALRead() uses it to pass the
  97. * timeline in which the new segment should be found, but the callback can
  98. * use it to return the TLI that it actually opened.
  99. */
  100. WALSegmentOpenCB segment_open;
  101. /*
  102. * WAL segment close callback. ->seg.ws_file shall be set to a negative
  103. * number.
  104. */
  105. WALSegmentCloseCB segment_close;
  106. } XLogReaderRoutine;
  107. #define XL_ROUTINE(...) &(XLogReaderRoutine){__VA_ARGS__}
  108. typedef struct
  109. {
  110. /* Is this block ref in use? */
  111. bool in_use;
  112. /* Identify the block this refers to */
  113. RelFileNode rnode;
  114. ForkNumber forknum;
  115. BlockNumber blkno;
  116. /* Prefetching workspace. */
  117. Buffer prefetch_buffer;
  118. /* copy of the fork_flags field from the XLogRecordBlockHeader */
  119. uint8 flags;
  120. /* Information on full-page image, if any */
  121. bool has_image; /* has image, even for consistency checking */
  122. bool apply_image; /* has image that should be restored */
  123. char *bkp_image;
  124. uint16 hole_offset;
  125. uint16 hole_length;
  126. uint16 bimg_len;
  127. uint8 bimg_info;
  128. /* Buffer holding the rmgr-specific data associated with this block */
  129. bool has_data;
  130. char *data;
  131. uint16 data_len;
  132. uint16 data_bufsz;
  133. } DecodedBkpBlock;
  134. /*
  135. * The decoded contents of a record. This occupies a contiguous region of
  136. * memory, with main_data and blocks[n].data pointing to memory after the
  137. * members declared here.
  138. */
  139. typedef struct DecodedXLogRecord
  140. {
  141. /* Private member used for resource management. */
  142. size_t size; /* total size of decoded record */
  143. bool oversized; /* outside the regular decode buffer? */
  144. struct DecodedXLogRecord *next; /* decoded record queue link */
  145. /* Public members. */
  146. XLogRecPtr lsn; /* location */
  147. XLogRecPtr next_lsn; /* location of next record */
  148. XLogRecord header; /* header */
  149. RepOriginId record_origin;
  150. TransactionId toplevel_xid; /* XID of top-level transaction */
  151. char *main_data; /* record's main data portion */
  152. uint32 main_data_len; /* main data portion's length */
  153. int max_block_id; /* highest block_id in use (-1 if none) */
  154. DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER];
  155. } DecodedXLogRecord;
  156. struct XLogReaderState
  157. {
  158. /*
  159. * Operational callbacks
  160. */
  161. XLogReaderRoutine routine;
  162. /* ----------------------------------------
  163. * Public parameters
  164. * ----------------------------------------
  165. */
  166. /*
  167. * System identifier of the xlog files we're about to read. Set to zero
  168. * (the default value) if unknown or unimportant.
  169. */
  170. uint64 system_identifier;
  171. /*
  172. * Opaque data for callbacks to use. Not used by XLogReader.
  173. */
  174. void *private_data;
  175. /*
  176. * Start and end point of last record read. EndRecPtr is also used as the
  177. * position to read next. Calling XLogBeginRead() sets EndRecPtr to the
  178. * starting position and ReadRecPtr to invalid.
  179. *
  180. * Start and end point of last record returned by XLogReadRecord(). These
  181. * are also available as record->lsn and record->next_lsn.
  182. */
  183. XLogRecPtr ReadRecPtr; /* start of last record read */
  184. XLogRecPtr EndRecPtr; /* end+1 of last record read */
  185. /*
  186. * Set at the end of recovery: the start point of a partial record at the
  187. * end of WAL (InvalidXLogRecPtr if there wasn't one), and the start
  188. * location of its first contrecord that went missing.
  189. */
  190. XLogRecPtr abortedRecPtr;
  191. XLogRecPtr missingContrecPtr;
  192. /* Set when XLP_FIRST_IS_OVERWRITE_CONTRECORD is found */
  193. XLogRecPtr overwrittenRecPtr;
  194. /* ----------------------------------------
  195. * Decoded representation of current record
  196. *
  197. * Use XLogRecGet* functions to investigate the record; these fields
  198. * should not be accessed directly.
  199. * ----------------------------------------
  200. * Start and end point of the last record read and decoded by
  201. * XLogReadRecordInternal(). NextRecPtr is also used as the position to
  202. * decode next. Calling XLogBeginRead() sets NextRecPtr and EndRecPtr to
  203. * the requested starting position.
  204. */
  205. XLogRecPtr DecodeRecPtr; /* start of last record decoded */
  206. XLogRecPtr NextRecPtr; /* end+1 of last record decoded */
  207. XLogRecPtr PrevRecPtr; /* start of previous record decoded */
  208. /* Last record returned by XLogReadRecord(). */
  209. DecodedXLogRecord *record;
  210. /* ----------------------------------------
  211. * private/internal state
  212. * ----------------------------------------
  213. */
  214. /*
  215. * Buffer for decoded records. This is a circular buffer, though
  216. * individual records can't be split in the middle, so some space is often
  217. * wasted at the end. Oversized records that don't fit in this space are
  218. * allocated separately.
  219. */
  220. char *decode_buffer;
  221. size_t decode_buffer_size;
  222. bool free_decode_buffer; /* need to free? */
  223. char *decode_buffer_head; /* data is read from the head */
  224. char *decode_buffer_tail; /* new data is written at the tail */
  225. /*
  226. * Queue of records that have been decoded. This is a linked list that
  227. * usually consists of consecutive records in decode_buffer, but may also
  228. * contain oversized records allocated with palloc().
  229. */
  230. DecodedXLogRecord *decode_queue_head; /* oldest decoded record */
  231. DecodedXLogRecord *decode_queue_tail; /* newest decoded record */
  232. /*
  233. * Buffer for currently read page (XLOG_BLCKSZ bytes, valid up to at least
  234. * readLen bytes)
  235. */
  236. char *readBuf;
  237. uint32 readLen;
  238. /* last read XLOG position for data currently in readBuf */
  239. WALSegmentContext segcxt;
  240. WALOpenSegment seg;
  241. uint32 segoff;
  242. /*
  243. * beginning of prior page read, and its TLI. Doesn't necessarily
  244. * correspond to what's in readBuf; used for timeline sanity checks.
  245. */
  246. XLogRecPtr latestPagePtr;
  247. TimeLineID latestPageTLI;
  248. /* beginning of the WAL record being read. */
  249. XLogRecPtr currRecPtr;
  250. /* timeline to read it from, 0 if a lookup is required */
  251. TimeLineID currTLI;
  252. /*
  253. * Safe point to read to in currTLI if current TLI is historical
  254. * (tliSwitchPoint) or InvalidXLogRecPtr if on current timeline.
  255. *
  256. * Actually set to the start of the segment containing the timeline switch
  257. * that ends currTLI's validity, not the LSN of the switch its self, since
  258. * we can't assume the old segment will be present.
  259. */
  260. XLogRecPtr currTLIValidUntil;
  261. /*
  262. * If currTLI is not the most recent known timeline, the next timeline to
  263. * read from when currTLIValidUntil is reached.
  264. */
  265. TimeLineID nextTLI;
  266. /*
  267. * Buffer for current ReadRecord result (expandable), used when a record
  268. * crosses a page boundary.
  269. */
  270. char *readRecordBuf;
  271. uint32 readRecordBufSize;
  272. /* Buffer to hold error message */
  273. char *errormsg_buf;
  274. bool errormsg_deferred;
  275. /*
  276. * Flag to indicate to XLogPageReadCB that it should not block waiting for
  277. * data.
  278. */
  279. bool nonblocking;
  280. };
  281. /*
  282. * Check if XLogNextRecord() has any more queued records or an error to return.
  283. */
  284. static inline bool
  285. XLogReaderHasQueuedRecordOrError(XLogReaderState *state)
  286. {
  287. return (state->decode_queue_head != NULL) || state->errormsg_deferred;
  288. }
  289. /* Get a new XLogReader */
  290. extern XLogReaderState *XLogReaderAllocate(int wal_segment_size,
  291. const char *waldir,
  292. XLogReaderRoutine *routine,
  293. void *private_data);
  294. extern XLogReaderRoutine *LocalXLogReaderRoutine(void);
  295. /* Free an XLogReader */
  296. extern void XLogReaderFree(XLogReaderState *state);
  297. /* Optionally provide a circular decoding buffer to allow readahead. */
  298. extern void XLogReaderSetDecodeBuffer(XLogReaderState *state,
  299. void *buffer,
  300. size_t size);
  301. /* Position the XLogReader to given record */
  302. extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
  303. extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
  304. /* Return values from XLogPageReadCB. */
  305. typedef enum XLogPageReadResult
  306. {
  307. XLREAD_SUCCESS = 0, /* record is successfully read */
  308. XLREAD_FAIL = -1, /* failed during reading a record */
  309. XLREAD_WOULDBLOCK = -2 /* nonblocking mode only, no data */
  310. } XLogPageReadResult;
  311. /* Read the next XLog record. Returns NULL on end-of-WAL or failure */
  312. extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
  313. char **errormsg);
  314. /* Consume the next record or error. */
  315. extern DecodedXLogRecord *XLogNextRecord(XLogReaderState *state,
  316. char **errormsg);
  317. /* Release the previously returned record, if necessary. */
  318. extern XLogRecPtr XLogReleasePreviousRecord(XLogReaderState *state);
  319. /* Try to read ahead, if there is data and space. */
  320. extern DecodedXLogRecord *XLogReadAhead(XLogReaderState *state,
  321. bool nonblocking);
  322. /* Validate a page */
  323. extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
  324. XLogRecPtr recptr, char *phdr);
  325. /* Forget error produced by XLogReaderValidatePageHeader(). */
  326. extern void XLogReaderResetError(XLogReaderState *state);
  327. /*
  328. * Error information from WALRead that both backend and frontend caller can
  329. * process. Currently only errors from pg_pread can be reported.
  330. */
  331. typedef struct WALReadError
  332. {
  333. int wre_errno; /* errno set by the last pg_pread() */
  334. int wre_off; /* Offset we tried to read from. */
  335. int wre_req; /* Bytes requested to be read. */
  336. int wre_read; /* Bytes read by the last read(). */
  337. WALOpenSegment wre_seg; /* Segment we tried to read from. */
  338. } WALReadError;
  339. extern bool WALRead(XLogReaderState *state,
  340. char *buf, XLogRecPtr startptr, Size count,
  341. TimeLineID tli, WALReadError *errinfo);
  342. /* Functions for decoding an XLogRecord */
  343. extern size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len);
  344. extern bool DecodeXLogRecord(XLogReaderState *state,
  345. DecodedXLogRecord *decoded,
  346. XLogRecord *record,
  347. XLogRecPtr lsn,
  348. char **errmsg);
  349. /*
  350. * Macros that provide access to parts of the record most recently returned by
  351. * XLogReadRecord() or XLogNextRecord().
  352. */
  353. #define XLogRecGetTotalLen(decoder) ((decoder)->record->header.xl_tot_len)
  354. #define XLogRecGetPrev(decoder) ((decoder)->record->header.xl_prev)
  355. #define XLogRecGetInfo(decoder) ((decoder)->record->header.xl_info)
  356. #define XLogRecGetRmid(decoder) ((decoder)->record->header.xl_rmid)
  357. #define XLogRecGetXid(decoder) ((decoder)->record->header.xl_xid)
  358. #define XLogRecGetOrigin(decoder) ((decoder)->record->record_origin)
  359. #define XLogRecGetTopXid(decoder) ((decoder)->record->toplevel_xid)
  360. #define XLogRecGetData(decoder) ((decoder)->record->main_data)
  361. #define XLogRecGetDataLen(decoder) ((decoder)->record->main_data_len)
  362. #define XLogRecHasAnyBlockRefs(decoder) ((decoder)->record->max_block_id >= 0)
  363. #define XLogRecMaxBlockId(decoder) ((decoder)->record->max_block_id)
  364. #define XLogRecGetBlock(decoder, i) (&(decoder)->record->blocks[(i)])
  365. #define XLogRecHasBlockRef(decoder, block_id) \
  366. (((decoder)->record->max_block_id >= (block_id)) && \
  367. ((decoder)->record->blocks[block_id].in_use))
  368. #define XLogRecHasBlockImage(decoder, block_id) \
  369. ((decoder)->record->blocks[block_id].has_image)
  370. #define XLogRecBlockImageApply(decoder, block_id) \
  371. ((decoder)->record->blocks[block_id].apply_image)
  372. #ifndef FRONTEND
  373. extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record);
  374. #endif
  375. extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
  376. extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
  377. extern void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
  378. RelFileNode *rnode, ForkNumber *forknum,
  379. BlockNumber *blknum);
  380. extern bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id,
  381. RelFileNode *rnode, ForkNumber *forknum,
  382. BlockNumber *blknum,
  383. Buffer *prefetch_buffer);
  384. #endif /* XLOGREADER_H */