2
0

proc.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. /*-------------------------------------------------------------------------
  2. *
  3. * proc.h
  4. * per-process shared memory data structures
  5. *
  6. *
  7. * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  8. * Portions Copyright (c) 1994, Regents of the University of California
  9. *
  10. * src/include/storage/proc.h
  11. *
  12. *-------------------------------------------------------------------------
  13. */
  14. #ifndef _PROC_H_
  15. #define _PROC_H_
  16. #include "access/clog.h"
  17. #include "access/xlogdefs.h"
  18. #include "lib/ilist.h"
  19. #include "storage/latch.h"
  20. #include "storage/lock.h"
  21. #include "storage/pg_sema.h"
  22. #include "storage/proclist_types.h"
  23. /*
  24. * Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds
  25. * for non-aborted subtransactions of its current top transaction. These
  26. * have to be treated as running XIDs by other backends.
  27. *
  28. * We also keep track of whether the cache overflowed (ie, the transaction has
  29. * generated at least one subtransaction that didn't fit in the cache).
  30. * If none of the caches have overflowed, we can assume that an XID that's not
  31. * listed anywhere in the PGPROC array is not a running transaction. Else we
  32. * have to look at pg_subtrans.
  33. */
  34. #define PGPROC_MAX_CACHED_SUBXIDS 64 /* XXX guessed-at value */
  35. typedef struct XidCacheStatus
  36. {
  37. /* number of cached subxids, never more than PGPROC_MAX_CACHED_SUBXIDS */
  38. uint8 count;
  39. /* has PGPROC->subxids overflowed */
  40. bool overflowed;
  41. } XidCacheStatus;
  42. struct XidCache
  43. {
  44. TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS];
  45. };
  46. /*
  47. * Flags for PGPROC->statusFlags and PROC_HDR->statusFlags[]
  48. */
  49. #define PROC_IS_AUTOVACUUM 0x01 /* is it an autovac worker? */
  50. #define PROC_IN_VACUUM 0x02 /* currently running lazy vacuum */
  51. #define PROC_IN_SAFE_IC 0x04 /* currently running CREATE INDEX
  52. * CONCURRENTLY or REINDEX
  53. * CONCURRENTLY on non-expressional,
  54. * non-partial index */
  55. #define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */
  56. #define PROC_IN_LOGICAL_DECODING 0x10 /* currently doing logical
  57. * decoding outside xact */
  58. #define PROC_AFFECTS_ALL_HORIZONS 0x20 /* this proc's xmin must be
  59. * included in vacuum horizons
  60. * in all databases */
  61. /* flags reset at EOXact */
  62. #define PROC_VACUUM_STATE_MASK \
  63. (PROC_IN_VACUUM | PROC_IN_SAFE_IC | PROC_VACUUM_FOR_WRAPAROUND)
  64. /*
  65. * Xmin-related flags. Make sure any flags that affect how the process' Xmin
  66. * value is interpreted by VACUUM are included here.
  67. */
  68. #define PROC_XMIN_FLAGS (PROC_IN_VACUUM | PROC_IN_SAFE_IC)
  69. /*
  70. * We allow a small number of "weak" relation locks (AccessShareLock,
  71. * RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure
  72. * rather than the main lock table. This eases contention on the lock
  73. * manager LWLocks. See storage/lmgr/README for additional details.
  74. */
  75. #define FP_LOCK_SLOTS_PER_BACKEND 16
  76. /*
  77. * An invalid pgprocno. Must be larger than the maximum number of PGPROC
  78. * structures we could possibly have. See comments for MAX_BACKENDS.
  79. */
  80. #define INVALID_PGPROCNO PG_INT32_MAX
  81. /*
  82. * Flags for PGPROC.delayChkpt
  83. *
  84. * These flags can be used to delay the start or completion of a checkpoint
  85. * for short periods. A flag is in effect if the corresponding bit is set in
  86. * the PGPROC of any backend.
  87. *
  88. * For our purposes here, a checkpoint has three phases: (1) determine the
  89. * location to which the redo pointer will be moved, (2) write all the
  90. * data durably to disk, and (3) WAL-log the checkpoint.
  91. *
  92. * Setting DELAY_CHKPT_START prevents the system from moving from phase 1
  93. * to phase 2. This is useful when we are performing a WAL-logged modification
  94. * of data that will be flushed to disk in phase 2. By setting this flag
  95. * before writing WAL and clearing it after we've both written WAL and
  96. * performed the corresponding modification, we ensure that if the WAL record
  97. * is inserted prior to the new redo point, the corresponding data changes will
  98. * also be flushed to disk before the checkpoint can complete. (In the
  99. * extremely common case where the data being modified is in shared buffers
  100. * and we acquire an exclusive content lock on the relevant buffers before
  101. * writing WAL, this mechanism is not needed, because phase 2 will block
  102. * until we release the content lock and then flush the modified data to
  103. * disk.)
  104. *
  105. * Setting DELAY_CHKPT_COMPLETE prevents the system from moving from phase 2
  106. * to phase 3. This is useful if we are performing a WAL-logged operation that
  107. * might invalidate buffers, such as relation truncation. In this case, we need
  108. * to ensure that any buffers which were invalidated and thus not flushed by
  109. * the checkpoint are actaully destroyed on disk. Replay can cope with a file
  110. * or block that doesn't exist, but not with a block that has the wrong
  111. * contents.
  112. */
  113. #define DELAY_CHKPT_START (1<<0)
  114. #define DELAY_CHKPT_COMPLETE (1<<1)
  115. typedef enum
  116. {
  117. PROC_WAIT_STATUS_OK,
  118. PROC_WAIT_STATUS_WAITING,
  119. PROC_WAIT_STATUS_ERROR,
  120. } ProcWaitStatus;
  121. /*
  122. * Each backend has a PGPROC struct in shared memory. There is also a list of
  123. * currently-unused PGPROC structs that will be reallocated to new backends.
  124. *
  125. * links: list link for any list the PGPROC is in. When waiting for a lock,
  126. * the PGPROC is linked into that lock's waitProcs queue. A recycled PGPROC
  127. * is linked into ProcGlobal's freeProcs list.
  128. *
  129. * Note: twophase.c also sets up a dummy PGPROC struct for each currently
  130. * prepared transaction. These PGPROCs appear in the ProcArray data structure
  131. * so that the prepared transactions appear to be still running and are
  132. * correctly shown as holding locks. A prepared transaction PGPROC can be
  133. * distinguished from a real one at need by the fact that it has pid == 0.
  134. * The semaphore and lock-activity fields in a prepared-xact PGPROC are unused,
  135. * but its myProcLocks[] lists are valid.
  136. *
  137. * We allow many fields of this struct to be accessed without locks, such as
  138. * delayChkpt and isBackgroundWorker. However, keep in mind that writing
  139. * mirrored ones (see below) requires holding ProcArrayLock or XidGenLock in
  140. * at least shared mode, so that pgxactoff does not change concurrently.
  141. *
  142. * Mirrored fields:
  143. *
  144. * Some fields in PGPROC (see "mirrored in ..." comment) are mirrored into an
  145. * element of more densely packed ProcGlobal arrays. These arrays are indexed
  146. * by PGPROC->pgxactoff. Both copies need to be maintained coherently.
  147. *
  148. * NB: The pgxactoff indexed value can *never* be accessed without holding
  149. * locks.
  150. *
  151. * See PROC_HDR for details.
  152. */
  153. struct PGPROC
  154. {
  155. /* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */
  156. SHM_QUEUE links; /* list link if process is in a list */
  157. PGPROC **procgloballist; /* procglobal list that owns this PGPROC */
  158. PGSemaphore sem; /* ONE semaphore to sleep on */
  159. ProcWaitStatus waitStatus;
  160. Latch procLatch; /* generic latch for process */
  161. TransactionId xid; /* id of top-level transaction currently being
  162. * executed by this proc, if running and XID
  163. * is assigned; else InvalidTransactionId.
  164. * mirrored in ProcGlobal->xids[pgxactoff] */
  165. TransactionId xmin; /* minimal running XID as it was when we were
  166. * starting our xact, excluding LAZY VACUUM:
  167. * vacuum must not remove tuples deleted by
  168. * xid >= xmin ! */
  169. LocalTransactionId lxid; /* local id of top-level transaction currently
  170. * being executed by this proc, if running;
  171. * else InvalidLocalTransactionId */
  172. int pid; /* Backend's process ID; 0 if prepared xact */
  173. int pgxactoff; /* offset into various ProcGlobal->arrays with
  174. * data mirrored from this PGPROC */
  175. int pgprocno;
  176. /* These fields are zero while a backend is still starting up: */
  177. BackendId backendId; /* This backend's backend ID (if assigned) */
  178. Oid databaseId; /* OID of database this backend is using */
  179. Oid roleId; /* OID of role using this backend */
  180. Oid tempNamespaceId; /* OID of temp schema this backend is
  181. * using */
  182. bool isBackgroundWorker; /* true if background worker. */
  183. /*
  184. * While in hot standby mode, shows that a conflict signal has been sent
  185. * for the current transaction. Set/cleared while holding ProcArrayLock,
  186. * though not required. Accessed without lock, if needed.
  187. */
  188. bool recoveryConflictPending;
  189. /* Info about LWLock the process is currently waiting for, if any. */
  190. bool lwWaiting; /* true if waiting for an LW lock */
  191. uint8 lwWaitMode; /* lwlock mode being waited for */
  192. proclist_node lwWaitLink; /* position in LW lock wait list */
  193. /* Support for condition variables. */
  194. proclist_node cvWaitLink; /* position in CV wait list */
  195. /* Info about lock the process is currently waiting for, if any. */
  196. /* waitLock and waitProcLock are NULL if not currently waiting. */
  197. LOCK *waitLock; /* Lock object we're sleeping on ... */
  198. PROCLOCK *waitProcLock; /* Per-holder info for awaited lock */
  199. LOCKMODE waitLockMode; /* type of lock we're waiting for */
  200. LOCKMASK heldLocks; /* bitmask for lock types already held on this
  201. * lock object by this backend */
  202. pg_atomic_uint64 waitStart; /* time at which wait for lock acquisition
  203. * started */
  204. int delayChkptFlags; /* for DELAY_CHKPT_* flags */
  205. uint8 statusFlags; /* this backend's status flags, see PROC_*
  206. * above. mirrored in
  207. * ProcGlobal->statusFlags[pgxactoff] */
  208. /*
  209. * Info to allow us to wait for synchronous replication, if needed.
  210. * waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend.
  211. * syncRepState must not be touched except by owning process or WALSender.
  212. * syncRepLinks used only while holding SyncRepLock.
  213. */
  214. XLogRecPtr waitLSN; /* waiting for this LSN or higher */
  215. int syncRepState; /* wait state for sync rep */
  216. SHM_QUEUE syncRepLinks; /* list link if process is in syncrep queue */
  217. /*
  218. * All PROCLOCK objects for locks held or awaited by this backend are
  219. * linked into one of these lists, according to the partition number of
  220. * their lock.
  221. */
  222. SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS];
  223. XidCacheStatus subxidStatus; /* mirrored with
  224. * ProcGlobal->subxidStates[i] */
  225. struct XidCache subxids; /* cache for subtransaction XIDs */
  226. /* Support for group XID clearing. */
  227. /* true, if member of ProcArray group waiting for XID clear */
  228. bool procArrayGroupMember;
  229. /* next ProcArray group member waiting for XID clear */
  230. pg_atomic_uint32 procArrayGroupNext;
  231. /*
  232. * latest transaction id among the transaction's main XID and
  233. * subtransactions
  234. */
  235. TransactionId procArrayGroupMemberXid;
  236. uint32 wait_event_info; /* proc's wait information */
  237. /* Support for group transaction status update. */
  238. bool clogGroupMember; /* true, if member of clog group */
  239. pg_atomic_uint32 clogGroupNext; /* next clog group member */
  240. TransactionId clogGroupMemberXid; /* transaction id of clog group member */
  241. XidStatus clogGroupMemberXidStatus; /* transaction status of clog
  242. * group member */
  243. int clogGroupMemberPage; /* clog page corresponding to
  244. * transaction id of clog group member */
  245. XLogRecPtr clogGroupMemberLsn; /* WAL location of commit record for clog
  246. * group member */
  247. /* Lock manager data, recording fast-path locks taken by this backend. */
  248. LWLock fpInfoLock; /* protects per-backend fast-path state */
  249. uint64 fpLockBits; /* lock modes held for each fast-path slot */
  250. Oid fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */
  251. bool fpVXIDLock; /* are we holding a fast-path VXID lock? */
  252. LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID
  253. * lock */
  254. /*
  255. * Support for lock groups. Use LockHashPartitionLockByProc on the group
  256. * leader to get the LWLock protecting these fields.
  257. */
  258. PGPROC *lockGroupLeader; /* lock group leader, if I'm a member */
  259. dlist_head lockGroupMembers; /* list of members, if I'm a leader */
  260. dlist_node lockGroupLink; /* my member link, if I'm a member */
  261. };
  262. /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
  263. extern PGDLLIMPORT PGPROC *MyProc;
  264. /*
  265. * There is one ProcGlobal struct for the whole database cluster.
  266. *
  267. * Adding/Removing an entry into the procarray requires holding *both*
  268. * ProcArrayLock and XidGenLock in exclusive mode (in that order). Both are
  269. * needed because the dense arrays (see below) are accessed from
  270. * GetNewTransactionId() and GetSnapshotData(), and we don't want to add
  271. * further contention by both using the same lock. Adding/Removing a procarray
  272. * entry is much less frequent.
  273. *
  274. * Some fields in PGPROC are mirrored into more densely packed arrays (e.g.
  275. * xids), with one entry for each backend. These arrays only contain entries
  276. * for PGPROCs that have been added to the shared array with ProcArrayAdd()
  277. * (in contrast to PGPROC array which has unused PGPROCs interspersed).
  278. *
  279. * The dense arrays are indexed by PGPROC->pgxactoff. Any concurrent
  280. * ProcArrayAdd() / ProcArrayRemove() can lead to pgxactoff of a procarray
  281. * member to change. Therefore it is only safe to use PGPROC->pgxactoff to
  282. * access the dense array while holding either ProcArrayLock or XidGenLock.
  283. *
  284. * As long as a PGPROC is in the procarray, the mirrored values need to be
  285. * maintained in both places in a coherent manner.
  286. *
  287. * The denser separate arrays are beneficial for three main reasons: First, to
  288. * allow for as tight loops accessing the data as possible. Second, to prevent
  289. * updates of frequently changing data (e.g. xmin) from invalidating
  290. * cachelines also containing less frequently changing data (e.g. xid,
  291. * statusFlags). Third to condense frequently accessed data into as few
  292. * cachelines as possible.
  293. *
  294. * There are two main reasons to have the data mirrored between these dense
  295. * arrays and PGPROC. First, as explained above, a PGPROC's array entries can
  296. * only be accessed with either ProcArrayLock or XidGenLock held, whereas the
  297. * PGPROC entries do not require that (obviously there may still be locking
  298. * requirements around the individual field, separate from the concerns
  299. * here). That is particularly important for a backend to efficiently checks
  300. * it own values, which it often can safely do without locking. Second, the
  301. * PGPROC fields allow to avoid unnecessary accesses and modification to the
  302. * dense arrays. A backend's own PGPROC is more likely to be in a local cache,
  303. * whereas the cachelines for the dense array will be modified by other
  304. * backends (often removing it from the cache for other cores/sockets). At
  305. * commit/abort time a check of the PGPROC value can avoid accessing/dirtying
  306. * the corresponding array value.
  307. *
  308. * Basically it makes sense to access the PGPROC variable when checking a
  309. * single backend's data, especially when already looking at the PGPROC for
  310. * other reasons already. It makes sense to look at the "dense" arrays if we
  311. * need to look at many / most entries, because we then benefit from the
  312. * reduced indirection and better cross-process cache-ability.
  313. *
  314. * When entering a PGPROC for 2PC transactions with ProcArrayAdd(), the data
  315. * in the dense arrays is initialized from the PGPROC while it already holds
  316. * ProcArrayLock.
  317. */
  318. typedef struct PROC_HDR
  319. {
  320. /* Array of PGPROC structures (not including dummies for prepared txns) */
  321. PGPROC *allProcs;
  322. /* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */
  323. TransactionId *xids;
  324. /*
  325. * Array mirroring PGPROC.subxidStatus for each PGPROC currently in the
  326. * procarray.
  327. */
  328. XidCacheStatus *subxidStates;
  329. /*
  330. * Array mirroring PGPROC.statusFlags for each PGPROC currently in the
  331. * procarray.
  332. */
  333. uint8 *statusFlags;
  334. /* Length of allProcs array */
  335. uint32 allProcCount;
  336. /* Head of list of free PGPROC structures */
  337. PGPROC *freeProcs;
  338. /* Head of list of autovacuum's free PGPROC structures */
  339. PGPROC *autovacFreeProcs;
  340. /* Head of list of bgworker free PGPROC structures */
  341. PGPROC *bgworkerFreeProcs;
  342. /* Head of list of walsender free PGPROC structures */
  343. PGPROC *walsenderFreeProcs;
  344. /* First pgproc waiting for group XID clear */
  345. pg_atomic_uint32 procArrayGroupFirst;
  346. /* First pgproc waiting for group transaction status update */
  347. pg_atomic_uint32 clogGroupFirst;
  348. /* WALWriter process's latch */
  349. Latch *walwriterLatch;
  350. /* Checkpointer process's latch */
  351. Latch *checkpointerLatch;
  352. /* Current shared estimate of appropriate spins_per_delay value */
  353. int spins_per_delay;
  354. /* Buffer id of the buffer that Startup process waits for pin on, or -1 */
  355. int startupBufferPinWaitBufId;
  356. } PROC_HDR;
  357. extern PGDLLIMPORT PROC_HDR *ProcGlobal;
  358. extern PGDLLIMPORT PGPROC *PreparedXactProcs;
  359. /* Accessor for PGPROC given a pgprocno. */
  360. #define GetPGProcByNumber(n) (&ProcGlobal->allProcs[(n)])
  361. /*
  362. * We set aside some extra PGPROC structures for auxiliary processes,
  363. * ie things that aren't full-fledged backends but need shmem access.
  364. *
  365. * Background writer, checkpointer, WAL writer and archiver run during normal
  366. * operation. Startup process and WAL receiver also consume 2 slots, but WAL
  367. * writer is launched only after startup has exited, so we only need 5 slots.
  368. */
  369. #define NUM_AUXILIARY_PROCS 5
  370. /* configurable options */
  371. extern PGDLLIMPORT int DeadlockTimeout;
  372. extern PGDLLIMPORT int StatementTimeout;
  373. extern PGDLLIMPORT int LockTimeout;
  374. extern PGDLLIMPORT int IdleInTransactionSessionTimeout;
  375. extern PGDLLIMPORT int IdleSessionTimeout;
  376. extern PGDLLIMPORT bool log_lock_waits;
  377. /*
  378. * Function Prototypes
  379. */
  380. extern int ProcGlobalSemas(void);
  381. extern Size ProcGlobalShmemSize(void);
  382. extern void InitProcGlobal(void);
  383. extern void InitProcess(void);
  384. extern void InitProcessPhase2(void);
  385. extern void InitAuxiliaryProcess(void);
  386. extern void SetStartupBufferPinWaitBufId(int bufid);
  387. extern int GetStartupBufferPinWaitBufId(void);
  388. extern bool HaveNFreeProcs(int n);
  389. extern void ProcReleaseLocks(bool isCommit);
  390. extern void ProcQueueInit(PROC_QUEUE *queue);
  391. extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
  392. extern PGPROC *ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus);
  393. extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
  394. extern void CheckDeadLockAlert(void);
  395. extern bool IsWaitingForLock(void);
  396. extern void LockErrorCleanup(void);
  397. extern void ProcWaitForSignal(uint32 wait_event_info);
  398. extern void ProcSendSignal(int pgprocno);
  399. extern PGPROC *AuxiliaryPidGetProc(int pid);
  400. extern void BecomeLockGroupLeader(void);
  401. extern bool BecomeLockGroupMember(PGPROC *leader, int pid);
  402. #endif /* _PROC_H_ */