123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494 |
- /*-------------------------------------------------------------------------
- *
- * predicate_internals.h
- * POSTGRES internal predicate locking definitions.
- *
- *
- * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/include/storage/predicate_internals.h
- *
- *-------------------------------------------------------------------------
- */
- #ifndef PREDICATE_INTERNALS_H
- #define PREDICATE_INTERNALS_H
- #include "storage/lock.h"
- #include "storage/lwlock.h"
- /*
- * Commit number.
- */
- typedef uint64 SerCommitSeqNo;
- /*
- * Reserved commit sequence numbers:
- * - 0 is reserved to indicate a non-existent SLRU entry; it cannot be
- * used as a SerCommitSeqNo, even an invalid one
- * - InvalidSerCommitSeqNo is used to indicate a transaction that
- * hasn't committed yet, so use a number greater than all valid
- * ones to make comparison do the expected thing
- * - RecoverySerCommitSeqNo is used to refer to transactions that
- * happened before a crash/recovery, since we restart the sequence
- * at that point. It's earlier than all normal sequence numbers,
- * and is only used by recovered prepared transactions
- */
- #define InvalidSerCommitSeqNo ((SerCommitSeqNo) PG_UINT64_MAX)
- #define RecoverySerCommitSeqNo ((SerCommitSeqNo) 1)
- #define FirstNormalSerCommitSeqNo ((SerCommitSeqNo) 2)
- /*
- * The SERIALIZABLEXACT struct contains information needed for each
- * serializable database transaction to support SSI techniques.
- *
- * A home-grown list is maintained in shared memory to manage these.
- * An entry is used when the serializable transaction acquires a snapshot.
- * Unless the transaction is rolled back, this entry must generally remain
- * until all concurrent transactions have completed. (There are special
- * optimizations for READ ONLY transactions which often allow them to be
- * cleaned up earlier.) A transaction which is rolled back is cleaned up
- * as soon as possible.
- *
- * Eligibility for cleanup of committed transactions is generally determined
- * by comparing the transaction's finishedBefore field to
- * SxactGlobalXmin.
- */
- typedef struct SERIALIZABLEXACT
- {
- VirtualTransactionId vxid; /* The executing process always has one of
- * these. */
- /*
- * We use two numbers to track the order that transactions commit. Before
- * commit, a transaction is marked as prepared, and prepareSeqNo is set.
- * Shortly after commit, it's marked as committed, and commitSeqNo is set.
- * This doesn't give a strict commit order, but these two values together
- * are good enough for us, as we can always err on the safe side and
- * assume that there's a conflict, if we can't be sure of the exact
- * ordering of two commits.
- *
- * Note that a transaction is marked as prepared for a short period during
- * commit processing, even if two-phase commit is not used. But with
- * two-phase commit, a transaction can stay in prepared state for some
- * time.
- */
- SerCommitSeqNo prepareSeqNo;
- SerCommitSeqNo commitSeqNo;
- /* these values are not both interesting at the same time */
- union
- {
- SerCommitSeqNo earliestOutConflictCommit; /* when committed with
- * conflict out */
- SerCommitSeqNo lastCommitBeforeSnapshot; /* when not committed or
- * no conflict out */
- } SeqNo;
- SHM_QUEUE outConflicts; /* list of write transactions whose data we
- * couldn't read. */
- SHM_QUEUE inConflicts; /* list of read transactions which couldn't
- * see our write. */
- SHM_QUEUE predicateLocks; /* list of associated PREDICATELOCK objects */
- SHM_QUEUE finishedLink; /* list link in
- * FinishedSerializableTransactions */
- /*
- * perXactPredicateListLock is only used in parallel queries: it protects
- * this SERIALIZABLEXACT's predicate lock list against other workers of
- * the same session.
- */
- LWLock perXactPredicateListLock;
- /*
- * for r/o transactions: list of concurrent r/w transactions that we could
- * potentially have conflicts with, and vice versa for r/w transactions
- */
- SHM_QUEUE possibleUnsafeConflicts;
- TransactionId topXid; /* top level xid for the transaction, if one
- * exists; else invalid */
- TransactionId finishedBefore; /* invalid means still running; else the
- * struct expires when no serializable
- * xids are before this. */
- TransactionId xmin; /* the transaction's snapshot xmin */
- uint32 flags; /* OR'd combination of values defined below */
- int pid; /* pid of associated process */
- int pgprocno; /* pgprocno of associated process */
- } SERIALIZABLEXACT;
- #define SXACT_FLAG_COMMITTED 0x00000001 /* already committed */
- #define SXACT_FLAG_PREPARED 0x00000002 /* about to commit */
- #define SXACT_FLAG_ROLLED_BACK 0x00000004 /* already rolled back */
- #define SXACT_FLAG_DOOMED 0x00000008 /* will roll back */
- /*
- * The following flag actually means that the flagged transaction has a
- * conflict out *to a transaction which committed ahead of it*. It's hard
- * to get that into a name of a reasonable length.
- */
- #define SXACT_FLAG_CONFLICT_OUT 0x00000010
- #define SXACT_FLAG_READ_ONLY 0x00000020
- #define SXACT_FLAG_DEFERRABLE_WAITING 0x00000040
- #define SXACT_FLAG_RO_SAFE 0x00000080
- #define SXACT_FLAG_RO_UNSAFE 0x00000100
- #define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200
- #define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400
- /*
- * The following flag means the transaction has been partially released
- * already, but is being preserved because parallel workers might have a
- * reference to it. It'll be recycled by the leader at end-of-transaction.
- */
- #define SXACT_FLAG_PARTIALLY_RELEASED 0x00000800
- /*
- * The following types are used to provide an ad hoc list for holding
- * SERIALIZABLEXACT objects. An HTAB is overkill, since there is no need to
- * access these by key -- there are direct pointers to these objects where
- * needed. If a shared memory list is created, these types can probably be
- * eliminated in favor of using the general solution.
- */
- typedef struct PredXactListElementData
- {
- SHM_QUEUE link;
- SERIALIZABLEXACT sxact;
- } PredXactListElementData;
- typedef struct PredXactListElementData *PredXactListElement;
- #define PredXactListElementDataSize \
- ((Size)MAXALIGN(sizeof(PredXactListElementData)))
- typedef struct PredXactListData
- {
- SHM_QUEUE availableList;
- SHM_QUEUE activeList;
- /*
- * These global variables are maintained when registering and cleaning up
- * serializable transactions. They must be global across all backends,
- * but are not needed outside the predicate.c source file. Protected by
- * SerializableXactHashLock.
- */
- TransactionId SxactGlobalXmin; /* global xmin for active serializable
- * transactions */
- int SxactGlobalXminCount; /* how many active serializable
- * transactions have this xmin */
- int WritableSxactCount; /* how many non-read-only serializable
- * transactions are active */
- SerCommitSeqNo LastSxactCommitSeqNo; /* a strictly monotonically
- * increasing number for commits
- * of serializable transactions */
- /* Protected by SerializableXactHashLock. */
- SerCommitSeqNo CanPartialClearThrough; /* can clear predicate locks and
- * inConflicts for committed
- * transactions through this seq
- * no */
- /* Protected by SerializableFinishedListLock. */
- SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
- * seq no */
- SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
- PredXactListElement element;
- } PredXactListData;
- typedef struct PredXactListData *PredXactList;
- #define PredXactListDataSize \
- ((Size)MAXALIGN(sizeof(PredXactListData)))
- /*
- * The following types are used to provide lists of rw-conflicts between
- * pairs of transactions. Since exactly the same information is needed,
- * they are also used to record possible unsafe transaction relationships
- * for purposes of identifying safe snapshots for read-only transactions.
- *
- * When a RWConflictData is not in use to record either type of relationship
- * between a pair of transactions, it is kept on an "available" list. The
- * outLink field is used for maintaining that list.
- */
- typedef struct RWConflictData
- {
- SHM_QUEUE outLink; /* link for list of conflicts out from a sxact */
- SHM_QUEUE inLink; /* link for list of conflicts in to a sxact */
- SERIALIZABLEXACT *sxactOut;
- SERIALIZABLEXACT *sxactIn;
- } RWConflictData;
- typedef struct RWConflictData *RWConflict;
- #define RWConflictDataSize \
- ((Size)MAXALIGN(sizeof(RWConflictData)))
- typedef struct RWConflictPoolHeaderData
- {
- SHM_QUEUE availableList;
- RWConflict element;
- } RWConflictPoolHeaderData;
- typedef struct RWConflictPoolHeaderData *RWConflictPoolHeader;
- #define RWConflictPoolHeaderDataSize \
- ((Size)MAXALIGN(sizeof(RWConflictPoolHeaderData)))
- /*
- * The SERIALIZABLEXIDTAG struct identifies an xid assigned to a serializable
- * transaction or any of its subtransactions.
- */
- typedef struct SERIALIZABLEXIDTAG
- {
- TransactionId xid;
- } SERIALIZABLEXIDTAG;
- /*
- * The SERIALIZABLEXID struct provides a link from a TransactionId for a
- * serializable transaction to the related SERIALIZABLEXACT record, even if
- * the transaction has completed and its connection has been closed.
- *
- * These are created as new top level transaction IDs are first assigned to
- * transactions which are participating in predicate locking. This may
- * never happen for a particular transaction if it doesn't write anything.
- * They are removed with their related serializable transaction objects.
- *
- * The SubTransGetTopmostTransaction method is used where necessary to get
- * from an XID which might be from a subtransaction to the top level XID.
- */
- typedef struct SERIALIZABLEXID
- {
- /* hash key */
- SERIALIZABLEXIDTAG tag;
- /* data */
- SERIALIZABLEXACT *myXact; /* pointer to the top level transaction data */
- } SERIALIZABLEXID;
- /*
- * The PREDICATELOCKTARGETTAG struct identifies a database object which can
- * be the target of predicate locks.
- *
- * Note that the hash function being used doesn't properly respect tag
- * length -- if the length of the structure isn't a multiple of four bytes it
- * will go to a four byte boundary past the end of the tag. If you change
- * this struct, make sure any slack space is initialized, so that any random
- * bytes in the middle or at the end are not included in the hash.
- *
- * TODO SSI: If we always use the same fields for the same type of value, we
- * should rename these. Holding off until it's clear there are no exceptions.
- * Since indexes are relations with blocks and tuples, it's looking likely that
- * the rename will be possible. If not, we may need to divide the last field
- * and use part of it for a target type, so that we know how to interpret the
- * data..
- */
- typedef struct PREDICATELOCKTARGETTAG
- {
- uint32 locktag_field1; /* a 32-bit ID field */
- uint32 locktag_field2; /* a 32-bit ID field */
- uint32 locktag_field3; /* a 32-bit ID field */
- uint32 locktag_field4; /* a 32-bit ID field */
- } PREDICATELOCKTARGETTAG;
- /*
- * The PREDICATELOCKTARGET struct represents a database object on which there
- * are predicate locks.
- *
- * A hash list of these objects is maintained in shared memory. An entry is
- * added when a predicate lock is requested on an object which doesn't
- * already have one. An entry is removed when the last lock is removed from
- * its list.
- */
- typedef struct PREDICATELOCKTARGET
- {
- /* hash key */
- PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
- /* data */
- SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with
- * predicate lock target */
- } PREDICATELOCKTARGET;
- /*
- * The PREDICATELOCKTAG struct identifies an individual predicate lock.
- *
- * It is the combination of predicate lock target (which is a lockable
- * object) and a serializable transaction which has acquired a lock on that
- * target.
- */
- typedef struct PREDICATELOCKTAG
- {
- PREDICATELOCKTARGET *myTarget;
- SERIALIZABLEXACT *myXact;
- } PREDICATELOCKTAG;
- /*
- * The PREDICATELOCK struct represents an individual lock.
- *
- * An entry can be created here when the related database object is read, or
- * by promotion of multiple finer-grained targets. All entries related to a
- * serializable transaction are removed when that serializable transaction is
- * cleaned up. Entries can also be removed when they are combined into a
- * single coarser-grained lock entry.
- */
- typedef struct PREDICATELOCK
- {
- /* hash key */
- PREDICATELOCKTAG tag; /* unique identifier of lock */
- /* data */
- SHM_QUEUE targetLink; /* list link in PREDICATELOCKTARGET's list of
- * predicate locks */
- SHM_QUEUE xactLink; /* list link in SERIALIZABLEXACT's list of
- * predicate locks */
- SerCommitSeqNo commitSeqNo; /* only used for summarized predicate locks */
- } PREDICATELOCK;
- /*
- * The LOCALPREDICATELOCK struct represents a local copy of data which is
- * also present in the PREDICATELOCK table, organized for fast access without
- * needing to acquire a LWLock. It is strictly for optimization.
- *
- * Each serializable transaction creates its own local hash table to hold a
- * collection of these. This information is used to determine when a number
- * of fine-grained locks should be promoted to a single coarser-grained lock.
- * The information is maintained more-or-less in parallel to the
- * PREDICATELOCK data, but because this data is not protected by locks and is
- * only used in an optimization heuristic, it is allowed to drift in a few
- * corner cases where maintaining exact data would be expensive.
- *
- * The hash table is created when the serializable transaction acquires its
- * snapshot, and its memory is released upon completion of the transaction.
- */
- typedef struct LOCALPREDICATELOCK
- {
- /* hash key */
- PREDICATELOCKTARGETTAG tag; /* unique identifier of lockable object */
- /* data */
- bool held; /* is lock held, or just its children? */
- int childLocks; /* number of child locks currently held */
- } LOCALPREDICATELOCK;
- /*
- * The types of predicate locks which can be acquired.
- */
- typedef enum PredicateLockTargetType
- {
- PREDLOCKTAG_RELATION,
- PREDLOCKTAG_PAGE,
- PREDLOCKTAG_TUPLE
- /* TODO SSI: Other types may be needed for index locking */
- } PredicateLockTargetType;
- /*
- * This structure is used to quickly capture a copy of all predicate
- * locks. This is currently used only by the pg_lock_status function,
- * which in turn is used by the pg_locks view.
- */
- typedef struct PredicateLockData
- {
- int nelements;
- PREDICATELOCKTARGETTAG *locktags;
- SERIALIZABLEXACT *xacts;
- } PredicateLockData;
- /*
- * These macros define how we map logical IDs of lockable objects into the
- * physical fields of PREDICATELOCKTARGETTAG. Use these to set up values,
- * rather than accessing the fields directly. Note multiple eval of target!
- */
- #define SET_PREDICATELOCKTARGETTAG_RELATION(locktag,dboid,reloid) \
- ((locktag).locktag_field1 = (dboid), \
- (locktag).locktag_field2 = (reloid), \
- (locktag).locktag_field3 = InvalidBlockNumber, \
- (locktag).locktag_field4 = InvalidOffsetNumber)
- #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
- ((locktag).locktag_field1 = (dboid), \
- (locktag).locktag_field2 = (reloid), \
- (locktag).locktag_field3 = (blocknum), \
- (locktag).locktag_field4 = InvalidOffsetNumber)
- #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
- ((locktag).locktag_field1 = (dboid), \
- (locktag).locktag_field2 = (reloid), \
- (locktag).locktag_field3 = (blocknum), \
- (locktag).locktag_field4 = (offnum))
- #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
- ((Oid) (locktag).locktag_field1)
- #define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
- ((Oid) (locktag).locktag_field2)
- #define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
- ((BlockNumber) (locktag).locktag_field3)
- #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
- ((OffsetNumber) (locktag).locktag_field4)
- #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
- (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
- (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
- PREDLOCKTAG_RELATION))
- /*
- * Two-phase commit statefile records. There are two types: for each
- * transaction, we generate one per-transaction record and a variable
- * number of per-predicate-lock records.
- */
- typedef enum TwoPhasePredicateRecordType
- {
- TWOPHASEPREDICATERECORD_XACT,
- TWOPHASEPREDICATERECORD_LOCK
- } TwoPhasePredicateRecordType;
- /*
- * Per-transaction information to reconstruct a SERIALIZABLEXACT. Not
- * much is needed because most of it not meaningful for a recovered
- * prepared transaction.
- *
- * In particular, we do not record the in and out conflict lists for a
- * prepared transaction because the associated SERIALIZABLEXACTs will
- * not be available after recovery. Instead, we simply record the
- * existence of each type of conflict by setting the transaction's
- * summary conflict in/out flag.
- */
- typedef struct TwoPhasePredicateXactRecord
- {
- TransactionId xmin;
- uint32 flags;
- } TwoPhasePredicateXactRecord;
- /* Per-lock state */
- typedef struct TwoPhasePredicateLockRecord
- {
- PREDICATELOCKTARGETTAG target;
- uint32 filler; /* to avoid length change in back-patched fix */
- } TwoPhasePredicateLockRecord;
- typedef struct TwoPhasePredicateRecord
- {
- TwoPhasePredicateRecordType type;
- union
- {
- TwoPhasePredicateXactRecord xactRecord;
- TwoPhasePredicateLockRecord lockRecord;
- } data;
- } TwoPhasePredicateRecord;
- /*
- * Define a macro to use for an "empty" SERIALIZABLEXACT reference.
- */
- #define InvalidSerializableXact ((SERIALIZABLEXACT *) NULL)
- /*
- * Function definitions for functions needing awareness of predicate
- * locking internals.
- */
- extern PredicateLockData *GetPredicateLockStatusData(void);
- extern int GetSafeSnapshotBlockingPids(int blocked_pid,
- int *output, int output_size);
- #endif /* PREDICATE_INTERNALS_H */
|