tableam.h 75 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077
  1. /*-------------------------------------------------------------------------
  2. *
  3. * tableam.h
  4. * POSTGRES table access method definitions.
  5. *
  6. *
  7. * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
  8. * Portions Copyright (c) 1994, Regents of the University of California
  9. *
  10. * src/include/access/tableam.h
  11. *
  12. * NOTES
  13. * See tableam.sgml for higher level documentation.
  14. *
  15. *-------------------------------------------------------------------------
  16. */
  17. #ifndef TABLEAM_H
  18. #define TABLEAM_H
  19. #include "access/relscan.h"
  20. #include "access/sdir.h"
  21. #include "access/xact.h"
  22. #include "utils/guc.h"
  23. #include "utils/rel.h"
  24. #include "utils/snapshot.h"
  25. #define DEFAULT_TABLE_ACCESS_METHOD "heap"
  26. /* GUCs */
  27. extern PGDLLIMPORT char *default_table_access_method;
  28. extern PGDLLIMPORT bool synchronize_seqscans;
  29. struct BulkInsertStateData;
  30. struct IndexInfo;
  31. struct SampleScanState;
  32. struct TBMIterateResult;
  33. struct VacuumParams;
  34. struct ValidateIndexState;
  35. /*
  36. * Bitmask values for the flags argument to the scan_begin callback.
  37. */
  38. typedef enum ScanOptions
  39. {
  40. /* one of SO_TYPE_* may be specified */
  41. SO_TYPE_SEQSCAN = 1 << 0,
  42. SO_TYPE_BITMAPSCAN = 1 << 1,
  43. SO_TYPE_SAMPLESCAN = 1 << 2,
  44. SO_TYPE_TIDSCAN = 1 << 3,
  45. SO_TYPE_TIDRANGESCAN = 1 << 4,
  46. SO_TYPE_ANALYZE = 1 << 5,
  47. /* several of SO_ALLOW_* may be specified */
  48. /* allow or disallow use of access strategy */
  49. SO_ALLOW_STRAT = 1 << 6,
  50. /* report location to syncscan logic? */
  51. SO_ALLOW_SYNC = 1 << 7,
  52. /* verify visibility page-at-a-time? */
  53. SO_ALLOW_PAGEMODE = 1 << 8,
  54. /* unregister snapshot at scan end? */
  55. SO_TEMP_SNAPSHOT = 1 << 9
  56. } ScanOptions;
  57. /*
  58. * Result codes for table_{update,delete,lock_tuple}, and for visibility
  59. * routines inside table AMs.
  60. */
  61. typedef enum TM_Result
  62. {
  63. /*
  64. * Signals that the action succeeded (i.e. update/delete performed, lock
  65. * was acquired)
  66. */
  67. TM_Ok,
  68. /* The affected tuple wasn't visible to the relevant snapshot */
  69. TM_Invisible,
  70. /* The affected tuple was already modified by the calling backend */
  71. TM_SelfModified,
  72. /*
  73. * The affected tuple was updated by another transaction. This includes
  74. * the case where tuple was moved to another partition.
  75. */
  76. TM_Updated,
  77. /* The affected tuple was deleted by another transaction */
  78. TM_Deleted,
  79. /*
  80. * The affected tuple is currently being modified by another session. This
  81. * will only be returned if table_(update/delete/lock_tuple) are
  82. * instructed not to wait.
  83. */
  84. TM_BeingModified,
  85. /* lock couldn't be acquired, action skipped. Only used by lock_tuple */
  86. TM_WouldBlock
  87. } TM_Result;
  88. /*
  89. * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail
  90. * because the target tuple is already outdated, they fill in this struct to
  91. * provide information to the caller about what happened.
  92. *
  93. * ctid is the target's ctid link: it is the same as the target's TID if the
  94. * target was deleted, or the location of the replacement tuple if the target
  95. * was updated.
  96. *
  97. * xmax is the outdating transaction's XID. If the caller wants to visit the
  98. * replacement tuple, it must check that this matches before believing the
  99. * replacement is really a match.
  100. *
  101. * cmax is the outdating command's CID, but only when the failure code is
  102. * TM_SelfModified (i.e., something in the current transaction outdated the
  103. * tuple); otherwise cmax is zero. (We make this restriction because
  104. * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
  105. * transactions.)
  106. */
  107. typedef struct TM_FailureData
  108. {
  109. ItemPointerData ctid;
  110. TransactionId xmax;
  111. CommandId cmax;
  112. bool traversed;
  113. } TM_FailureData;
  114. /*
  115. * State used when calling table_index_delete_tuples().
  116. *
  117. * Represents the status of table tuples, referenced by table TID and taken by
  118. * index AM from index tuples. State consists of high level parameters of the
  119. * deletion operation, plus two mutable palloc()'d arrays for information
  120. * about the status of individual table tuples. These are conceptually one
  121. * single array. Using two arrays keeps the TM_IndexDelete struct small,
  122. * which makes sorting the first array (the deltids array) fast.
  123. *
  124. * Some index AM callers perform simple index tuple deletion (by specifying
  125. * bottomup = false), and include only known-dead deltids. These known-dead
  126. * entries are all marked knowndeletable = true directly (typically these are
  127. * TIDs from LP_DEAD-marked index tuples), but that isn't strictly required.
  128. *
  129. * Callers that specify bottomup = true are "bottom-up index deletion"
  130. * callers. The considerations for the tableam are more subtle with these
  131. * callers because they ask the tableam to perform highly speculative work,
  132. * and might only expect the tableam to check a small fraction of all entries.
  133. * Caller is not allowed to specify knowndeletable = true for any entry
  134. * because everything is highly speculative. Bottom-up caller provides
  135. * context and hints to tableam -- see comments below for details on how index
  136. * AMs and tableams should coordinate during bottom-up index deletion.
  137. *
  138. * Simple index deletion callers may ask the tableam to perform speculative
  139. * work, too. This is a little like bottom-up deletion, but not too much.
  140. * The tableam will only perform speculative work when it's practically free
  141. * to do so in passing for simple deletion caller (while always performing
  142. * whatever work is needed to enable knowndeletable/LP_DEAD index tuples to
  143. * be deleted within index AM). This is the real reason why it's possible for
  144. * simple index deletion caller to specify knowndeletable = false up front
  145. * (this means "check if it's possible for me to delete corresponding index
  146. * tuple when it's cheap to do so in passing"). The index AM should only
  147. * include "extra" entries for index tuples whose TIDs point to a table block
  148. * that tableam is expected to have to visit anyway (in the event of a block
  149. * orientated tableam). The tableam isn't strictly obligated to check these
  150. * "extra" TIDs, but a block-based AM should always manage to do so in
  151. * practice.
  152. *
  153. * The final contents of the deltids/status arrays are interesting to callers
  154. * that ask tableam to perform speculative work (i.e. when _any_ items have
  155. * knowndeletable set to false up front). These index AM callers will
  156. * naturally need to consult final state to determine which index tuples are
  157. * in fact deletable.
  158. *
  159. * The index AM can keep track of which index tuple relates to which deltid by
  160. * setting idxoffnum (and/or relying on each entry being uniquely identifiable
  161. * using tid), which is important when the final contents of the array will
  162. * need to be interpreted -- the array can shrink from initial size after
  163. * tableam processing and/or have entries in a new order (tableam may sort
  164. * deltids array for its own reasons). Bottom-up callers may find that final
  165. * ndeltids is 0 on return from call to tableam, in which case no index tuple
  166. * deletions are possible. Simple deletion callers can rely on any entries
  167. * they know to be deletable appearing in the final array as deletable.
  168. */
  169. typedef struct TM_IndexDelete
  170. {
  171. ItemPointerData tid; /* table TID from index tuple */
  172. int16 id; /* Offset into TM_IndexStatus array */
  173. } TM_IndexDelete;
  174. typedef struct TM_IndexStatus
  175. {
  176. OffsetNumber idxoffnum; /* Index am page offset number */
  177. bool knowndeletable; /* Currently known to be deletable? */
  178. /* Bottom-up index deletion specific fields follow */
  179. bool promising; /* Promising (duplicate) index tuple? */
  180. int16 freespace; /* Space freed in index if deleted */
  181. } TM_IndexStatus;
  182. /*
  183. * Index AM/tableam coordination is central to the design of bottom-up index
  184. * deletion. The index AM provides hints about where to look to the tableam
  185. * by marking some entries as "promising". Index AM does this with duplicate
  186. * index tuples that are strongly suspected to be old versions left behind by
  187. * UPDATEs that did not logically modify indexed values. Index AM may find it
  188. * helpful to only mark entries as promising when they're thought to have been
  189. * affected by such an UPDATE in the recent past.
  190. *
  191. * Bottom-up index deletion casts a wide net at first, usually by including
  192. * all TIDs on a target index page. It is up to the tableam to worry about
  193. * the cost of checking transaction status information. The tableam is in
  194. * control, but needs careful guidance from the index AM. Index AM requests
  195. * that bottomupfreespace target be met, while tableam measures progress
  196. * towards that goal by tallying the per-entry freespace value for known
  197. * deletable entries. (All !bottomup callers can just set these space related
  198. * fields to zero.)
  199. */
  200. typedef struct TM_IndexDeleteOp
  201. {
  202. Relation irel; /* Target index relation */
  203. BlockNumber iblknum; /* Index block number (for error reports) */
  204. bool bottomup; /* Bottom-up (not simple) deletion? */
  205. int bottomupfreespace; /* Bottom-up space target */
  206. /* Mutable per-TID information follows (index AM initializes entries) */
  207. int ndeltids; /* Current # of deltids/status elements */
  208. TM_IndexDelete *deltids;
  209. TM_IndexStatus *status;
  210. } TM_IndexDeleteOp;
  211. /* "options" flag bits for table_tuple_insert */
  212. /* TABLE_INSERT_SKIP_WAL was 0x0001; RelationNeedsWAL() now governs */
  213. #define TABLE_INSERT_SKIP_FSM 0x0002
  214. #define TABLE_INSERT_FROZEN 0x0004
  215. #define TABLE_INSERT_NO_LOGICAL 0x0008
  216. /* flag bits for table_tuple_lock */
  217. /* Follow tuples whose update is in progress if lock modes don't conflict */
  218. #define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS (1 << 0)
  219. /* Follow update chain and lock latest version of tuple */
  220. #define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
  221. /* Typedef for callback function for table_index_build_scan */
  222. typedef void (*IndexBuildCallback) (Relation index,
  223. ItemPointer tid,
  224. Datum *values,
  225. bool *isnull,
  226. bool tupleIsAlive,
  227. void *state);
  228. /*
  229. * API struct for a table AM. Note this must be allocated in a
  230. * server-lifetime manner, typically as a static const struct, which then gets
  231. * returned by FormData_pg_am.amhandler.
  232. *
  233. * In most cases it's not appropriate to call the callbacks directly, use the
  234. * table_* wrapper functions instead.
  235. *
  236. * GetTableAmRoutine() asserts that required callbacks are filled in, remember
  237. * to update when adding a callback.
  238. */
  239. typedef struct TableAmRoutine
  240. {
  241. /* this must be set to T_TableAmRoutine */
  242. NodeTag type;
  243. /* ------------------------------------------------------------------------
  244. * Slot related callbacks.
  245. * ------------------------------------------------------------------------
  246. */
  247. /*
  248. * Return slot implementation suitable for storing a tuple of this AM.
  249. */
  250. const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
  251. /* ------------------------------------------------------------------------
  252. * Table scan callbacks.
  253. * ------------------------------------------------------------------------
  254. */
  255. /*
  256. * Start a scan of `rel`. The callback has to return a TableScanDesc,
  257. * which will typically be embedded in a larger, AM specific, struct.
  258. *
  259. * If nkeys != 0, the results need to be filtered by those scan keys.
  260. *
  261. * pscan, if not NULL, will have already been initialized with
  262. * parallelscan_initialize(), and has to be for the same relation. Will
  263. * only be set coming from table_beginscan_parallel().
  264. *
  265. * `flags` is a bitmask indicating the type of scan (ScanOptions's
  266. * SO_TYPE_*, currently only one may be specified), options controlling
  267. * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be
  268. * specified, an AM may ignore unsupported ones) and whether the snapshot
  269. * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT).
  270. */
  271. TableScanDesc (*scan_begin) (Relation rel,
  272. Snapshot snapshot,
  273. int nkeys, struct ScanKeyData *key,
  274. ParallelTableScanDesc pscan,
  275. uint32 flags);
  276. /*
  277. * Release resources and deallocate scan. If TableScanDesc.temp_snap,
  278. * TableScanDesc.rs_snapshot needs to be unregistered.
  279. */
  280. void (*scan_end) (TableScanDesc scan);
  281. /*
  282. * Restart relation scan. If set_params is set to true, allow_{strat,
  283. * sync, pagemode} (see scan_begin) changes should be taken into account.
  284. */
  285. void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key,
  286. bool set_params, bool allow_strat,
  287. bool allow_sync, bool allow_pagemode);
  288. /*
  289. * Return next tuple from `scan`, store in slot.
  290. */
  291. bool (*scan_getnextslot) (TableScanDesc scan,
  292. ScanDirection direction,
  293. TupleTableSlot *slot);
  294. /*-----------
  295. * Optional functions to provide scanning for ranges of ItemPointers.
  296. * Implementations must either provide both of these functions, or neither
  297. * of them.
  298. *
  299. * Implementations of scan_set_tidrange must themselves handle
  300. * ItemPointers of any value. i.e, they must handle each of the following:
  301. *
  302. * 1) mintid or maxtid is beyond the end of the table; and
  303. * 2) mintid is above maxtid; and
  304. * 3) item offset for mintid or maxtid is beyond the maximum offset
  305. * allowed by the AM.
  306. *
  307. * Implementations can assume that scan_set_tidrange is always called
  308. * before can_getnextslot_tidrange or after scan_rescan and before any
  309. * further calls to scan_getnextslot_tidrange.
  310. */
  311. void (*scan_set_tidrange) (TableScanDesc scan,
  312. ItemPointer mintid,
  313. ItemPointer maxtid);
  314. /*
  315. * Return next tuple from `scan` that's in the range of TIDs defined by
  316. * scan_set_tidrange.
  317. */
  318. bool (*scan_getnextslot_tidrange) (TableScanDesc scan,
  319. ScanDirection direction,
  320. TupleTableSlot *slot);
  321. /* ------------------------------------------------------------------------
  322. * Parallel table scan related functions.
  323. * ------------------------------------------------------------------------
  324. */
  325. /*
  326. * Estimate the size of shared memory needed for a parallel scan of this
  327. * relation. The snapshot does not need to be accounted for.
  328. */
  329. Size (*parallelscan_estimate) (Relation rel);
  330. /*
  331. * Initialize ParallelTableScanDesc for a parallel scan of this relation.
  332. * `pscan` will be sized according to parallelscan_estimate() for the same
  333. * relation.
  334. */
  335. Size (*parallelscan_initialize) (Relation rel,
  336. ParallelTableScanDesc pscan);
  337. /*
  338. * Reinitialize `pscan` for a new scan. `rel` will be the same relation as
  339. * when `pscan` was initialized by parallelscan_initialize.
  340. */
  341. void (*parallelscan_reinitialize) (Relation rel,
  342. ParallelTableScanDesc pscan);
  343. /* ------------------------------------------------------------------------
  344. * Index Scan Callbacks
  345. * ------------------------------------------------------------------------
  346. */
  347. /*
  348. * Prepare to fetch tuples from the relation, as needed when fetching
  349. * tuples for an index scan. The callback has to return an
  350. * IndexFetchTableData, which the AM will typically embed in a larger
  351. * structure with additional information.
  352. *
  353. * Tuples for an index scan can then be fetched via index_fetch_tuple.
  354. */
  355. struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
  356. /*
  357. * Reset index fetch. Typically this will release cross index fetch
  358. * resources held in IndexFetchTableData.
  359. */
  360. void (*index_fetch_reset) (struct IndexFetchTableData *data);
  361. /*
  362. * Release resources and deallocate index fetch.
  363. */
  364. void (*index_fetch_end) (struct IndexFetchTableData *data);
  365. /*
  366. * Fetch tuple at `tid` into `slot`, after doing a visibility test
  367. * according to `snapshot`. If a tuple was found and passed the visibility
  368. * test, return true, false otherwise.
  369. *
  370. * Note that AMs that do not necessarily update indexes when indexed
  371. * columns do not change, need to return the current/correct version of
  372. * the tuple that is visible to the snapshot, even if the tid points to an
  373. * older version of the tuple.
  374. *
  375. * *call_again is false on the first call to index_fetch_tuple for a tid.
  376. * If there potentially is another tuple matching the tid, *call_again
  377. * needs to be set to true by index_fetch_tuple, signaling to the caller
  378. * that index_fetch_tuple should be called again for the same tid.
  379. *
  380. * *all_dead, if all_dead is not NULL, should be set to true by
  381. * index_fetch_tuple iff it is guaranteed that no backend needs to see
  382. * that tuple. Index AMs can use that to avoid returning that tid in
  383. * future searches.
  384. */
  385. bool (*index_fetch_tuple) (struct IndexFetchTableData *scan,
  386. ItemPointer tid,
  387. Snapshot snapshot,
  388. TupleTableSlot *slot,
  389. bool *call_again, bool *all_dead);
  390. /* ------------------------------------------------------------------------
  391. * Callbacks for non-modifying operations on individual tuples
  392. * ------------------------------------------------------------------------
  393. */
  394. /*
  395. * Fetch tuple at `tid` into `slot`, after doing a visibility test
  396. * according to `snapshot`. If a tuple was found and passed the visibility
  397. * test, returns true, false otherwise.
  398. */
  399. bool (*tuple_fetch_row_version) (Relation rel,
  400. ItemPointer tid,
  401. Snapshot snapshot,
  402. TupleTableSlot *slot);
  403. /*
  404. * Is tid valid for a scan of this relation.
  405. */
  406. bool (*tuple_tid_valid) (TableScanDesc scan,
  407. ItemPointer tid);
  408. /*
  409. * Return the latest version of the tuple at `tid`, by updating `tid` to
  410. * point at the newest version.
  411. */
  412. void (*tuple_get_latest_tid) (TableScanDesc scan,
  413. ItemPointer tid);
  414. /*
  415. * Does the tuple in `slot` satisfy `snapshot`? The slot needs to be of
  416. * the appropriate type for the AM.
  417. */
  418. bool (*tuple_satisfies_snapshot) (Relation rel,
  419. TupleTableSlot *slot,
  420. Snapshot snapshot);
  421. /* see table_index_delete_tuples() */
  422. TransactionId (*index_delete_tuples) (Relation rel,
  423. TM_IndexDeleteOp *delstate);
  424. /* ------------------------------------------------------------------------
  425. * Manipulations of physical tuples.
  426. * ------------------------------------------------------------------------
  427. */
  428. /* see table_tuple_insert() for reference about parameters */
  429. void (*tuple_insert) (Relation rel, TupleTableSlot *slot,
  430. CommandId cid, int options,
  431. struct BulkInsertStateData *bistate);
  432. /* see table_tuple_insert_speculative() for reference about parameters */
  433. void (*tuple_insert_speculative) (Relation rel,
  434. TupleTableSlot *slot,
  435. CommandId cid,
  436. int options,
  437. struct BulkInsertStateData *bistate,
  438. uint32 specToken);
  439. /* see table_tuple_complete_speculative() for reference about parameters */
  440. void (*tuple_complete_speculative) (Relation rel,
  441. TupleTableSlot *slot,
  442. uint32 specToken,
  443. bool succeeded);
  444. /* see table_multi_insert() for reference about parameters */
  445. void (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots,
  446. CommandId cid, int options, struct BulkInsertStateData *bistate);
  447. /* see table_tuple_delete() for reference about parameters */
  448. TM_Result (*tuple_delete) (Relation rel,
  449. ItemPointer tid,
  450. CommandId cid,
  451. Snapshot snapshot,
  452. Snapshot crosscheck,
  453. bool wait,
  454. TM_FailureData *tmfd,
  455. bool changingPart);
  456. /* see table_tuple_update() for reference about parameters */
  457. TM_Result (*tuple_update) (Relation rel,
  458. ItemPointer otid,
  459. TupleTableSlot *slot,
  460. CommandId cid,
  461. Snapshot snapshot,
  462. Snapshot crosscheck,
  463. bool wait,
  464. TM_FailureData *tmfd,
  465. LockTupleMode *lockmode,
  466. bool *update_indexes);
  467. /* see table_tuple_lock() for reference about parameters */
  468. TM_Result (*tuple_lock) (Relation rel,
  469. ItemPointer tid,
  470. Snapshot snapshot,
  471. TupleTableSlot *slot,
  472. CommandId cid,
  473. LockTupleMode mode,
  474. LockWaitPolicy wait_policy,
  475. uint8 flags,
  476. TM_FailureData *tmfd);
  477. /*
  478. * Perform operations necessary to complete insertions made via
  479. * tuple_insert and multi_insert with a BulkInsertState specified. In-tree
  480. * access methods ceased to use this.
  481. *
  482. * Typically callers of tuple_insert and multi_insert will just pass all
  483. * the flags that apply to them, and each AM has to decide which of them
  484. * make sense for it, and then only take actions in finish_bulk_insert for
  485. * those flags, and ignore others.
  486. *
  487. * Optional callback.
  488. */
  489. void (*finish_bulk_insert) (Relation rel, int options);
  490. /* ------------------------------------------------------------------------
  491. * DDL related functionality.
  492. * ------------------------------------------------------------------------
  493. */
  494. /*
  495. * This callback needs to create a new relation filenode for `rel`, with
  496. * appropriate durability behaviour for `persistence`.
  497. *
  498. * Note that only the subset of the relcache filled by
  499. * RelationBuildLocalRelation() can be relied upon and that the relation's
  500. * catalog entries will either not yet exist (new relation), or will still
  501. * reference the old relfilenode.
  502. *
  503. * As output *freezeXid, *minmulti must be set to the values appropriate
  504. * for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those
  505. * fields to be filled they can be set to InvalidTransactionId and
  506. * InvalidMultiXactId, respectively.
  507. *
  508. * See also table_relation_set_new_filenode().
  509. */
  510. void (*relation_set_new_filenode) (Relation rel,
  511. const RelFileNode *newrnode,
  512. char persistence,
  513. TransactionId *freezeXid,
  514. MultiXactId *minmulti);
  515. /*
  516. * This callback needs to remove all contents from `rel`'s current
  517. * relfilenode. No provisions for transactional behaviour need to be made.
  518. * Often this can be implemented by truncating the underlying storage to
  519. * its minimal size.
  520. *
  521. * See also table_relation_nontransactional_truncate().
  522. */
  523. void (*relation_nontransactional_truncate) (Relation rel);
  524. /*
  525. * See table_relation_copy_data().
  526. *
  527. * This can typically be implemented by directly copying the underlying
  528. * storage, unless it contains references to the tablespace internally.
  529. */
  530. void (*relation_copy_data) (Relation rel,
  531. const RelFileNode *newrnode);
  532. /* See table_relation_copy_for_cluster() */
  533. void (*relation_copy_for_cluster) (Relation NewTable,
  534. Relation OldTable,
  535. Relation OldIndex,
  536. bool use_sort,
  537. TransactionId OldestXmin,
  538. TransactionId *xid_cutoff,
  539. MultiXactId *multi_cutoff,
  540. double *num_tuples,
  541. double *tups_vacuumed,
  542. double *tups_recently_dead);
  543. /*
  544. * React to VACUUM command on the relation. The VACUUM can be triggered by
  545. * a user or by autovacuum. The specific actions performed by the AM will
  546. * depend heavily on the individual AM.
  547. *
  548. * On entry a transaction is already established, and the relation is
  549. * locked with a ShareUpdateExclusive lock.
  550. *
  551. * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through
  552. * this routine, even if (for ANALYZE) it is part of the same VACUUM
  553. * command.
  554. *
  555. * There probably, in the future, needs to be a separate callback to
  556. * integrate with autovacuum's scheduling.
  557. */
  558. void (*relation_vacuum) (Relation rel,
  559. struct VacuumParams *params,
  560. BufferAccessStrategy bstrategy);
  561. /*
  562. * Prepare to analyze block `blockno` of `scan`. The scan has been started
  563. * with table_beginscan_analyze(). See also
  564. * table_scan_analyze_next_block().
  565. *
  566. * The callback may acquire resources like locks that are held until
  567. * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
  568. * to hold a lock until all tuples on a block have been analyzed by
  569. * scan_analyze_next_tuple.
  570. *
  571. * The callback can return false if the block is not suitable for
  572. * sampling, e.g. because it's a metapage that could never contain tuples.
  573. *
  574. * XXX: This obviously is primarily suited for block-based AMs. It's not
  575. * clear what a good interface for non block based AMs would be, so there
  576. * isn't one yet.
  577. */
  578. bool (*scan_analyze_next_block) (TableScanDesc scan,
  579. BlockNumber blockno,
  580. BufferAccessStrategy bstrategy);
  581. /*
  582. * See table_scan_analyze_next_tuple().
  583. *
  584. * Not every AM might have a meaningful concept of dead rows, in which
  585. * case it's OK to not increment *deadrows - but note that that may
  586. * influence autovacuum scheduling (see comment for relation_vacuum
  587. * callback).
  588. */
  589. bool (*scan_analyze_next_tuple) (TableScanDesc scan,
  590. TransactionId OldestXmin,
  591. double *liverows,
  592. double *deadrows,
  593. TupleTableSlot *slot);
  594. /* see table_index_build_range_scan for reference about parameters */
  595. double (*index_build_range_scan) (Relation table_rel,
  596. Relation index_rel,
  597. struct IndexInfo *index_info,
  598. bool allow_sync,
  599. bool anyvisible,
  600. bool progress,
  601. BlockNumber start_blockno,
  602. BlockNumber numblocks,
  603. IndexBuildCallback callback,
  604. void *callback_state,
  605. TableScanDesc scan);
  606. /* see table_index_validate_scan for reference about parameters */
  607. void (*index_validate_scan) (Relation table_rel,
  608. Relation index_rel,
  609. struct IndexInfo *index_info,
  610. Snapshot snapshot,
  611. struct ValidateIndexState *state);
  612. /* ------------------------------------------------------------------------
  613. * Miscellaneous functions.
  614. * ------------------------------------------------------------------------
  615. */
  616. /*
  617. * See table_relation_size().
  618. *
  619. * Note that currently a few callers use the MAIN_FORKNUM size to figure
  620. * out the range of potentially interesting blocks (brin, analyze). It's
  621. * probable that we'll need to revise the interface for those at some
  622. * point.
  623. */
  624. uint64 (*relation_size) (Relation rel, ForkNumber forkNumber);
  625. /*
  626. * This callback should return true if the relation requires a TOAST table
  627. * and false if it does not. It may wish to examine the relation's tuple
  628. * descriptor before making a decision, but if it uses some other method
  629. * of storing large values (or if it does not support them) it can simply
  630. * return false.
  631. */
  632. bool (*relation_needs_toast_table) (Relation rel);
  633. /*
  634. * This callback should return the OID of the table AM that implements
  635. * TOAST tables for this AM. If the relation_needs_toast_table callback
  636. * always returns false, this callback is not required.
  637. */
  638. Oid (*relation_toast_am) (Relation rel);
  639. /*
  640. * This callback is invoked when detoasting a value stored in a toast
  641. * table implemented by this AM. See table_relation_fetch_toast_slice()
  642. * for more details.
  643. */
  644. void (*relation_fetch_toast_slice) (Relation toastrel, Oid valueid,
  645. int32 attrsize,
  646. int32 sliceoffset,
  647. int32 slicelength,
  648. struct varlena *result);
  649. /* ------------------------------------------------------------------------
  650. * Planner related functions.
  651. * ------------------------------------------------------------------------
  652. */
  653. /*
  654. * See table_relation_estimate_size().
  655. *
  656. * While block oriented, it shouldn't be too hard for an AM that doesn't
  657. * internally use blocks to convert into a usable representation.
  658. *
  659. * This differs from the relation_size callback by returning size
  660. * estimates (both relation size and tuple count) for planning purposes,
  661. * rather than returning a currently correct estimate.
  662. */
  663. void (*relation_estimate_size) (Relation rel, int32 *attr_widths,
  664. BlockNumber *pages, double *tuples,
  665. double *allvisfrac);
  666. /* ------------------------------------------------------------------------
  667. * Executor related functions.
  668. * ------------------------------------------------------------------------
  669. */
  670. /*
  671. * Prepare to fetch / check / return tuples from `tbmres->blockno` as part
  672. * of a bitmap table scan. `scan` was started via table_beginscan_bm().
  673. * Return false if there are no tuples to be found on the page, true
  674. * otherwise.
  675. *
  676. * This will typically read and pin the target block, and do the necessary
  677. * work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
  678. * make sense to perform tuple visibility checks at this time). For some
  679. * AMs it will make more sense to do all the work referencing `tbmres`
  680. * contents here, for others it might be better to defer more work to
  681. * scan_bitmap_next_tuple.
  682. *
  683. * If `tbmres->blockno` is -1, this is a lossy scan and all visible tuples
  684. * on the page have to be returned, otherwise the tuples at offsets in
  685. * `tbmres->offsets` need to be returned.
  686. *
  687. * XXX: Currently this may only be implemented if the AM uses md.c as its
  688. * storage manager, and uses ItemPointer->ip_blkid in a manner that maps
  689. * blockids directly to the underlying storage. nodeBitmapHeapscan.c
  690. * performs prefetching directly using that interface. This probably
  691. * needs to be rectified at a later point.
  692. *
  693. * XXX: Currently this may only be implemented if the AM uses the
  694. * visibilitymap, as nodeBitmapHeapscan.c unconditionally accesses it to
  695. * perform prefetching. This probably needs to be rectified at a later
  696. * point.
  697. *
  698. * Optional callback, but either both scan_bitmap_next_block and
  699. * scan_bitmap_next_tuple need to exist, or neither.
  700. */
  701. bool (*scan_bitmap_next_block) (TableScanDesc scan,
  702. struct TBMIterateResult *tbmres);
  703. /*
  704. * Fetch the next tuple of a bitmap table scan into `slot` and return true
  705. * if a visible tuple was found, false otherwise.
  706. *
  707. * For some AMs it will make more sense to do all the work referencing
  708. * `tbmres` contents in scan_bitmap_next_block, for others it might be
  709. * better to defer more work to this callback.
  710. *
  711. * Optional callback, but either both scan_bitmap_next_block and
  712. * scan_bitmap_next_tuple need to exist, or neither.
  713. */
  714. bool (*scan_bitmap_next_tuple) (TableScanDesc scan,
  715. struct TBMIterateResult *tbmres,
  716. TupleTableSlot *slot);
  717. /*
  718. * Prepare to fetch tuples from the next block in a sample scan. Return
  719. * false if the sample scan is finished, true otherwise. `scan` was
  720. * started via table_beginscan_sampling().
  721. *
  722. * Typically this will first determine the target block by calling the
  723. * TsmRoutine's NextSampleBlock() callback if not NULL, or alternatively
  724. * perform a sequential scan over all blocks. The determined block is
  725. * then typically read and pinned.
  726. *
  727. * As the TsmRoutine interface is block based, a block needs to be passed
  728. * to NextSampleBlock(). If that's not appropriate for an AM, it
  729. * internally needs to perform mapping between the internal and a block
  730. * based representation.
  731. *
  732. * Note that it's not acceptable to hold deadlock prone resources such as
  733. * lwlocks until scan_sample_next_tuple() has exhausted the tuples on the
  734. * block - the tuple is likely to be returned to an upper query node, and
  735. * the next call could be off a long while. Holding buffer pins and such
  736. * is obviously OK.
  737. *
  738. * Currently it is required to implement this interface, as there's no
  739. * alternative way (contrary e.g. to bitmap scans) to implement sample
  740. * scans. If infeasible to implement, the AM may raise an error.
  741. */
  742. bool (*scan_sample_next_block) (TableScanDesc scan,
  743. struct SampleScanState *scanstate);
  744. /*
  745. * This callback, only called after scan_sample_next_block has returned
  746. * true, should determine the next tuple to be returned from the selected
  747. * block using the TsmRoutine's NextSampleTuple() callback.
  748. *
  749. * The callback needs to perform visibility checks, and only return
  750. * visible tuples. That obviously can mean calling NextSampleTuple()
  751. * multiple times.
  752. *
  753. * The TsmRoutine interface assumes that there's a maximum offset on a
  754. * given page, so if that doesn't apply to an AM, it needs to emulate that
  755. * assumption somehow.
  756. */
  757. bool (*scan_sample_next_tuple) (TableScanDesc scan,
  758. struct SampleScanState *scanstate,
  759. TupleTableSlot *slot);
  760. } TableAmRoutine;
  761. /* ----------------------------------------------------------------------------
  762. * Slot functions.
  763. * ----------------------------------------------------------------------------
  764. */
  765. /*
  766. * Returns slot callbacks suitable for holding tuples of the appropriate type
  767. * for the relation. Works for tables, views, foreign tables and partitioned
  768. * tables.
  769. */
  770. extern const TupleTableSlotOps *table_slot_callbacks(Relation rel);
  771. /*
  772. * Returns slot using the callbacks returned by table_slot_callbacks(), and
  773. * registers it on *reglist.
  774. */
  775. extern TupleTableSlot *table_slot_create(Relation rel, List **reglist);
  776. /* ----------------------------------------------------------------------------
  777. * Table scan functions.
  778. * ----------------------------------------------------------------------------
  779. */
  780. /*
  781. * Start a scan of `rel`. Returned tuples pass a visibility test of
  782. * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
  783. */
  784. static inline TableScanDesc
  785. table_beginscan(Relation rel, Snapshot snapshot,
  786. int nkeys, struct ScanKeyData *key)
  787. {
  788. uint32 flags = SO_TYPE_SEQSCAN |
  789. SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
  790. return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
  791. }
  792. /*
  793. * Like table_beginscan(), but for scanning catalog. It'll automatically use a
  794. * snapshot appropriate for scanning catalog relations.
  795. */
  796. extern TableScanDesc table_beginscan_catalog(Relation rel, int nkeys,
  797. struct ScanKeyData *key);
  798. /*
  799. * Like table_beginscan(), but table_beginscan_strat() offers an extended API
  800. * that lets the caller control whether a nondefault buffer access strategy
  801. * can be used, and whether syncscan can be chosen (possibly resulting in the
  802. * scan not starting from block zero). Both of these default to true with
  803. * plain table_beginscan.
  804. */
  805. static inline TableScanDesc
  806. table_beginscan_strat(Relation rel, Snapshot snapshot,
  807. int nkeys, struct ScanKeyData *key,
  808. bool allow_strat, bool allow_sync)
  809. {
  810. uint32 flags = SO_TYPE_SEQSCAN | SO_ALLOW_PAGEMODE;
  811. if (allow_strat)
  812. flags |= SO_ALLOW_STRAT;
  813. if (allow_sync)
  814. flags |= SO_ALLOW_SYNC;
  815. return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
  816. }
  817. /*
  818. * table_beginscan_bm is an alternative entry point for setting up a
  819. * TableScanDesc for a bitmap heap scan. Although that scan technology is
  820. * really quite unlike a standard seqscan, there is just enough commonality to
  821. * make it worth using the same data structure.
  822. */
  823. static inline TableScanDesc
  824. table_beginscan_bm(Relation rel, Snapshot snapshot,
  825. int nkeys, struct ScanKeyData *key)
  826. {
  827. uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
  828. return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
  829. }
  830. /*
  831. * table_beginscan_sampling is an alternative entry point for setting up a
  832. * TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth
  833. * using the same data structure although the behavior is rather different.
  834. * In addition to the options offered by table_beginscan_strat, this call
  835. * also allows control of whether page-mode visibility checking is used.
  836. */
  837. static inline TableScanDesc
  838. table_beginscan_sampling(Relation rel, Snapshot snapshot,
  839. int nkeys, struct ScanKeyData *key,
  840. bool allow_strat, bool allow_sync,
  841. bool allow_pagemode)
  842. {
  843. uint32 flags = SO_TYPE_SAMPLESCAN;
  844. if (allow_strat)
  845. flags |= SO_ALLOW_STRAT;
  846. if (allow_sync)
  847. flags |= SO_ALLOW_SYNC;
  848. if (allow_pagemode)
  849. flags |= SO_ALLOW_PAGEMODE;
  850. return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
  851. }
  852. /*
  853. * table_beginscan_tid is an alternative entry point for setting up a
  854. * TableScanDesc for a Tid scan. As with bitmap scans, it's worth using
  855. * the same data structure although the behavior is rather different.
  856. */
  857. static inline TableScanDesc
  858. table_beginscan_tid(Relation rel, Snapshot snapshot)
  859. {
  860. uint32 flags = SO_TYPE_TIDSCAN;
  861. return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
  862. }
  863. /*
  864. * table_beginscan_analyze is an alternative entry point for setting up a
  865. * TableScanDesc for an ANALYZE scan. As with bitmap scans, it's worth using
  866. * the same data structure although the behavior is rather different.
  867. */
  868. static inline TableScanDesc
  869. table_beginscan_analyze(Relation rel)
  870. {
  871. uint32 flags = SO_TYPE_ANALYZE;
  872. return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
  873. }
  874. /*
  875. * End relation scan.
  876. */
  877. static inline void
  878. table_endscan(TableScanDesc scan)
  879. {
  880. scan->rs_rd->rd_tableam->scan_end(scan);
  881. }
  882. /*
  883. * Restart a relation scan.
  884. */
  885. static inline void
  886. table_rescan(TableScanDesc scan,
  887. struct ScanKeyData *key)
  888. {
  889. scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
  890. }
  891. /*
  892. * Restart a relation scan after changing params.
  893. *
  894. * This call allows changing the buffer strategy, syncscan, and pagemode
  895. * options before starting a fresh scan. Note that although the actual use of
  896. * syncscan might change (effectively, enabling or disabling reporting), the
  897. * previously selected startblock will be kept.
  898. */
  899. static inline void
  900. table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key,
  901. bool allow_strat, bool allow_sync, bool allow_pagemode)
  902. {
  903. scan->rs_rd->rd_tableam->scan_rescan(scan, key, true,
  904. allow_strat, allow_sync,
  905. allow_pagemode);
  906. }
  907. /*
  908. * Update snapshot used by the scan.
  909. */
  910. extern void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot);
  911. /*
  912. * Return next tuple from `scan`, store in slot.
  913. */
  914. static inline bool
  915. table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
  916. {
  917. slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
  918. /*
  919. * We don't expect direct calls to table_scan_getnextslot with valid
  920. * CheckXidAlive for catalog or regular tables. See detailed comments in
  921. * xact.c where these variables are declared.
  922. */
  923. if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
  924. elog(ERROR, "unexpected table_scan_getnextslot call during logical decoding");
  925. return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
  926. }
  927. /* ----------------------------------------------------------------------------
  928. * TID Range scanning related functions.
  929. * ----------------------------------------------------------------------------
  930. */
  931. /*
  932. * table_beginscan_tidrange is the entry point for setting up a TableScanDesc
  933. * for a TID range scan.
  934. */
  935. static inline TableScanDesc
  936. table_beginscan_tidrange(Relation rel, Snapshot snapshot,
  937. ItemPointer mintid,
  938. ItemPointer maxtid)
  939. {
  940. TableScanDesc sscan;
  941. uint32 flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE;
  942. sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
  943. /* Set the range of TIDs to scan */
  944. sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
  945. return sscan;
  946. }
  947. /*
  948. * table_rescan_tidrange resets the scan position and sets the minimum and
  949. * maximum TID range to scan for a TableScanDesc created by
  950. * table_beginscan_tidrange.
  951. */
  952. static inline void
  953. table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid,
  954. ItemPointer maxtid)
  955. {
  956. /* Ensure table_beginscan_tidrange() was used. */
  957. Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
  958. sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false);
  959. sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
  960. }
  961. /*
  962. * Fetch the next tuple from `sscan` for a TID range scan created by
  963. * table_beginscan_tidrange(). Stores the tuple in `slot` and returns true,
  964. * or returns false if no more tuples exist in the range.
  965. */
  966. static inline bool
  967. table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
  968. TupleTableSlot *slot)
  969. {
  970. /* Ensure table_beginscan_tidrange() was used. */
  971. Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
  972. return sscan->rs_rd->rd_tableam->scan_getnextslot_tidrange(sscan,
  973. direction,
  974. slot);
  975. }
  976. /* ----------------------------------------------------------------------------
  977. * Parallel table scan related functions.
  978. * ----------------------------------------------------------------------------
  979. */
  980. /*
  981. * Estimate the size of shared memory needed for a parallel scan of this
  982. * relation.
  983. */
  984. extern Size table_parallelscan_estimate(Relation rel, Snapshot snapshot);
  985. /*
  986. * Initialize ParallelTableScanDesc for a parallel scan of this
  987. * relation. `pscan` needs to be sized according to parallelscan_estimate()
  988. * for the same relation. Call this just once in the leader process; then,
  989. * individual workers attach via table_beginscan_parallel.
  990. */
  991. extern void table_parallelscan_initialize(Relation rel,
  992. ParallelTableScanDesc pscan,
  993. Snapshot snapshot);
  994. /*
  995. * Begin a parallel scan. `pscan` needs to have been initialized with
  996. * table_parallelscan_initialize(), for the same relation. The initialization
  997. * does not need to have happened in this backend.
  998. *
  999. * Caller must hold a suitable lock on the relation.
  1000. */
  1001. extern TableScanDesc table_beginscan_parallel(Relation rel,
  1002. ParallelTableScanDesc pscan);
  1003. /*
  1004. * Restart a parallel scan. Call this in the leader process. Caller is
  1005. * responsible for making sure that all workers have finished the scan
  1006. * beforehand.
  1007. */
  1008. static inline void
  1009. table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
  1010. {
  1011. rel->rd_tableam->parallelscan_reinitialize(rel, pscan);
  1012. }
  1013. /* ----------------------------------------------------------------------------
  1014. * Index scan related functions.
  1015. * ----------------------------------------------------------------------------
  1016. */
  1017. /*
  1018. * Prepare to fetch tuples from the relation, as needed when fetching tuples
  1019. * for an index scan.
  1020. *
  1021. * Tuples for an index scan can then be fetched via table_index_fetch_tuple().
  1022. */
  1023. static inline IndexFetchTableData *
  1024. table_index_fetch_begin(Relation rel)
  1025. {
  1026. return rel->rd_tableam->index_fetch_begin(rel);
  1027. }
  1028. /*
  1029. * Reset index fetch. Typically this will release cross index fetch resources
  1030. * held in IndexFetchTableData.
  1031. */
  1032. static inline void
  1033. table_index_fetch_reset(struct IndexFetchTableData *scan)
  1034. {
  1035. scan->rel->rd_tableam->index_fetch_reset(scan);
  1036. }
  1037. /*
  1038. * Release resources and deallocate index fetch.
  1039. */
  1040. static inline void
  1041. table_index_fetch_end(struct IndexFetchTableData *scan)
  1042. {
  1043. scan->rel->rd_tableam->index_fetch_end(scan);
  1044. }
  1045. /*
  1046. * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing
  1047. * a visibility test according to `snapshot`. If a tuple was found and passed
  1048. * the visibility test, returns true, false otherwise. Note that *tid may be
  1049. * modified when we return true (see later remarks on multiple row versions
  1050. * reachable via a single index entry).
  1051. *
  1052. * *call_again needs to be false on the first call to table_index_fetch_tuple() for
  1053. * a tid. If there potentially is another tuple matching the tid, *call_again
  1054. * will be set to true, signaling that table_index_fetch_tuple() should be called
  1055. * again for the same tid.
  1056. *
  1057. * *all_dead, if all_dead is not NULL, will be set to true by
  1058. * table_index_fetch_tuple() iff it is guaranteed that no backend needs to see
  1059. * that tuple. Index AMs can use that to avoid returning that tid in future
  1060. * searches.
  1061. *
  1062. * The difference between this function and table_tuple_fetch_row_version()
  1063. * is that this function returns the currently visible version of a row if
  1064. * the AM supports storing multiple row versions reachable via a single index
  1065. * entry (like heap's HOT). Whereas table_tuple_fetch_row_version() only
  1066. * evaluates the tuple exactly at `tid`. Outside of index entry ->table tuple
  1067. * lookups, table_tuple_fetch_row_version() is what's usually needed.
  1068. */
  1069. static inline bool
  1070. table_index_fetch_tuple(struct IndexFetchTableData *scan,
  1071. ItemPointer tid,
  1072. Snapshot snapshot,
  1073. TupleTableSlot *slot,
  1074. bool *call_again, bool *all_dead)
  1075. {
  1076. /*
  1077. * We don't expect direct calls to table_index_fetch_tuple with valid
  1078. * CheckXidAlive for catalog or regular tables. See detailed comments in
  1079. * xact.c where these variables are declared.
  1080. */
  1081. if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
  1082. elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding");
  1083. return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
  1084. slot, call_again,
  1085. all_dead);
  1086. }
  1087. /*
  1088. * This is a convenience wrapper around table_index_fetch_tuple() which
  1089. * returns whether there are table tuple items corresponding to an index
  1090. * entry. This likely is only useful to verify if there's a conflict in a
  1091. * unique index.
  1092. */
  1093. extern bool table_index_fetch_tuple_check(Relation rel,
  1094. ItemPointer tid,
  1095. Snapshot snapshot,
  1096. bool *all_dead);
  1097. /* ------------------------------------------------------------------------
  1098. * Functions for non-modifying operations on individual tuples
  1099. * ------------------------------------------------------------------------
  1100. */
  1101. /*
  1102. * Fetch tuple at `tid` into `slot`, after doing a visibility test according to
  1103. * `snapshot`. If a tuple was found and passed the visibility test, returns
  1104. * true, false otherwise.
  1105. *
  1106. * See table_index_fetch_tuple's comment about what the difference between
  1107. * these functions is. It is correct to use this function outside of index
  1108. * entry->table tuple lookups.
  1109. */
  1110. static inline bool
  1111. table_tuple_fetch_row_version(Relation rel,
  1112. ItemPointer tid,
  1113. Snapshot snapshot,
  1114. TupleTableSlot *slot)
  1115. {
  1116. /*
  1117. * We don't expect direct calls to table_tuple_fetch_row_version with
  1118. * valid CheckXidAlive for catalog or regular tables. See detailed
  1119. * comments in xact.c where these variables are declared.
  1120. */
  1121. if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
  1122. elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
  1123. return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
  1124. }
  1125. /*
  1126. * Verify that `tid` is a potentially valid tuple identifier. That doesn't
  1127. * mean that the pointed to row needs to exist or be visible, but that
  1128. * attempting to fetch the row (e.g. with table_tuple_get_latest_tid() or
  1129. * table_tuple_fetch_row_version()) should not error out if called with that
  1130. * tid.
  1131. *
  1132. * `scan` needs to have been started via table_beginscan().
  1133. */
  1134. static inline bool
  1135. table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
  1136. {
  1137. return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
  1138. }
  1139. /*
  1140. * Return the latest version of the tuple at `tid`, by updating `tid` to
  1141. * point at the newest version.
  1142. */
  1143. extern void table_tuple_get_latest_tid(TableScanDesc scan, ItemPointer tid);
  1144. /*
  1145. * Return true iff tuple in slot satisfies the snapshot.
  1146. *
  1147. * This assumes the slot's tuple is valid, and of the appropriate type for the
  1148. * AM.
  1149. *
  1150. * Some AMs might modify the data underlying the tuple as a side-effect. If so
  1151. * they ought to mark the relevant buffer dirty.
  1152. */
  1153. static inline bool
  1154. table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
  1155. Snapshot snapshot)
  1156. {
  1157. return rel->rd_tableam->tuple_satisfies_snapshot(rel, slot, snapshot);
  1158. }
  1159. /*
  1160. * Determine which index tuples are safe to delete based on their table TID.
  1161. *
  1162. * Determines which entries from index AM caller's TM_IndexDeleteOp state
  1163. * point to vacuumable table tuples. Entries that are found by tableam to be
  1164. * vacuumable are naturally safe for index AM to delete, and so get directly
  1165. * marked as deletable. See comments above TM_IndexDelete and comments above
  1166. * TM_IndexDeleteOp for full details.
  1167. *
  1168. * Returns a latestRemovedXid transaction ID that caller generally places in
  1169. * its index deletion WAL record. This might be used during subsequent REDO
  1170. * of the WAL record when in Hot Standby mode -- a recovery conflict for the
  1171. * index deletion operation might be required on the standby.
  1172. */
  1173. static inline TransactionId
  1174. table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
  1175. {
  1176. return rel->rd_tableam->index_delete_tuples(rel, delstate);
  1177. }
  1178. /* ----------------------------------------------------------------------------
  1179. * Functions for manipulations of physical tuples.
  1180. * ----------------------------------------------------------------------------
  1181. */
  1182. /*
  1183. * Insert a tuple from a slot into table AM routine.
  1184. *
  1185. * The options bitmask allows the caller to specify options that may change the
  1186. * behaviour of the AM. The AM will ignore options that it does not support.
  1187. *
  1188. * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
  1189. * free space in the relation. This can save some cycles when we know the
  1190. * relation is new and doesn't contain useful amounts of free space.
  1191. * TABLE_INSERT_SKIP_FSM is commonly passed directly to
  1192. * RelationGetBufferForTuple. See that method for more information.
  1193. *
  1194. * TABLE_INSERT_FROZEN should only be specified for inserts into
  1195. * relfilenodes created during the current subtransaction and when
  1196. * there are no prior snapshots or pre-existing portals open.
  1197. * This causes rows to be frozen, which is an MVCC violation and
  1198. * requires explicit options chosen by user.
  1199. *
  1200. * TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
  1201. * information for the tuple. This should solely be used during table rewrites
  1202. * where RelationIsLogicallyLogged(relation) is not yet accurate for the new
  1203. * relation.
  1204. *
  1205. * Note that most of these options will be applied when inserting into the
  1206. * heap's TOAST table, too, if the tuple requires any out-of-line data.
  1207. *
  1208. * The BulkInsertState object (if any; bistate can be NULL for default
  1209. * behavior) is also just passed through to RelationGetBufferForTuple. If
  1210. * `bistate` is provided, table_finish_bulk_insert() needs to be called.
  1211. *
  1212. * On return the slot's tts_tid and tts_tableOid are updated to reflect the
  1213. * insertion. But note that any toasting of fields within the slot is NOT
  1214. * reflected in the slots contents.
  1215. */
  1216. static inline void
  1217. table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid,
  1218. int options, struct BulkInsertStateData *bistate)
  1219. {
  1220. rel->rd_tableam->tuple_insert(rel, slot, cid, options,
  1221. bistate);
  1222. }
  1223. /*
  1224. * Perform a "speculative insertion". These can be backed out afterwards
  1225. * without aborting the whole transaction. Other sessions can wait for the
  1226. * speculative insertion to be confirmed, turning it into a regular tuple, or
  1227. * aborted, as if it never existed. Speculatively inserted tuples behave as
  1228. * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
  1229. *
  1230. * A transaction having performed a speculative insertion has to either abort,
  1231. * or finish the speculative insertion with
  1232. * table_tuple_complete_speculative(succeeded = ...).
  1233. */
  1234. static inline void
  1235. table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot,
  1236. CommandId cid, int options,
  1237. struct BulkInsertStateData *bistate,
  1238. uint32 specToken)
  1239. {
  1240. rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
  1241. bistate, specToken);
  1242. }
  1243. /*
  1244. * Complete "speculative insertion" started in the same transaction. If
  1245. * succeeded is true, the tuple is fully inserted, if false, it's removed.
  1246. */
  1247. static inline void
  1248. table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot,
  1249. uint32 specToken, bool succeeded)
  1250. {
  1251. rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
  1252. succeeded);
  1253. }
  1254. /*
  1255. * Insert multiple tuples into a table.
  1256. *
  1257. * This is like table_tuple_insert(), but inserts multiple tuples in one
  1258. * operation. That's often faster than calling table_tuple_insert() in a loop,
  1259. * because e.g. the AM can reduce WAL logging and page locking overhead.
  1260. *
  1261. * Except for taking `nslots` tuples as input, and an array of TupleTableSlots
  1262. * in `slots`, the parameters for table_multi_insert() are the same as for
  1263. * table_tuple_insert().
  1264. *
  1265. * Note: this leaks memory into the current memory context. You can create a
  1266. * temporary context before calling this, if that's a problem.
  1267. */
  1268. static inline void
  1269. table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
  1270. CommandId cid, int options, struct BulkInsertStateData *bistate)
  1271. {
  1272. rel->rd_tableam->multi_insert(rel, slots, nslots,
  1273. cid, options, bistate);
  1274. }
  1275. /*
  1276. * Delete a tuple.
  1277. *
  1278. * NB: do not call this directly unless prepared to deal with
  1279. * concurrent-update conditions. Use simple_table_tuple_delete instead.
  1280. *
  1281. * Input parameters:
  1282. * relation - table to be modified (caller must hold suitable lock)
  1283. * tid - TID of tuple to be deleted
  1284. * cid - delete command ID (used for visibility test, and stored into
  1285. * cmax if successful)
  1286. * crosscheck - if not InvalidSnapshot, also check tuple against this
  1287. * wait - true if should wait for any conflicting update to commit/abort
  1288. * Output parameters:
  1289. * tmfd - filled in failure cases (see below)
  1290. * changingPart - true iff the tuple is being moved to another partition
  1291. * table due to an update of the partition key. Otherwise, false.
  1292. *
  1293. * Normal, successful return value is TM_Ok, which means we did actually
  1294. * delete it. Failure return codes are TM_SelfModified, TM_Updated, and
  1295. * TM_BeingModified (the last only possible if wait == false).
  1296. *
  1297. * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
  1298. * t_xmax, and, if possible, and, if possible, t_cmax. See comments for
  1299. * struct TM_FailureData for additional info.
  1300. */
  1301. static inline TM_Result
  1302. table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
  1303. Snapshot snapshot, Snapshot crosscheck, bool wait,
  1304. TM_FailureData *tmfd, bool changingPart)
  1305. {
  1306. return rel->rd_tableam->tuple_delete(rel, tid, cid,
  1307. snapshot, crosscheck,
  1308. wait, tmfd, changingPart);
  1309. }
  1310. /*
  1311. * Update a tuple.
  1312. *
  1313. * NB: do not call this directly unless you are prepared to deal with
  1314. * concurrent-update conditions. Use simple_table_tuple_update instead.
  1315. *
  1316. * Input parameters:
  1317. * relation - table to be modified (caller must hold suitable lock)
  1318. * otid - TID of old tuple to be replaced
  1319. * slot - newly constructed tuple data to store
  1320. * cid - update command ID (used for visibility test, and stored into
  1321. * cmax/cmin if successful)
  1322. * crosscheck - if not InvalidSnapshot, also check old tuple against this
  1323. * wait - true if should wait for any conflicting update to commit/abort
  1324. * Output parameters:
  1325. * tmfd - filled in failure cases (see below)
  1326. * lockmode - filled with lock mode acquired on tuple
  1327. * update_indexes - in success cases this is set to true if new index entries
  1328. * are required for this tuple
  1329. *
  1330. * Normal, successful return value is TM_Ok, which means we did actually
  1331. * update it. Failure return codes are TM_SelfModified, TM_Updated, and
  1332. * TM_BeingModified (the last only possible if wait == false).
  1333. *
  1334. * On success, the slot's tts_tid and tts_tableOid are updated to match the new
  1335. * stored tuple; in particular, slot->tts_tid is set to the TID where the
  1336. * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT
  1337. * update was done. However, any TOAST changes in the new tuple's
  1338. * data are not reflected into *newtup.
  1339. *
  1340. * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
  1341. * t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData
  1342. * for additional info.
  1343. */
  1344. static inline TM_Result
  1345. table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
  1346. CommandId cid, Snapshot snapshot, Snapshot crosscheck,
  1347. bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
  1348. bool *update_indexes)
  1349. {
  1350. return rel->rd_tableam->tuple_update(rel, otid, slot,
  1351. cid, snapshot, crosscheck,
  1352. wait, tmfd,
  1353. lockmode, update_indexes);
  1354. }
  1355. /*
  1356. * Lock a tuple in the specified mode.
  1357. *
  1358. * Input parameters:
  1359. * relation: relation containing tuple (caller must hold suitable lock)
  1360. * tid: TID of tuple to lock
  1361. * snapshot: snapshot to use for visibility determinations
  1362. * cid: current command ID (used for visibility test, and stored into
  1363. * tuple's cmax if lock is successful)
  1364. * mode: lock mode desired
  1365. * wait_policy: what to do if tuple lock is not available
  1366. * flags:
  1367. * If TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, follow the update chain to
  1368. * also lock descendant tuples if lock modes don't conflict.
  1369. * If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock
  1370. * latest version.
  1371. *
  1372. * Output parameters:
  1373. * *slot: contains the target tuple
  1374. * *tmfd: filled in failure cases (see below)
  1375. *
  1376. * Function result may be:
  1377. * TM_Ok: lock was successfully acquired
  1378. * TM_Invisible: lock failed because tuple was never visible to us
  1379. * TM_SelfModified: lock failed because tuple updated by self
  1380. * TM_Updated: lock failed because tuple updated by other xact
  1381. * TM_Deleted: lock failed because tuple deleted by other xact
  1382. * TM_WouldBlock: lock couldn't be acquired and wait_policy is skip
  1383. *
  1384. * In the failure cases other than TM_Invisible and TM_Deleted, the routine
  1385. * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax. See
  1386. * comments for struct TM_FailureData for additional info.
  1387. */
  1388. static inline TM_Result
  1389. table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot,
  1390. TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
  1391. LockWaitPolicy wait_policy, uint8 flags,
  1392. TM_FailureData *tmfd)
  1393. {
  1394. return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
  1395. cid, mode, wait_policy,
  1396. flags, tmfd);
  1397. }
  1398. /*
  1399. * Perform operations necessary to complete insertions made via
  1400. * tuple_insert and multi_insert with a BulkInsertState specified.
  1401. */
  1402. static inline void
  1403. table_finish_bulk_insert(Relation rel, int options)
  1404. {
  1405. /* optional callback */
  1406. if (rel->rd_tableam && rel->rd_tableam->finish_bulk_insert)
  1407. rel->rd_tableam->finish_bulk_insert(rel, options);
  1408. }
  1409. /* ------------------------------------------------------------------------
  1410. * DDL related functionality.
  1411. * ------------------------------------------------------------------------
  1412. */
  1413. /*
  1414. * Create storage for `rel` in `newrnode`, with persistence set to
  1415. * `persistence`.
  1416. *
  1417. * This is used both during relation creation and various DDL operations to
  1418. * create a new relfilenode that can be filled from scratch. When creating
  1419. * new storage for an existing relfilenode, this should be called before the
  1420. * relcache entry has been updated.
  1421. *
  1422. * *freezeXid, *minmulti are set to the xid / multixact horizon for the table
  1423. * that pg_class.{relfrozenxid, relminmxid} have to be set to.
  1424. */
  1425. static inline void
  1426. table_relation_set_new_filenode(Relation rel,
  1427. const RelFileNode *newrnode,
  1428. char persistence,
  1429. TransactionId *freezeXid,
  1430. MultiXactId *minmulti)
  1431. {
  1432. rel->rd_tableam->relation_set_new_filenode(rel, newrnode, persistence,
  1433. freezeXid, minmulti);
  1434. }
  1435. /*
  1436. * Remove all table contents from `rel`, in a non-transactional manner.
  1437. * Non-transactional meaning that there's no need to support rollbacks. This
  1438. * commonly only is used to perform truncations for relfilenodes created in the
  1439. * current transaction.
  1440. */
  1441. static inline void
  1442. table_relation_nontransactional_truncate(Relation rel)
  1443. {
  1444. rel->rd_tableam->relation_nontransactional_truncate(rel);
  1445. }
  1446. /*
  1447. * Copy data from `rel` into the new relfilenode `newrnode`. The new
  1448. * relfilenode may not have storage associated before this function is
  1449. * called. This is only supposed to be used for low level operations like
  1450. * changing a relation's tablespace.
  1451. */
  1452. static inline void
  1453. table_relation_copy_data(Relation rel, const RelFileNode *newrnode)
  1454. {
  1455. rel->rd_tableam->relation_copy_data(rel, newrnode);
  1456. }
  1457. /*
  1458. * Copy data from `OldTable` into `NewTable`, as part of a CLUSTER or VACUUM
  1459. * FULL.
  1460. *
  1461. * Additional Input parameters:
  1462. * - use_sort - if true, the table contents are sorted appropriate for
  1463. * `OldIndex`; if false and OldIndex is not InvalidOid, the data is copied
  1464. * in that index's order; if false and OldIndex is InvalidOid, no sorting is
  1465. * performed
  1466. * - OldIndex - see use_sort
  1467. * - OldestXmin - computed by vacuum_set_xid_limits(), even when
  1468. * not needed for the relation's AM
  1469. * - *xid_cutoff - ditto
  1470. * - *multi_cutoff - ditto
  1471. *
  1472. * Output parameters:
  1473. * - *xid_cutoff - rel's new relfrozenxid value, may be invalid
  1474. * - *multi_cutoff - rel's new relminmxid value, may be invalid
  1475. * - *tups_vacuumed - stats, for logging, if appropriate for AM
  1476. * - *tups_recently_dead - stats, for logging, if appropriate for AM
  1477. */
  1478. static inline void
  1479. table_relation_copy_for_cluster(Relation OldTable, Relation NewTable,
  1480. Relation OldIndex,
  1481. bool use_sort,
  1482. TransactionId OldestXmin,
  1483. TransactionId *xid_cutoff,
  1484. MultiXactId *multi_cutoff,
  1485. double *num_tuples,
  1486. double *tups_vacuumed,
  1487. double *tups_recently_dead)
  1488. {
  1489. OldTable->rd_tableam->relation_copy_for_cluster(OldTable, NewTable, OldIndex,
  1490. use_sort, OldestXmin,
  1491. xid_cutoff, multi_cutoff,
  1492. num_tuples, tups_vacuumed,
  1493. tups_recently_dead);
  1494. }
  1495. /*
  1496. * Perform VACUUM on the relation. The VACUUM can be triggered by a user or by
  1497. * autovacuum. The specific actions performed by the AM will depend heavily on
  1498. * the individual AM.
  1499. *
  1500. * On entry a transaction needs to already been established, and the
  1501. * table is locked with a ShareUpdateExclusive lock.
  1502. *
  1503. * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through this
  1504. * routine, even if (for ANALYZE) it is part of the same VACUUM command.
  1505. */
  1506. static inline void
  1507. table_relation_vacuum(Relation rel, struct VacuumParams *params,
  1508. BufferAccessStrategy bstrategy)
  1509. {
  1510. rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
  1511. }
  1512. /*
  1513. * Prepare to analyze block `blockno` of `scan`. The scan needs to have been
  1514. * started with table_beginscan_analyze(). Note that this routine might
  1515. * acquire resources like locks that are held until
  1516. * table_scan_analyze_next_tuple() returns false.
  1517. *
  1518. * Returns false if block is unsuitable for sampling, true otherwise.
  1519. */
  1520. static inline bool
  1521. table_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
  1522. BufferAccessStrategy bstrategy)
  1523. {
  1524. return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, blockno,
  1525. bstrategy);
  1526. }
  1527. /*
  1528. * Iterate over tuples in the block selected with
  1529. * table_scan_analyze_next_block() (which needs to have returned true, and
  1530. * this routine may not have returned false for the same block before). If a
  1531. * tuple that's suitable for sampling is found, true is returned and a tuple
  1532. * is stored in `slot`.
  1533. *
  1534. * *liverows and *deadrows are incremented according to the encountered
  1535. * tuples.
  1536. */
  1537. static inline bool
  1538. table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
  1539. double *liverows, double *deadrows,
  1540. TupleTableSlot *slot)
  1541. {
  1542. return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
  1543. liverows, deadrows,
  1544. slot);
  1545. }
  1546. /*
  1547. * table_index_build_scan - scan the table to find tuples to be indexed
  1548. *
  1549. * This is called back from an access-method-specific index build procedure
  1550. * after the AM has done whatever setup it needs. The parent table relation
  1551. * is scanned to find tuples that should be entered into the index. Each
  1552. * such tuple is passed to the AM's callback routine, which does the right
  1553. * things to add it to the new index. After we return, the AM's index
  1554. * build procedure does whatever cleanup it needs.
  1555. *
  1556. * The total count of live tuples is returned. This is for updating pg_class
  1557. * statistics. (It's annoying not to be able to do that here, but we want to
  1558. * merge that update with others; see index_update_stats.) Note that the
  1559. * index AM itself must keep track of the number of index tuples; we don't do
  1560. * so here because the AM might reject some of the tuples for its own reasons,
  1561. * such as being unable to store NULLs.
  1562. *
  1563. * If 'progress', the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when
  1564. * starting the scan, and PROGRESS_SCAN_BLOCKS_DONE is updated as we go along.
  1565. *
  1566. * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
  1567. * any potentially broken HOT chains. Currently, we set this if there are any
  1568. * RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without trying
  1569. * very hard to detect whether they're really incompatible with the chain tip.
  1570. * This only really makes sense for heap AM, it might need to be generalized
  1571. * for other AMs later.
  1572. */
  1573. static inline double
  1574. table_index_build_scan(Relation table_rel,
  1575. Relation index_rel,
  1576. struct IndexInfo *index_info,
  1577. bool allow_sync,
  1578. bool progress,
  1579. IndexBuildCallback callback,
  1580. void *callback_state,
  1581. TableScanDesc scan)
  1582. {
  1583. return table_rel->rd_tableam->index_build_range_scan(table_rel,
  1584. index_rel,
  1585. index_info,
  1586. allow_sync,
  1587. false,
  1588. progress,
  1589. 0,
  1590. InvalidBlockNumber,
  1591. callback,
  1592. callback_state,
  1593. scan);
  1594. }
  1595. /*
  1596. * As table_index_build_scan(), except that instead of scanning the complete
  1597. * table, only the given number of blocks are scanned. Scan to end-of-rel can
  1598. * be signaled by passing InvalidBlockNumber as numblocks. Note that
  1599. * restricting the range to scan cannot be done when requesting syncscan.
  1600. *
  1601. * When "anyvisible" mode is requested, all tuples visible to any transaction
  1602. * are indexed and counted as live, including those inserted or deleted by
  1603. * transactions that are still in progress.
  1604. */
  1605. static inline double
  1606. table_index_build_range_scan(Relation table_rel,
  1607. Relation index_rel,
  1608. struct IndexInfo *index_info,
  1609. bool allow_sync,
  1610. bool anyvisible,
  1611. bool progress,
  1612. BlockNumber start_blockno,
  1613. BlockNumber numblocks,
  1614. IndexBuildCallback callback,
  1615. void *callback_state,
  1616. TableScanDesc scan)
  1617. {
  1618. return table_rel->rd_tableam->index_build_range_scan(table_rel,
  1619. index_rel,
  1620. index_info,
  1621. allow_sync,
  1622. anyvisible,
  1623. progress,
  1624. start_blockno,
  1625. numblocks,
  1626. callback,
  1627. callback_state,
  1628. scan);
  1629. }
  1630. /*
  1631. * table_index_validate_scan - second table scan for concurrent index build
  1632. *
  1633. * See validate_index() for an explanation.
  1634. */
  1635. static inline void
  1636. table_index_validate_scan(Relation table_rel,
  1637. Relation index_rel,
  1638. struct IndexInfo *index_info,
  1639. Snapshot snapshot,
  1640. struct ValidateIndexState *state)
  1641. {
  1642. table_rel->rd_tableam->index_validate_scan(table_rel,
  1643. index_rel,
  1644. index_info,
  1645. snapshot,
  1646. state);
  1647. }
  1648. /* ----------------------------------------------------------------------------
  1649. * Miscellaneous functionality
  1650. * ----------------------------------------------------------------------------
  1651. */
  1652. /*
  1653. * Return the current size of `rel` in bytes. If `forkNumber` is
  1654. * InvalidForkNumber, return the relation's overall size, otherwise the size
  1655. * for the indicated fork.
  1656. *
  1657. * Note that the overall size might not be the equivalent of the sum of sizes
  1658. * for the individual forks for some AMs, e.g. because the AMs storage does
  1659. * not neatly map onto the builtin types of forks.
  1660. */
  1661. static inline uint64
  1662. table_relation_size(Relation rel, ForkNumber forkNumber)
  1663. {
  1664. return rel->rd_tableam->relation_size(rel, forkNumber);
  1665. }
  1666. /*
  1667. * table_relation_needs_toast_table - does this relation need a toast table?
  1668. */
  1669. static inline bool
  1670. table_relation_needs_toast_table(Relation rel)
  1671. {
  1672. return rel->rd_tableam->relation_needs_toast_table(rel);
  1673. }
  1674. /*
  1675. * Return the OID of the AM that should be used to implement the TOAST table
  1676. * for this relation.
  1677. */
  1678. static inline Oid
  1679. table_relation_toast_am(Relation rel)
  1680. {
  1681. return rel->rd_tableam->relation_toast_am(rel);
  1682. }
  1683. /*
  1684. * Fetch all or part of a TOAST value from a TOAST table.
  1685. *
  1686. * If this AM is never used to implement a TOAST table, then this callback
  1687. * is not needed. But, if toasted values are ever stored in a table of this
  1688. * type, then you will need this callback.
  1689. *
  1690. * toastrel is the relation in which the toasted value is stored.
  1691. *
  1692. * valueid identifes which toast value is to be fetched. For the heap,
  1693. * this corresponds to the values stored in the chunk_id column.
  1694. *
  1695. * attrsize is the total size of the toast value to be fetched.
  1696. *
  1697. * sliceoffset is the offset within the toast value of the first byte that
  1698. * should be fetched.
  1699. *
  1700. * slicelength is the number of bytes from the toast value that should be
  1701. * fetched.
  1702. *
  1703. * result is caller-allocated space into which the fetched bytes should be
  1704. * stored.
  1705. */
  1706. static inline void
  1707. table_relation_fetch_toast_slice(Relation toastrel, Oid valueid,
  1708. int32 attrsize, int32 sliceoffset,
  1709. int32 slicelength, struct varlena *result)
  1710. {
  1711. toastrel->rd_tableam->relation_fetch_toast_slice(toastrel, valueid,
  1712. attrsize,
  1713. sliceoffset, slicelength,
  1714. result);
  1715. }
  1716. /* ----------------------------------------------------------------------------
  1717. * Planner related functionality
  1718. * ----------------------------------------------------------------------------
  1719. */
  1720. /*
  1721. * Estimate the current size of the relation, as an AM specific workhorse for
  1722. * estimate_rel_size(). Look there for an explanation of the parameters.
  1723. */
  1724. static inline void
  1725. table_relation_estimate_size(Relation rel, int32 *attr_widths,
  1726. BlockNumber *pages, double *tuples,
  1727. double *allvisfrac)
  1728. {
  1729. rel->rd_tableam->relation_estimate_size(rel, attr_widths, pages, tuples,
  1730. allvisfrac);
  1731. }
  1732. /* ----------------------------------------------------------------------------
  1733. * Executor related functionality
  1734. * ----------------------------------------------------------------------------
  1735. */
  1736. /*
  1737. * Prepare to fetch / check / return tuples from `tbmres->blockno` as part of
  1738. * a bitmap table scan. `scan` needs to have been started via
  1739. * table_beginscan_bm(). Returns false if there are no tuples to be found on
  1740. * the page, true otherwise.
  1741. *
  1742. * Note, this is an optionally implemented function, therefore should only be
  1743. * used after verifying the presence (at plan time or such).
  1744. */
  1745. static inline bool
  1746. table_scan_bitmap_next_block(TableScanDesc scan,
  1747. struct TBMIterateResult *tbmres)
  1748. {
  1749. /*
  1750. * We don't expect direct calls to table_scan_bitmap_next_block with valid
  1751. * CheckXidAlive for catalog or regular tables. See detailed comments in
  1752. * xact.c where these variables are declared.
  1753. */
  1754. if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
  1755. elog(ERROR, "unexpected table_scan_bitmap_next_block call during logical decoding");
  1756. return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan,
  1757. tbmres);
  1758. }
  1759. /*
  1760. * Fetch the next tuple of a bitmap table scan into `slot` and return true if
  1761. * a visible tuple was found, false otherwise.
  1762. * table_scan_bitmap_next_block() needs to previously have selected a
  1763. * block (i.e. returned true), and no previous
  1764. * table_scan_bitmap_next_tuple() for the same block may have
  1765. * returned false.
  1766. */
  1767. static inline bool
  1768. table_scan_bitmap_next_tuple(TableScanDesc scan,
  1769. struct TBMIterateResult *tbmres,
  1770. TupleTableSlot *slot)
  1771. {
  1772. /*
  1773. * We don't expect direct calls to table_scan_bitmap_next_tuple with valid
  1774. * CheckXidAlive for catalog or regular tables. See detailed comments in
  1775. * xact.c where these variables are declared.
  1776. */
  1777. if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
  1778. elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding");
  1779. return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
  1780. tbmres,
  1781. slot);
  1782. }
  1783. /*
  1784. * Prepare to fetch tuples from the next block in a sample scan. Returns false
  1785. * if the sample scan is finished, true otherwise. `scan` needs to have been
  1786. * started via table_beginscan_sampling().
  1787. *
  1788. * This will call the TsmRoutine's NextSampleBlock() callback if necessary
  1789. * (i.e. NextSampleBlock is not NULL), or perform a sequential scan over the
  1790. * underlying relation.
  1791. */
  1792. static inline bool
  1793. table_scan_sample_next_block(TableScanDesc scan,
  1794. struct SampleScanState *scanstate)
  1795. {
  1796. /*
  1797. * We don't expect direct calls to table_scan_sample_next_block with valid
  1798. * CheckXidAlive for catalog or regular tables. See detailed comments in
  1799. * xact.c where these variables are declared.
  1800. */
  1801. if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
  1802. elog(ERROR, "unexpected table_scan_sample_next_block call during logical decoding");
  1803. return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate);
  1804. }
  1805. /*
  1806. * Fetch the next sample tuple into `slot` and return true if a visible tuple
  1807. * was found, false otherwise. table_scan_sample_next_block() needs to
  1808. * previously have selected a block (i.e. returned true), and no previous
  1809. * table_scan_sample_next_tuple() for the same block may have returned false.
  1810. *
  1811. * This will call the TsmRoutine's NextSampleTuple() callback.
  1812. */
  1813. static inline bool
  1814. table_scan_sample_next_tuple(TableScanDesc scan,
  1815. struct SampleScanState *scanstate,
  1816. TupleTableSlot *slot)
  1817. {
  1818. /*
  1819. * We don't expect direct calls to table_scan_sample_next_tuple with valid
  1820. * CheckXidAlive for catalog or regular tables. See detailed comments in
  1821. * xact.c where these variables are declared.
  1822. */
  1823. if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
  1824. elog(ERROR, "unexpected table_scan_sample_next_tuple call during logical decoding");
  1825. return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate,
  1826. slot);
  1827. }
  1828. /* ----------------------------------------------------------------------------
  1829. * Functions to make modifications a bit simpler.
  1830. * ----------------------------------------------------------------------------
  1831. */
  1832. extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
  1833. extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
  1834. Snapshot snapshot);
  1835. extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
  1836. TupleTableSlot *slot, Snapshot snapshot,
  1837. bool *update_indexes);
  1838. /* ----------------------------------------------------------------------------
  1839. * Helper functions to implement parallel scans for block oriented AMs.
  1840. * ----------------------------------------------------------------------------
  1841. */
  1842. extern Size table_block_parallelscan_estimate(Relation rel);
  1843. extern Size table_block_parallelscan_initialize(Relation rel,
  1844. ParallelTableScanDesc pscan);
  1845. extern void table_block_parallelscan_reinitialize(Relation rel,
  1846. ParallelTableScanDesc pscan);
  1847. extern BlockNumber table_block_parallelscan_nextpage(Relation rel,
  1848. ParallelBlockTableScanWorker pbscanwork,
  1849. ParallelBlockTableScanDesc pbscan);
  1850. extern void table_block_parallelscan_startblock_init(Relation rel,
  1851. ParallelBlockTableScanWorker pbscanwork,
  1852. ParallelBlockTableScanDesc pbscan);
  1853. /* ----------------------------------------------------------------------------
  1854. * Helper functions to implement relation sizing for block oriented AMs.
  1855. * ----------------------------------------------------------------------------
  1856. */
  1857. extern uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber);
  1858. extern void table_block_relation_estimate_size(Relation rel,
  1859. int32 *attr_widths,
  1860. BlockNumber *pages,
  1861. double *tuples,
  1862. double *allvisfrac,
  1863. Size overhead_bytes_per_tuple,
  1864. Size usable_bytes_per_page);
  1865. /* ----------------------------------------------------------------------------
  1866. * Functions in tableamapi.c
  1867. * ----------------------------------------------------------------------------
  1868. */
  1869. extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
  1870. extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
  1871. extern bool check_default_table_access_method(char **newval, void **extra,
  1872. GucSource source);
  1873. #endif /* TABLEAM_H */