From 86e3f8885a59090d825611fc0d20a8e7fadc8a1b Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 7 Jan 2026 14:21:48 +0900 Subject: [PATCH v29 2/7] Refactor code for in-core "local" sequences This commit restructures the code of in-core sequences into a new set of files: - seqdesc.c is renamed to seqlocaldesc.c. - seqlocal_xlog.c to the code in the WAL replay logic. - seqlocalam.c to store a set of routines called from sequence.c, finishing the separation between the main sequence logic and the in-core sequences. - seqlocalam.h to store the AM-specific structures and routine related to the in-core sequences. WAL records are renamed to "SequenceLocal" with structures, variables and file structures mapping to that. --- src/backend/access/rmgrdesc/Makefile | 2 +- src/backend/access/rmgrdesc/meson.build | 2 +- .../rmgrdesc/{seqdesc.c => seqlocaldesc.c} | 20 +- src/backend/access/sequence/Makefile | 4 +- src/backend/access/sequence/meson.build | 2 + .../sequence/seqlocal_xlog.c} | 34 +- src/backend/access/sequence/seqlocalam.c | 635 ++++++++++++++++++ src/backend/access/transam/rmgr.c | 2 +- src/backend/commands/Makefile | 1 - src/backend/commands/meson.build | 1 - src/backend/commands/sequence.c | 564 +--------------- src/bin/pg_waldump/.gitignore | 2 +- src/bin/pg_waldump/rmgrdesc.c | 2 +- src/bin/pg_waldump/t/001_basic.pl | 2 +- src/include/access/rmgrlist.h | 2 +- src/include/access/seqlocal_xlog.h | 45 ++ src/include/access/seqlocalam.h | 32 + src/include/commands/sequence_xlog.h | 45 -- 18 files changed, 778 insertions(+), 619 deletions(-) rename src/backend/access/rmgrdesc/{seqdesc.c => seqlocaldesc.c} (64%) rename src/backend/{commands/sequence_xlog.c => access/sequence/seqlocal_xlog.c} (67%) create mode 100644 src/backend/access/sequence/seqlocalam.c create mode 100644 src/include/access/seqlocal_xlog.h create mode 100644 src/include/access/seqlocalam.h delete mode 100644 src/include/commands/sequence_xlog.h diff --git a/src/backend/access/rmgrdesc/Makefile b/src/backend/access/rmgrdesc/Makefile index cd95eec37f1..e5900ed77af 100644 --- a/src/backend/access/rmgrdesc/Makefile +++ b/src/backend/access/rmgrdesc/Makefile @@ -24,7 +24,7 @@ OBJS = \ relmapdesc.o \ replorigindesc.o \ rmgrdesc_utils.o \ - seqdesc.o \ + seqlocaldesc.o \ smgrdesc.o \ spgdesc.o \ standbydesc.o \ diff --git a/src/backend/access/rmgrdesc/meson.build b/src/backend/access/rmgrdesc/meson.build index d9000ccd9fd..7a59bb08237 100644 --- a/src/backend/access/rmgrdesc/meson.build +++ b/src/backend/access/rmgrdesc/meson.build @@ -17,7 +17,7 @@ rmgr_desc_sources = files( 'relmapdesc.c', 'replorigindesc.c', 'rmgrdesc_utils.c', - 'seqdesc.c', + 'seqlocaldesc.c', 'smgrdesc.c', 'spgdesc.c', 'standbydesc.c', diff --git a/src/backend/access/rmgrdesc/seqdesc.c b/src/backend/access/rmgrdesc/seqlocaldesc.c similarity index 64% rename from src/backend/access/rmgrdesc/seqdesc.c rename to src/backend/access/rmgrdesc/seqlocaldesc.c index c9fc6dc1850..7aeb3f7cf4b 100644 --- a/src/backend/access/rmgrdesc/seqdesc.c +++ b/src/backend/access/rmgrdesc/seqlocaldesc.c @@ -1,44 +1,44 @@ /*------------------------------------------------------------------------- * - * seqdesc.c - * rmgr descriptor routines for commands/sequence.c + * seqlocaldesc.c + * rmgr descriptor routines for sequence/seqlocal_xlog.c * * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * src/backend/access/rmgrdesc/seqdesc.c + * src/backend/access/rmgrdesc/seqlocaldesc.c * *------------------------------------------------------------------------- */ #include "postgres.h" -#include "commands/sequence_xlog.h" +#include "access/seqlocal_xlog.h" void -seq_desc(StringInfo buf, XLogReaderState *record) +seq_local_desc(StringInfo buf, XLogReaderState *record) { char *rec = XLogRecGetData(record); uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; - xl_seq_rec *xlrec = (xl_seq_rec *) rec; + xl_seq_local_rec *xlrec = (xl_seq_local_rec *) rec; - if (info == XLOG_SEQ_LOG) + if (info == XLOG_SEQ_LOCAL_LOG) appendStringInfo(buf, "rel %u/%u/%u", xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.relNumber); } const char * -seq_identify(uint8 info) +seq_local_identify(uint8 info) { const char *id = NULL; switch (info & ~XLR_INFO_MASK) { - case XLOG_SEQ_LOG: - id = "LOG"; + case XLOG_SEQ_LOCAL_LOG: + id = "SEQ_LOCAL_LOG"; break; } diff --git a/src/backend/access/sequence/Makefile b/src/backend/access/sequence/Makefile index 9f9d31f5425..2a3c8542cb3 100644 --- a/src/backend/access/sequence/Makefile +++ b/src/backend/access/sequence/Makefile @@ -12,6 +12,8 @@ subdir = src/backend/access/sequence top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = sequence.o +OBJS = seqlocalam.o \ + seqlocal_xlog.o \ + sequence.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/sequence/meson.build b/src/backend/access/sequence/meson.build index 40cc02c770c..dea3b597a88 100644 --- a/src/backend/access/sequence/meson.build +++ b/src/backend/access/sequence/meson.build @@ -1,5 +1,7 @@ # Copyright (c) 2022-2026, PostgreSQL Global Development Group backend_sources += files( + 'seqlocalam.c', + 'seqlocal_xlog.c', 'sequence.c', ) diff --git a/src/backend/commands/sequence_xlog.c b/src/backend/access/sequence/seqlocal_xlog.c similarity index 67% rename from src/backend/commands/sequence_xlog.c rename to src/backend/access/sequence/seqlocal_xlog.c index d0aed48e268..40c72cef19a 100644 --- a/src/backend/commands/sequence_xlog.c +++ b/src/backend/access/sequence/seqlocal_xlog.c @@ -1,26 +1,26 @@ /*------------------------------------------------------------------------- * - * sequence.c - * RMGR WAL routines for sequences. + * seqlocal_xlog.c + * WAL replay logic for local sequence access manager * * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * src/backend/commands/sequence_xlog.c + * src/backend/access/sequence/seqlocal_xlog.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/bufmask.h" +#include "access/seqlocal_xlog.h" #include "access/xlogutils.h" -#include "commands/sequence_xlog.h" -#include "storage/bufmgr.h" +#include "storage/block.h" void -seq_redo(XLogReaderState *record) +seq_local_redo(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; @@ -29,11 +29,11 @@ seq_redo(XLogReaderState *record) Page localpage; char *item; Size itemsz; - xl_seq_rec *xlrec = (xl_seq_rec *) XLogRecGetData(record); - sequence_magic *sm; + xl_seq_local_rec *xlrec = (xl_seq_local_rec *) XLogRecGetData(record); + seq_local_magic *sm; - if (info != XLOG_SEQ_LOG) - elog(PANIC, "seq_redo: unknown op code %u", info); + if (info != XLOG_SEQ_LOCAL_LOG) + elog(PANIC, "seq_local_redo: unknown op code %u", info); buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); @@ -49,15 +49,15 @@ seq_redo(XLogReaderState *record) */ localpage = (Page) palloc(BufferGetPageSize(buffer)); - PageInit(localpage, BufferGetPageSize(buffer), sizeof(sequence_magic)); - sm = (sequence_magic *) PageGetSpecialPointer(localpage); - sm->magic = SEQ_MAGIC; + PageInit(localpage, BufferGetPageSize(buffer), sizeof(seq_local_magic)); + sm = (seq_local_magic *) PageGetSpecialPointer(localpage); + sm->magic = SEQ_LOCAL_MAGIC; - item = (char *) xlrec + sizeof(xl_seq_rec); - itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_rec); + item = (char *) xlrec + sizeof(xl_seq_local_rec); + itemsz = XLogRecGetDataLen(record) - sizeof(xl_seq_local_rec); if (PageAddItem(localpage, item, itemsz, FirstOffsetNumber, false, false) == InvalidOffsetNumber) - elog(PANIC, "seq_redo: failed to add item to page"); + elog(PANIC, "seq_local_redo: failed to add item to page"); PageSetLSN(localpage, lsn); @@ -72,7 +72,7 @@ seq_redo(XLogReaderState *record) * Mask a Sequence page before performing consistency checks on it. */ void -seq_mask(char *page, BlockNumber blkno) +seq_local_mask(char *page, BlockNumber blkno) { mask_page_lsn_and_checksum(page); diff --git a/src/backend/access/sequence/seqlocalam.c b/src/backend/access/sequence/seqlocalam.c new file mode 100644 index 00000000000..2b7bfb68d51 --- /dev/null +++ b/src/backend/access/sequence/seqlocalam.c @@ -0,0 +1,635 @@ +/*------------------------------------------------------------------------- + * + * seqlocalam.c + * Local sequence access manager + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/access/sequence/seqlocalam.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/multixact.h" +#include "access/seqlocalam.h" +#include "access/seqlocal_xlog.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "access/xloginsert.h" +#include "access/xlogutils.h" +#include "catalog/storage_xlog.h" +#include "commands/tablecmds.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" + + +/* Format of tuples stored in heap table associated to local sequences */ +typedef struct FormData_pg_seq_local_data +{ + int64 last_value; + int64 log_cnt; + bool is_called; +} FormData_pg_seq_local_data; + +typedef FormData_pg_seq_local_data *Form_pg_seq_local_data; + +/* + * Columns of a local sequence relation + */ +#define SEQ_LOCAL_COL_LASTVAL 1 +#define SEQ_LOCAL_COL_LOG 2 +#define SEQ_LOCAL_COL_CALLED 3 + +#define SEQ_LOCAL_COL_FIRSTCOL SEQ_LOCAL_COL_LASTVAL +#define SEQ_LOCAL_COL_LASTCOL SEQ_LOCAL_COL_CALLED + + +/* + * We don't want to log each fetching of a value from a sequence, + * so we pre-log a few fetches in advance. In the event of + * crash we can lose (skip over) as many values as we pre-logged. + */ +#define SEQ_LOCAL_LOG_VALS 32 + +static Form_pg_seq_local_data read_seq_tuple(Relation rel, + Buffer *buf, + HeapTuple seqdatatuple); +static void fill_seq_with_data(Relation rel, HeapTuple tuple); +static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple, + ForkNumber forkNum); + +/* + * Given an opened sequence relation, lock the page buffer and find the tuple + * + * *buf receives the reference to the pinned-and-ex-locked buffer + * *seqdatatuple receives the reference to the sequence tuple proper + * (this arg should point to a local variable of type HeapTupleData) + * + * Function's return value points to the data payload of the tuple + */ +static Form_pg_seq_local_data +read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) +{ + Page page; + ItemId lp; + seq_local_magic *sm; + Form_pg_seq_local_data seq; + + *buf = ReadBuffer(rel, 0); + LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(*buf); + sm = (seq_local_magic *) PageGetSpecialPointer(page); + + if (sm->magic != SEQ_LOCAL_MAGIC) + elog(ERROR, "bad magic number in sequence \"%s\": %08X", + RelationGetRelationName(rel), sm->magic); + + lp = PageGetItemId(page, FirstOffsetNumber); + Assert(ItemIdIsNormal(lp)); + + /* Note we currently only bother to set these two fields of *seqdatatuple */ + seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); + seqdatatuple->t_len = ItemIdGetLength(lp); + + /* + * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on + * a sequence, which would leave a non-frozen XID in the sequence tuple's + * xmax, which eventually leads to clog access failures or worse. If we + * see this has happened, clean up after it. We treat this like a hint + * bit update, ie, don't bother to WAL-log it, since we can certainly do + * this again if the update gets lost. + */ + Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); + if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) + { + HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); + seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; + seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; + MarkBufferDirtyHint(*buf, true); + } + + seq = (Form_pg_seq_local_data) GETSTRUCT(seqdatatuple); + + return seq; +} + +/* + * Initialize a sequence's relation with the specified tuple as content + * + * This handles unlogged sequences by writing to both the main and the init + * fork as necessary. + */ +static void +fill_seq_with_data(Relation rel, HeapTuple tuple) +{ + fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM); + + if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) + { + SMgrRelation srel; + + srel = smgropen(rel->rd_locator, INVALID_PROC_NUMBER); + smgrcreate(srel, INIT_FORKNUM, false); + log_smgrcreate(&rel->rd_locator, INIT_FORKNUM); + fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM); + FlushRelationBuffers(rel); + smgrclose(srel); + } +} + +/* + * Initialize a sequence's relation fork with the specified tuple as content + */ +static void +fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum) +{ + Buffer buf; + Page page; + seq_local_magic *sm; + OffsetNumber offnum; + + /* Initialize first page of relation with special magic number */ + + buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL, + EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); + Assert(BufferGetBlockNumber(buf) == 0); + + page = BufferGetPage(buf); + + PageInit(page, BufferGetPageSize(buf), sizeof(seq_local_magic)); + sm = (seq_local_magic *) PageGetSpecialPointer(page); + sm->magic = SEQ_LOCAL_MAGIC; + + /* Now insert sequence tuple */ + + /* + * Since VACUUM does not process sequences, we have to force the tuple to + * have xmin = FrozenTransactionId now. Otherwise it would become + * invisible to SELECTs after 2G transactions. It is okay to do this + * because if the current transaction aborts, no other xact will ever + * examine the sequence tuple anyway. + */ + HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); + HeapTupleHeaderSetXminFrozen(tuple->t_data); + HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); + HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); + tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; + ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); + + /* check the comment above nextval_internal()'s equivalent call. */ + if (RelationNeedsWAL(rel)) + GetTopTransactionId(); + + START_CRIT_SECTION(); + + MarkBufferDirty(buf); + + offnum = PageAddItem(page, tuple->t_data, tuple->t_len, InvalidOffsetNumber, false, false); + if (offnum != FirstOffsetNumber) + elog(ERROR, "failed to add sequence tuple to page"); + + /* XLOG stuff */ + if (RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM) + { + xl_seq_local_rec xlrec; + XLogRecPtr recptr; + + XLogBeginInsert(); + XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); + + xlrec.locator = rel->rd_locator; + + XLogRegisterData(&xlrec, sizeof(xl_seq_local_rec)); + XLogRegisterData(tuple->t_data, tuple->t_len); + + recptr = XLogInsert(RM_SEQ_LOCAL_ID, XLOG_SEQ_LOCAL_LOG); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); +} + +/* + * seq_local_nextval() + * + * Allocate a new value for a local sequence, based on the sequence + * configuration. + */ +int64 +seq_local_nextval(Relation rel, int64 incby, int64 maxv, + int64 minv, int64 cache, bool cycle, + int64 *last) +{ + int64 result; + int64 fetch; + int64 next; + int64 rescnt = 0; + int64 log; + Buffer buf; + HeapTupleData seqdatatuple; + Form_pg_seq_local_data seq; + Page page; + bool logit = false; + + /* lock page buffer and read tuple */ + seq = read_seq_tuple(rel, &buf, &seqdatatuple); + page = BufferGetPage(buf); + + *last = next = result = seq->last_value; + fetch = cache; + log = seq->log_cnt; + + if (!seq->is_called) + { + rescnt++; /* return last_value if not is_called */ + fetch--; + } + + /* + * Decide whether we should emit a WAL log record. If so, force up the + * fetch count to grab SEQ_LOCAL_LOG_VALS more values than we actually + * need to cache. (These will then be usable without logging.) + * + * If this is the first nextval after a checkpoint, we must force a new + * WAL record to be written anyway, else replay starting from the + * checkpoint would fail to advance the sequence past the logged values. + * In this case we may as well fetch extra values. + */ + if (log < fetch || !seq->is_called) + { + /* forced log to satisfy local demand for values */ + fetch = log = fetch + SEQ_LOCAL_LOG_VALS; + logit = true; + } + else + { + XLogRecPtr redoptr = GetRedoRecPtr(); + + if (PageGetLSN(page) <= redoptr) + { + /* last update of seq was before checkpoint */ + fetch = log = fetch + SEQ_LOCAL_LOG_VALS; + logit = true; + } + } + + while (fetch) /* try to fetch cache [+ log ] numbers */ + { + /* + * Check MAXVALUE for ascending sequences and MINVALUE for descending + * sequences + */ + if (incby > 0) + { + /* ascending sequence */ + if ((maxv >= 0 && next > maxv - incby) || + (maxv < 0 && next + incby > maxv)) + { + if (rescnt > 0) + break; /* stop fetching */ + if (!cycle) + ereport(ERROR, + (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), + errmsg("nextval: reached maximum value of sequence \"%s\" (%" PRId64 ")", + RelationGetRelationName(rel), + maxv))); + next = minv; + } + else + next += incby; + } + else + { + /* descending sequence */ + if ((minv < 0 && next < minv - incby) || + (minv >= 0 && next + incby < minv)) + { + if (rescnt > 0) + break; /* stop fetching */ + if (!cycle) + ereport(ERROR, + (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), + errmsg("nextval: reached minimum value of sequence \"%s\" (%" PRId64 ")", + RelationGetRelationName(rel), + minv))); + next = maxv; + } + else + next += incby; + } + fetch--; + if (rescnt < cache) + { + log--; + rescnt++; + *last = next; + if (rescnt == 1) /* if it's first result - */ + result = next; /* it's what to return */ + } + } + + log -= fetch; /* adjust for any unfetched numbers */ + Assert(log >= 0); + + /* + * If something needs to be WAL logged, acquire an xid, so this + * transaction's commit will trigger a WAL flush and wait for syncrep. + * It's sufficient to ensure the toplevel transaction has an xid, no need + * to assign xids subxacts, that'll already trigger an appropriate wait. + * (Have to do that here, so we're outside the critical section) + */ + if (logit && RelationNeedsWAL(rel)) + GetTopTransactionId(); + + /* ready to change the on-disk (or really, in-buffer) tuple */ + START_CRIT_SECTION(); + + /* + * We must mark the buffer dirty before doing XLogInsert(); see notes in + * SyncOneBuffer(). However, we don't apply the desired changes just yet. + * This looks like a violation of the buffer update protocol, but it is in + * fact safe because we hold exclusive lock on the buffer. Any other + * process, including a checkpoint, that tries to examine the buffer + * contents will block until we release the lock, and then will see the + * final state that we install below. + */ + MarkBufferDirty(buf); + + /* XLOG stuff */ + if (logit && RelationNeedsWAL(rel)) + { + xl_seq_local_rec xlrec; + XLogRecPtr recptr; + + /* + * We don't log the current state of the tuple, but rather the state + * as it would appear after "log" more fetches. This lets us skip + * that many future WAL records, at the cost that we lose those + * sequence values if we crash. + */ + XLogBeginInsert(); + XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); + + /* set values that will be saved in xlog */ + seq->last_value = next; + seq->is_called = true; + seq->log_cnt = 0; + + xlrec.locator = rel->rd_locator; + + XLogRegisterData(&xlrec, sizeof(xl_seq_local_rec)); + XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len); + + recptr = XLogInsert(RM_SEQ_LOCAL_ID, XLOG_SEQ_LOCAL_LOG); + + PageSetLSN(page, recptr); + } + + /* Now update sequence tuple to the intended final state */ + seq->last_value = *last; /* last fetched number */ + seq->is_called = true; + seq->log_cnt = log; /* how much is logged */ + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); + + return result; +} + +/* + * seq_local_get_table_am() + * + * Return the table access method used by this sequence. + */ +const char * +seq_local_get_table_am(void) +{ + return DEFAULT_TABLE_ACCESS_METHOD; +} + +/* + * seq_local_init() + * + * Add the sequence attributes to the relation created for this sequence + * AM and insert a tuple of metadata into the sequence relation, based on + * the information guessed from pg_sequences. This is the first tuple + * inserted after the relation has been created, filling in its heap + * table. + */ +void +seq_local_init(Relation rel, int64 last_value, bool is_called) +{ + Datum value[SEQ_LOCAL_COL_LASTCOL]; + bool null[SEQ_LOCAL_COL_LASTCOL]; + List *elts = NIL; + List *atcmds = NIL; + ListCell *lc; + TupleDesc tupdesc; + HeapTuple tuple; + + /* + * Create relation (and fill value[] and null[] for the initial tuple). + */ + for (int i = SEQ_LOCAL_COL_FIRSTCOL; i <= SEQ_LOCAL_COL_LASTCOL; i++) + { + ColumnDef *coldef = NULL; + + switch (i) + { + case SEQ_LOCAL_COL_LASTVAL: + coldef = makeColumnDef("last_value", INT8OID, -1, InvalidOid); + value[i - 1] = Int64GetDatumFast(last_value); + break; + case SEQ_LOCAL_COL_LOG: + coldef = makeColumnDef("log_cnt", INT8OID, -1, InvalidOid); + value[i - 1] = Int64GetDatum(0); + break; + case SEQ_LOCAL_COL_CALLED: + coldef = makeColumnDef("is_called", BOOLOID, -1, InvalidOid); + value[i - 1] = BoolGetDatum(is_called); + break; + } + + coldef->is_not_null = true; + null[i - 1] = false; + elts = lappend(elts, coldef); + } + + /* Add all the attributes to the sequence */ + foreach(lc, elts) + { + AlterTableCmd *atcmd; + + atcmd = makeNode(AlterTableCmd); + atcmd->subtype = AT_AddColumnToSequence; + atcmd->def = (Node *) lfirst(lc); + atcmds = lappend(atcmds, atcmd); + } + + /* + * No recursion needed. Note that EventTriggerAlterTableStart() should + * have been called. + */ + AlterTableInternal(RelationGetRelid(rel), atcmds, false); + CommandCounterIncrement(); + + tupdesc = RelationGetDescr(rel); + tuple = heap_form_tuple(tupdesc, value, null); + fill_seq_with_data(rel, tuple); +} + +/* + * seq_local_setval() + * + * Callback for setval(). + */ +void +seq_local_setval(Relation rel, int64 next, bool iscalled) +{ + Buffer buf; + HeapTupleData seqdatatuple; + Form_pg_seq_local_data seq; + + /* lock page buffer and read tuple */ + seq = read_seq_tuple(rel, &buf, &seqdatatuple); + + /* ready to change the on-disk (or really, in-buffer) tuple */ + START_CRIT_SECTION(); + seq->last_value = next; /* last fetched number */ + seq->is_called = iscalled; + seq->log_cnt = 0; + + MarkBufferDirty(buf); + + /* XLOG stuff */ + if (RelationNeedsWAL(rel)) + { + xl_seq_local_rec xlrec; + XLogRecPtr recptr; + Page page = BufferGetPage(buf); + + XLogBeginInsert(); + XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); + + xlrec.locator = rel->rd_locator; + XLogRegisterData(&xlrec, sizeof(xl_seq_local_rec)); + XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len); + + recptr = XLogInsert(RM_SEQ_LOCAL_ID, XLOG_SEQ_LOCAL_LOG); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buf); +} + +/* + * seq_local_reset() + * + * Perform a hard reset on the local sequence, rewriting its heap data + * entirely. + */ +void +seq_local_reset(Relation rel, int64 startv, bool is_called, bool reset_state) +{ + Form_pg_seq_local_data seq; + Buffer buf; + HeapTupleData seqdatatuple; + HeapTuple tuple; + + /* lock buffer page and read tuple */ + (void) read_seq_tuple(rel, &buf, &seqdatatuple); + + /* + * Copy the existing sequence tuple. + */ + tuple = heap_copytuple(&seqdatatuple); + + /* Now we're done with the old page */ + UnlockReleaseBuffer(buf); + + /* + * Modify the copied tuple to execute the restart (compare the RESTART + * action in AlterSequence) + */ + seq = (Form_pg_seq_local_data) GETSTRUCT(tuple); + seq->last_value = startv; + seq->is_called = is_called; + if (reset_state) + seq->log_cnt = 0; + + /* + * Create a new storage file for the sequence. + */ + RelationSetNewRelfilenumber(rel, rel->rd_rel->relpersistence); + + /* + * Ensure sequence's relfrozenxid is at 0, since it won't contain any + * unfrozen XIDs. Same with relminmxid, since a sequence will never + * contain multixacts. + */ + Assert(rel->rd_rel->relfrozenxid == InvalidTransactionId); + Assert(rel->rd_rel->relminmxid == InvalidMultiXactId); + + /* + * Insert the modified tuple into the new storage file. + */ + fill_seq_with_data(rel, tuple); +} + +/* + * seq_local_get_state() + * + * Retrieve the state of a local sequence. + */ +void +seq_local_get_state(Relation rel, + int64 *last_value, + bool *is_called, + XLogRecPtr *page_lsn) +{ + Buffer buf; + Page page; + HeapTupleData seqdatatuple; + Form_pg_seq_local_data seq; + + /* lock page buffer and read tuple */ + seq = read_seq_tuple(rel, &buf, &seqdatatuple); + page = BufferGetPage(buf); + + *last_value = seq->last_value; + *is_called = seq->is_called; + *page_lsn = PageGetLSN(page); + + UnlockReleaseBuffer(buf); +} + +/* + * seq_local_change_persistence() + * + * Persistence change for the local sequence Relation. + */ +void +seq_local_change_persistence(Relation rel, char newrelpersistence) +{ + Buffer buf; + HeapTupleData seqdatatuple; + + (void) read_seq_tuple(rel, &buf, &seqdatatuple); + RelationSetNewRelfilenumber(rel, newrelpersistence); + fill_seq_with_data(rel, &seqdatatuple); + UnlockReleaseBuffer(buf); +} diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c index 4fda03a3cfc..1892b6f2754 100644 --- a/src/backend/access/transam/rmgr.c +++ b/src/backend/access/transam/rmgr.c @@ -29,11 +29,11 @@ #include "access/heapam_xlog.h" #include "access/multixact.h" #include "access/nbtxlog.h" +#include "access/seqlocal_xlog.h" #include "access/spgxlog.h" #include "access/xact.h" #include "catalog/storage_xlog.h" #include "commands/dbcommands_xlog.h" -#include "commands/sequence_xlog.h" #include "commands/tablespace.h" #include "replication/decode.h" #include "replication/message.h" diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile index 5b9d084977e..44c3a500b97 100644 --- a/src/backend/commands/Makefile +++ b/src/backend/commands/Makefile @@ -55,7 +55,6 @@ OBJS = \ schemacmds.o \ seclabel.o \ sequence.o \ - sequence_xlog.o \ statscmds.o \ subscriptioncmds.o \ tablecmds.o \ diff --git a/src/backend/commands/meson.build b/src/backend/commands/meson.build index 9f258d566eb..1109727100a 100644 --- a/src/backend/commands/meson.build +++ b/src/backend/commands/meson.build @@ -43,7 +43,6 @@ backend_sources += files( 'schemacmds.c', 'seclabel.c', 'sequence.c', - 'sequence_xlog.c', 'statscmds.c', 'subscriptioncmds.c', 'tablecmds.c', diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index c77e16df4dd..af66e332b19 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -17,6 +17,7 @@ #include "access/htup_details.h" #include "access/multixact.h" #include "access/relation.h" +#include "access/seqlocalam.h" #include "access/sequence.h" #include "access/table.h" #include "access/transam.h" @@ -31,7 +32,6 @@ #include "catalog/storage_xlog.h" #include "commands/defrem.h" #include "commands/sequence.h" -#include "commands/sequence_xlog.h" #include "commands/tablecmds.h" #include "funcapi.h" #include "miscadmin.h" @@ -50,13 +50,6 @@ #include "utils/varlena.h" -/* - * We don't want to log each fetching of a value from a sequence, - * so we pre-log a few fetches in advance. In the event of - * crash we can lose (skip over) as many values as we pre-logged. - */ -#define SEQ_LOG_VALS 32 - /* * We store a SeqTable item for every sequence we have touched in the current * session. This is needed to hold onto nextval/currval state. (We can't @@ -86,13 +79,9 @@ static HTAB *seqhashtab = NULL; /* hash table for SeqTable items */ */ static SeqTableData *last_used_seq = NULL; -static void fill_seq_with_data(Relation rel, HeapTuple tuple); -static void fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum); static Relation lock_and_open_sequence(SeqTable seq); static void create_seq_hashtable(void); static void init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel); -static Form_pg_sequence_data read_seq_tuple(Relation rel, - Buffer *buf, HeapTuple seqdatatuple); static void init_params(ParseState *pstate, List *options, bool for_identity, bool isInit, Form_pg_sequence seqform, @@ -123,14 +112,8 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) Relation rel; HeapTuple tuple; TupleDesc tupDesc; - Datum value[SEQ_COL_LASTCOL]; - bool null[SEQ_COL_LASTCOL]; - List *elts = NIL; - List *atcmds = NIL; - ListCell *lc; Datum pgs_values[Natts_pg_sequence]; bool pgs_nulls[Natts_pg_sequence]; - int i; /* * If if_not_exists was given and a relation with the same name already @@ -163,35 +146,6 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) &seqform, &last_value, &reset_state, &is_called, &need_seq_rewrite, &owned_by); - /* - * Create relation (and fill value[] and null[] for the tuple) - */ - for (i = SEQ_COL_FIRSTCOL; i <= SEQ_COL_LASTCOL; i++) - { - ColumnDef *coldef = NULL; - - switch (i) - { - case SEQ_COL_LASTVAL: - coldef = makeColumnDef("last_value", INT8OID, -1, InvalidOid); - value[i - 1] = Int64GetDatumFast(last_value); - break; - case SEQ_COL_LOG: - coldef = makeColumnDef("log_cnt", INT8OID, -1, InvalidOid); - value[i - 1] = Int64GetDatum((int64) 0); - break; - case SEQ_COL_CALLED: - coldef = makeColumnDef("is_called", BOOLOID, -1, InvalidOid); - value[i - 1] = BoolGetDatum(false); - break; - } - - coldef->is_not_null = true; - null[i - 1] = false; - - elts = lappend(elts, coldef); - } - stmt->relation = seq->sequence; stmt->inhRelations = NIL; stmt->constraints = NIL; @@ -212,29 +166,8 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq) rel = sequence_open(seqoid, AccessExclusiveLock); - /* Add all the attributes to the sequence */ - foreach(lc, elts) - { - AlterTableCmd *atcmd; - - atcmd = makeNode(AlterTableCmd); - atcmd->subtype = AT_AddColumnToSequence; - atcmd->def = (Node *) lfirst(lc); - atcmds = lappend(atcmds, atcmd); - } - - /* - * No recursion needed. Note that EventTriggerAlterTableStart() should - * have been called. - */ - AlterTableInternal(RelationGetRelid(rel), atcmds, false); - CommandCounterIncrement(); - - tupDesc = RelationGetDescr(rel); - - /* now initialize the sequence's data */ - tuple = heap_form_tuple(tupDesc, value, null); - fill_seq_with_data(rel, tuple); + /* now initialize the sequence table structure and its data */ + seq_local_init(rel, last_value, is_called); /* process OWNED BY if given */ if (owned_by) @@ -283,10 +216,6 @@ ResetSequence(Oid seq_relid) { Relation seq_rel; SeqTable elm; - Form_pg_sequence_data seq; - Buffer buf; - HeapTupleData seqdatatuple; - HeapTuple tuple; HeapTuple pgstuple; Form_pg_sequence pgsform; int64 startv; @@ -297,7 +226,6 @@ ResetSequence(Oid seq_relid) * indeed a sequence. */ init_sequence(seq_relid, &elm, &seq_rel); - (void) read_seq_tuple(seq_rel, &buf, &seqdatatuple); pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(seq_relid)); if (!HeapTupleIsValid(pgstuple)) @@ -306,40 +234,8 @@ ResetSequence(Oid seq_relid) startv = pgsform->seqstart; ReleaseSysCache(pgstuple); - /* - * Copy the existing sequence tuple. - */ - tuple = heap_copytuple(&seqdatatuple); - - /* Now we're done with the old page */ - UnlockReleaseBuffer(buf); - - /* - * Modify the copied tuple to execute the restart (compare the RESTART - * action in AlterSequence) - */ - seq = (Form_pg_sequence_data) GETSTRUCT(tuple); - seq->last_value = startv; - seq->is_called = false; - seq->log_cnt = 0; - - /* - * Create a new storage file for the sequence. - */ - RelationSetNewRelfilenumber(seq_rel, seq_rel->rd_rel->relpersistence); - - /* - * Ensure sequence's relfrozenxid is at 0, since it won't contain any - * unfrozen XIDs. Same with relminmxid, since a sequence will never - * contain multixacts. - */ - Assert(seq_rel->rd_rel->relfrozenxid == InvalidTransactionId); - Assert(seq_rel->rd_rel->relminmxid == InvalidMultiXactId); - - /* - * Insert the modified tuple into the new storage file. - */ - fill_seq_with_data(seq_rel, tuple); + /* Sequence state is forcibly reset here. */ + seq_local_reset(seq_rel, startv, false, true); /* Clear local cache so that we don't think we have cached numbers */ /* Note that we do not change the currval() state */ @@ -348,105 +244,6 @@ ResetSequence(Oid seq_relid) sequence_close(seq_rel, NoLock); } -/* - * Initialize a sequence's relation with the specified tuple as content - * - * This handles unlogged sequences by writing to both the main and the init - * fork as necessary. - */ -static void -fill_seq_with_data(Relation rel, HeapTuple tuple) -{ - fill_seq_fork_with_data(rel, tuple, MAIN_FORKNUM); - - if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) - { - SMgrRelation srel; - - srel = smgropen(rel->rd_locator, INVALID_PROC_NUMBER); - smgrcreate(srel, INIT_FORKNUM, false); - log_smgrcreate(&rel->rd_locator, INIT_FORKNUM); - fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM); - FlushRelationBuffers(rel); - smgrclose(srel); - } -} - -/* - * Initialize a sequence's relation fork with the specified tuple as content - */ -static void -fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum) -{ - Buffer buf; - Page page; - sequence_magic *sm; - OffsetNumber offnum; - - /* Initialize first page of relation with special magic number */ - - buf = ExtendBufferedRel(BMR_REL(rel), forkNum, NULL, - EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK); - Assert(BufferGetBlockNumber(buf) == 0); - - page = BufferGetPage(buf); - - PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic)); - sm = (sequence_magic *) PageGetSpecialPointer(page); - sm->magic = SEQ_MAGIC; - - /* Now insert sequence tuple */ - - /* - * Since VACUUM does not process sequences, we have to force the tuple to - * have xmin = FrozenTransactionId now. Otherwise it would become - * invisible to SELECTs after 2G transactions. It is okay to do this - * because if the current transaction aborts, no other xact will ever - * examine the sequence tuple anyway. - */ - HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); - HeapTupleHeaderSetXminFrozen(tuple->t_data); - HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); - HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); - tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; - ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); - - /* check the comment above nextval_internal()'s equivalent call. */ - if (RelationNeedsWAL(rel)) - GetTopTransactionId(); - - START_CRIT_SECTION(); - - MarkBufferDirty(buf); - - offnum = PageAddItem(page, tuple->t_data, tuple->t_len, InvalidOffsetNumber, false, false); - if (offnum != FirstOffsetNumber) - elog(ERROR, "failed to add sequence tuple to page"); - - /* XLOG stuff */ - if (RelationNeedsWAL(rel) || forkNum == INIT_FORKNUM) - { - xl_seq_rec xlrec; - XLogRecPtr recptr; - - XLogBeginInsert(); - XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); - - xlrec.locator = rel->rd_locator; - - XLogRegisterData(&xlrec, sizeof(xl_seq_rec)); - XLogRegisterData(tuple->t_data, tuple->t_len); - - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); - - PageSetLSN(page, recptr); - } - - END_CRIT_SECTION(); - - UnlockReleaseBuffer(buf); -} - /* * AlterSequence * @@ -458,10 +255,7 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) Oid relid; SeqTable elm; Relation seqrel; - Buffer buf; - HeapTupleData datatuple; Form_pg_sequence seqform; - Form_pg_sequence_data newdataform; bool need_seq_rewrite; List *owned_by; ObjectAddress address; @@ -470,7 +264,7 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) bool reset_state = false; bool is_called; int64 last_value; - HeapTuple newdatatuple; + XLogRecPtr page_lsn; /* Open and lock sequence, and check for ownership along the way. */ relid = RangeVarGetRelidExtended(stmt->sequence, @@ -497,16 +291,8 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) seqform = (Form_pg_sequence) GETSTRUCT(seqtuple); - /* lock page buffer and read tuple into new sequence structure */ - (void) read_seq_tuple(seqrel, &buf, &datatuple); - - /* copy the existing sequence data tuple, so it can be modified locally */ - newdatatuple = heap_copytuple(&datatuple); - newdataform = (Form_pg_sequence_data) GETSTRUCT(newdatatuple); - last_value = newdataform->last_value; - is_called = newdataform->is_called; - - UnlockReleaseBuffer(buf); + /* Read sequence data */ + seq_local_get_state(seqrel, &last_value, &is_called, &page_lsn); /* Check and set new values */ init_params(pstate, stmt->options, stmt->for_identity, false, @@ -516,32 +302,10 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) /* If needed, rewrite the sequence relation itself */ if (need_seq_rewrite) { - /* check the comment above nextval_internal()'s equivalent call. */ if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); - /* - * Create a new storage file for the sequence, making the state - * changes transactional. - */ - RelationSetNewRelfilenumber(seqrel, seqrel->rd_rel->relpersistence); - - /* - * Ensure sequence's relfrozenxid is at 0, since it won't contain any - * unfrozen XIDs. Same with relminmxid, since a sequence will never - * contain multixacts. - */ - Assert(seqrel->rd_rel->relfrozenxid == InvalidTransactionId); - Assert(seqrel->rd_rel->relminmxid == InvalidMultiXactId); - - /* - * Insert the modified tuple into the new storage file. - */ - newdataform->last_value = last_value; - newdataform->is_called = is_called; - if (reset_state) - newdataform->log_cnt = 0; - fill_seq_with_data(seqrel, newdatatuple); + seq_local_reset(seqrel, last_value, is_called, reset_state); } /* Clear local cache so that we don't think we have cached numbers */ @@ -570,8 +334,6 @@ SequenceChangePersistence(Oid relid, char newrelpersistence) { SeqTable elm; Relation seqrel; - Buffer buf; - HeapTupleData seqdatatuple; /* * ALTER SEQUENCE acquires this lock earlier. If we're processing an @@ -586,10 +348,7 @@ SequenceChangePersistence(Oid relid, char newrelpersistence) if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); - (void) read_seq_tuple(seqrel, &buf, &seqdatatuple); - RelationSetNewRelfilenumber(seqrel, newrelpersistence); - fill_seq_with_data(seqrel, &seqdatatuple); - UnlockReleaseBuffer(buf); + seq_local_change_persistence(seqrel, newrelpersistence); sequence_close(seqrel, NoLock); } @@ -652,24 +411,15 @@ nextval_internal(Oid relid, bool check_permissions) { SeqTable elm; Relation seqrel; - Buffer buf; - Page page; HeapTuple pgstuple; Form_pg_sequence pgsform; - HeapTupleData seqdatatuple; - Form_pg_sequence_data seq; int64 incby, maxv, minv, cache, - log, - fetch, last; - int64 result, - next, - rescnt = 0; + int64 result; bool cycle; - bool logit = false; /* open and lock sequence */ init_sequence(relid, &elm, &seqrel); @@ -714,105 +464,9 @@ nextval_internal(Oid relid, bool check_permissions) cycle = pgsform->seqcycle; ReleaseSysCache(pgstuple); - /* lock page buffer and read tuple */ - seq = read_seq_tuple(seqrel, &buf, &seqdatatuple); - page = BufferGetPage(buf); - - last = next = result = seq->last_value; - fetch = cache; - log = seq->log_cnt; - - if (!seq->is_called) - { - rescnt++; /* return last_value if not is_called */ - fetch--; - } - - /* - * Decide whether we should emit a WAL log record. If so, force up the - * fetch count to grab SEQ_LOG_VALS more values than we actually need to - * cache. (These will then be usable without logging.) - * - * If this is the first nextval after a checkpoint, we must force a new - * WAL record to be written anyway, else replay starting from the - * checkpoint would fail to advance the sequence past the logged values. - * In this case we may as well fetch extra values. - */ - if (log < fetch || !seq->is_called) - { - /* forced log to satisfy local demand for values */ - fetch = log = fetch + SEQ_LOG_VALS; - logit = true; - } - else - { - XLogRecPtr redoptr = GetRedoRecPtr(); - - if (PageGetLSN(page) <= redoptr) - { - /* last update of seq was before checkpoint */ - fetch = log = fetch + SEQ_LOG_VALS; - logit = true; - } - } - - while (fetch) /* try to fetch cache [+ log ] numbers */ - { - /* - * Check MAXVALUE for ascending sequences and MINVALUE for descending - * sequences - */ - if (incby > 0) - { - /* ascending sequence */ - if ((maxv >= 0 && next > maxv - incby) || - (maxv < 0 && next + incby > maxv)) - { - if (rescnt > 0) - break; /* stop fetching */ - if (!cycle) - ereport(ERROR, - (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), - errmsg("nextval: reached maximum value of sequence \"%s\" (%" PRId64 ")", - RelationGetRelationName(seqrel), - maxv))); - next = minv; - } - else - next += incby; - } - else - { - /* descending sequence */ - if ((minv < 0 && next < minv - incby) || - (minv >= 0 && next + incby < minv)) - { - if (rescnt > 0) - break; /* stop fetching */ - if (!cycle) - ereport(ERROR, - (errcode(ERRCODE_SEQUENCE_GENERATOR_LIMIT_EXCEEDED), - errmsg("nextval: reached minimum value of sequence \"%s\" (%" PRId64 ")", - RelationGetRelationName(seqrel), - minv))); - next = maxv; - } - else - next += incby; - } - fetch--; - if (rescnt < cache) - { - log--; - rescnt++; - last = next; - if (rescnt == 1) /* if it's first result - */ - result = next; /* it's what to return */ - } - } - - log -= fetch; /* adjust for any unfetched numbers */ - Assert(log >= 0); + /* retrieve next value from the access method */ + result = seq_local_nextval(seqrel, incby, maxv, minv, cache, cycle, + &last); /* save info in local cache */ elm->increment = incby; @@ -822,69 +476,6 @@ nextval_internal(Oid relid, bool check_permissions) last_used_seq = elm; - /* - * If something needs to be WAL logged, acquire an xid, so this - * transaction's commit will trigger a WAL flush and wait for syncrep. - * It's sufficient to ensure the toplevel transaction has an xid, no need - * to assign xids subxacts, that'll already trigger an appropriate wait. - * (Have to do that here, so we're outside the critical section) - */ - if (logit && RelationNeedsWAL(seqrel)) - GetTopTransactionId(); - - /* ready to change the on-disk (or really, in-buffer) tuple */ - START_CRIT_SECTION(); - - /* - * We must mark the buffer dirty before doing XLogInsert(); see notes in - * SyncOneBuffer(). However, we don't apply the desired changes just yet. - * This looks like a violation of the buffer update protocol, but it is in - * fact safe because we hold exclusive lock on the buffer. Any other - * process, including a checkpoint, that tries to examine the buffer - * contents will block until we release the lock, and then will see the - * final state that we install below. - */ - MarkBufferDirty(buf); - - /* XLOG stuff */ - if (logit && RelationNeedsWAL(seqrel)) - { - xl_seq_rec xlrec; - XLogRecPtr recptr; - - /* - * We don't log the current state of the tuple, but rather the state - * as it would appear after "log" more fetches. This lets us skip - * that many future WAL records, at the cost that we lose those - * sequence values if we crash. - */ - XLogBeginInsert(); - XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); - - /* set values that will be saved in xlog */ - seq->last_value = next; - seq->is_called = true; - seq->log_cnt = 0; - - xlrec.locator = seqrel->rd_locator; - - XLogRegisterData(&xlrec, sizeof(xl_seq_rec)); - XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len); - - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); - - PageSetLSN(page, recptr); - } - - /* Now update sequence tuple to the intended final state */ - seq->last_value = last; /* last fetched number */ - seq->is_called = true; - seq->log_cnt = log; /* how much is logged */ - - END_CRIT_SECTION(); - - UnlockReleaseBuffer(buf); - sequence_close(seqrel, NoLock); return result; @@ -974,9 +565,6 @@ SetSequence(Oid relid, int64 next, bool iscalled) { SeqTable elm; Relation seqrel; - Buffer buf; - HeapTupleData seqdatatuple; - Form_pg_sequence_data seq; HeapTuple pgstuple; Form_pg_sequence pgsform; int64 maxv, @@ -1010,9 +598,6 @@ SetSequence(Oid relid, int64 next, bool iscalled) */ PreventCommandIfParallelMode("setval()"); - /* lock page buffer and read tuple */ - seq = read_seq_tuple(seqrel, &buf, &seqdatatuple); - if ((next < minv) || (next > maxv)) ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), @@ -1034,37 +619,8 @@ SetSequence(Oid relid, int64 next, bool iscalled) if (RelationNeedsWAL(seqrel)) GetTopTransactionId(); - /* ready to change the on-disk (or really, in-buffer) tuple */ - START_CRIT_SECTION(); - - seq->last_value = next; /* last fetched number */ - seq->is_called = iscalled; - seq->log_cnt = 0; - - MarkBufferDirty(buf); - - /* XLOG stuff */ - if (RelationNeedsWAL(seqrel)) - { - xl_seq_rec xlrec; - XLogRecPtr recptr; - Page page = BufferGetPage(buf); - - XLogBeginInsert(); - XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT); - - xlrec.locator = seqrel->rd_locator; - XLogRegisterData(&xlrec, sizeof(xl_seq_rec)); - XLogRegisterData(seqdatatuple.t_data, seqdatatuple.t_len); - - recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG); - - PageSetLSN(page, recptr); - } - - END_CRIT_SECTION(); - - UnlockReleaseBuffer(buf); + /* Call the access method callback */ + seq_local_setval(seqrel, next, iscalled); sequence_close(seqrel, NoLock); } @@ -1205,62 +761,6 @@ init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel) } -/* - * Given an opened sequence relation, lock the page buffer and find the tuple - * - * *buf receives the reference to the pinned-and-ex-locked buffer - * *seqdatatuple receives the reference to the sequence tuple proper - * (this arg should point to a local variable of type HeapTupleData) - * - * Function's return value points to the data payload of the tuple - */ -static Form_pg_sequence_data -read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) -{ - Page page; - ItemId lp; - sequence_magic *sm; - Form_pg_sequence_data seq; - - *buf = ReadBuffer(rel, 0); - LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE); - - page = BufferGetPage(*buf); - sm = (sequence_magic *) PageGetSpecialPointer(page); - - if (sm->magic != SEQ_MAGIC) - elog(ERROR, "bad magic number in sequence \"%s\": %08X", - RelationGetRelationName(rel), sm->magic); - - lp = PageGetItemId(page, FirstOffsetNumber); - Assert(ItemIdIsNormal(lp)); - - /* Note we currently only bother to set these two fields of *seqdatatuple */ - seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); - seqdatatuple->t_len = ItemIdGetLength(lp); - - /* - * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on - * a sequence, which would leave a non-frozen XID in the sequence tuple's - * xmax, which eventually leads to clog access failures or worse. If we - * see this has happened, clean up after it. We treat this like a hint - * bit update, ie, don't bother to WAL-log it, since we can certainly do - * this again if the update gets lost. - */ - Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); - if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) - { - HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); - seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; - seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; - MarkBufferDirtyHint(*buf, true); - } - - seq = (Form_pg_sequence_data) GETSTRUCT(seqdatatuple); - - return seq; -} - /* * init_params: process the options list of CREATE or ALTER SEQUENCE, and * store the values into appropriate fields of seqform, for changes that go @@ -1850,19 +1350,16 @@ pg_get_sequence_data(PG_FUNCTION_ARGS) !RELATION_IS_OTHER_TEMP(seqrel) && (RelationIsPermanent(seqrel) || !RecoveryInProgress())) { - Buffer buf; - HeapTupleData seqtuple; - Form_pg_sequence_data seq; - Page page; + bool is_called; + int64 last_value; + XLogRecPtr page_lsn; - seq = read_seq_tuple(seqrel, &buf, &seqtuple); - page = BufferGetPage(buf); + seq_local_get_state(seqrel, &last_value, &is_called, + &page_lsn); - values[0] = Int64GetDatum(seq->last_value); - values[1] = BoolGetDatum(seq->is_called); - values[2] = LSNGetDatum(PageGetLSN(page)); - - UnlockReleaseBuffer(buf); + values[0] = Int64GetDatum(last_value); + values[1] = BoolGetDatum(is_called); + values[2] = LSNGetDatum(page_lsn); } else memset(isnull, true, sizeof(isnull)); @@ -1890,6 +1387,7 @@ pg_sequence_last_value(PG_FUNCTION_ARGS) Relation seqrel; bool is_called = false; int64 result = 0; + XLogRecPtr page_lsn = InvalidXLogRecPtr; /* open and lock sequence */ init_sequence(relid, &elm, &seqrel); @@ -1907,17 +1405,9 @@ pg_sequence_last_value(PG_FUNCTION_ARGS) !RELATION_IS_OTHER_TEMP(seqrel) && (RelationIsPermanent(seqrel) || !RecoveryInProgress())) { - Buffer buf; - HeapTupleData seqtuple; - Form_pg_sequence_data seq; - - seq = read_seq_tuple(seqrel, &buf, &seqtuple); - - is_called = seq->is_called; - result = seq->last_value; - - UnlockReleaseBuffer(buf); + seq_local_get_state(seqrel, &result, &is_called, &page_lsn); } + sequence_close(seqrel, NoLock); if (is_called) diff --git a/src/bin/pg_waldump/.gitignore b/src/bin/pg_waldump/.gitignore index ec51f41c767..6709e87914d 100644 --- a/src/bin/pg_waldump/.gitignore +++ b/src/bin/pg_waldump/.gitignore @@ -16,7 +16,7 @@ /relmapdesc.c /replorigindesc.c /rmgrdesc_utils.c -/seqdesc.c +/seqlocaldesc.c /smgrdesc.c /spgdesc.c /standbydesc.c diff --git a/src/bin/pg_waldump/rmgrdesc.c b/src/bin/pg_waldump/rmgrdesc.c index 931ab8b979e..770a56fd3b3 100644 --- a/src/bin/pg_waldump/rmgrdesc.c +++ b/src/bin/pg_waldump/rmgrdesc.c @@ -19,12 +19,12 @@ #include "access/multixact.h" #include "access/nbtxlog.h" #include "access/rmgr.h" +#include "access/seqlocal_xlog.h" #include "access/spgxlog.h" #include "access/xact.h" #include "access/xlog_internal.h" #include "catalog/storage_xlog.h" #include "commands/dbcommands_xlog.h" -#include "commands/sequence_xlog.h" #include "commands/tablespace.h" #include "replication/message.h" #include "replication/origin.h" diff --git a/src/bin/pg_waldump/t/001_basic.pl b/src/bin/pg_waldump/t/001_basic.pl index 53b2f016b80..beb44ca6da4 100644 --- a/src/bin/pg_waldump/t/001_basic.pl +++ b/src/bin/pg_waldump/t/001_basic.pl @@ -73,7 +73,7 @@ Btree Hash Gin Gist -Sequence +SequenceLocal SPGist BRIN CommitTs diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h index ae32ef16d67..1bc806e6134 100644 --- a/src/include/access/rmgrlist.h +++ b/src/include/access/rmgrlist.h @@ -40,7 +40,7 @@ PG_RMGR(RM_BTREE_ID, "Btree", btree_redo, btree_desc, btree_identify, btree_xlog PG_RMGR(RM_HASH_ID, "Hash", hash_redo, hash_desc, hash_identify, NULL, NULL, hash_mask, NULL) PG_RMGR(RM_GIN_ID, "Gin", gin_redo, gin_desc, gin_identify, gin_xlog_startup, gin_xlog_cleanup, gin_mask, NULL) PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_identify, gist_xlog_startup, gist_xlog_cleanup, gist_mask, NULL) -PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, seq_identify, NULL, NULL, seq_mask, NULL) +PG_RMGR(RM_SEQ_LOCAL_ID, "SequenceLocal", seq_local_redo, seq_local_desc, seq_local_identify, NULL, NULL, seq_local_mask, NULL) PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_identify, spg_xlog_startup, spg_xlog_cleanup, spg_mask, NULL) PG_RMGR(RM_BRIN_ID, "BRIN", brin_redo, brin_desc, brin_identify, NULL, NULL, brin_mask, NULL) PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_identify, NULL, NULL, NULL, NULL) diff --git a/src/include/access/seqlocal_xlog.h b/src/include/access/seqlocal_xlog.h new file mode 100644 index 00000000000..4f2441a8ca5 --- /dev/null +++ b/src/include/access/seqlocal_xlog.h @@ -0,0 +1,45 @@ +/*------------------------------------------------------------------------- + * + * seqlocal_xlog.h + * Local sequence WAL definitions. + * + * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/seqlocal_xlog.h + * + *------------------------------------------------------------------------- + */ + +#ifndef SEQLOCAL_XLOG_H +#define SEQLOCAL_XLOG_H + +#include "access/xlogreader.h" +#include "lib/stringinfo.h" + +/* Record identifier */ +#define XLOG_SEQ_LOCAL_LOG 0x00 + +/* + * The "special area" of a local sequence's buffer page looks like this. + */ +#define SEQ_LOCAL_MAGIC 0x1717 + +typedef struct seq_local_magic +{ + uint32 magic; +} seq_local_magic; + +/* Sequence WAL record */ +typedef struct xl_seq_local_rec +{ + RelFileLocator locator; + /* SEQUENCE TUPLE DATA FOLLOWS AT THE END */ +} xl_seq_local_rec; + +extern void seq_local_redo(XLogReaderState *record); +extern void seq_local_desc(StringInfo buf, XLogReaderState *record); +extern const char *seq_local_identify(uint8 info); +extern void seq_local_mask(char *page, BlockNumber blkno); + +#endif /* SEQLOCAL_XLOG_H */ diff --git a/src/include/access/seqlocalam.h b/src/include/access/seqlocalam.h new file mode 100644 index 00000000000..2ce54c2b778 --- /dev/null +++ b/src/include/access/seqlocalam.h @@ -0,0 +1,32 @@ +/*------------------------------------------------------------------------- + * + * seqlocalam.h + * Local sequence access method. + * + * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/seqlocalam.h + * + *------------------------------------------------------------------------- + */ +#ifndef SEQLOCALAM_H +#define SEQLOCALAM_H + +#include "utils/rel.h" + +/* access routines */ +extern int64 seq_local_nextval(Relation rel, int64 incby, int64 maxv, + int64 minv, int64 cache, bool cycle, + int64 *last); +extern const char *seq_local_get_table_am(void); +extern void seq_local_init(Relation rel, int64 last_value, bool is_called); +extern void seq_local_setval(Relation rel, int64 next, bool iscalled); +extern void seq_local_reset(Relation rel, int64 startv, bool is_called, + bool reset_state); +extern void seq_local_get_state(Relation rel, int64 *last_value, + bool *is_called, XLogRecPtr *page_lsn); +extern void seq_local_change_persistence(Relation rel, + char newrelpersistence); + +#endif /* SEQLOCALAM_H */ diff --git a/src/include/commands/sequence_xlog.h b/src/include/commands/sequence_xlog.h deleted file mode 100644 index b0495f41b43..00000000000 --- a/src/include/commands/sequence_xlog.h +++ /dev/null @@ -1,45 +0,0 @@ -/*------------------------------------------------------------------------- - * - * sequence_xlog.h - * Sequence WAL definitions. - * - * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * src/include/commands/sequence_xlog.h - * - *------------------------------------------------------------------------- - */ - -#ifndef SEQUENCE_XLOG_H -#define SEQUENCE_XLOG_H - -#include "access/xlogreader.h" -#include "lib/stringinfo.h" - -/* Record identifier */ -#define XLOG_SEQ_LOG 0x00 - -/* - * The "special area" of a sequence's buffer page looks like this. - */ -#define SEQ_MAGIC 0x1717 - -typedef struct sequence_magic -{ - uint32 magic; -} sequence_magic; - -/* Sequence WAL record */ -typedef struct xl_seq_rec -{ - RelFileLocator locator; - /* SEQUENCE TUPLE DATA FOLLOWS AT THE END */ -} xl_seq_rec; - -extern void seq_redo(XLogReaderState *record); -extern void seq_desc(StringInfo buf, XLogReaderState *record); -extern const char *seq_identify(uint8 info); -extern void seq_mask(char *page, BlockNumber blkno); - -#endif /* SEQUENCE_XLOG_H */ -- 2.54.0