From f083a9bdf091f2092dd2aff89fa1c5b415ab7d73 Mon Sep 17 00:00:00 2001 From: bdrouvotAWS Date: Thu, 12 Jan 2023 11:09:24 +0000 Subject: [PATCH v39 1/6] Add info in WAL records in preparation for logical slot conflict handling. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Overall design: 1. We want to enable logical decoding on standbys, but replay of WAL from the primary might remove data that is needed by logical decoding, causing replication conflicts much as hot standby does. 2. Our chosen strategy for dealing with this type of replication slot is to invalidate logical slots for which needed data has been removed. 3. To do this we need the latestRemovedXid for each change, just as we do for physical replication conflicts, but we also need to know whether any particular change was to data that logical replication might access. 4. We can't rely on the standby's relcache entries for this purpose in any way, because the startup process can't access catalog contents. 5. Therefore every WAL record that potentially removes data from the index or heap must carry a flag indicating whether or not it is one that might be accessed during logical decoding. Why do we need this for logical decoding on standby? First, let's forget about logical decoding on standby and recall that on a primary database, any catalog rows that may be needed by a logical decoding replication slot are not removed. This is done thanks to the catalog_xmin associated with the logical replication slot. But, with logical decoding on standby, in the following cases: - hot_standby_feedback is off - hot_standby_feedback is on but there is no a physical slot between the primary and the standby. Then, hot_standby_feedback will work, but only while the connection is alive (for example a node restart would break it) Then, the primary may delete system catalog rows that could be needed by the logical decoding on the standby (as it does not know about the catalog_xmin on the standby). So, it’s mandatory to identify those rows and invalidate the slots that may need them if any. Identifying those rows is the purpose of this commit. Implementation: When a WAL replay on standby indicates that a catalog table tuple is to be deleted by an xid that is greater than a logical slot's catalog_xmin, then that means the slot's catalog_xmin conflicts with the xid, and we need to handle the conflict. While subsequent commits will do the actual conflict handling, this commit adds a new field isCatalogRel in such WAL records (and a new bit set in the xl_heap_visible flags field), that is true for catalog tables, so as to arrange for conflict handling. Due to this new field being added, xl_hash_vacuum_one_page and gistxlogDelete do now contain the offsets to be deleted as a FLEXIBLE_ARRAY_MEMBER. This is needed to ensure correct alignement. It's not needed on the others struct where isCatalogRel has been added. To introduce the new isCatalogRel field for indexes, indisusercatalog has been added to pg_index. It allows us to check if there is a risk of conflict on indexes (without having to table_open() the linked table and so prevent any risk of deadlock on it.) Author: Andres Freund (in an older version), Amit Khandekar, Bertrand Drouvot Reviewed-By: Bertrand Drouvot, Andres Freund, Robert Haas, Fabrizio de Royes Mello --- contrib/test_decoding/expected/ddl.out | 65 +++++++++++++++++++++++++ contrib/test_decoding/sql/ddl.sql | 23 +++++++++ doc/src/sgml/catalogs.sgml | 11 +++++ src/backend/access/common/reloptions.c | 2 +- src/backend/access/gist/gistxlog.c | 11 ++--- src/backend/access/hash/hash_xlog.c | 12 ++--- src/backend/access/hash/hashinsert.c | 1 + src/backend/access/heap/heapam.c | 5 +- src/backend/access/heap/pruneheap.c | 1 + src/backend/access/heap/visibilitymap.c | 3 +- src/backend/access/nbtree/nbtpage.c | 2 + src/backend/access/spgist/spgvacuum.c | 1 + src/backend/catalog/index.c | 10 ++-- src/backend/commands/tablecmds.c | 55 ++++++++++++++++++++- src/include/access/gistxlog.h | 11 +++-- src/include/access/hash_xlog.h | 8 +-- src/include/access/heapam_xlog.h | 8 +-- src/include/access/nbtxlog.h | 6 ++- src/include/access/spgxlog.h | 1 + src/include/access/visibilitymapdefs.h | 9 ++-- src/include/catalog/pg_index.h | 2 + src/include/utils/rel.h | 14 +++++- 22 files changed, 217 insertions(+), 44 deletions(-) 25.7% contrib/test_decoding/expected/ 11.0% contrib/test_decoding/sql/ 4.3% doc/src/sgml/ 3.7% src/backend/access/gist/ 3.7% src/backend/access/hash/ 5.1% src/backend/access/heap/ 14.9% src/backend/commands/ 5.2% src/backend/ 20.6% src/include/access/ 4.3% src/include/utils/ diff --git a/contrib/test_decoding/expected/ddl.out b/contrib/test_decoding/expected/ddl.out index 9a28b5ddc5..48fb44c575 100644 --- a/contrib/test_decoding/expected/ddl.out +++ b/contrib/test_decoding/expected/ddl.out @@ -483,6 +483,7 @@ CREATE TABLE replication_metadata ( ) WITH (user_catalog_table = true) ; +CREATE INDEX replication_metadata_idx1 on replication_metadata(relation); \d+ replication_metadata Table "public.replication_metadata" Column | Type | Collation | Nullable | Default | Storage | Stats target | Description @@ -492,11 +493,19 @@ WITH (user_catalog_table = true) options | text[] | | | | extended | | Indexes: "replication_metadata_pkey" PRIMARY KEY, btree (id) + "replication_metadata_idx1" btree (relation) Options: user_catalog_table=true +SELECT bool_and(indisusercatalog) from pg_index where indrelid = 'replication_metadata'::regclass; + bool_and +---------- + t +(1 row) + INSERT INTO replication_metadata(relation, options) VALUES ('foo', ARRAY['a', 'b']); ALTER TABLE replication_metadata RESET (user_catalog_table); +CREATE INDEX replication_metadata_idx2 on replication_metadata(relation); \d+ replication_metadata Table "public.replication_metadata" Column | Type | Collation | Nullable | Default | Storage | Stats target | Description @@ -506,10 +515,19 @@ ALTER TABLE replication_metadata RESET (user_catalog_table); options | text[] | | | | extended | | Indexes: "replication_metadata_pkey" PRIMARY KEY, btree (id) + "replication_metadata_idx1" btree (relation) + "replication_metadata_idx2" btree (relation) + +SELECT bool_or(indisusercatalog) from pg_index where indrelid = 'replication_metadata'::regclass; + bool_or +--------- + f +(1 row) INSERT INTO replication_metadata(relation, options) VALUES ('bar', ARRAY['a', 'b']); ALTER TABLE replication_metadata SET (user_catalog_table = true); +CREATE INDEX replication_metadata_idx3 on replication_metadata(relation); \d+ replication_metadata Table "public.replication_metadata" Column | Type | Collation | Nullable | Default | Storage | Stats target | Description @@ -519,15 +537,52 @@ ALTER TABLE replication_metadata SET (user_catalog_table = true); options | text[] | | | | extended | | Indexes: "replication_metadata_pkey" PRIMARY KEY, btree (id) + "replication_metadata_idx1" btree (relation) + "replication_metadata_idx2" btree (relation) + "replication_metadata_idx3" btree (relation) Options: user_catalog_table=true +SELECT bool_and(indisusercatalog) from pg_index where indrelid = 'replication_metadata'::regclass; + bool_and +---------- + t +(1 row) + INSERT INTO replication_metadata(relation, options) VALUES ('blub', NULL); +-- Also checking that indisusercatalog is set correctly when a table is created with user_catalog_table = false +CREATE TABLE replication_metadata_false ( + id serial primary key, + relation name NOT NULL, + options text[] +) +WITH (user_catalog_table = false) +; +CREATE INDEX replication_metadata_false_idx1 on replication_metadata_false(relation); +\d+ replication_metadata_false + Table "public.replication_metadata_false" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +----------+---------+-----------+----------+--------------------------------------------------------+----------+--------------+------------- + id | integer | | not null | nextval('replication_metadata_false_id_seq'::regclass) | plain | | + relation | name | | not null | | plain | | + options | text[] | | | | extended | | +Indexes: + "replication_metadata_false_pkey" PRIMARY KEY, btree (id) + "replication_metadata_false_idx1" btree (relation) +Options: user_catalog_table=false + +SELECT bool_or(indisusercatalog) from pg_index where indrelid = 'replication_metadata_false'::regclass; + bool_or +--------- + f +(1 row) + -- make sure rewrites don't work ALTER TABLE replication_metadata ADD COLUMN rewritemeornot int; ALTER TABLE replication_metadata ALTER COLUMN rewritemeornot TYPE text; ERROR: cannot rewrite table "replication_metadata" used as a catalog table ALTER TABLE replication_metadata SET (user_catalog_table = false); +CREATE INDEX replication_metadata_idx4 on replication_metadata(relation); \d+ replication_metadata Table "public.replication_metadata" Column | Type | Collation | Nullable | Default | Storage | Stats target | Description @@ -538,8 +593,18 @@ ALTER TABLE replication_metadata SET (user_catalog_table = false); rewritemeornot | integer | | | | plain | | Indexes: "replication_metadata_pkey" PRIMARY KEY, btree (id) + "replication_metadata_idx1" btree (relation) + "replication_metadata_idx2" btree (relation) + "replication_metadata_idx3" btree (relation) + "replication_metadata_idx4" btree (relation) Options: user_catalog_table=false +SELECT bool_or(indisusercatalog) from pg_index where indrelid = 'replication_metadata'::regclass; + bool_or +--------- + f +(1 row) + INSERT INTO replication_metadata(relation, options) VALUES ('zaphod', NULL); SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); diff --git a/contrib/test_decoding/sql/ddl.sql b/contrib/test_decoding/sql/ddl.sql index 4f76bed72c..51baac5c4e 100644 --- a/contrib/test_decoding/sql/ddl.sql +++ b/contrib/test_decoding/sql/ddl.sql @@ -276,29 +276,52 @@ CREATE TABLE replication_metadata ( ) WITH (user_catalog_table = true) ; + +CREATE INDEX replication_metadata_idx1 on replication_metadata(relation); + \d+ replication_metadata +SELECT bool_and(indisusercatalog) from pg_index where indrelid = 'replication_metadata'::regclass; INSERT INTO replication_metadata(relation, options) VALUES ('foo', ARRAY['a', 'b']); ALTER TABLE replication_metadata RESET (user_catalog_table); +CREATE INDEX replication_metadata_idx2 on replication_metadata(relation); \d+ replication_metadata +SELECT bool_or(indisusercatalog) from pg_index where indrelid = 'replication_metadata'::regclass; INSERT INTO replication_metadata(relation, options) VALUES ('bar', ARRAY['a', 'b']); ALTER TABLE replication_metadata SET (user_catalog_table = true); +CREATE INDEX replication_metadata_idx3 on replication_metadata(relation); \d+ replication_metadata +SELECT bool_and(indisusercatalog) from pg_index where indrelid = 'replication_metadata'::regclass; INSERT INTO replication_metadata(relation, options) VALUES ('blub', NULL); +-- Also checking that indisusercatalog is set correctly when a table is created with user_catalog_table = false +CREATE TABLE replication_metadata_false ( + id serial primary key, + relation name NOT NULL, + options text[] +) +WITH (user_catalog_table = false) +; + +CREATE INDEX replication_metadata_false_idx1 on replication_metadata_false(relation); +\d+ replication_metadata_false +SELECT bool_or(indisusercatalog) from pg_index where indrelid = 'replication_metadata_false'::regclass; + -- make sure rewrites don't work ALTER TABLE replication_metadata ADD COLUMN rewritemeornot int; ALTER TABLE replication_metadata ALTER COLUMN rewritemeornot TYPE text; ALTER TABLE replication_metadata SET (user_catalog_table = false); +CREATE INDEX replication_metadata_idx4 on replication_metadata(relation); \d+ replication_metadata +SELECT bool_or(indisusercatalog) from pg_index where indrelid = 'replication_metadata'::regclass; INSERT INTO replication_metadata(relation, options) VALUES ('zaphod', NULL); diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index c1e4048054..22616a0579 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -4447,6 +4447,17 @@ SCRAM-SHA-256$<iteration count>:&l + + + indisusercatalog bool + + + If true, the index is linked to a table that is declared as an additional + catalog table for purposes of logical replication (means has user_catalog_table) + set to true. + + + indisreplident bool diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 14c23101ad..f5368e3a5b 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -120,7 +120,7 @@ static relopt_bool boolRelOpts[] = RELOPT_KIND_HEAP, AccessExclusiveLock }, - false + HEAP_DEFAULT_USER_CATALOG_TABLE }, { { diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index f65864254a..59e31fcc12 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -177,6 +177,7 @@ gistRedoDeleteRecord(XLogReaderState *record) gistxlogDelete *xldata = (gistxlogDelete *) XLogRecGetData(record); Buffer buffer; Page page; + OffsetNumber *toDelete = xldata->offsets; /* * If we have any conflict processing to do, it must happen before we @@ -203,14 +204,7 @@ gistRedoDeleteRecord(XLogReaderState *record) { page = (Page) BufferGetPage(buffer); - if (XLogRecGetDataLen(record) > SizeOfGistxlogDelete) - { - OffsetNumber *todelete; - - todelete = (OffsetNumber *) ((char *) xldata + SizeOfGistxlogDelete); - - PageIndexMultiDelete(page, todelete, xldata->ntodelete); - } + PageIndexMultiDelete(page, toDelete, xldata->ntodelete); GistClearPageHasGarbage(page); GistMarkTuplesDeleted(page); @@ -608,6 +602,7 @@ gistXLogPageReuse(Relation rel, BlockNumber blkno, FullTransactionId deleteXid) */ /* XLOG stuff */ + xlrec_reuse.isCatalogRel = RelationIsAccessibleInLogicalDecoding(rel); xlrec_reuse.locator = rel->rd_locator; xlrec_reuse.block = blkno; xlrec_reuse.snapshotConflictHorizon = deleteXid; diff --git a/src/backend/access/hash/hash_xlog.c b/src/backend/access/hash/hash_xlog.c index f38b42efb9..08ceb91288 100644 --- a/src/backend/access/hash/hash_xlog.c +++ b/src/backend/access/hash/hash_xlog.c @@ -980,8 +980,10 @@ hash_xlog_vacuum_one_page(XLogReaderState *record) Page page; XLogRedoAction action; HashPageOpaque pageopaque; + OffsetNumber *toDelete; xldata = (xl_hash_vacuum_one_page *) XLogRecGetData(record); + toDelete = xldata->offsets; /* * If we have any conflict processing to do, it must happen before we @@ -1010,15 +1012,7 @@ hash_xlog_vacuum_one_page(XLogReaderState *record) { page = (Page) BufferGetPage(buffer); - if (XLogRecGetDataLen(record) > SizeOfHashVacuumOnePage) - { - OffsetNumber *unused; - - unused = (OffsetNumber *) ((char *) xldata + SizeOfHashVacuumOnePage); - - PageIndexMultiDelete(page, unused, xldata->ntuples); - } - + PageIndexMultiDelete(page, toDelete, xldata->ntuples); /* * Mark the page as not containing any LP_DEAD items. See comments in * _hash_vacuum_one_page() for details. diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c index a604e31891..22656b24e2 100644 --- a/src/backend/access/hash/hashinsert.c +++ b/src/backend/access/hash/hashinsert.c @@ -432,6 +432,7 @@ _hash_vacuum_one_page(Relation rel, Relation hrel, Buffer metabuf, Buffer buf) xl_hash_vacuum_one_page xlrec; XLogRecPtr recptr; + xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(hrel); xlrec.snapshotConflictHorizon = snapshotConflictHorizon; xlrec.ntuples = ndeletable; diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 388df94a44..0e37bad213 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6871,6 +6871,7 @@ heap_freeze_execute_prepared(Relation rel, Buffer buffer, nplans = heap_log_freeze_plan(tuples, ntuples, plans, offsets); xlrec.snapshotConflictHorizon = snapshotConflictHorizon; + xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(rel); xlrec.nplans = nplans; XLogBeginInsert(); @@ -8441,7 +8442,7 @@ bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate) * update the heap page's LSN. */ XLogRecPtr -log_heap_visible(RelFileLocator rlocator, Buffer heap_buffer, Buffer vm_buffer, +log_heap_visible(Relation rel, Buffer heap_buffer, Buffer vm_buffer, TransactionId snapshotConflictHorizon, uint8 vmflags) { xl_heap_visible xlrec; @@ -8453,6 +8454,8 @@ log_heap_visible(RelFileLocator rlocator, Buffer heap_buffer, Buffer vm_buffer, xlrec.snapshotConflictHorizon = snapshotConflictHorizon; xlrec.flags = vmflags; + if (RelationIsAccessibleInLogicalDecoding(rel)) + xlrec.flags |= VISIBILITYMAP_IS_CATALOG_REL; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfHeapVisible); diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 4e65cbcadf..3f0342351f 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -418,6 +418,7 @@ heap_page_prune(Relation relation, Buffer buffer, xl_heap_prune xlrec; XLogRecPtr recptr; + xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(relation); xlrec.snapshotConflictHorizon = prstate.snapshotConflictHorizon; xlrec.nredirected = prstate.nredirected; xlrec.ndead = prstate.ndead; diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 1d1ca423a9..045c61edb8 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -283,8 +283,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, if (XLogRecPtrIsInvalid(recptr)) { Assert(!InRecovery); - recptr = log_heap_visible(rel->rd_locator, heapBuf, vmBuf, - cutoff_xid, flags); + recptr = log_heap_visible(rel, heapBuf, vmBuf, cutoff_xid, flags); /* * If data checksums are enabled (or wal_log_hints=on), we diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 3feee28d19..edc4fe866a 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -836,6 +836,7 @@ _bt_log_reuse_page(Relation rel, BlockNumber blkno, FullTransactionId safexid) */ /* XLOG stuff */ + xlrec_reuse.isCatalogRel = RelationIsAccessibleInLogicalDecoding(rel); xlrec_reuse.locator = rel->rd_locator; xlrec_reuse.block = blkno; xlrec_reuse.snapshotConflictHorizon = safexid; @@ -1358,6 +1359,7 @@ _bt_delitems_delete(Relation rel, Buffer buf, XLogRecPtr recptr; xl_btree_delete xlrec_delete; + xlrec_delete.isCatalogRel = RelationIsAccessibleInLogicalDecoding(rel); xlrec_delete.snapshotConflictHorizon = snapshotConflictHorizon; xlrec_delete.ndeleted = ndeletable; xlrec_delete.nupdated = nupdatable; diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c index 3adb18f2d8..afd9275a10 100644 --- a/src/backend/access/spgist/spgvacuum.c +++ b/src/backend/access/spgist/spgvacuum.c @@ -503,6 +503,7 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer) spgxlogVacuumRedirect xlrec; GlobalVisState *vistest; + xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(index); xlrec.nToPlaceholder = 0; xlrec.snapshotConflictHorizon = InvalidTransactionId; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index e6579f2979..a038400fe1 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -123,7 +123,8 @@ static void UpdateIndexRelation(Oid indexoid, Oid heapoid, bool isexclusion, bool immediate, bool isvalid, - bool isready); + bool isready, + bool is_user_catalog); static void index_update_stats(Relation rel, bool hasindex, double reltuples); @@ -545,7 +546,8 @@ UpdateIndexRelation(Oid indexoid, bool isexclusion, bool immediate, bool isvalid, - bool isready) + bool isready, + bool is_user_catalog) { int2vector *indkey; oidvector *indcollation; @@ -622,6 +624,7 @@ UpdateIndexRelation(Oid indexoid, values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false); values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready); values[Anum_pg_index_indislive - 1] = BoolGetDatum(true); + values[Anum_pg_index_indisusercatalog - 1] = BoolGetDatum(is_user_catalog); values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false); values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey); values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation); @@ -1020,7 +1023,8 @@ index_create(Relation heapRelation, isprimary, is_exclusion, (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) == 0, !concurrent && !invalid, - !concurrent); + !concurrent, + RelationIsUsedAsCatalogTable(heapRelation)); /* * Register relcache invalidation on the indexes' heap relation, to diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 1fbdad4b64..327fcbcc6c 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -103,6 +103,7 @@ #include "utils/syscache.h" #include "utils/timestamp.h" #include "utils/typcache.h" +#include "utils/rel.h" /* * ON COMMIT action list @@ -14162,6 +14163,10 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, Datum repl_val[Natts_pg_class]; bool repl_null[Natts_pg_class]; bool repl_repl[Natts_pg_class]; + ListCell *cell; + List *rel_options; + bool catalog_table_val = HEAP_DEFAULT_USER_CATALOG_TABLE; + bool catalog_table = false; static char *validnsps[] = HEAP_RELOPT_NAMESPACES; if (defList == NIL && operation != AT_ReplaceRelOptions) @@ -14228,7 +14233,6 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, { Query *view_query = get_view_query(rel); List *view_options = untransformRelOptions(newOptions); - ListCell *cell; bool check_option = false; foreach(cell, view_options) @@ -14256,6 +14260,20 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, } } + /* If user_catalog_table is part of the new options, record its new value */ + rel_options = untransformRelOptions(newOptions); + + foreach(cell, rel_options) + { + DefElem *defel = (DefElem *) lfirst(cell); + + if (strcmp(defel->defname, "user_catalog_table") == 0) + { + catalog_table = true; + catalog_table_val = defGetBoolean(defel); + } + } + /* * All we need do here is update the pg_class row; the new options will be * propagated into relcaches during post-commit cache inval. @@ -14282,6 +14300,41 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, ReleaseSysCache(tuple); + /* Update the indexes if there is a need to */ + if (catalog_table || operation == AT_ResetRelOptions) + { + Relation pg_index; + HeapTuple pg_index_tuple; + Form_pg_index pg_index_form; + ListCell *index; + + pg_index = table_open(IndexRelationId, RowExclusiveLock); + + foreach(index, RelationGetIndexList(rel)) + { + Oid thisIndexOid = lfirst_oid(index); + + pg_index_tuple = SearchSysCacheCopy1(INDEXRELID, + ObjectIdGetDatum(thisIndexOid)); + if (!HeapTupleIsValid(pg_index_tuple)) + elog(ERROR, "cache lookup failed for index %u", thisIndexOid); + pg_index_form = (Form_pg_index) GETSTRUCT(pg_index_tuple); + + /* Modify the index only if user_catalog_table differ */ + if (catalog_table_val != pg_index_form->indisusercatalog) + { + pg_index_form->indisusercatalog = catalog_table_val; + CatalogTupleUpdate(pg_index, &pg_index_tuple->t_self, pg_index_tuple); + InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0, + InvalidOid, true); + } + + heap_freetuple(pg_index_tuple); + } + + table_close(pg_index, RowExclusiveLock); + } + /* repeat the whole exercise for the toast table, if there's one */ if (OidIsValid(rel->rd_rel->reltoastrelid)) { diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h index 09f9b0f8c6..191f0e5808 100644 --- a/src/include/access/gistxlog.h +++ b/src/include/access/gistxlog.h @@ -51,13 +51,13 @@ typedef struct gistxlogDelete { TransactionId snapshotConflictHorizon; uint16 ntodelete; /* number of deleted offsets */ + bool isCatalogRel; - /* - * In payload of blk 0 : todelete OffsetNumbers - */ + /* TODELETE OFFSET NUMBERS */ + OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]; } gistxlogDelete; -#define SizeOfGistxlogDelete (offsetof(gistxlogDelete, ntodelete) + sizeof(uint16)) +#define SizeOfGistxlogDelete offsetof(gistxlogDelete, offsets) /* * Backup Blk 0: If this operation completes a page split, by inserting a @@ -100,9 +100,10 @@ typedef struct gistxlogPageReuse RelFileLocator locator; BlockNumber block; FullTransactionId snapshotConflictHorizon; + bool isCatalogRel; } gistxlogPageReuse; -#define SizeOfGistxlogPageReuse (offsetof(gistxlogPageReuse, snapshotConflictHorizon) + sizeof(FullTransactionId)) +#define SizeOfGistxlogPageReuse (offsetof(gistxlogPageReuse, isCatalogRel) + sizeof(bool)) extern void gist_redo(XLogReaderState *record); extern void gist_desc(StringInfo buf, XLogReaderState *record); diff --git a/src/include/access/hash_xlog.h b/src/include/access/hash_xlog.h index a2f0f39213..4a79e0c0a4 100644 --- a/src/include/access/hash_xlog.h +++ b/src/include/access/hash_xlog.h @@ -252,12 +252,12 @@ typedef struct xl_hash_vacuum_one_page { TransactionId snapshotConflictHorizon; int ntuples; - - /* TARGET OFFSET NUMBERS FOLLOW AT THE END */ + bool isCatalogRel; + /* TARGET OFFSET NUMBERS */ + OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]; } xl_hash_vacuum_one_page; -#define SizeOfHashVacuumOnePage \ - (offsetof(xl_hash_vacuum_one_page, ntuples) + sizeof(int)) +#define SizeOfHashVacuumOnePage offsetof(xl_hash_vacuum_one_page, offsets) extern void hash_redo(XLogReaderState *record); extern void hash_desc(StringInfo buf, XLogReaderState *record); diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index 8cb0d8da19..1d43181a40 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -245,10 +245,11 @@ typedef struct xl_heap_prune TransactionId snapshotConflictHorizon; uint16 nredirected; uint16 ndead; + bool isCatalogRel; /* OFFSET NUMBERS are in the block reference 0 */ } xl_heap_prune; -#define SizeOfHeapPrune (offsetof(xl_heap_prune, ndead) + sizeof(uint16)) +#define SizeOfHeapPrune (offsetof(xl_heap_prune, isCatalogRel) + sizeof(bool)) /* * The vacuum page record is similar to the prune record, but can only mark @@ -344,12 +345,13 @@ typedef struct xl_heap_freeze_page { TransactionId snapshotConflictHorizon; uint16 nplans; + bool isCatalogRel; /* FREEZE PLANS FOLLOW */ /* OFFSET NUMBER ARRAY FOLLOWS */ } xl_heap_freeze_page; -#define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, nplans) + sizeof(uint16)) +#define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, isCatalogRel) + sizeof(bool)) /* * This is what we need to know about setting a visibility map bit @@ -408,7 +410,7 @@ extern void heap2_desc(StringInfo buf, XLogReaderState *record); extern const char *heap2_identify(uint8 info); extern void heap_xlog_logical_rewrite(XLogReaderState *r); -extern XLogRecPtr log_heap_visible(RelFileLocator rlocator, Buffer heap_buffer, +extern XLogRecPtr log_heap_visible(Relation rel, Buffer heap_buffer, Buffer vm_buffer, TransactionId snapshotConflictHorizon, uint8 vmflags); diff --git a/src/include/access/nbtxlog.h b/src/include/access/nbtxlog.h index edd1333d9b..99d87d7189 100644 --- a/src/include/access/nbtxlog.h +++ b/src/include/access/nbtxlog.h @@ -188,9 +188,10 @@ typedef struct xl_btree_reuse_page RelFileLocator locator; BlockNumber block; FullTransactionId snapshotConflictHorizon; + bool isCatalogRel; } xl_btree_reuse_page; -#define SizeOfBtreeReusePage (sizeof(xl_btree_reuse_page)) +#define SizeOfBtreeReusePage (offsetof(xl_btree_reuse_page, isCatalogRel) + sizeof(bool)) /* * xl_btree_vacuum and xl_btree_delete records describe deletion of index @@ -235,13 +236,14 @@ typedef struct xl_btree_delete TransactionId snapshotConflictHorizon; uint16 ndeleted; uint16 nupdated; + bool isCatalogRel; /* DELETED TARGET OFFSET NUMBERS FOLLOW */ /* UPDATED TARGET OFFSET NUMBERS FOLLOW */ /* UPDATED TUPLES METADATA (xl_btree_update) ARRAY FOLLOWS */ } xl_btree_delete; -#define SizeOfBtreeDelete (offsetof(xl_btree_delete, nupdated) + sizeof(uint16)) +#define SizeOfBtreeDelete (offsetof(xl_btree_delete, isCatalogRel) + sizeof(bool)) /* * The offsets that appear in xl_btree_update metadata are offsets into the diff --git a/src/include/access/spgxlog.h b/src/include/access/spgxlog.h index b9d6753533..29a6aa57a9 100644 --- a/src/include/access/spgxlog.h +++ b/src/include/access/spgxlog.h @@ -240,6 +240,7 @@ typedef struct spgxlogVacuumRedirect uint16 nToPlaceholder; /* number of redirects to make placeholders */ OffsetNumber firstPlaceholder; /* first placeholder tuple to remove */ TransactionId snapshotConflictHorizon; /* newest XID of removed redirects */ + bool isCatalogRel; /* offsets of redirect tuples to make placeholders follow */ OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]; diff --git a/src/include/access/visibilitymapdefs.h b/src/include/access/visibilitymapdefs.h index 9165b9456b..b27fdc0aef 100644 --- a/src/include/access/visibilitymapdefs.h +++ b/src/include/access/visibilitymapdefs.h @@ -17,9 +17,10 @@ #define BITS_PER_HEAPBLOCK 2 /* Flags for bit map */ -#define VISIBILITYMAP_ALL_VISIBLE 0x01 -#define VISIBILITYMAP_ALL_FROZEN 0x02 -#define VISIBILITYMAP_VALID_BITS 0x03 /* OR of all valid visibilitymap - * flags bits */ +#define VISIBILITYMAP_ALL_VISIBLE 0x01 +#define VISIBILITYMAP_ALL_FROZEN 0x02 +#define VISIBILITYMAP_VALID_BITS 0x03 /* OR of all valid visibilitymap + * flags bits */ +#define VISIBILITYMAP_IS_CATALOG_REL 0x04 #endif /* VISIBILITYMAPDEFS_H */ diff --git a/src/include/catalog/pg_index.h b/src/include/catalog/pg_index.h index b0592571da..f5f5de1603 100644 --- a/src/include/catalog/pg_index.h +++ b/src/include/catalog/pg_index.h @@ -43,6 +43,8 @@ CATALOG(pg_index,2610,IndexRelationId) BKI_SCHEMA_MACRO bool indcheckxmin; /* must we wait for xmin to be old? */ bool indisready; /* is this index ready for inserts? */ bool indislive; /* is this index alive at all? */ + bool indisusercatalog; /* is this index linked to a user catalog + * relation? */ bool indisreplident; /* is this index the identity for replication? */ /* variable-length fields start here, but we allow direct access to indkey */ diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index af9785038d..2ef192c169 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -27,6 +27,7 @@ #include "storage/smgr.h" #include "utils/relcache.h" #include "utils/reltrigger.h" +#include "catalog/catalog.h" /* @@ -343,6 +344,7 @@ typedef struct StdRdOptions #define HEAP_MIN_FILLFACTOR 10 #define HEAP_DEFAULT_FILLFACTOR 100 +#define HEAP_DEFAULT_USER_CATALOG_TABLE false /* * RelationGetToastTupleTarget @@ -385,6 +387,15 @@ typedef struct StdRdOptions (relation)->rd_rel->relkind == RELKIND_MATVIEW) ? \ ((StdRdOptions *) (relation)->rd_options)->user_catalog_table : false) +/* + * IndexIsLinkedToUserCatalogTable + * Returns whether the relation should be treated as an index linked to + * a user catalog table from the pov of logical decoding. + */ +#define IndexIsLinkedToUserCatalogTable(relation) \ + ((relation)->rd_rel->relkind == RELKIND_INDEX && \ + (relation)->rd_index->indisusercatalog) + /* * RelationGetParallelWorkers * Returns the relation's parallel_workers reloption setting. @@ -682,7 +693,8 @@ RelationCloseSmgr(Relation relation) #define RelationIsAccessibleInLogicalDecoding(relation) \ (XLogLogicalInfoActive() && \ RelationNeedsWAL(relation) && \ - (IsCatalogRelation(relation) || RelationIsUsedAsCatalogTable(relation))) + (IsCatalogRelation(relation) || RelationIsUsedAsCatalogTable(relation) || \ + IndexIsLinkedToUserCatalogTable(relation))) /* * RelationIsLogicallyLogged -- 2.34.1