From 8d62f2e3dc8c236401fc4f1f3960a3a656b4c36d Mon Sep 17 00:00:00 2001 From: Petr Jelinek Date: Thu, 15 Dec 2016 15:20:08 +0100 Subject: [PATCH] Logical replication support for initial data copy --- contrib/file_fdw/file_fdw.c | 5 +- doc/src/sgml/catalogs.sgml | 77 +++ doc/src/sgml/config.sgml | 25 + doc/src/sgml/logical-replication.sgml | 59 +- doc/src/sgml/logicaldecoding.sgml | 2 +- doc/src/sgml/monitoring.sgml | 9 +- doc/src/sgml/protocol.sgml | 22 +- doc/src/sgml/ref/alter_subscription.sgml | 47 +- doc/src/sgml/ref/create_subscription.sgml | 39 ++ src/backend/catalog/Makefile | 1 + src/backend/catalog/heap.c | 6 + src/backend/catalog/pg_subscription.c | 289 +++++++++ src/backend/catalog/system_views.sql | 1 + src/backend/commands/copy.c | 23 +- src/backend/commands/subscriptioncmds.c | 376 +++++++++-- src/backend/parser/gram.y | 26 +- src/backend/postmaster/pgstat.c | 6 + .../libpqwalreceiver/libpqwalreceiver.c | 251 +++++++- src/backend/replication/logical/Makefile | 2 +- src/backend/replication/logical/launcher.c | 130 +++- src/backend/replication/logical/relation.c | 7 + src/backend/replication/logical/snapbuild.c | 85 ++- src/backend/replication/logical/tablesync.c | 708 +++++++++++++++++++++ src/backend/replication/logical/worker.c | 193 ++++-- src/backend/replication/repl_gram.y | 75 ++- src/backend/replication/repl_scanner.l | 7 +- src/backend/replication/walsender.c | 150 ++++- src/backend/tcop/postgres.c | 5 +- src/backend/utils/adt/misc.c | 20 + src/backend/utils/cache/syscache.c | 23 + src/backend/utils/misc/guc.c | 12 + src/include/catalog/indexing.h | 6 + src/include/catalog/pg_proc.h | 5 +- src/include/catalog/pg_subscription_rel.h | 79 +++ src/include/commands/copy.h | 5 +- src/include/nodes/nodes.h | 1 + src/include/nodes/parsenodes.h | 12 + src/include/nodes/replnodes.h | 11 +- src/include/pgstat.h | 4 +- src/include/replication/logical.h | 15 +- src/include/replication/logicallauncher.h | 1 + src/include/replication/snapbuild.h | 1 + src/include/replication/walreceiver.h | 25 +- src/include/replication/walsender.h | 12 +- src/include/replication/worker_internal.h | 24 +- src/include/utils/syscache.h | 2 + src/test/regress/expected/object_address.out | 3 +- src/test/regress/expected/rules.out | 3 +- src/test/regress/expected/sanity_check.out | 1 + src/test/regress/expected/subscription.out | 44 +- src/test/regress/sql/object_address.sql | 2 +- src/test/regress/sql/subscription.sql | 8 +- src/test/subscription/t/001_rep_changes.pl | 36 +- src/test/subscription/t/002_types.pl | 6 + src/test/subscription/t/003_constraints.pl | 2 +- src/test/subscription/t/004_sync.pl | 159 +++++ 56 files changed, 2854 insertions(+), 294 deletions(-) create mode 100644 src/backend/replication/logical/tablesync.c create mode 100644 src/include/catalog/pg_subscription_rel.h create mode 100644 src/test/subscription/t/004_sync.pl diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 735b794..277639f 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -662,6 +662,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags) node->ss.ss_currentRelation, filename, is_program, + NULL, NIL, options); @@ -737,6 +738,7 @@ fileReScanForeignScan(ForeignScanState *node) node->ss.ss_currentRelation, festate->filename, festate->is_program, + NULL, NIL, festate->options); } @@ -1100,7 +1102,8 @@ file_acquire_sample_rows(Relation onerel, int elevel, /* * Create CopyState from FDW options. */ - cstate = BeginCopyFrom(NULL, onerel, filename, is_program, NIL, options); + cstate = BeginCopyFrom(NULL, onerel, filename, is_program, NULL, NIL, + options); /* * Use per-tuple memory context to prevent leak of memory used to read diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 41e3e1b..daa85f2 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -301,6 +301,11 @@ + pg_subscription_rel + relation state mapping for subscriptions + + + pg_tablespace tablespaces within this database cluster @@ -6406,6 +6411,78 @@ + + <structname>pg_subscription_rel</structname> + + + pg_subscription_rel + + + + The catalog pg_subscription_rel contains the + status for each replicated relation in each subscription. This is a + many-to-many mapping. + + + + This catalog only contains tables known to subscription after running + either CREATE SUBSCRIPTION or + ALTER SUBSCRIPTION ... REFRESH commands. + + + + <structname>pg_subscription_rel</structname> Columns + + + + + Name + Type + References + Description + + + + + + srsubid + oid + pg_subscription.oid + Reference to subscription + + + + srrelid + oid + pg_class.oid + Reference to relation + + + + srsubstate + char + + + i = initialize, + d = data is being copied, + s = synchronized, + r = ready (normal replication) + + + + + srsublsn + pg_lsn + + + End LSN for s and r states. + + + + +
+
+ <structname>pg_tablespace</structname> diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index cd82c04..5a0fb2d 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3449,6 +3449,31 @@ ANY num_sync ( + max_sync_workers_per_subscription (integer) + + max_sync_workers_per_subscription configuration parameter + + + + + Maximum number of synchronization workers per subscription. This + parameter controls the amount of paralelism of the initial data copy + during the subscription initialization or when new tables are added. + + + Currently, there can be only one synchronization worker per table. + + + The synchronization workers are taken from the pool defined by + max_logical_replication_workers. + + + The default value is 2. + + + + diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml index 7b351f2..f75304c 100644 --- a/doc/src/sgml/logical-replication.sgml +++ b/doc/src/sgml/logical-replication.sgml @@ -24,11 +24,13 @@ - Logical replication sends changes on the publisher to the subscriber as - they occur in real-time. The subscriber applies the data in the same order - as the publisher so that transactional consistency is guaranteed for - publications within a single subscription. This method of data replication - is sometimes referred to as transactional replication. + Logical replication typically starts with a snapshot of the data on + the publisher database. Once that is done, the changes on the publisher + are sent to the subscriber as they occur in real-time. The subscriber + applies the data in the same order as the publisher so that transactional + consistency is guaranteed for publications within a single subscription. + This method of data replication is sometimes referred to as transactional + replication. @@ -159,7 +161,9 @@ Each subscription will receive changes via one replication slot (see - ). + ). Additional temporary + replication slots may be required for the initial data synchronizations + of pre-existing table data. @@ -264,9 +268,25 @@ to replica, which produces the usual effects on triggers and constraints. + + + Initial Snapshot + + The initial data in existing subscribed tables are snapshotted and + copied in a parallel instance of a special kind of apply process. + This process will create its own temporary replication slot and + copy the existing data. Once existing data is copied, the worker + enters synchronization mode, which ensures that the table is brought + up to a synchronized state with the main apply process by streaming + any changes that happened during the initial data copy using standard + logical replication. Once the synchronization is done, the control + of the replication of the table is given back to the main apply + process where the replication continues as normal. + + - + Monitoring @@ -287,7 +307,9 @@ Normally, there is a single apply process running for an enabled subscription. A disabled subscription or a crashed subscription will have - zero rows in this view. + zero rows in this view. If the initial data synchronization of any + table is in progress there will be additional workers for the tables + being synchronized. @@ -333,10 +355,11 @@ On the publisher side, wal_level must be set to logical, and max_replication_slots - must be set to at least the number of subscriptions expected to connect. - And max_wal_senders should be set to at least the same - as max_replication_slots plus the number of physical replicas - that are connected at the same time. + must be set to at least the number of subscriptions expected to connect + with some reserve for table synchronization. And + max_wal_senders should be set to at least the same as + max_replication_slots plus the number of physical + replicas that are connected at the same time. @@ -344,9 +367,9 @@ to be set. In this case it should be set to at least the number of subscriptions that will be added to the subscriber. max_logical_replication_workers must be set to at - least the number of subscriptions. Additionally the - max_worker_processes may need to be adjusted to - accommodate for replication workers, at least + least the number of subscriptions again with some reserve for the table + synchronization. Additionally the max_worker_processes + may need to be adjusted to accommodate for replication workers, at least (max_logical_replication_workers + 1). Note that some extensions and parallel queries also take worker slots from max_worker_processes. @@ -389,8 +412,10 @@ CREATE SUBSCRIPTION mysub CONNECTION 'dbname=foo host=bar user=repuser' PUBLICAT - The above will start the replication process of changes to - users and departments tables. + The above will start the replication process, which synchronizes the + initial table contents of users and + departments tables and then starts replicating + incremental changes to those tables. diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml index 03c2c69..e1cc1b9 100644 --- a/doc/src/sgml/logicaldecoding.sgml +++ b/doc/src/sgml/logicaldecoding.sgml @@ -272,7 +272,7 @@ $ pg_recvlogical -d postgres --slot test --drop-slot Exported Snapshots When a new replication slot is created using the streaming replication interface, - a snapshot is exported + a snapshot can be exported (see ), which will show exactly the state of the database after which all changes will be included in the change stream. This can be used to create a new replica by diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index fad5cb0..3d3761e 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -1585,6 +1585,12 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i Process ID of the subscription worker process + relid + Oid + Relation id which the worker is synchronizing, this is always + NULL for the main apply worker + + received_lsn pg_lsn Last transaction log position received, the initial value of @@ -1620,7 +1626,8 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i The pg_stat_subscription view will contain one row per subscription for main worker (with null PID if the worker is - not running). + not running), and additional rows for workers handling the initial data + copy of the subscribed tables. diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 589b881..ae67ca4 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -1458,7 +1458,7 @@ The commands accepted in walsender mode are: - CREATE_REPLICATION_SLOT slot_name [ TEMPORARY ] { PHYSICAL [ RESERVE_WAL ] | LOGICAL output_plugin } + CREATE_REPLICATION_SLOT slot_name [ TEMPORARY ] { PHYSICAL [ RESERVE_WAL ] | LOGICAL output_plugin [ EXPORT_SNAPSHOT | NOEXPORT_SNAPSHOT | USE_SNAPSHOT ] } CREATE_REPLICATION_SLOT @@ -1509,6 +1509,26 @@ The commands accepted in walsender mode are: + + + EXPORT_SNAPSHOT + NOEXPORT_SNAPSHOT + USE_SNAPSHOT + + + Decides what to do with snapshot created during logical slot + initialization. The EXPORT_SNAPSHOT (which is the + default) will export the snapshot for use in other sessions. This + option can't be used inside a transaction. The + USE_SNAPSHOT will use the snapshot for current + transaction executing the command. This option must be used in a + transaction and the CREATE_REPLICATION_SLOT must + be the first command run in that transaction. Finally + NOEXPORT_SNAPSHOT will just use the snapshot for logical + decoding as normal but won't do anything else with it. + + + diff --git a/doc/src/sgml/ref/alter_subscription.sgml b/doc/src/sgml/ref/alter_subscription.sgml index 032ecbb..b34386d 100644 --- a/doc/src/sgml/ref/alter_subscription.sgml +++ b/doc/src/sgml/ref/alter_subscription.sgml @@ -21,15 +21,21 @@ PostgreSQL documentation -ALTER SUBSCRIPTION name WITH ( option [, ... ] ) ] +ALTER SUBSCRIPTION name WITH ( suboption [, ... ] ) ] -where option can be: +where suboption can be: - SLOT NAME = slot_name + SLOT NAME = slot_name + +ALTER SUBSCRIPTION name SET PUBLICATION publication_name [, ...] WITH ( puboption [, ... ] ) +ALTER SUBSCRIPTION name REFRESH PUBLICATION WITH ( puboption [, ... ] ) + +where puboption can be: + + COPY DATA | NOCOPY DATA ALTER SUBSCRIPTION name OWNER TO { new_owner | CURRENT_USER | SESSION_USER } ALTER SUBSCRIPTION name CONNECTION 'conninfo' -ALTER SUBSCRIPTION name SET PUBLICATION publication_name [, ...] ALTER SUBSCRIPTION name ENABLE ALTER SUBSCRIPTION name DISABLE @@ -65,7 +71,6 @@ ALTER SUBSCRIPTION name DISABLE CONNECTION 'conninfo' - SET PUBLICATION publication_name SLOT NAME = slot_name @@ -77,6 +82,37 @@ ALTER SUBSCRIPTION name DISABLE + SET PUBLICATION publication_name + + + Changes list of subscribed publications. See + for more information. + + + This clause will also execute REFRESH PUBLICATION. + + + + + + REFRESH PUBLICATION + + + Fetch missing table info from publisher. This will start replication + of tables that were added to subscribed publications since last + invocation of REFRESH PUBLICATION or since the + CREATE SUBSCRIPTION. + + + The COPY DATA and NOCOPY DATA + options specify if the existing data in the publication that are being + subscribed should be copied. COPY DATA is the + default. + + + + + ENABLE @@ -95,6 +131,7 @@ ALTER SUBSCRIPTION name DISABLE + diff --git a/doc/src/sgml/ref/create_subscription.sgml b/doc/src/sgml/ref/create_subscription.sgml index 9bed262..91127ea 100644 --- a/doc/src/sgml/ref/create_subscription.sgml +++ b/doc/src/sgml/ref/create_subscription.sgml @@ -28,6 +28,8 @@ CREATE SUBSCRIPTION subscription_name @@ -129,6 +131,43 @@ CREATE SUBSCRIPTION subscription_name + + + COPY DATA + NOCOPY DATA + + + Specifies if the existing data in the publication that are being + subscribed should be copied once the replication starts. + COPY DATA is the default. + + + + + + NOCONNECT + + + Instructs the CREATE SUBSCRIPTION to skip initial + connection to the provider. This will change default values of other + options to DISABLED, + NOCREATE SLOT and NOCOPY DATA. + + + It's not allowed to combine NOCONNECT and + ENABLED, CREATE SLOT or + COPY DATA. + + + Since no connection is made when this option is specified the tables + are not subscribed, so after you enable the subscription nothing will + be replicated. It is required to run + ALTER SUBSCRIPTION ... REFRESH PUBLICATION in order for + tables to be subscribed. + + + + diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index 3136858..159cab5 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -44,6 +44,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\ pg_default_acl.h pg_init_privs.h pg_seclabel.h pg_shseclabel.h \ pg_collation.h pg_partitioned_table.h pg_range.h pg_transform.h \ pg_sequence.h pg_publication.h pg_publication_rel.h pg_subscription.h \ + pg_subscription_rel.h toasting.h indexing.h \ toasting.h indexing.h \ ) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 41c0056..d49dcdc 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -52,6 +52,7 @@ #include "catalog/pg_opclass.h" #include "catalog/pg_partitioned_table.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_subscription_rel.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" #include "catalog/pg_type_fn.h" @@ -1832,6 +1833,11 @@ heap_drop_with_catalog(Oid relid) relation_close(rel, NoLock); /* + * Remove any associated relation synchronization states. + */ + RemoveSubscriptionRel(InvalidOid, relid); + + /* * Forget any ON COMMIT action for the rel */ remove_on_commit_action(relid); diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c index 20fdd6a..8850b7e 100644 --- a/src/backend/catalog/pg_subscription.c +++ b/src/backend/catalog/pg_subscription.c @@ -19,15 +19,20 @@ #include "access/genam.h" #include "access/heapam.h" #include "access/htup_details.h" +#include "access/xact.h" +#include "catalog/indexing.h" #include "catalog/pg_type.h" #include "catalog/pg_subscription.h" +#include "catalog/pg_subscription_rel.h" #include "nodes/makefuncs.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/fmgroids.h" +#include "utils/pg_lsn.h" +#include "utils/rel.h" #include "utils/syscache.h" @@ -206,3 +211,287 @@ textarray_to_stringlist(ArrayType *textarray) return res; } + +/* + * Set the state of a subscription table. + */ +Oid +SetSubscriptionRelState(Oid subid, Oid relid, char state, + XLogRecPtr sublsn) +{ + Relation rel; + HeapTuple tup; + Oid subrelid; + bool nulls[Natts_pg_subscription_rel]; + Datum values[Natts_pg_subscription_rel]; + + /* Prevent concurrent changes. */ + rel = heap_open(SubscriptionRelRelationId, ShareRowExclusiveLock); + + /* Try finding existing mapping. */ + tup = SearchSysCacheCopy2(SUBSCRIPTIONRELMAP, + ObjectIdGetDatum(relid), + ObjectIdGetDatum(subid)); + + /* + * If the record for given table does not exist yet create new + * record, otherwise update the existing one. + */ + if (!HeapTupleIsValid(tup)) + { + /* Form the tuple. */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + values[Anum_pg_subscription_rel_srsubid - 1] = ObjectIdGetDatum(subid); + values[Anum_pg_subscription_rel_srrelid - 1] = ObjectIdGetDatum(relid); + values[Anum_pg_subscription_rel_srsubstate - 1] = CharGetDatum(state); + if (sublsn != InvalidXLogRecPtr) + values[Anum_pg_subscription_rel_srsublsn - 1] = LSNGetDatum(sublsn); + else + nulls[Anum_pg_subscription_rel_srsublsn - 1] = true; + + tup = heap_form_tuple(RelationGetDescr(rel), values, nulls); + + /* Insert tuple into catalog. */ + subrelid = CatalogTupleInsert(rel, tup); + + heap_freetuple(tup); + } + else + { + bool replaces[Natts_pg_subscription_rel]; + + /* Update the tuple. */ + memset(values, 0, sizeof(values)); + memset(nulls, true, sizeof(nulls)); + memset(replaces, false, sizeof(replaces)); + + replaces[Anum_pg_subscription_rel_srsubstate - 1] = true; + nulls[Anum_pg_subscription_rel_srsubstate - 1] = false; + values[Anum_pg_subscription_rel_srsubstate - 1] = CharGetDatum(state); + + replaces[Anum_pg_subscription_rel_srsublsn - 1] = true; + if (sublsn != InvalidXLogRecPtr) + { + nulls[Anum_pg_subscription_rel_srsublsn - 1] = false; + values[Anum_pg_subscription_rel_srsublsn - 1] = LSNGetDatum(sublsn); + } + + tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls, + replaces); + + /* Update the catalog. */ + CatalogTupleUpdate(rel, &tup->t_self, tup); + + subrelid = HeapTupleGetOid(tup); + } + + /* Cleanup. */ + heap_close(rel, NoLock); + + /* Make the changes visible. */ + CommandCounterIncrement(); + + return subrelid; +} + +/* + * Get state of subscription table. + * + * Returns SUBREL_STATE_UNKNOWN when not found and missing_ok is true. + */ +char +GetSubscriptionRelState(Oid subid, Oid relid, XLogRecPtr *sublsn, + bool missing_ok) +{ + Relation rel; + HeapTuple tup; + char substate; + bool isnull; + Datum d; + + rel = heap_open(SubscriptionRelRelationId, AccessShareLock); + + /* Try finding the mapping. */ + tup = SearchSysCache2(SUBSCRIPTIONRELMAP, + ObjectIdGetDatum(relid), + ObjectIdGetDatum(subid)); + + if (!HeapTupleIsValid(tup)) + { + if (missing_ok) + { + heap_close(rel, RowExclusiveLock); + *sublsn = InvalidXLogRecPtr; + return '\0'; + } + + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("subscription table %u in subscription %u does not exist", + relid, subid))); + } + + /* Get the state. */ + d = SysCacheGetAttr(SUBSCRIPTIONRELMAP, tup, + Anum_pg_subscription_rel_srsubstate, &isnull); + Assert(!isnull); + substate = DatumGetChar(d); + d = SysCacheGetAttr(SUBSCRIPTIONRELMAP, tup, + Anum_pg_subscription_rel_srsublsn, &isnull); + if (isnull) + *sublsn = InvalidXLogRecPtr; + else + *sublsn = DatumGetLSN(d); + + /* Cleanup */ + ReleaseSysCache(tup); + heap_close(rel, AccessShareLock); + + return substate; +} + +/* + * Drop subscription relation mapping. These can be for a particular + * subscription, or for a particular relation, or both. + */ +void +RemoveSubscriptionRel(Oid subid, Oid relid) +{ + Relation rel; + HeapScanDesc scan; + ScanKeyData skey[2]; + HeapTuple tup; + int nkeys = 0; + + /* Prevent concurrent changes (see SetSubscriptionRelState()). */ + rel = heap_open(SubscriptionRelRelationId, ShareRowExclusiveLock); + + if (OidIsValid(subid)) + { + ScanKeyInit(&skey[nkeys++], + Anum_pg_subscription_rel_srsubid, + BTEqualStrategyNumber, + F_OIDEQ, + ObjectIdGetDatum(subid)); + } + + if (OidIsValid(relid)) + { + ScanKeyInit(&skey[nkeys++], + Anum_pg_subscription_rel_srrelid, + BTEqualStrategyNumber, + F_OIDEQ, + ObjectIdGetDatum(relid)); + } + + /* Do the search and delete what we found. */ + scan = heap_beginscan_catalog(rel, nkeys, skey); + while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) + { + simple_heap_delete(rel, &tup->t_self); + } + heap_endscan(scan); + + heap_close(rel, ShareRowExclusiveLock); +} + + +/* + * Get all relations for subscription. + * + * Returned list is palloced in current memory context. + */ +List * +GetSubscriptionRelations(Oid subid) +{ + List *res = NIL; + Relation rel; + HeapTuple tup; + int nkeys = 0; + ScanKeyData skey[2]; + SysScanDesc scan; + + rel = heap_open(SubscriptionRelRelationId, AccessShareLock); + + ScanKeyInit(&skey[nkeys++], + Anum_pg_subscription_rel_srsubid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(subid)); + + scan = systable_beginscan(rel, InvalidOid, false, + NULL, nkeys, skey); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_subscription_rel subrel; + SubscriptionRelState *relstate; + + subrel = (Form_pg_subscription_rel) GETSTRUCT(tup); + + relstate = (SubscriptionRelState *)palloc(sizeof(SubscriptionRelState)); + relstate->relid = subrel->srrelid; + relstate->state = subrel->srsubstate; + relstate->lsn = subrel->srsublsn; + + res = lappend(res, relstate); + } + + /* Cleanup */ + systable_endscan(scan); + heap_close(rel, AccessShareLock); + + return res; +} + +/* + * Get all relations for subscription that are not in a ready state. + * + * Returned list is palloced in current memory context. + */ +List * +GetSubscriptionNotReadyRelations(Oid subid) +{ + List *res = NIL; + Relation rel; + HeapTuple tup; + int nkeys = 0; + ScanKeyData skey[2]; + SysScanDesc scan; + + rel = heap_open(SubscriptionRelRelationId, AccessShareLock); + + ScanKeyInit(&skey[nkeys++], + Anum_pg_subscription_rel_srsubid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(subid)); + + ScanKeyInit(&skey[nkeys++], + Anum_pg_subscription_rel_srsubstate, + BTEqualStrategyNumber, F_CHARNE, + CharGetDatum(SUBREL_STATE_READY)); + + scan = systable_beginscan(rel, InvalidOid, false, + NULL, nkeys, skey); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_subscription_rel subrel; + SubscriptionRelState *relstate; + + subrel = (Form_pg_subscription_rel) GETSTRUCT(tup); + + relstate = (SubscriptionRelState *)palloc(sizeof(SubscriptionRelState)); + relstate->relid = subrel->srrelid; + relstate->state = subrel->srsubstate; + relstate->lsn = subrel->srsublsn; + + res = lappend(res, relstate); + } + + /* Cleanup */ + systable_endscan(scan); + heap_close(rel, AccessShareLock); + + return res; +} diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index ba980de..1377977 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -733,6 +733,7 @@ CREATE VIEW pg_stat_subscription AS su.oid AS subid, su.subname, st.pid, + st.relid, st.received_lsn, st.last_msg_send_time, st.last_msg_receipt_time, diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 01a63c8..2ad1678 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -60,7 +60,8 @@ typedef enum CopyDest { COPY_FILE, /* to/from file (or a piped program) */ COPY_OLD_FE, /* to/from frontend (2.0 protocol) */ - COPY_NEW_FE /* to/from frontend (3.0 protocol) */ + COPY_NEW_FE, /* to/from frontend (3.0 protocol) */ + COPY_CALLBACK /* to/from callback function */ } CopyDest; /* @@ -109,6 +110,7 @@ typedef struct CopyStateData List *attnumlist; /* integer list of attnums to copy */ char *filename; /* filename, or NULL for STDIN/STDOUT */ bool is_program; /* is 'filename' a program to popen? */ + copy_data_source_cb data_source_cb; /* function for reading data*/ bool binary; /* binary format? */ bool oids; /* include OIDs? */ bool freeze; /* freeze rows on loading? */ @@ -299,7 +301,6 @@ static uint64 DoCopyTo(CopyState cstate); static uint64 CopyTo(CopyState cstate); static void CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum *values, bool *nulls); -static uint64 CopyFrom(CopyState cstate); static void CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid, int hi_options, ResultRelInfo *resultRelInfo, TupleTableSlot *myslot, @@ -529,6 +530,9 @@ CopySendEndOfRow(CopyState cstate) /* Dump the accumulated row as one CopyData message */ (void) pq_putmessage('d', fe_msgbuf->data, fe_msgbuf->len); break; + case COPY_CALLBACK: + Assert(false); /* Not yet supported. */ + break; } resetStringInfo(fe_msgbuf); @@ -643,6 +647,9 @@ CopyGetData(CopyState cstate, void *databuf, int minread, int maxread) bytesread += avail; } break; + case COPY_CALLBACK: + bytesread = cstate->data_source_cb(databuf, minread, maxread); + break; } return bytesread; @@ -968,7 +975,7 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, PreventCommandIfParallelMode("COPY FROM"); cstate = BeginCopyFrom(pstate, rel, stmt->filename, stmt->is_program, - stmt->attlist, stmt->options); + NULL, stmt->attlist, stmt->options); cstate->range_table = range_table; *processed = CopyFrom(cstate); /* copy from file to database */ EndCopyFrom(cstate); @@ -2285,7 +2292,7 @@ limit_printout_length(const char *str) /* * Copy FROM file to relation. */ -static uint64 +uint64 CopyFrom(CopyState cstate) { HeapTuple tuple; @@ -2877,6 +2884,7 @@ BeginCopyFrom(ParseState *pstate, Relation rel, const char *filename, bool is_program, + copy_data_source_cb data_source_cb, List *attnamelist, List *options) { @@ -2991,7 +2999,12 @@ BeginCopyFrom(ParseState *pstate, cstate->num_defaults = num_defaults; cstate->is_program = is_program; - if (pipe) + if (data_source_cb) + { + cstate->copy_dest = COPY_CALLBACK; + cstate->data_source_cb = data_source_cb; + } + else if (pipe) { Assert(!is_program); /* the grammar does not allow this */ if (whereToSendOutput == DestRemote) diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index 0036d99..ef33ece 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -20,11 +20,14 @@ #include "access/htup_details.h" #include "access/xact.h" +#include "catalog/dependency.h" #include "catalog/indexing.h" +#include "catalog/namespace.h" #include "catalog/objectaccess.h" #include "catalog/objectaddress.h" #include "catalog/pg_type.h" #include "catalog/pg_subscription.h" +#include "catalog/pg_subscription_rel.h" #include "commands/defrem.h" #include "commands/event_trigger.h" @@ -33,11 +36,13 @@ #include "replication/logicallauncher.h" #include "replication/origin.h" #include "replication/walreceiver.h" +#include "replication/walsender.h" #include "replication/worker_internal.h" #include "storage/lmgr.h" #include "utils/builtins.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/syscache.h" @@ -49,17 +54,17 @@ * accomodate that. */ static void -parse_subscription_options(List *options, char **conninfo, - List **publications, bool *enabled_given, - bool *enabled, bool *create_slot, char **slot_name) +parse_subscription_options(List *options, bool *connect, bool *enabled_given, + bool *enabled, bool *create_slot, char **slot_name, + bool *copy_data) { ListCell *lc; + bool connect_given = false; bool create_slot_given = false; + bool copy_data_given = false; - if (conninfo) - *conninfo = NULL; - if (publications) - *publications = NIL; + if (connect) + *connect = true; if (enabled) { *enabled_given = false; @@ -69,29 +74,23 @@ parse_subscription_options(List *options, char **conninfo, *create_slot = true; if (slot_name) *slot_name = NULL; + if (copy_data) + *copy_data = true; /* Parse options */ foreach (lc, options) { DefElem *defel = (DefElem *) lfirst(lc); - if (strcmp(defel->defname, "conninfo") == 0 && conninfo) + if (strcmp(defel->defname, "noconnect") == 0 && connect) { - if (*conninfo) + if (connect_given) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); - *conninfo = defGetString(defel); - } - else if (strcmp(defel->defname, "publication") == 0 && publications) - { - if (*publications) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("conflicting or redundant options"))); - - *publications = defGetStringList(defel); + connect_given = true; + *connect = !defGetBoolean(defel); } else if (strcmp(defel->defname, "enabled") == 0 && enabled) { @@ -142,9 +141,57 @@ parse_subscription_options(List *options, char **conninfo, *slot_name = defGetString(defel); } + else if (strcmp(defel->defname, "copy data") == 0 && copy_data) + { + if (copy_data_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + copy_data_given = true; + *copy_data = defGetBoolean(defel); + } + else if (strcmp(defel->defname, "nocopy data") == 0 && copy_data) + { + if (copy_data_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + copy_data_given = true; + *copy_data = !defGetBoolean(defel); + } else elog(ERROR, "unrecognized option: %s", defel->defname); } + + /* + * We've been explicitly asked to not connect, that requires some + * additional processing. + */ + if (connect && !*connect) + { + /* Check for incompatible options from the user. */ + if (*enabled_given && *enabled) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("noconnect and enabled are mutually exclusive options"))); + + if (create_slot_given && *create_slot) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("noconnect and create slot are mutually exclusive options"))); + + if (copy_data_given && *copy_data) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("noconnect and copy data are mutually exclusive options"))); + + /* Change the defaults of other options. */ + *enabled = false; + *create_slot = false; + *copy_data = false; + } } /* @@ -214,8 +261,10 @@ CreateSubscription(CreateSubscriptionStmt *stmt, bool isTopLevel) Datum values[Natts_pg_subscription]; Oid owner = GetUserId(); HeapTuple tup; + bool connect; bool enabled_given; bool enabled; + bool copy_data; char *conninfo; char *slotname; char originname[NAMEDATALEN]; @@ -226,9 +275,8 @@ CreateSubscription(CreateSubscriptionStmt *stmt, bool isTopLevel) * Parse and check options. * Connection and publication should not be specified here. */ - parse_subscription_options(stmt->options, NULL, NULL, - &enabled_given, &enabled, - &create_slot, &slotname); + parse_subscription_options(stmt->options, &connect, &enabled_given, + &enabled, &create_slot, &slotname, ©_data); /* * Since creating a replication slot is not transactional, rolling back @@ -297,14 +345,17 @@ CreateSubscription(CreateSubscriptionStmt *stmt, bool isTopLevel) replorigin_create(originname); /* - * If requested, create the replication slot on remote side for our - * newly created subscription. + * Connect to remote side to execute requested commands and fetch table + * info. */ - if (create_slot) + if (connect) { XLogRecPtr lsn; char *err; WalReceiverConn *wrconn; + List *tables; + ListCell *lc; + char table_state; /* Try to connect to the publisher. */ wrconn = walrcv_connect(conninfo, true, stmt->subname, &err); @@ -314,10 +365,40 @@ CreateSubscription(CreateSubscriptionStmt *stmt, bool isTopLevel) PG_TRY(); { - walrcv_create_slot(wrconn, slotname, false, &lsn); + /* + * If requested, create the replication slot on remote side for our + * newly created subscription. + */ + if (create_slot) + { + walrcv_create_slot(wrconn, slotname, false, + CRS_NOEXPORT_SNAPSHOT, &lsn); + ereport(NOTICE, + (errmsg("created replication slot \"%s\" on publisher", + slotname))); + } + + /* Set sync state based on if we were asked to do data copy or not. */ + table_state = copy_data ? SUBREL_STATE_INIT : SUBREL_STATE_READY; + + /* + * Get the table list from publisher and build local table status + * info. + */ + tables = walrcv_table_list(wrconn, publications); + foreach (lc, tables) + { + RangeVar *rv = (RangeVar *) lfirst(lc); + Oid relid; + + relid = RangeVarGetRelid(rv, AccessShareLock, true); + + SetSubscriptionRelState(subid, relid, table_state, + InvalidXLogRecPtr); + } + ereport(NOTICE, - (errmsg("created replication slot \"%s\" on publisher", - slotname))); + (errmsg("synchronized table states"))); } PG_CATCH(); { @@ -330,6 +411,11 @@ CreateSubscription(CreateSubscriptionStmt *stmt, bool isTopLevel) /* And we are done with the remote side. */ walrcv_disconnect(wrconn); } + else + ereport(WARNING, + (errmsg("tables were not subscribed, you will have to run " + "ALTER SUBSCRIPTION ... REFRESH PUBLICATION to " + "subscribe the tables"))); heap_close(rel, RowExclusiveLock); @@ -342,6 +428,108 @@ CreateSubscription(CreateSubscriptionStmt *stmt, bool isTopLevel) return myself; } +static void +AlterSubscription_refresh(Subscription *sub, bool copy_data) +{ + char *err; + List *pubrel_names; + List *subrel_states; + Oid *subrel_local_oids; + Oid *pubrel_local_oids; + ListCell *lc; + int off; + + /* Load the library providing us libpq calls. */ + load_file("libpqwalreceiver", false); + + /* Try to connect to the publisher. */ + wrconn = walrcv_connect(sub->conninfo, true, sub->name, &err); + if (!wrconn) + ereport(ERROR, + (errmsg("could not connect to the publisher: %s", err))); + + /* Get the table list from publisher. */ + pubrel_names = walrcv_table_list(wrconn, sub->publications); + + /* We are done with the remote side, close connection. */ + walrcv_disconnect(wrconn); + + /* Get local table list. */ + subrel_states = GetSubscriptionRelations(sub->oid); + + /* + * Build qsorted array of local table oids for faster lookup. + * This can potentially contain all tables in the database so + * speed of lookup is important. + */ + subrel_local_oids = palloc(list_length(subrel_states) * sizeof(Oid)); + off = 0; + foreach(lc, subrel_states) + { + SubscriptionRelState *relstate = (SubscriptionRelState *) lfirst(lc); + subrel_local_oids[off++] = relstate->relid; + } + qsort(subrel_local_oids, list_length(subrel_states), + sizeof(Oid), oid_cmp); + + /* + * Walk over the remote tables and try to match them to locally + * known tables. If the table is not known locally create a new state + * for it. + * + * Also builds array of local oids of remote tables for the next step. + */ + off = 0; + pubrel_local_oids = palloc(list_length(pubrel_names) * sizeof(Oid)); + + foreach (lc, pubrel_names) + { + RangeVar *rv = (RangeVar *) lfirst(lc); + Oid relid; + + relid = RangeVarGetRelid(rv, AccessShareLock, false); + pubrel_local_oids[off++] = relid; + + if (!bsearch(&relid, subrel_local_oids, + list_length(subrel_states), sizeof(Oid), oid_cmp)) + { + SetSubscriptionRelState(sub->oid, relid, + copy_data ? SUBREL_STATE_INIT : SUBREL_STATE_READY, + InvalidXLogRecPtr); + ereport(NOTICE, + (errmsg("added subscription for table %s.%s", + quote_identifier(rv->schemaname), + quote_identifier(rv->relname)))); + } + } + + /* + * Next remove state for tables we should not care about anymore using + * the data we collected above + */ + qsort(pubrel_local_oids, list_length(pubrel_names), + sizeof(Oid), oid_cmp); + + for (off = 0; off < list_length(subrel_states); off++) + { + Oid relid = subrel_local_oids[off]; + + if (!bsearch(&relid, pubrel_local_oids, + list_length(pubrel_names), sizeof(Oid), oid_cmp)) + { + char *namespace; + + RemoveSubscriptionRel(sub->oid, relid); + + namespace = get_namespace_name(get_rel_namespace(relid)); + ereport(NOTICE, + (errmsg("removed subscription for table %s.%s", + quote_identifier(namespace), + quote_identifier(get_rel_name(relid))))); + } + } +} + /* * Alter the existing subscription. */ @@ -355,11 +543,7 @@ AlterSubscription(AlterSubscriptionStmt *stmt) Datum values[Natts_pg_subscription]; HeapTuple tup; Oid subid; - bool enabled_given; - bool enabled; - char *conninfo; - char *slot_name; - List *publications; + bool update_tuple = false; rel = heap_open(SubscriptionRelationId, RowExclusiveLock); @@ -380,52 +564,107 @@ AlterSubscription(AlterSubscriptionStmt *stmt) subid = HeapTupleGetOid(tup); - /* Parse options. */ - parse_subscription_options(stmt->options, &conninfo, &publications, - &enabled_given, &enabled, - NULL, &slot_name); - /* Form a new tuple. */ memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replaces, false, sizeof(replaces)); - if (enabled_given) - { - values[Anum_pg_subscription_subenabled - 1] = BoolGetDatum(enabled); - replaces[Anum_pg_subscription_subenabled - 1] = true; - } - if (conninfo) + switch (stmt->kind) { - values[Anum_pg_subscription_subconninfo - 1] = - CStringGetTextDatum(conninfo); - replaces[Anum_pg_subscription_subconninfo - 1] = true; - } - if (slot_name) - { - values[Anum_pg_subscription_subslotname - 1] = - DirectFunctionCall1(namein, CStringGetDatum(slot_name)); - replaces[Anum_pg_subscription_subslotname - 1] = true; - } - if (publications != NIL) - { - values[Anum_pg_subscription_subpublications - 1] = - publicationListToArray(publications); - replaces[Anum_pg_subscription_subpublications - 1] = true; + case ALTER_SUBSCRIPTION_OPTIONS: + { + char *slot_name; + + parse_subscription_options(stmt->options, NULL, NULL, NULL, + NULL, &slot_name, NULL); + + values[Anum_pg_subscription_subslotname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(slot_name)); + replaces[Anum_pg_subscription_subslotname - 1] = true; + + update_tuple = true; + break; + } + + case ALTER_SUBSCRIPTION_ENABLED: + { + bool enabled, + enabled_given; + + parse_subscription_options(stmt->options, NULL, + &enabled_given, &enabled, NULL, + NULL, NULL); + Assert(enabled_given); + + values[Anum_pg_subscription_subenabled - 1] = + BoolGetDatum(enabled); + replaces[Anum_pg_subscription_subenabled - 1] = true; + + update_tuple = true; + break; + } + + case ALTER_SUBSCRIPTION_CONNECTION: + values[Anum_pg_subscription_subconninfo - 1] = + CStringGetTextDatum(stmt->conninfo); + replaces[Anum_pg_subscription_subconninfo - 1] = true; + update_tuple = true; + break; + + case ALTER_SUBSCRIPTION_PUBLICATION: + { + bool copy_data; + Subscription *sub = GetSubscription(subid, false); + + parse_subscription_options(stmt->options, NULL, NULL, NULL, + NULL, NULL, ©_data); + + values[Anum_pg_subscription_subpublications - 1] = + publicationListToArray(stmt->publication); + replaces[Anum_pg_subscription_subpublications - 1] = true; + + update_tuple = true; + + /* Make sure refresh sees the new list of publications. */ + sub->publications = stmt->publication; + AlterSubscription_refresh(sub, copy_data); + + break; + } + + case ALTER_SUBSCRIPTION_REFRESH: + { + bool copy_data; + Subscription *sub = GetSubscription(subid, false); + + parse_subscription_options(stmt->options, NULL, NULL, NULL, + NULL, NULL, ©_data); + + AlterSubscription_refresh(sub, copy_data); + + break; + } + + default: + elog(ERROR, "unrecognized ALTER SUBSCRIPTION kind %d", + stmt->kind); } - tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls, - replaces); + /* Update the catalog if needed. */ + if (update_tuple) + { + tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls, + replaces); - /* Update the catalog. */ - CatalogTupleUpdate(rel, &tup->t_self, tup); + CatalogTupleUpdate(rel, &tup->t_self, tup); - ObjectAddressSet(myself, SubscriptionRelationId, subid); + heap_freetuple(tup); + } - /* Cleanup. */ - heap_freetuple(tup); heap_close(rel, RowExclusiveLock); + ObjectAddressSet(myself, SubscriptionRelationId, subid); + InvokeObjectPostAlterHook(SubscriptionRelationId, subid, 0); return myself; @@ -528,11 +767,14 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel) /* Clean up dependencies */ deleteSharedDependencyRecordsFor(SubscriptionRelationId, subid, 0); + /* Remove any associated relation synchronization states. */ + RemoveSubscriptionRel(subid, InvalidOid); + /* Protect against launcher restarting the worker. */ LWLockAcquire(LogicalRepLauncherLock, LW_EXCLUSIVE); /* Kill the apply worker so that the slot becomes accessible. */ - logicalrep_worker_stop(subid); + logicalrep_worker_stop(subid, InvalidOid); LWLockRelease(LogicalRepLauncherLock); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 29fde33..6ba9585 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -9130,6 +9130,7 @@ AlterSubscriptionStmt: { AlterSubscriptionStmt *n = makeNode(AlterSubscriptionStmt); + n->kind = ALTER_SUBSCRIPTION_OPTIONS; n->subname = $3; n->options = $5; $$ = (Node *)n; @@ -9138,24 +9139,35 @@ AlterSubscriptionStmt: { AlterSubscriptionStmt *n = makeNode(AlterSubscriptionStmt); + n->kind = ALTER_SUBSCRIPTION_CONNECTION; n->subname = $3; - n->options = list_make1(makeDefElem("conninfo", - (Node *)makeString($5), @1)); + n->conninfo = $5; $$ = (Node *)n; } - | ALTER SUBSCRIPTION name SET PUBLICATION publication_name_list + | ALTER SUBSCRIPTION name REFRESH PUBLICATION opt_definition { AlterSubscriptionStmt *n = makeNode(AlterSubscriptionStmt); + n->kind = ALTER_SUBSCRIPTION_REFRESH; n->subname = $3; - n->options = list_make1(makeDefElem("publication", - (Node *)$6, @1)); + n->options = $6; + $$ = (Node *)n; + } + | ALTER SUBSCRIPTION name SET PUBLICATION publication_name_list opt_definition + { + AlterSubscriptionStmt *n = + makeNode(AlterSubscriptionStmt); + n->kind = ALTER_SUBSCRIPTION_PUBLICATION; + n->subname = $3; + n->publication = $6; + n->options = $7; $$ = (Node *)n; } | ALTER SUBSCRIPTION name ENABLE_P { AlterSubscriptionStmt *n = makeNode(AlterSubscriptionStmt); + n->kind = ALTER_SUBSCRIPTION_ENABLED; n->subname = $3; n->options = list_make1(makeDefElem("enabled", (Node *)makeInteger(TRUE), @1)); @@ -9165,11 +9177,13 @@ AlterSubscriptionStmt: { AlterSubscriptionStmt *n = makeNode(AlterSubscriptionStmt); + n->kind = ALTER_SUBSCRIPTION_ENABLED; n->subname = $3; n->options = list_make1(makeDefElem("enabled", (Node *)makeInteger(FALSE), @1)); $$ = (Node *)n; - } ; + } + ; /***************************************************************************** * diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 2fb9a8b..a445eb5 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -3401,6 +3401,12 @@ pgstat_get_wait_ipc(WaitEventIPC w) case WAIT_EVENT_SYNC_REP: event_name = "SyncRep"; break; + case WAIT_EVENT_LOGICAL_SYNC_DATA: + event_name = "LogicalSyncData"; + break; + case WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE: + event_name = "LogicalSyncStateChange"; + break; /* no default case, so that compiler will warn */ } diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c index 048d2aa..98ce43d 100644 --- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c +++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c @@ -24,6 +24,7 @@ #include "access/xlog.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "nodes/makefuncs.h" #include "pgstat.h" #include "replication/logicalproto.h" #include "replication/walreceiver.h" @@ -68,9 +69,15 @@ static void libpqrcv_send(WalReceiverConn *conn, const char *buffer, static char *libpqrcv_create_slot(WalReceiverConn *conn, const char *slotname, bool temporary, + CRSSnapshotAction snapaction, XLogRecPtr *lsn); static bool libpqrcv_command(WalReceiverConn *conn, const char *cmd, char **err); +static List *libpqrcv_table_list(WalReceiverConn *conn, List *publications); +static void libpqrcv_table_info(WalReceiverConn *conn, const char *nspname, + const char *relname, LogicalRepRelation *lrel); +static void libpqrcv_table_copy(WalReceiverConn *conn, + const char *nspname, const char *relname); static void libpqrcv_disconnect(WalReceiverConn *conn); static WalReceiverFunctionsType PQWalReceiverFunctions = { @@ -85,6 +92,9 @@ static WalReceiverFunctionsType PQWalReceiverFunctions = { libpqrcv_send, libpqrcv_create_slot, libpqrcv_command, + libpqrcv_table_list, + libpqrcv_table_info, + libpqrcv_table_copy, libpqrcv_disconnect }; @@ -428,10 +438,8 @@ libpqrcv_endstreaming(WalReceiverConn *conn, TimeLineID *next_tli) * next timeline's ID, or just CommandComplete if the server was shut * down. * - * If we had not yet received CopyDone from the backend, PGRES_COPY_IN - * would also be possible. However, at the moment this function is only - * called after receiving CopyDone from the backend - the walreceiver - * never terminates replication on its own initiative. + * If we had not yet received CopyDone from the backend, PGRES_COPY_OUT + * is also possible in case we aborted the copy in mid-stream. */ res = PQgetResult(conn->streamConn); if (PQresultStatus(res) == PGRES_TUPLES_OK) @@ -611,6 +619,203 @@ libpqrcv_PQexec(PGconn *streamConn, const char *query) } /* + * Obtain list of tables that belong to given replication sets. + */ +static List * +libpqrcv_table_list(WalReceiverConn *conn, List *publications) +{ + StringInfoData cmd; + PGresult *res; + int i; + ListCell *lc; + bool first; + List *tablelist = NIL; + + Assert(conn->logical); + Assert(list_length(publications) > 0); + + initStringInfo(&cmd); + appendStringInfo(&cmd, "SELECT DISTINCT t.schemaname, t.tablename\n" + " FROM pg_catalog.pg_publication_tables t\n" + " WHERE t.pubname IN ("); + first = true; + foreach (lc, publications) + { + char *pubname = strVal(lfirst(lc)); + + if (first) + first = false; + else + appendStringInfoString(&cmd, ", "); + + appendStringInfo(&cmd, "%s", quote_literal_cstr(pubname)); + } + appendStringInfoString(&cmd, ")"); + + res = libpqrcv_PQexec(conn->streamConn, cmd.data); + pfree(cmd.data); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + PQclear(res); + ereport(ERROR, + (errmsg("could not receive list of replicated tables from the publisher: %s", + PQerrorMessage(conn->streamConn)))); + } + if (PQnfields(res) != 2) + { + int nfields = PQnfields(res); + PQclear(res); + ereport(ERROR, + (errmsg("invalid response from publisher"), + errdetail("Expected 2 fields, got %d fields.", nfields))); + } + + for (i = 0; i < PQntuples(res); i++) + { + RangeVar *rv; + + rv = makeRangeVar(pstrdup(PQgetvalue(res, i, 0)), + pstrdup(PQgetvalue(res, i, 1)), -1); + + tablelist = lappend(tablelist, rv); + } + + PQclear(res); + + return tablelist; +} + +/* + * Fetch table info of a named table from the publisher and fill the lrel. + */ +static void +libpqrcv_table_info(WalReceiverConn *conn, const char *nspname, + const char *relname, LogicalRepRelation *lrel) +{ + StringInfoData cmd; + PGresult *res; + int i; + + Assert(conn->logical); + + /* First fetch the oid of the table. */ + initStringInfo(&cmd); + appendStringInfo(&cmd, "SELECT c.oid, c.relreplident" + " FROM pg_catalog.pg_class c," + " pg_catalog.pg_namespace n" + " WHERE n.nspname = %s" + " AND c.relname = %s" + " AND c.relkind = 'r'", + quote_literal_cstr(nspname), + quote_literal_cstr(relname)); + + res = libpqrcv_PQexec(conn->streamConn, cmd.data); + pfree(cmd.data); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + PQclear(res); + ereport(ERROR, + (errmsg("could not fetch table info for table %s from publisher: %s", + quote_qualified_identifier(nspname, relname), + PQerrorMessage(conn->streamConn)))); + } + if (PQntuples(res) != 1) + { + PQclear(res); + ereport(ERROR, + (errmsg("table %s not found on publisher", + quote_qualified_identifier(nspname, relname)))); + } + + lrel->remoteid = atooid(PQgetvalue(res, 0, 0)); + lrel->replident = *PQgetvalue(res, 0, 1); + PQclear(res); + + lrel->nspname = pstrdup(nspname); + lrel->relname = pstrdup(relname); + + /* Now fetch columns. */ + initStringInfo(&cmd); + appendStringInfo(&cmd, + "SELECT a.attname," + " a.atttypid," + " a.atttypmod," + " a.attnum = ANY(i.indkey)" + " FROM pg_catalog.pg_attribute a" + " LEFT JOIN pg_catalog.pg_index i" + " ON (i.indexrelid = pg_get_replica_identity_index(%u))" + " WHERE a.attnum > 0::pg_catalog.int2" + " AND NOT a.attisdropped" + " AND a.attrelid = %u" + " ORDER BY a.attnum", + lrel->remoteid, lrel->remoteid); + + res = libpqrcv_PQexec(conn->streamConn, cmd.data); + pfree(cmd.data); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + PQclear(res); + ereport(ERROR, + (errmsg("could not fetch table info for table %s: %s", + quote_qualified_identifier(nspname, relname), + PQerrorMessage(conn->streamConn)))); + } + if (PQnfields(res) != 4) + { + int nfields = PQnfields(res); + PQclear(res); + ereport(ERROR, + (errmsg("invalid response from publisher"), + errdetail("Expected 4 fields, got %d fields.", nfields))); + } + + lrel->natts = PQntuples(res); + lrel->attnames = palloc(lrel->natts * sizeof(char *)); + lrel->atttyps = palloc(lrel->natts * sizeof(Oid)); + lrel->attkeys = NULL; + for (i = 0; i < lrel->natts; i++) + { + lrel->attnames[i] = pstrdup(PQgetvalue(res, i, 0)); + lrel->atttyps[i] = atooid(PQgetvalue(res, i, 1)); + if (strcmp(PQgetvalue(res, i, 3), "t") != 0) + lrel->attkeys = bms_add_member(lrel->attkeys, i); + } + PQclear(res); +} + +/* + * Start copy proccess of the existing data in a table. + */ +static void +libpqrcv_table_copy(WalReceiverConn *conn, const char *nspname, + const char *relname) +{ + StringInfoData cmd; + PGresult *res; + + Assert(conn->logical); + + initStringInfo(&cmd); + appendStringInfo(&cmd, "COPY %s TO STDOUT", + quote_qualified_identifier(nspname, relname)); + + res = libpqrcv_PQexec(conn->streamConn, cmd.data); + pfree(cmd.data); + + if (PQresultStatus(res) != PGRES_COPY_OUT) + { + PQclear(res); + ereport(ERROR, + (errmsg("could not start initial table contents copy: %s", + PQerrorMessage(conn->streamConn)))); + } + PQclear(res); +} + +/* * Disconnect connection to primary, if any. */ static void @@ -672,8 +877,19 @@ libpqrcv_receive(WalReceiverConn *conn, char **buffer, PGresult *res; res = PQgetResult(conn->streamConn); - if (PQresultStatus(res) == PGRES_COMMAND_OK || - PQresultStatus(res) == PGRES_COPY_IN) + if (PQresultStatus(res) == PGRES_COMMAND_OK) + { + PQclear(res); + + /* Verify that there are no more results */ + res = PQgetResult(conn->streamConn); + if (res != NULL) + ereport(ERROR, + (errmsg("unexpected result after CommandComplete: %s", + PQerrorMessage(conn->streamConn)))); + return -1; + } + else if (PQresultStatus(res) == PGRES_COPY_IN) { PQclear(res); return -1; @@ -718,7 +934,8 @@ libpqrcv_send(WalReceiverConn *conn, const char *buffer, int nbytes) */ static char * libpqrcv_create_slot(WalReceiverConn *conn, const char *slotname, - bool temporary, XLogRecPtr *lsn) + bool temporary, CRSSnapshotAction snapaction, + XLogRecPtr *lsn) { PGresult *res; StringInfoData cmd; @@ -726,13 +943,27 @@ libpqrcv_create_slot(WalReceiverConn *conn, const char *slotname, initStringInfo(&cmd); - appendStringInfo(&cmd, "CREATE_REPLICATION_SLOT \"%s\" ", slotname); + appendStringInfo(&cmd, "CREATE_REPLICATION_SLOT \"%s\"", slotname); if (temporary) - appendStringInfo(&cmd, "TEMPORARY "); + appendStringInfo(&cmd, " TEMPORARY"); if (conn->logical) - appendStringInfo(&cmd, "LOGICAL pgoutput"); + { + appendStringInfo(&cmd, " LOGICAL pgoutput"); + switch (snapaction) + { + case CRS_EXPORT_SNAPSHOT: + appendStringInfo(&cmd, " EXPORT_SNAPSHOT"); + break; + case CRS_NOEXPORT_SNAPSHOT: + appendStringInfo(&cmd, " NOEXPORT_SNAPSHOT"); + break; + case CRS_USE_SNAPSHOT: + appendStringInfo(&cmd, " USE_SNAPSHOT"); + break; + } + } res = libpqrcv_PQexec(conn->streamConn, cmd.data); pfree(cmd.data); diff --git a/src/backend/replication/logical/Makefile b/src/backend/replication/logical/Makefile index 259befa..bb417b0 100644 --- a/src/backend/replication/logical/Makefile +++ b/src/backend/replication/logical/Makefile @@ -15,6 +15,6 @@ include $(top_builddir)/src/Makefile.global override CPPFLAGS := -I$(srcdir) $(CPPFLAGS) OBJS = decode.o launcher.o logical.o logicalfuncs.o message.o origin.o \ - proto.o relation.o reorderbuffer.o snapbuild.o worker.o + proto.o relation.o reorderbuffer.o snapbuild.o tablesync.o worker.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 39530f96..3e724de 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -27,6 +27,7 @@ #include "access/xact.h" #include "catalog/pg_subscription.h" +#include "catalog/pg_subscription_rel.h" #include "libpq/pqsignal.h" @@ -56,6 +57,8 @@ #define DEFAULT_NAPTIME_PER_CYCLE 180000L int max_logical_replication_workers = 4; +int max_sync_workers_per_subscription = 2; + LogicalRepWorker *MyLogicalRepWorker = NULL; typedef struct LogicalRepCtxStruct @@ -198,20 +201,22 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker, /* * Walks the workers array and searches for one that matches given - * subscription id. + * subscription id and relid. */ LogicalRepWorker * -logicalrep_worker_find(Oid subid) +logicalrep_worker_find(Oid subid, Oid relid, bool only_running) { int i; LogicalRepWorker *res = NULL; Assert(LWLockHeldByMe(LogicalRepWorkerLock)); + /* Search for attached worker for a given subscription id. */ for (i = 0; i < max_logical_replication_workers; i++) { LogicalRepWorker *w = &LogicalRepCtx->workers[i]; - if (w->subid == subid && w->proc && IsBackendPid(w->proc->pid)) + if (w->subid == subid && w->relid == relid && + (!only_running || (w->proc && IsBackendPid(w->proc->pid)))) { res = w; break; @@ -225,7 +230,8 @@ logicalrep_worker_find(Oid subid) * Start new apply background worker. */ void -logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid) +logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid, + Oid relid) { BackgroundWorker bgw; BackgroundWorkerHandle *bgw_handle; @@ -270,10 +276,18 @@ logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid) } /* Prepare the worker info. */ - memset(worker, 0, sizeof(LogicalRepWorker)); + worker->proc = NULL; worker->dbid = dbid; worker->userid = userid; worker->subid = subid; + worker->relid = relid; + worker->relstate = SUBREL_STATE_UNKNOWN; + worker->relstate_lsn = InvalidXLogRecPtr; + worker->last_lsn = InvalidXLogRecPtr; + TIMESTAMP_NOBEGIN(worker->last_send_time); + TIMESTAMP_NOBEGIN(worker->last_recv_time); + worker->reply_lsn = InvalidXLogRecPtr; + TIMESTAMP_NOBEGIN(worker->reply_time); LWLockRelease(LogicalRepWorkerLock); @@ -285,6 +299,13 @@ logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid) snprintf(bgw.bgw_name, BGW_MAXLEN, "logical replication worker for subscription %u", subid); + if (OidIsValid(relid)) + snprintf(bgw.bgw_name, BGW_MAXLEN, + "logical replication worker %u sync %u", subid, relid); + else + snprintf(bgw.bgw_name, BGW_MAXLEN, + "logical replication worker %u", subid); + bgw.bgw_restart_time = BGW_NEVER_RESTART; bgw.bgw_notify_pid = MyProcPid; bgw.bgw_main_arg = slot; @@ -310,7 +331,7 @@ logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid) * not being started during this function call. */ void -logicalrep_worker_stop(Oid subid) +logicalrep_worker_stop(Oid subid, Oid relid) { LogicalRepWorker *worker; @@ -318,7 +339,7 @@ logicalrep_worker_stop(Oid subid) LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); - worker = logicalrep_worker_find(subid); + worker = logicalrep_worker_find(subid, relid, false); /* No worker, nothing to do. */ if (!worker) @@ -401,6 +422,32 @@ logicalrep_worker_stop(Oid subid) } /* + * Wake up (using latch) the logical replication worker. + */ +void +logicalrep_worker_wakeup(Oid subid, Oid relid) +{ + LogicalRepWorker *worker; + + LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); + worker = logicalrep_worker_find(subid, relid, true); + LWLockRelease(LogicalRepWorkerLock); + + if (worker) + SetLatch(&worker->proc->procLatch); +} + +/* + * Wake up (using latch) the logical replication worker. + */ +void +logicalrep_worker_wakeup_ptr(LogicalRepWorker *worker) +{ + if (worker) + SetLatch(&worker->proc->procLatch); +} + +/* * Attach to a slot. */ void @@ -463,6 +510,29 @@ logicalrep_worker_sigterm(SIGNAL_ARGS) } /* + * Count the number of registered (not necessarily running) sync workers + * for a subscription. + */ +int +logicalrep_sync_worker_count(Oid subid) +{ + int i; + int res = 0; + + Assert(LWLockHeldByMe(LogicalRepWorkerLock)); + + /* Search for attached worker for a given subscription id. */ + for (i = 0; i < max_logical_replication_workers; i++) + { + LogicalRepWorker *w = &LogicalRepCtx->workers[i]; + if (w->subid == subid && OidIsValid(w->relid)) + res++; + } + + return res; +} + +/* * ApplyLauncherShmemSize * Compute space needed for replication launcher shared memory */ @@ -517,7 +587,20 @@ ApplyLauncherShmemInit(void) &found); if (!found) + { + int slot; + memset(LogicalRepCtx, 0, ApplyLauncherShmemSize()); + + /* Find unused worker slot. */ + for (slot = 0; slot < max_logical_replication_workers; slot++) + { + LogicalRepWorker *worker = &LogicalRepCtx->workers[slot]; + + memset(worker, 0, sizeof(LogicalRepWorker)); + SpinLockInit(&worker->relmutex); + } + } } /* @@ -615,12 +698,13 @@ ApplyLauncherMain(Datum main_arg) LogicalRepWorker *w; LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); - w = logicalrep_worker_find(sub->oid); + w = logicalrep_worker_find(sub->oid, InvalidOid, false); LWLockRelease(LogicalRepWorkerLock); if (sub->enabled && w == NULL) { - logicalrep_worker_launch(sub->dbid, sub->oid, sub->name, sub->owner); + logicalrep_worker_launch(sub->dbid, sub->oid, sub->name, + sub->owner, InvalidOid); last_start_time = now; wait_time = wal_retrieve_retry_interval; /* Limit to one worker per mainloop cycle. */ @@ -674,7 +758,7 @@ ApplyLauncherMain(Datum main_arg) Datum pg_stat_get_subscription(PG_FUNCTION_ARGS) { -#define PG_STAT_GET_SUBSCRIPTION_COLS 7 +#define PG_STAT_GET_SUBSCRIPTION_COLS 8 Oid subid = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0); int i; ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; @@ -733,27 +817,31 @@ pg_stat_get_subscription(PG_FUNCTION_ARGS) MemSet(nulls, 0, sizeof(nulls)); values[0] = ObjectIdGetDatum(worker.subid); - values[1] = Int32GetDatum(worker_pid); + if (!OidIsValid(worker.relid)) + nulls[1] = true; + else + values[1] = ObjectIdGetDatum(worker.relid); + values[2] = Int32GetDatum(worker_pid); if (XLogRecPtrIsInvalid(worker.last_lsn)) - nulls[2] = true; + nulls[3] = true; else - values[2] = LSNGetDatum(worker.last_lsn); + values[3] = LSNGetDatum(worker.last_lsn); if (worker.last_send_time == 0) - nulls[3] = true; + nulls[4] = true; else - values[3] = TimestampTzGetDatum(worker.last_send_time); + values[4] = TimestampTzGetDatum(worker.last_send_time); if (worker.last_recv_time == 0) - nulls[4] = true; + nulls[5] = true; else - values[4] = TimestampTzGetDatum(worker.last_recv_time); + values[5] = TimestampTzGetDatum(worker.last_recv_time); if (XLogRecPtrIsInvalid(worker.reply_lsn)) - nulls[5] = true; + nulls[6] = true; else - values[5] = LSNGetDatum(worker.reply_lsn); + values[6] = LSNGetDatum(worker.reply_lsn); if (worker.reply_time == 0) - nulls[6] = true; + nulls[7] = true; else - values[6] = TimestampTzGetDatum(worker.reply_time); + values[7] = TimestampTzGetDatum(worker.reply_time); tuplestore_putvalues(tupstore, tupdesc, values, nulls); diff --git a/src/backend/replication/logical/relation.c b/src/backend/replication/logical/relation.c index d8dc0c7..875a081 100644 --- a/src/backend/replication/logical/relation.c +++ b/src/backend/replication/logical/relation.c @@ -19,6 +19,7 @@ #include "access/heapam.h" #include "access/sysattr.h" #include "catalog/namespace.h" +#include "catalog/pg_subscription_rel.h" #include "nodes/makefuncs.h" #include "replication/logicalrelation.h" #include "replication/worker_internal.h" @@ -357,6 +358,12 @@ logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode) else entry->localrel = heap_open(entry->localreloid, lockmode); + if (entry->state != SUBREL_STATE_READY) + entry->state = GetSubscriptionRelState(MySubscription->oid, + entry->localreloid, + &entry->statelsn, + true); + return entry; } diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index c800aa5..c7e184e 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -497,51 +497,32 @@ SnapBuildBuildSnapshot(SnapBuild *builder, TransactionId xid) } /* - * Export a snapshot so it can be set in another session with SET TRANSACTION - * SNAPSHOT. - * - * For that we need to start a transaction in the current backend as the - * importing side checks whether the source transaction is still open to make - * sure the xmin horizon hasn't advanced since then. + * Build the initial slot snapshot and convert it to normal snapshot that + * is understood by HeapTupleSatisfiesMVCC. * - * After that we convert a locally built snapshot into the normal variant - * understood by HeapTupleSatisfiesMVCC et al. + * The snapshot will be usable directly in current transaction or exported + * for loading in different transaction. */ -const char * -SnapBuildExportSnapshot(SnapBuild *builder) +Snapshot +SnapBuildInitalSnapshot(SnapBuild *builder) { Snapshot snap; - char *snapname; TransactionId xid; TransactionId *newxip; int newxcnt = 0; + Assert(!FirstSnapshotSet); + Assert(XactIsoLevel = XACT_REPEATABLE_READ); + if (builder->state != SNAPBUILD_CONSISTENT) - elog(ERROR, "cannot export a snapshot before reaching a consistent state"); + elog(ERROR, "cannot build and initial slot snapshot before reaching a consistent state"); if (!builder->committed.includes_all_transactions) - elog(ERROR, "cannot export a snapshot, not all transactions are monitored anymore"); + elog(ERROR, "cannot build and initial slot snapshot, not all transactions are monitored anymore"); /* so we don't overwrite the existing value */ if (TransactionIdIsValid(MyPgXact->xmin)) - elog(ERROR, "cannot export a snapshot when MyPgXact->xmin already is valid"); - - if (IsTransactionOrTransactionBlock()) - elog(ERROR, "cannot export a snapshot from within a transaction"); - - if (SavedResourceOwnerDuringExport) - elog(ERROR, "can only export one snapshot at a time"); - - SavedResourceOwnerDuringExport = CurrentResourceOwner; - ExportInProgress = true; - - StartTransactionCommand(); - - Assert(!FirstSnapshotSet); - - /* There doesn't seem to a nice API to set these */ - XactIsoLevel = XACT_REPEATABLE_READ; - XactReadOnly = true; + elog(ERROR, "cannot build and initial slot snapshot when MyPgXact->xmin already is valid"); snap = SnapBuildBuildSnapshot(builder, GetTopTransactionId()); @@ -576,7 +557,9 @@ SnapBuildExportSnapshot(SnapBuild *builder) if (test == NULL) { if (newxcnt >= GetMaxSnapshotXidCount()) - elog(ERROR, "snapshot too large"); + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("initial slot snapshot too large"))); newxip[newxcnt++] = xid; } @@ -587,9 +570,43 @@ SnapBuildExportSnapshot(SnapBuild *builder) snap->xcnt = newxcnt; snap->xip = newxip; + return snap; +} + +/* + * Export a snapshot so it can be set in another session with SET TRANSACTION + * SNAPSHOT. + * + * For that we need to start a transaction in the current backend as the + * importing side checks whether the source transaction is still open to make + * sure the xmin horizon hasn't advanced since then. + */ +const char * +SnapBuildExportSnapshot(SnapBuild *builder) +{ + Snapshot snap; + char *snapname; + + if (IsTransactionOrTransactionBlock()) + elog(ERROR, "cannot export a snapshot from within a transaction"); + + if (SavedResourceOwnerDuringExport) + elog(ERROR, "can only export one snapshot at a time"); + + SavedResourceOwnerDuringExport = CurrentResourceOwner; + ExportInProgress = true; + + StartTransactionCommand(); + + /* There doesn't seem to a nice API to set these */ + XactIsoLevel = XACT_REPEATABLE_READ; + XactReadOnly = true; + + snap = SnapBuildInitalSnapshot(builder); + /* - * now that we've built a plain snapshot, use the normal mechanisms for - * exporting it + * now that we've built a plain snapshot, make it active and use the + * normal mechanisms for exporting it */ snapname = ExportSnapshot(snap); diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c new file mode 100644 index 0000000..99352ed --- /dev/null +++ b/src/backend/replication/logical/tablesync.c @@ -0,0 +1,708 @@ +/*------------------------------------------------------------------------- + * tablesync.c + * PostgreSQL logical replication + * + * Copyright (c) 2012-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/replication/logical/tablesync.c + * + * NOTES + * This file contains code for initial table data synchronization for + * logical replication. + * + * The initial data synchronization is done separately for each table, + * in separate apply worker that only fetches the initial snapshot data + * from the publisher and then synchronizes the position in stream with + * the main apply worker. + * + * The are several reasons for doing the synchronization this way: + * - It allows us to parallelize the initial data synchronization + * which lowers the time needed for it to happen. + * - The initial synchronization does not have to hold the xid and LSN + * for the time it takes to copy data of all tables, causing less + * bloat and lower disk consumption compared to doing the + * synchronization in single process for whole database. + * - It allows us to synchronize the tables added after the initial + * synchronization has finished. + * + * The stream position synchronization works in multiple steps. + * - Sync finishes copy and sets table state as SYNCWAIT and waits + * for state to change in a loop. + * - Apply periodically checks tables that are synchronizing for SYNCWAIT. + * When the desired state appears it will compare its position in the + * stream with the SYNCWAIT position and based on that changes the + * state to based on following rules: + * - if the apply is in front of the sync in the wal stream the new + * state is set to CATCHUP and apply loops until the sync process + * catches up to the same LSN as apply + * - if the sync if in front of the apply in the wal stream the new + * state is set to SYNCDONE + * - if both apply and sync are at the same position in the wal stream + * the state of the table is set to READY + * - If the state was set to CATCHUP sync will read the stream and + * apply changes until it catches up to the specified stream + * position and then sets state to READY and signals apply that it + * can stop waiting and exits, if the state was set to something + * else than CATCHUP the sync process will simply end. + * - If the state was set to SYNCDONE by apply, the apply will + * continue tracking the table until it reaches the SYNCDONE stream + * position at which point it sets state to READY and stops tracking. + * + * The catalog pg_subscription_rel is used to keep information about + * subscribed tables and their state and some transient state during + * data synchronization is kept in shared memory. + * + * Example flows look like this: + * - Apply is in front: + * sync:8 + * -> set SYNCWAIT + * apply:10 + * -> set CATCHUP + * -> enter wait-loop + * sync:10 + * -> set READY + * -> exit + * apply:10 + * -> exit wait-loop + * -> continue rep + * - Sync in front: + * sync:10 + * -> set SYNCWAIT + * apply:8 + * -> set SYNCDONE + * -> continue per-table filtering + * sync:10 + * -> exit + * apply:10 + * -> set READY + * -> stop per-table filtering + * -> continue rep + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" +#include "pgstat.h" + +#include "access/xact.h" + +#include "catalog/pg_subscription_rel.h" + +#include "commands/copy.h" + +#include "replication/logicallauncher.h" +#include "replication/logicalrelation.h" +#include "replication/walreceiver.h" +#include "replication/worker_internal.h" + +#include "storage/ipc.h" + +#include "utils/lsyscache.h" +#include "utils/memutils.h" + +static List *table_states = NIL; +static bool table_states_valid = false; + +StringInfo copybuf = NULL; + +/* + * Exit routine for synchronization worker. + */ +static void +finish_sync_worker(char *slotname) +{ + /* Commit any outstanding trasnsaction, */ + if (IsTransactionState()) + CommitTransactionCommand(); + + /* And flush all writes. */ + XLogFlush(GetXLogWriteRecPtr()); + + /* Find the main apply worker and signal it. */ + logicalrep_worker_wakeup(MyLogicalRepWorker->subid, InvalidOid); + + ereport(LOG, + (errmsg("logical replication synchronization worker finished processing"))); + + /* Stop gracefully */ + walrcv_disconnect(wrconn); + proc_exit(0); +} + +/* + * Wait until the table synchronization change. + * + * Returns false if the relation subscription state disappeared. + */ +static bool +wait_for_sync_status_change(Oid relid, char origstate) +{ + int rc; + char state = origstate; + + while (!got_SIGTERM) + { + LogicalRepWorker *worker; + + LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); + worker = logicalrep_worker_find(MyLogicalRepWorker->subid, + relid, false); + if (!worker) + { + LWLockRelease(LogicalRepWorkerLock); + return false; + } + state = worker->relstate; + LWLockRelease(LogicalRepWorkerLock); + + if (state == SUBREL_STATE_UNKNOWN) + return false; + + if (state != origstate) + return true; + + rc = WaitLatch(&MyProc->procLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + 10000L, WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE); + + /* emergency bailout if postmaster has died */ + if (rc & WL_POSTMASTER_DEATH) + proc_exit(1); + + ResetLatch(&MyProc->procLatch); + } + + return false; +} + +/* + * Callback from syscache invalidation. + */ +void +invalidate_syncing_table_states(Datum arg, int cacheid, uint32 hashvalue) +{ + table_states_valid = false; +} + +/* + * Handle table synchronization cooperation from the synchronization + * worker. + * + * If the sync worker is in catch up mode and reached the predetermined + * synchronization point in wal stream, it will mark the table as ready and + * finish. + */ +static void +process_syncing_tables_for_sync(char *slotname, XLogRecPtr end_lsn) +{ + TimeLineID tli; + + Assert(IsTransactionState()); + + /* + * Synchronization worker has catched up with apply. Update the table + * state and finish. + */ + SpinLockAcquire(&MyLogicalRepWorker->relmutex); + + if (MyLogicalRepWorker->relstate == SUBREL_STATE_CATCHUP && + end_lsn >= MyLogicalRepWorker->relstate_lsn) + { + MyLogicalRepWorker->relstate = + (end_lsn == MyLogicalRepWorker->relstate_lsn) + ? SUBREL_STATE_READY + : SUBREL_STATE_SYNCDONE; + MyLogicalRepWorker->relstate_lsn = end_lsn; + + SpinLockRelease(&MyLogicalRepWorker->relmutex); + + /* The synchronization is done so write it into catalog. */ + SetSubscriptionRelState(MyLogicalRepWorker->subid, + MyLogicalRepWorker->relid, + MyLogicalRepWorker->relstate, + MyLogicalRepWorker->relstate_lsn); + + walrcv_endstreaming(wrconn, &tli); + finish_sync_worker(slotname); + } + else + SpinLockRelease(&MyLogicalRepWorker->relmutex); +} + +/* + * Handle table synchronization cooperation from the apply worker. + * + * Walk over all subscription tables that are individually tracked by apply + * process (currently all that have state other than SUBREL_STATE_READY) and + * manage synchronization for them. + * + * In case there are tables that need synchronized and are not being + * synchronized yet (and there are free slots for sync workers) it will start + * sync workers for them. + * + * For tables that are being synchronized already, it will check if sync + * workers either need action from the apply worker or have finished. + * + * The usual action needed by apply is to mark table for catchup and wait for + * the catchup to happen. In case that sync worker got in front of apply + * worker it will mark the table as synced but not ready yet as it needs to be + * tracked until apply reaches the same position to which it was synced. + * + * In case the synchronization position is reached the table can be marked + * as ready and no longer tracked. + */ +static void +process_syncing_tables_for_apply(char *slotname, XLogRecPtr end_lsn) +{ + ListCell *lc; + + Assert(!IsTransactionState()); + + /* We need up to date sync state info for subscription tables here. */ + if (!table_states_valid) + { + MemoryContext oldctx; + List *rstates; + ListCell *lc; + SubscriptionRelState *rstate; + + /* Clean the old list. */ + list_free_deep(table_states); + table_states = NIL; + + StartTransactionCommand(); + + /* Fetch all non-ready tables. */ + rstates = GetSubscriptionNotReadyRelations(MySubscription->oid); + + /* Allocate the tracking info in a permanent memory context. */ + oldctx = MemoryContextSwitchTo(CacheMemoryContext); + foreach(lc, rstates) + { + rstate = palloc(sizeof(SubscriptionRelState)); + memcpy(rstate, lfirst(lc), sizeof(SubscriptionRelState)); + table_states = lappend(table_states, rstate); + } + MemoryContextSwitchTo(oldctx); + + CommitTransactionCommand(); + + table_states_valid = true; + } + + /* Process all tables that are being synchronized. */ + foreach(lc, table_states) + { + SubscriptionRelState *rstate = (SubscriptionRelState *)lfirst(lc); + + /* + * Apply has caught up to the position where the table sync + * has finished, time to mark the table as ready so that + * apply will just continue to replicate it normally. + */ + if (rstate->state == SUBREL_STATE_SYNCDONE && + end_lsn >= rstate->lsn) + { + rstate->state = SUBREL_STATE_READY; + rstate->lsn = end_lsn; + StartTransactionCommand(); + SetSubscriptionRelState(MyLogicalRepWorker->subid, + rstate->relid, rstate->state, + rstate->lsn); + CommitTransactionCommand(); + + continue; + } + else + { + LogicalRepWorker *worker; + int nworkers = 0; + + LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); + worker = logicalrep_worker_find(MyLogicalRepWorker->subid, + rstate->relid, false); + if (worker) + { + SpinLockAcquire(&worker->relmutex); + rstate->state = worker->relstate; + rstate->lsn = worker->relstate_lsn; + SpinLockRelease(&worker->relmutex); + } + else + nworkers = logicalrep_sync_worker_count(MyLogicalRepWorker->subid); + LWLockRelease(LogicalRepWorkerLock); + + /* + * There is a worker synchronizing the relation and waiting for + * apply to do something. + */ + if (worker && rstate->state == SUBREL_STATE_SYNCWAIT) + { + /* + * When the synchronization process is at the catchup phase. + * + * There are three possible synchronization situations here. + * a) Apply is infront of the table sync, in this case we + * tell the table sync to catch up. + * b) Apply is behind the table sync, in that case we tell + * the table sync to mark the table as syncdone and finish. + * c) Apply and table sync are at the same position, in which + * case we tell table sync to mark the table as ready and + * finish. + * + * In any case we'll need to wait for tablesync to change + * the state in catalog and only then continue ourselves. + */ + if (end_lsn > rstate->lsn) + { + rstate->state = SUBREL_STATE_CATCHUP; + rstate->lsn = end_lsn; + } + else if (end_lsn == rstate->lsn) + { + rstate->state = SUBREL_STATE_READY; + rstate->lsn = end_lsn; + } + else + rstate->state = SUBREL_STATE_SYNCDONE; + + SpinLockAcquire(&worker->relmutex); + worker->relstate = rstate->state; + worker->relstate_lsn = rstate->lsn; + SpinLockRelease(&worker->relmutex); + + /* Signal the worker as it may be waiting for us. */ + logicalrep_worker_wakeup_ptr(worker); + + /* + * Enter busy loop and wait for synchronization status + * change. + */ + wait_for_sync_status_change(rstate->relid, rstate->state); + } + + /* + * If there is no sync worker registered for the table and + * there is some free sync worker slot, start new sync worker + * for the table. + */ + if (!worker && nworkers < max_sync_workers_per_subscription) + { + logicalrep_worker_launch(MyLogicalRepWorker->dbid, + MySubscription->oid, + MySubscription->name, + MyLogicalRepWorker->userid, + rstate->relid); + } + } + } +} + +/* + * Process state possible change(s) of tables that are being synchronized. + */ +void +process_syncing_tables(char *slotname, XLogRecPtr end_lsn) +{ + if (OidIsValid(MyLogicalRepWorker->relid)) + process_syncing_tables_for_sync(slotname, end_lsn); + else + process_syncing_tables_for_apply(slotname, end_lsn); +} + +/* + * Create list of columns for COPY based on logical relation mapping. + */ +static List * +make_copy_attnamelist(LogicalRepRelMapEntry *rel) +{ + List *attnamelist = NIL; + TupleDesc desc = RelationGetDescr(rel->localrel); + int i; + + for (i = 0; i < desc->natts; i++) + { + int remoteattnum = rel->attrmap[i]; + + /* Skip dropped attributes. */ + if (desc->attrs[i]->attisdropped) + continue; + + /* Skip attributes that are missing on remote side. */ + if (remoteattnum < 0) + continue; + + attnamelist = lappend(attnamelist, + makeString(rel->remoterel.attnames[remoteattnum])); + } + + return attnamelist; +} + +/* + * Callback for the COPY FROM which reads from the remote connection + * and passes the data back to our local COPY. + */ +static int +copy_read_data(void *outbuf, int minread, int maxread) +{ + int bytesread = 0; + int avail; + + /* If there are some leftover data from previous read, use them. */ + avail = copybuf->len - copybuf->cursor; + if (avail) + { + if (avail > maxread) + avail = maxread; + memcpy(outbuf, ©buf->data[copybuf->cursor], avail); + copybuf->cursor += avail; + maxread -= avail; + bytesread += avail; + } + + while (!got_SIGTERM && maxread > 0 && bytesread < minread) + { + pgsocket fd = PGINVALID_SOCKET; + int rc; + int len; + char *buf = NULL; + + for (;;) + { + /* Try read the data. */ + len = walrcv_receive(wrconn, &buf, &fd); + + CHECK_FOR_INTERRUPTS(); + + if (len == 0) + { + break; + } + else if (len < 0) + { + return bytesread; + } + else + { + /* Process the data */ + copybuf->data = buf; + copybuf->len = len; + copybuf->cursor = 0; + + avail = copybuf->len - copybuf->cursor; + if (avail > maxread) + avail = maxread; + memcpy(outbuf, ©buf->data[copybuf->cursor], avail); + outbuf = (void *) ((char *) outbuf + avail); + copybuf->cursor += avail; + maxread -= avail; + bytesread += avail; + } + + if (maxread <= 0 || bytesread >= minread) + return bytesread; + } + + /* + * Wait for more data or latch. + */ + rc = WaitLatchOrSocket(&MyProc->procLatch, + WL_SOCKET_READABLE | WL_LATCH_SET | + WL_TIMEOUT | WL_POSTMASTER_DEATH, + fd, 1000L, WAIT_EVENT_LOGICAL_SYNC_DATA); + + /* Emergency bailout if postmaster has died */ + if (rc & WL_POSTMASTER_DEATH) + proc_exit(1); + + ResetLatch(&MyProc->procLatch); + } + + /* Check for exit condition. */ + if (got_SIGTERM) + proc_exit(0); + + return bytesread; +} + +/* + * Copy existing data of a table from publisher. + * + * Caller is responsible for locking the local relation. + */ +static void +copy_table(Relation rel) +{ + LogicalRepRelMapEntry *relmapentry; + LogicalRepRelation lrel; + CopyState cstate; + List *attnamelist; + + /* Get the publisher relation info. */ + walrcv_table_info(wrconn, + get_namespace_name(RelationGetNamespace(rel)), + RelationGetRelationName(rel), &lrel); + + /* Put the relation into relmap. */ + logicalrep_relmap_update(&lrel); + + /* Map the publisher relation to local one. */ + relmapentry = logicalrep_rel_open(lrel.remoteid, NoLock); + Assert(rel == relmapentry->localrel); + + /* Start copy on the publisher. */ + walrcv_table_copy(wrconn, + get_namespace_name(RelationGetNamespace(rel)), + RelationGetRelationName(rel)); + + copybuf = makeStringInfo(); + + /* Create CopyState for ingestion of the data from publisher. */ + attnamelist = make_copy_attnamelist(relmapentry); + cstate = BeginCopyFrom(NULL, rel, NULL, false, copy_read_data, attnamelist, NIL); + + /* Do the copy */ + (void) CopyFrom(cstate); + + logicalrep_rel_close(relmapentry, NoLock); + + CommandCounterIncrement(); +} + +/* + * Start syncing the table in the sync worker. + * + * The returned slot name is palloced in current memory context. + */ +char * +LogicalRepSyncTableStart(XLogRecPtr *origin_startpos) +{ + char slotname[NAMEDATALEN]; + char *err; + + /* Check the state of the table synchronization. */ + StartTransactionCommand(); + SpinLockAcquire(&MyLogicalRepWorker->relmutex); + MyLogicalRepWorker->relstate = + GetSubscriptionRelState(MyLogicalRepWorker->subid, + MyLogicalRepWorker->relid, + &MyLogicalRepWorker->relstate_lsn, + false); + SpinLockRelease(&MyLogicalRepWorker->relmutex); + CommitTransactionCommand(); + + /* + * We are limited to 63 characters of the name length so we cut the + * original slot name to 36 chars because the "_sync_" adds 6, each + * each unsigned integer (oid) has maximum of 10 characters and we have + * one additional "_" separator between slot name and subscription oid. + */ + snprintf(slotname, NAMEDATALEN, "%.36s_%u_sync_%u", + MySubscription->slotname, MySubscription->oid, + MyLogicalRepWorker->relid); + + wrconn = walrcv_connect(MySubscription->conninfo, true, slotname, &err); + if (wrconn == NULL) + ereport(ERROR, + (errmsg("could not connect to the publisher: %s", err))); + + switch (MyLogicalRepWorker->relstate) + { + case SUBREL_STATE_INIT: + case SUBREL_STATE_DATASYNC: + { + Relation rel; + + SpinLockAcquire(&MyLogicalRepWorker->relmutex); + MyLogicalRepWorker->relstate = SUBREL_STATE_DATASYNC; + MyLogicalRepWorker->relstate_lsn = InvalidXLogRecPtr; + SpinLockRelease(&MyLogicalRepWorker->relmutex); + + /* Update the state and make it visible to others. */ + StartTransactionCommand(); + SetSubscriptionRelState(MyLogicalRepWorker->subid, + MyLogicalRepWorker->relid, + MyLogicalRepWorker->relstate, + MyLogicalRepWorker->relstate_lsn); + CommitTransactionCommand(); + + /* + * We want to do the table data sync in single + * transaction. + */ + StartTransactionCommand(); + + /* + * Use standard write lock here. It might be better to + * disallow access to table while it's being synchronized. + * But we don't want to block the main apply process from + * working and it has to open relation in RowExclusiveLock + * when remapping remote relation id to local one. + */ + rel = heap_open(MyLogicalRepWorker->relid, RowExclusiveLock); + + /* + * Create temporary slot for the sync process. + * We do this inside transaction so that we can use the + * snapshot made by the slot to get existing data. + */ + if (!walrcv_command(wrconn, + "BEGIN READ ONLY ISOLATION LEVEL " + "REPEATABLE READ", + &err)) + ereport(ERROR, + (errmsg("table copy could not start transaction on publisher"), + errdetail("The error was: %s", err))); + + walrcv_create_slot(wrconn, slotname, true, + CRS_USE_SNAPSHOT, origin_startpos); + + copy_table(rel); + + if (!walrcv_command(wrconn, "ROLLBACK", &err)) + ereport(ERROR, + (errmsg("table copy could not finish transaction on publisher"), + errdetail("The error was: %s", err))); + + heap_close(rel, NoLock); + + /* + * We are done with the initial data synchronization, + * update the state. + */ + SpinLockAcquire(&MyLogicalRepWorker->relmutex); + MyLogicalRepWorker->relstate = SUBREL_STATE_SYNCWAIT; + MyLogicalRepWorker->relstate_lsn = *origin_startpos; + SpinLockRelease(&MyLogicalRepWorker->relmutex); + + /* + * Wait for main apply worker to either tell us to + * catchup or that we are done. + */ + wait_for_sync_status_change(MyLogicalRepWorker->relid, + MyLogicalRepWorker->relstate); + if (MyLogicalRepWorker->relstate != SUBREL_STATE_CATCHUP) + { + /* Update the new state. */ + SetSubscriptionRelState(MyLogicalRepWorker->subid, + MyLogicalRepWorker->relid, + MyLogicalRepWorker->relstate, + MyLogicalRepWorker->relstate_lsn); + finish_sync_worker(slotname); + } + break; + } + case SUBREL_STATE_SYNCDONE: + case SUBREL_STATE_READY: + /* Nothing to do here but finish. */ + finish_sync_worker(slotname); + default: + elog(ERROR, "unknown relation state \"%c\"", + MyLogicalRepWorker->relstate); + } + + return pstrdup(slotname); +} diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 535aa2d..5383364 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -32,6 +32,7 @@ #include "catalog/namespace.h" #include "catalog/pg_subscription.h" +#include "catalog/pg_subscription_rel.h" #include "commands/trigger.h" @@ -101,14 +102,15 @@ typedef struct SlotErrCallbackArg } SlotErrCallbackArg; static MemoryContext ApplyContext = NULL; -static MemoryContext ApplyCacheContext = NULL; +MemoryContext ApplyCacheContext = NULL; WalReceiverConn *wrconn = NULL; Subscription *MySubscription = NULL; bool MySubscriptionValid = false; -bool in_remote_transaction = false; +static char *myslotname = NULL; +bool in_remote_transaction = false; static void send_feedback(XLogRecPtr recvpos, bool force, bool requestReply); @@ -117,6 +119,23 @@ static void store_flush_position(XLogRecPtr remote_lsn); static void reread_subscription(void); /* + * Should this worker apply changes for given relation. + * + * This is mainly needed for initial relation data sync as that runs in + * separate worker process running in parallel and we need some way to skip + * changes coming to the main apply worker during the sync of a table. + */ +static bool +should_apply_changes_for_rel(LogicalRepRelMapEntry *rel) +{ + return (MyLogicalRepWorker->relid == InvalidOid && + (rel->state == SUBREL_STATE_READY || + (rel->state == SUBREL_STATE_SYNCDONE && + rel->statelsn <= replorigin_session_origin_lsn))) || + MyLogicalRepWorker->relid == rel->localreloid; +} + +/* * Make sure that we started local transaction. * * Also switches to ApplyContext as necessary. @@ -418,7 +437,8 @@ apply_handle_commit(StringInfo s) Assert(commit_data.commit_lsn == replorigin_session_origin_lsn); Assert(commit_data.committime == replorigin_session_origin_timestamp); - if (IsTransactionState()) + /* The synchronization worker runs in single transaction. */ + if (IsTransactionState() && MyLogicalRepWorker->relid == InvalidOid) { CommitTransactionCommand(); @@ -427,6 +447,9 @@ apply_handle_commit(StringInfo s) in_remote_transaction = false; + /* Proccess any tables that are being synchronized in parallel. */ + process_syncing_tables(myslotname, commit_data.end_lsn); + pgstat_report_activity(STATE_IDLE, NULL); } @@ -442,7 +465,8 @@ apply_handle_origin(StringInfo s) * ORIGIN message can only come inside remote transaction and before * any actual writes. */ - if (!in_remote_transaction || IsTransactionState()) + if (!in_remote_transaction || + (IsTransactionState() && MyLogicalRepWorker->relid == InvalidOid)) ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("ORIGIN message sent out of order"))); @@ -515,6 +539,15 @@ apply_handle_insert(StringInfo s) relid = logicalrep_read_insert(s, &newtup); rel = logicalrep_rel_open(relid, RowExclusiveLock); + if (!should_apply_changes_for_rel(rel)) + { + /* + * The relation can't become interesting in the middle of the + * transaction so it's safe to unlock it. + */ + logicalrep_rel_close(rel, RowExclusiveLock); + return; + } /* Initialize the executor state. */ estate = create_estate_for_relation(rel); @@ -607,6 +640,15 @@ apply_handle_update(StringInfo s) relid = logicalrep_read_update(s, &has_oldtup, &oldtup, &newtup); rel = logicalrep_rel_open(relid, RowExclusiveLock); + if (!should_apply_changes_for_rel(rel)) + { + /* + * The relation can't become interesting in the middle of the + * transaction so it's safe to unlock it. + */ + logicalrep_rel_close(rel, RowExclusiveLock); + return; + } /* Check if we can do the update. */ check_relation_updatable(rel); @@ -716,6 +758,15 @@ apply_handle_delete(StringInfo s) relid = logicalrep_read_delete(s, &oldtup); rel = logicalrep_rel_open(relid, RowExclusiveLock); + if (!should_apply_changes_for_rel(rel)) + { + /* + * The relation can't become interesting in the middle of the + * transaction so it's safe to unlock it. + */ + logicalrep_rel_close(rel, RowExclusiveLock); + return; + } /* Check if we can do the delete. */ check_relation_updatable(rel); @@ -927,10 +978,8 @@ UpdateWorkerStats(XLogRecPtr last_lsn, TimestampTz send_time, bool reply) * Apply main loop. */ static void -ApplyLoop(void) +LogicalRepApplyLoop(XLogRecPtr last_received) { - XLogRecPtr last_received = InvalidXLogRecPtr; - /* Init the ApplyContext which we use for easier cleanup. */ ApplyContext = AllocSetContextCreate(TopMemoryContext, "ApplyContext", @@ -1014,15 +1063,18 @@ ApplyLoop(void) } else if (c == 'k') { - XLogRecPtr endpos; + XLogRecPtr end_lsn; TimestampTz timestamp; bool reply_requested; - endpos = pq_getmsgint64(&s); + end_lsn = pq_getmsgint64(&s); timestamp = pq_getmsgint64(&s); reply_requested = pq_getmsgbyte(&s); - send_feedback(endpos, reply_requested, false); + if (last_received < end_lsn) + last_received = end_lsn; + + send_feedback(last_received, reply_requested, false); UpdateWorkerStats(last_received, timestamp, true); } /* other message types are purposefully ignored */ @@ -1030,6 +1082,9 @@ ApplyLoop(void) len = walrcv_receive(wrconn, &buf, &fd); } + + /* confirm all writes at once */ + send_feedback(last_received, false, false); } if (!in_remote_transaction) @@ -1038,15 +1093,13 @@ ApplyLoop(void) * If we didn't get any transactions for a while there might be * unconsumed invalidation messages in the queue, consume them now. */ - StartTransactionCommand(); - /* Check for subscription change */ + AcceptInvalidationMessages(); if (!MySubscriptionValid) reread_subscription(); - CommitTransactionCommand(); - } - /* confirm all writes at once */ - send_feedback(last_received, false, false); + /* Process any table synchronization changes. */ + process_syncing_tables(myslotname, last_received); + } /* Cleanup the memory. */ MemoryContextResetAndDeleteChildren(ApplyContext); @@ -1054,7 +1107,11 @@ ApplyLoop(void) /* Check if we need to exit the streaming loop. */ if (endofstream) + { + TimeLineID tli; + walrcv_endstreaming(wrconn, &tli); break; + } /* * Wait for more data or latch. @@ -1222,6 +1279,14 @@ reread_subscription(void) { MemoryContext oldctx; Subscription *newsub; + bool started_tx = false; + + /* This function might be called inside or outside of transaction. */ + if (!IsTransactionState()) + { + StartTransactionCommand(); + started_tx = true; + } /* Ensure allocations in permanent context. */ oldctx = MemoryContextSwitchTo(ApplyCacheContext); @@ -1319,6 +1384,9 @@ reread_subscription(void) MemoryContextSwitchTo(oldctx); + if (started_tx) + CommitTransactionCommand(); + MySubscriptionValid = true; } @@ -1339,11 +1407,7 @@ ApplyWorkerMain(Datum main_arg) int worker_slot = DatumGetObjectId(main_arg); MemoryContext oldctx; char originname[NAMEDATALEN]; - RepOriginId originid; XLogRecPtr origin_startpos; - char *err; - int server_version; - TimeLineID startpointTLI; WalRcvStreamOptions options; /* Attach to slot */ @@ -1402,49 +1466,90 @@ ApplyWorkerMain(Datum main_arg) subscription_change_cb, (Datum) 0); - ereport(LOG, - (errmsg("logical replication apply for subscription \"%s\" has started", - MySubscription->name))); - - /* Setup replication origin tracking. */ - snprintf(originname, sizeof(originname), "pg_%u", MySubscription->oid); - originid = replorigin_by_name(originname, true); - if (!OidIsValid(originid)) - originid = replorigin_create(originname); - replorigin_session_setup(originid); - replorigin_session_origin = originid; - origin_startpos = replorigin_session_get_progress(false); + if (OidIsValid(MyLogicalRepWorker->relid)) + elog(LOG, "logical replication sync for subscription %s, table %s started", + MySubscription->name, get_rel_name(MyLogicalRepWorker->relid)); + else + elog(LOG, "logical replication apply for subscription %s started", + MySubscription->name); CommitTransactionCommand(); /* Connect to the origin and start the replication. */ elog(DEBUG1, "connecting to publisher using connection string \"%s\"", MySubscription->conninfo); - wrconn = walrcv_connect(MySubscription->conninfo, true, - MySubscription->name, &err); - if (wrconn == NULL) - ereport(ERROR, - (errmsg("could not connect to the publisher: %s", err))); + + if (OidIsValid(MyLogicalRepWorker->relid)) + { + char *syncslotname; + + /* This is table synchroniation worker, call initial sync. */ + syncslotname = LogicalRepSyncTableStart(&origin_startpos); + + /* The slot name needs to be allocated in permanent memory context. */ + oldctx = MemoryContextSwitchTo(ApplyCacheContext); + myslotname = pstrdup(syncslotname); + MemoryContextSwitchTo(oldctx); + + pfree(syncslotname); + } + else + { + /* This is main apply worker */ + RepOriginId originid; + TimeLineID startpointTLI; + char *err; + int server_version; + + myslotname = MySubscription->slotname; + + /* Setup replication origin tracking. */ + StartTransactionCommand(); + snprintf(originname, sizeof(originname), "pg_%u", MySubscription->oid); + originid = replorigin_by_name(originname, true); + if (!OidIsValid(originid)) + originid = replorigin_create(originname); + replorigin_session_setup(originid); + replorigin_session_origin = originid; + origin_startpos = replorigin_session_get_progress(false); + CommitTransactionCommand(); + + wrconn = walrcv_connect(MySubscription->conninfo, true, myslotname, + &err); + if (wrconn == NULL) + ereport(ERROR, + (errmsg("could not connect to the publisher: %s", err))); + + /* + * We don't really use the output identify_system for anything + * but it does some initializations on the upstream so let's still + * call it. + */ + (void) walrcv_identify_system(wrconn, &startpointTLI, + &server_version); + + } /* - * We don't really use the output identify_system for anything - * but it does some initializations on the upstream so let's still - * call it. + * Setup callback for syscache so that we know when something + * changes in the subscription relation state. */ - (void) walrcv_identify_system(wrconn, &startpointTLI, &server_version); + CacheRegisterSyscacheCallback(SUBSCRIPTIONRELOID, + invalidate_syncing_table_states, + (Datum) 0); /* Build logical replication streaming options. */ options.logical = true; options.startpoint = origin_startpos; - options.slotname = MySubscription->slotname; + options.slotname = myslotname; options.proto.logical.proto_version = LOGICALREP_PROTO_VERSION_NUM; options.proto.logical.publication_names = MySubscription->publications; - /* Start streaming from the slot. */ + /* Start normal logical streaming replication. */ walrcv_startstreaming(wrconn, &options); /* Run the main loop. */ - ApplyLoop(); + LogicalRepApplyLoop(origin_startpos); walrcv_disconnect(wrconn); diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y index b35d0f0..0755b88 100644 --- a/src/backend/replication/repl_gram.y +++ b/src/backend/replication/repl_gram.y @@ -25,6 +25,7 @@ /* Result of the parsing is returned here */ Node *replication_parse_result; +static SQLCmd *make_sqlcmd(void); /* * Bison doesn't allocate anything that needs to live across parser calls, @@ -57,6 +58,7 @@ Node *replication_parse_result; %token SCONST IDENT %token UCONST %token RECPTR +%token T_WORD /* Keyword tokens. */ %token K_BASE_BACKUP @@ -79,11 +81,14 @@ Node *replication_parse_result; %token K_SLOT %token K_RESERVE_WAL %token K_TEMPORARY +%token K_EXPORT_SNAPSHOT +%token K_NOEXPORT_SNAPSHOT +%token K_USE_SNAPSHOT %type command %type base_backup start_replication start_logical_replication create_replication_slot drop_replication_slot identify_system - timeline_history show + timeline_history show sql_cmd %type base_backup_opt_list %type base_backup_opt %type opt_timeline @@ -91,7 +96,9 @@ Node *replication_parse_result; %type plugin_opt_elem %type plugin_opt_arg %type opt_slot var_name -%type opt_reserve_wal opt_temporary +%type opt_temporary +%type create_slot_opt_list +%type create_slot_opt %% @@ -114,6 +121,7 @@ command: | drop_replication_slot | timeline_history | show + | sql_cmd ; /* @@ -202,18 +210,18 @@ base_backup_opt: create_replication_slot: /* CREATE_REPLICATION_SLOT slot TEMPORARY PHYSICAL RESERVE_WAL */ - K_CREATE_REPLICATION_SLOT IDENT opt_temporary K_PHYSICAL opt_reserve_wal + K_CREATE_REPLICATION_SLOT IDENT opt_temporary K_PHYSICAL create_slot_opt_list { CreateReplicationSlotCmd *cmd; cmd = makeNode(CreateReplicationSlotCmd); cmd->kind = REPLICATION_KIND_PHYSICAL; cmd->slotname = $2; cmd->temporary = $3; - cmd->reserve_wal = $5; + cmd->options = $5; $$ = (Node *) cmd; } /* CREATE_REPLICATION_SLOT slot TEMPORARY LOGICAL plugin */ - | K_CREATE_REPLICATION_SLOT IDENT opt_temporary K_LOGICAL IDENT + | K_CREATE_REPLICATION_SLOT IDENT opt_temporary K_LOGICAL IDENT create_slot_opt_list { CreateReplicationSlotCmd *cmd; cmd = makeNode(CreateReplicationSlotCmd); @@ -221,10 +229,42 @@ create_replication_slot: cmd->slotname = $2; cmd->temporary = $3; cmd->plugin = $5; + cmd->options = $6; $$ = (Node *) cmd; } ; +create_slot_opt_list: + create_slot_opt_list create_slot_opt + { $$ = lappend($1, $2); } + | /* EMPTY */ + { $$ = NIL; } + ; + +create_slot_opt: + K_EXPORT_SNAPSHOT + { + $$ = makeDefElem("export_snapshot", + (Node *)makeInteger(TRUE), -1); + } + | K_NOEXPORT_SNAPSHOT + { + $$ = makeDefElem("export_snapshot", + (Node *)makeInteger(FALSE), -1); + } + | K_USE_SNAPSHOT + { + $$ = makeDefElem("use_snapshot", + (Node *)makeInteger(TRUE), -1); + } + | K_RESERVE_WAL + { + $$ = makeDefElem("reserve_wal", + (Node *)makeInteger(TRUE), -1); + } + ; + + /* DROP_REPLICATION_SLOT slot */ drop_replication_slot: K_DROP_REPLICATION_SLOT IDENT @@ -291,11 +331,6 @@ opt_physical: | /* EMPTY */ ; -opt_reserve_wal: - K_RESERVE_WAL { $$ = true; } - | /* EMPTY */ { $$ = false; } - ; - opt_temporary: K_TEMPORARY { $$ = true; } | /* EMPTY */ { $$ = false; } @@ -348,6 +383,26 @@ plugin_opt_arg: SCONST { $$ = (Node *) makeString($1); } | /* EMPTY */ { $$ = NULL; } ; + +sql_cmd: + IDENT { $$ = (Node *) make_sqlcmd(); } + ; %% +static SQLCmd * +make_sqlcmd(void) +{ + SQLCmd *cmd = makeNode(SQLCmd); + int tok; + + /* Just move lexer to the end of command. */ + for (;;) + { + tok = yylex(); + if (tok == ';' || tok == 0) + break; + } + return cmd; +} + #include "repl_scanner.c" diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l index 37f8579..52ae7b3 100644 --- a/src/backend/replication/repl_scanner.l +++ b/src/backend/replication/repl_scanner.l @@ -100,6 +100,9 @@ RESERVE_WAL { return K_RESERVE_WAL; } LOGICAL { return K_LOGICAL; } SLOT { return K_SLOT; } TEMPORARY { return K_TEMPORARY; } +EXPORT_SNAPSHOT { return K_EXPORT_SNAPSHOT; } +NOEXPORT_SNAPSHOT { return K_NOEXPORT_SNAPSHOT; } +USE_SNAPSHOT { return K_USE_SNAPSHOT; } "," { return ','; } ";" { return ';'; } @@ -178,9 +181,7 @@ TEMPORARY { return K_TEMPORARY; } } . { - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error: unexpected character \"%s\"", yytext))); + return T_WORD; } %% diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 9cf9eb0..ae596cf 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -51,6 +51,7 @@ #include "catalog/pg_type.h" #include "commands/dbcommands.h" +#include "commands/defrem.h" #include "funcapi.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" @@ -733,6 +734,59 @@ logical_read_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int req } /* + * Process extra options given to CREATE_REPLICATION_SLOT. + */ +static void +parseCreateReplSlotOptions(CreateReplicationSlotCmd *cmd, + bool *reserve_wal, + CRSSnapshotAction *snapshot_action) +{ + ListCell *lc; + bool snapshot_action_given = false; + bool reserve_wal_given = false; + + /* Parse options */ + foreach (lc, cmd->options) + { + DefElem *defel = (DefElem *) lfirst(lc); + + if (strcmp(defel->defname, "export_snapshot") == 0) + { + if (snapshot_action_given || cmd->kind != REPLICATION_KIND_LOGICAL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + snapshot_action_given = true; + *snapshot_action = defGetBoolean(defel) ? CRS_EXPORT_SNAPSHOT : + CRS_NOEXPORT_SNAPSHOT; + } + else if (strcmp(defel->defname, "use_snapshot") == 0) + { + if (snapshot_action_given || cmd->kind != REPLICATION_KIND_LOGICAL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + snapshot_action_given = true; + *snapshot_action = CRS_USE_SNAPSHOT; + } + else if (strcmp(defel->defname, "reserve_wal") == 0) + { + if (reserve_wal_given || cmd->kind != REPLICATION_KIND_PHYSICAL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + reserve_wal_given = true; + *reserve_wal = true; + } + else + elog(ERROR, "unrecognized option: %s", defel->defname); + } +} + +/* * Create a new replication slot. */ static void @@ -741,6 +795,8 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd) const char *snapshot_name = NULL; char xpos[MAXFNAMELEN]; char *slot_name; + bool reserve_wal = false; + CRSSnapshotAction snapshot_action = CRS_EXPORT_SNAPSHOT; DestReceiver *dest; TupOutputState *tstate; TupleDesc tupdesc; @@ -749,6 +805,8 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd) Assert(!MyReplicationSlot); + parseCreateReplSlotOptions(cmd, &reserve_wal, &snapshot_action); + /* setup state for XLogReadPage */ sendTimeLineIsHistoric = false; sendTimeLine = ThisTimeLineID; @@ -777,6 +835,40 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd) { LogicalDecodingContext *ctx; + /* + * Do options check early so that we can bail before calling the + * DecodingContextFindStartpoint which can take long time. + */ + if (snapshot_action == CRS_EXPORT_SNAPSHOT) + { + if (IsTransactionBlock()) + ereport(ERROR, + (errmsg("CREATE_REPLICATION_SLOT ... EXPORT_SNAPSHOT " + "must not be called inside a transaction"))); + } + else if (snapshot_action == CRS_USE_SNAPSHOT) + { + if (!IsTransactionBlock()) + ereport(ERROR, + (errmsg("CREATE_REPLICATION_SLOT ... USE_SNAPSHOT " + "must be called inside a transaction"))); + + if (XactIsoLevel != XACT_REPEATABLE_READ) + ereport(ERROR, + (errmsg("CREATE_REPLICATION_SLOT ... USE_SNAPSHOT " + "must be called in REPEATABLE READ isolation mode transaction"))); + + if (FirstSnapshotSet) + ereport(ERROR, + (errmsg("CREATE_REPLICATION_SLOT ... USE_SNAPSHOT " + "must be called before any query"))); + + if (IsSubTransaction()) + ereport(ERROR, + (errmsg("CREATE_REPLICATION_SLOT ... USE_SNAPSHOT " + "must not be called in a subtransaction"))); + } + ctx = CreateInitDecodingContext(cmd->plugin, NIL, logical_read_xlog_page, WalSndPrepareWrite, WalSndWriteData); @@ -794,10 +886,22 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd) DecodingContextFindStartpoint(ctx); /* - * Export a plain (not of the snapbuild.c type) snapshot to the user - * that can be imported into another session. + * Export or use the snapshot if we've been asked to do so. + * + * NB. We will convert the snapbuild.c kind of snapshot to normal + * snapshot when doing this. */ - snapshot_name = SnapBuildExportSnapshot(ctx->snapshot_builder); + if (snapshot_action == CRS_EXPORT_SNAPSHOT) + { + snapshot_name = SnapBuildExportSnapshot(ctx->snapshot_builder); + } + else if (snapshot_action == CRS_USE_SNAPSHOT) + { + Snapshot snap; + + snap = SnapBuildInitalSnapshot(ctx->snapshot_builder); + RestoreTransactionSnapshot(snap, MyProc); + } /* don't need the decoding context anymore */ FreeDecodingContext(ctx); @@ -805,7 +909,7 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd) if (!cmd->temporary) ReplicationSlotPersist(); } - else if (cmd->kind == REPLICATION_KIND_PHYSICAL && cmd->reserve_wal) + else if (cmd->kind == REPLICATION_KIND_PHYSICAL && reserve_wal) { ReplicationSlotReserveWal(); @@ -1217,8 +1321,11 @@ WalSndWaitForWal(XLogRecPtr loc) /* * Execute an incoming replication command. + * + * Returns true if the cmd_string was recognized as WalSender command, false + * if not. */ -void +bool exec_replication_command(const char *cmd_string) { int parse_rc; @@ -1258,6 +1365,25 @@ exec_replication_command(const char *cmd_string) cmd_node = replication_parse_result; /* + * CREATE_REPLICATION_SLOT ... LOGICAL exports a snapshot. If it was + * called outside of transaction the snapshot should be cleared here. + */ + if (!IsTransactionBlock()) + SnapBuildClearExportedSnapshot(); + + /* + * For aborted transactions, don't allow anything except pure SQL, + * the exec_simple_query() will handle it correctly. + */ + if (IsAbortedTransactionBlockState() && cmd_node->type != T_SQLCmd) + ereport(ERROR, + (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION), + errmsg("current transaction is aborted, " + "commands ignored until end of transaction block"))); + + CHECK_FOR_INTERRUPTS(); + + /* * Allocate buffers that will be used for each outgoing and incoming * message. We do this just once per command to reduce palloc overhead. */ @@ -1272,6 +1398,7 @@ exec_replication_command(const char *cmd_string) break; case T_BaseBackupCmd: + PreventTransactionChain(true, "BASE_BACKUP"); SendBaseBackup((BaseBackupCmd *) cmd_node); break; @@ -1287,6 +1414,8 @@ exec_replication_command(const char *cmd_string) { StartReplicationCmd *cmd = (StartReplicationCmd *) cmd_node; + PreventTransactionChain(true, "START_REPLICATION"); + if (cmd->kind == REPLICATION_KIND_PHYSICAL) StartReplication(cmd); else @@ -1295,6 +1424,7 @@ exec_replication_command(const char *cmd_string) } case T_TimeLineHistoryCmd: + PreventTransactionChain(true, "TIMELINE_HISTORY"); SendTimeLineHistory((TimeLineHistoryCmd *) cmd_node); break; @@ -1307,6 +1437,14 @@ exec_replication_command(const char *cmd_string) } break; + case T_SQLCmd: + if (MyDatabaseId == InvalidOid) + ereport(ERROR, + (errmsg("not connected to database"))); + + /* Tell the caller that this wasn't a WalSender command. */ + return false; + default: elog(ERROR, "unrecognized replication command node tag: %u", cmd_node->type); @@ -1318,6 +1456,8 @@ exec_replication_command(const char *cmd_string) /* Send CommandComplete message */ EndCommand("SELECT", DestRemote); + + return true; } /* diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index b07d6c6..b54ad50 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -4053,6 +4053,7 @@ PostgresMain(int argc, char *argv[], case 'Q': /* simple query */ { const char *query_string; + bool walsender_query = false; /* Set statement_timestamp() */ SetCurrentStatementStartTimestamp(); @@ -4061,8 +4062,8 @@ PostgresMain(int argc, char *argv[], pq_getmsgend(&input_message); if (am_walsender) - exec_replication_command(query_string); - else + walsender_query = exec_replication_command(query_string); + if (!walsender_query) exec_simple_query(query_string); send_ready_for_query = true; diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c index ff6a25d..3d15aa2 100644 --- a/src/backend/utils/adt/misc.c +++ b/src/backend/utils/adt/misc.c @@ -988,3 +988,23 @@ pg_current_logfile_1arg(PG_FUNCTION_ARGS) { return pg_current_logfile(fcinfo); } + +/* + * SQL wrapper around RelationGetReplicaIndex(). + */ +Datum +pg_get_replica_identity_index(PG_FUNCTION_ARGS) +{ + Oid reloid = PG_GETARG_OID(0); + Oid idxoid; + Relation rel; + + rel = heap_open(reloid, AccessShareLock); + idxoid = RelationGetReplicaIndex(rel); + heap_close(rel, AccessShareLock); + + if (OidIsValid(idxoid)) + PG_RETURN_OID(idxoid); + else + PG_RETURN_NULL(); +} diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index b1c0b4b..1c86e2d 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -62,6 +62,7 @@ #include "catalog/pg_replication_origin.h" #include "catalog/pg_statistic.h" #include "catalog/pg_subscription.h" +#include "catalog/pg_subscription_rel.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_transform.h" #include "catalog/pg_ts_config.h" @@ -758,6 +759,28 @@ static const struct cachedesc cacheinfo[] = { }, 4 }, + {SubscriptionRelRelationId, /* SUBSCRIPTIONRELOID */ + SubscriptionRelOidIndexId, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0 + }, + 64 + }, + {SubscriptionRelRelationId, /* SUBSCRIPTIONRELMAP */ + SubscriptionRelMapIndexId, + 2, + { + Anum_pg_subscription_rel_srrelid, + Anum_pg_subscription_rel_srsubid, + 0, + 0 + }, + 64 + }, {TableSpaceRelationId, /* TABLESPACEOID */ TablespaceOidIndexId, 1, diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 0707f66..cab1893 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2488,6 +2488,18 @@ static struct config_int ConfigureNamesInt[] = }, { + {"max_sync_workers_per_subscription", + PGC_POSTMASTER, + RESOURCES_ASYNCHRONOUS, + gettext_noop("Maximum number of table synchronization workers per subscription."), + NULL, + }, + &max_sync_workers_per_subscription, + 2, 1, MAX_BACKENDS, + NULL, NULL, NULL + }, + + { {"log_rotation_age", PGC_SIGHUP, LOGGING_WHERE, gettext_noop("Automatic log file rotation will occur after N minutes."), NULL, diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index 6bce732..1f727eb 100644 --- a/src/include/catalog/indexing.h +++ b/src/include/catalog/indexing.h @@ -349,6 +349,12 @@ DECLARE_UNIQUE_INDEX(pg_subscription_oid_index, 6114, on pg_subscription using b DECLARE_UNIQUE_INDEX(pg_subscription_subname_index, 6115, on pg_subscription using btree(subdbid oid_ops, subname name_ops)); #define SubscriptionNameIndexId 6115 +DECLARE_UNIQUE_INDEX(pg_subscription_rel_oid_index, 6116, on pg_subscription_rel using btree(oid oid_ops)); +#define SubscriptionRelOidIndexId 6116 + +DECLARE_UNIQUE_INDEX(pg_subscription_rel_map_index, 6117, on pg_subscription_rel using btree(srrelid oid_ops, srsubid oid_ops)); +#define SubscriptionRelMapIndexId 6117 + /* last step of initialization script: build the indexes declared above */ BUILD_INDICES diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 0c8b5c6..31e146d 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -2019,6 +2019,9 @@ DESCR("is a relation insertable/updatable/deletable"); DATA(insert OID = 3843 ( pg_column_is_updatable PGNSP PGUID 12 10 0 0 0 f f f f t f s s 3 0 16 "2205 21 16" _null_ _null_ _null_ _null_ _null_ pg_column_is_updatable _null_ _null_ _null_ )); DESCR("is a column updatable"); +DATA(insert OID = 6120 ( pg_get_replica_identity_index PGNSP PGUID 12 10 0 0 0 f f f f t f s s 1 0 2205 "2205" _null_ _null_ _null_ _null_ _null_ pg_get_replica_identity_index _null_ _null_ _null_ )); +DESCR("oid of replica identity index if any"); + /* Deferrable unique constraint trigger */ DATA(insert OID = 1250 ( unique_key_recheck PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ unique_key_recheck _null_ _null_ _null_ )); DESCR("deferred UNIQUE constraint check"); @@ -2776,7 +2779,7 @@ DATA(insert OID = 3099 ( pg_stat_get_wal_senders PGNSP PGUID 12 1 10 0 0 f f f DESCR("statistics: information about currently active replication"); DATA(insert OID = 3317 ( pg_stat_get_wal_receiver PGNSP PGUID 12 1 0 0 0 f f f f f f s r 0 0 2249 "" "{23,25,3220,23,3220,23,1184,1184,3220,1184,25,25}" "{o,o,o,o,o,o,o,o,o,o,o,o}" "{pid,status,receive_start_lsn,receive_start_tli,received_lsn,received_tli,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time,slot_name,conninfo}" _null_ _null_ pg_stat_get_wal_receiver _null_ _null_ _null_ )); DESCR("statistics: information about WAL receiver"); -DATA(insert OID = 6118 ( pg_stat_get_subscription PGNSP PGUID 12 1 0 0 0 f f f f f f s r 1 0 2249 "26" "{26,26,23,3220,1184,1184,3220,1184}" "{i,o,o,o,o,o,o,o}" "{subid,subid,pid,received_lsn,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time}" _null_ _null_ pg_stat_get_subscription _null_ _null_ _null_ )); +DATA(insert OID = 6118 ( pg_stat_get_subscription PGNSP PGUID 12 1 0 0 0 f f f f f f s r 1 0 2249 "26" "{26,26,26,23,3220,1184,1184,3220,1184}" "{i,o,o,o,o,o,o,o,o}" "{subid,subid,relid,pid,received_lsn,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time}" _null_ _null_ pg_stat_get_subscription _null_ _null_ _null_ )); DESCR("statistics: information about subscription"); DATA(insert OID = 2026 ( pg_backend_pid PGNSP PGUID 12 1 0 0 0 f f f f t f s r 0 0 23 "" _null_ _null_ _null_ _null_ _null_ pg_backend_pid _null_ _null_ _null_ )); DESCR("statistics: current backend PID"); diff --git a/src/include/catalog/pg_subscription_rel.h b/src/include/catalog/pg_subscription_rel.h new file mode 100644 index 0000000..5dc1c96 --- /dev/null +++ b/src/include/catalog/pg_subscription_rel.h @@ -0,0 +1,79 @@ +/* ------------------------------------------------------------------------- + * + * pg_subscription_rel.h + * Local info about tables that come from the publisher of a + * subscription (pg_subscription_rel). + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * ------------------------------------------------------------------------- + */ +#ifndef PG_SUBSCRIPTION_REL_H +#define PG_SUBSCRIPTION_REL_H + +#include "catalog/genbki.h" + +/* ---------------- + * pg_subscription_rel definition. cpp turns this into + * typedef struct FormData_pg_subscription_rel + * ---------------- + */ +#define SubscriptionRelRelationId 6102 +#define SubscriptionRelRelation_Rowtype_Id 6103 + +/* Workaround for genbki not knowing about XLogRecPtr */ +#define pg_lsn XLogRecPtr + +CATALOG(pg_subscription_rel,6102) BKI_ROWTYPE_OID(6103) +{ + Oid srsubid; /* Oid of subscription */ + Oid srrelid; /* Oid of relation */ + char srsubstate; /* state of the relation in subscription */ + pg_lsn srsublsn; /* remote lsn of the state change + * used for synchronization coordination */ +} FormData_pg_subscription_rel; + +typedef FormData_pg_subscription_rel *Form_pg_subscription_rel; + +/* ---------------- + * compiler constants for pg_subscription_rel + * ---------------- + */ +#define Natts_pg_subscription_rel 4 +#define Anum_pg_subscription_rel_srsubid 1 +#define Anum_pg_subscription_rel_srrelid 2 +#define Anum_pg_subscription_rel_srsubstate 3 +#define Anum_pg_subscription_rel_srsublsn 4 + +/* ---------------- + * substate constants + * ---------------- + */ +#define SUBREL_STATE_INIT 'i' /* initializing (sublsn NULL) */ +#define SUBREL_STATE_DATASYNC 'd' /* data is being synchronized (sublsn NULL) */ +#define SUBREL_STATE_SYNCDONE 's' /* synchronization finished infront of apply (sublsn set) */ +#define SUBREL_STATE_READY 'r' /* ready (sublsn set) */ + +/* These are never stored in the catalog, we only use them for IPC. */ +#define SUBREL_STATE_UNKNOWN '\0' /* unknown state */ +#define SUBREL_STATE_SYNCWAIT 'w' /* waiting for sync */ +#define SUBREL_STATE_CATCHUP 'c' /* catching up with apply */ + +typedef struct SubscriptionRelState +{ + Oid relid; + XLogRecPtr lsn; + char state; +} SubscriptionRelState; + +extern Oid SetSubscriptionRelState(Oid subid, Oid relid, char state, + XLogRecPtr sublsn); +extern char GetSubscriptionRelState(Oid subid, Oid relid, + XLogRecPtr *sublsn, bool missing_ok); +extern void RemoveSubscriptionRel(Oid subid, Oid relid); + +extern List *GetSubscriptionRelations(Oid subid); +extern List *GetSubscriptionNotReadyRelations(Oid subid); + +#endif /* PG_SUBSCRIPTION_REL_H */ diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index d63ca0f..f081f22 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -21,6 +21,7 @@ /* CopyStateData is private in commands/copy.c */ typedef struct CopyStateData *CopyState; +typedef int (*copy_data_source_cb) (void *outbuf, int minread, int maxread); extern void DoCopy(ParseState *state, const CopyStmt *stmt, int stmt_location, int stmt_len, @@ -28,7 +29,7 @@ extern void DoCopy(ParseState *state, const CopyStmt *stmt, extern void ProcessCopyOptions(ParseState *pstate, CopyState cstate, bool is_from, List *options); extern CopyState BeginCopyFrom(ParseState *pstate, Relation rel, const char *filename, - bool is_program, List *attnamelist, List *options); + bool is_program, copy_data_source_cb data_source_cb, List *attnamelist, List *options); extern void EndCopyFrom(CopyState cstate); extern bool NextCopyFrom(CopyState cstate, ExprContext *econtext, Datum *values, bool *nulls, Oid *tupleOid); @@ -36,6 +37,8 @@ extern bool NextCopyFromRawFields(CopyState cstate, char ***fields, int *nfields); extern void CopyFromErrorCallback(void *arg); +extern uint64 CopyFrom(CopyState cstate); + extern DestReceiver *CreateCopyDestReceiver(void); #endif /* COPY_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 28aca92..3d62bc0 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -479,6 +479,7 @@ typedef enum NodeTag T_DropReplicationSlotCmd, T_StartReplicationCmd, T_TimeLineHistoryCmd, + T_SQLCmd, /* * TAGS FOR RANDOM OTHER STUFF diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 5afc3eb..0e17b39 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -3285,10 +3285,22 @@ typedef struct CreateSubscriptionStmt List *options; /* List of DefElem nodes */ } CreateSubscriptionStmt; +typedef enum AlterSubscriptionType +{ + ALTER_SUBSCRIPTION_OPTIONS, + ALTER_SUBSCRIPTION_CONNECTION, + ALTER_SUBSCRIPTION_PUBLICATION, + ALTER_SUBSCRIPTION_REFRESH, + ALTER_SUBSCRIPTION_ENABLED +} AlterSubscriptionType; + typedef struct AlterSubscriptionStmt { NodeTag type; + AlterSubscriptionType kind; /* ALTER_SUBSCRIPTION_OPTIONS, etc */ char *subname; /* Name of of the subscription */ + char *conninfo; /* Connection string to publisher */ + List *publication; /* One or more publication to subscribe to */ List *options; /* List of DefElem nodes */ } AlterSubscriptionStmt; diff --git a/src/include/nodes/replnodes.h b/src/include/nodes/replnodes.h index f27354f..92ada41 100644 --- a/src/include/nodes/replnodes.h +++ b/src/include/nodes/replnodes.h @@ -56,7 +56,7 @@ typedef struct CreateReplicationSlotCmd ReplicationKind kind; char *plugin; bool temporary; - bool reserve_wal; + List *options; } CreateReplicationSlotCmd; @@ -96,4 +96,13 @@ typedef struct TimeLineHistoryCmd TimeLineID timeline; } TimeLineHistoryCmd; +/* ---------------------- + * SQL commands + * ---------------------- + */ +typedef struct SQLCmd +{ + NodeTag type; +} SQLCmd; + #endif /* REPLNODES_H */ diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 0062fb8..3a7af37 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -788,7 +788,9 @@ typedef enum WAIT_EVENT_MQ_SEND, WAIT_EVENT_PARALLEL_FINISH, WAIT_EVENT_SAFE_SNAPSHOT, - WAIT_EVENT_SYNC_REP + WAIT_EVENT_SYNC_REP, + WAIT_EVENT_LOGICAL_SYNC_DATA, + WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE } WaitEventIPC; /* ---------- diff --git a/src/include/replication/logical.h b/src/include/replication/logical.h index fd34964..a5088c4 100644 --- a/src/include/replication/logical.h +++ b/src/include/replication/logical.h @@ -31,9 +31,11 @@ typedef struct LogicalDecodingContext /* memory context this is all allocated in */ MemoryContext context; - /* infrastructure pieces */ - XLogReaderState *reader; + /* The associated replication slot */ ReplicationSlot *slot; + + /* infrastructure pieces for decoding */ + XLogReaderState *reader; struct ReorderBuffer *reorder; struct SnapBuild *snapshot_builder; @@ -75,6 +77,7 @@ typedef struct LogicalDecodingContext TransactionId write_xid; } LogicalDecodingContext; + extern void CheckLogicalDecodingRequirements(void); extern LogicalDecodingContext *CreateInitDecodingContext(char *plugin, @@ -92,6 +95,14 @@ extern void DecodingContextFindStartpoint(LogicalDecodingContext *ctx); extern bool DecodingContextReady(LogicalDecodingContext *ctx); extern void FreeDecodingContext(LogicalDecodingContext *ctx); +extern LogicalDecodingContext *CreateCopyDecodingContext( + List *output_plugin_options, + LogicalOutputPluginWriterPrepareWrite prepare_write, + LogicalOutputPluginWriterWrite do_write); +extern void DecodingContextProccessTuple(LogicalDecodingContext *ctx, + Relation rel, HeapTuple tup); +extern List *DecodingContextGetTableList(LogicalDecodingContext *ctx); + extern void LogicalIncreaseXminForSlot(XLogRecPtr lsn, TransactionId xmin); extern void LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn); diff --git a/src/include/replication/logicallauncher.h b/src/include/replication/logicallauncher.h index cfe3db1..060946a 100644 --- a/src/include/replication/logicallauncher.h +++ b/src/include/replication/logicallauncher.h @@ -13,6 +13,7 @@ #define LOGICALLAUNCHER_H extern int max_logical_replication_workers; +extern int max_sync_workers_per_subscription; extern void ApplyLauncherRegister(void); extern void ApplyLauncherMain(Datum main_arg); diff --git a/src/include/replication/snapbuild.h b/src/include/replication/snapbuild.h index 5e824ae..091a9f9 100644 --- a/src/include/replication/snapbuild.h +++ b/src/include/replication/snapbuild.h @@ -59,6 +59,7 @@ extern void FreeSnapshotBuilder(SnapBuild *cache); extern void SnapBuildSnapDecRefcount(Snapshot snap); +extern Snapshot SnapBuildInitalSnapshot(SnapBuild *builder); extern const char *SnapBuildExportSnapshot(SnapBuild *snapstate); extern void SnapBuildClearExportedSnapshot(void); diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h index 0857bdc..0f2d2eb 100644 --- a/src/include/replication/walreceiver.h +++ b/src/include/replication/walreceiver.h @@ -15,6 +15,8 @@ #include "access/xlog.h" #include "access/xlogdefs.h" #include "fmgr.h" +#include "replication/logicalproto.h" +#include "replication/walsender.h" #include "storage/latch.h" #include "storage/spin.h" #include "pgtime.h" @@ -183,9 +185,19 @@ typedef void (*walrcv_send_fn) (WalReceiverConn *conn, const char *buffer, int nbytes); typedef char *(*walrcv_create_slot_fn) (WalReceiverConn *conn, const char *slotname, bool temporary, + CRSSnapshotAction snapaction, XLogRecPtr *lsn); typedef bool (*walrcv_command_fn) (WalReceiverConn *conn, const char *cmd, char **err); +typedef List *(*walrcv_table_list_fn) (WalReceiverConn *conn, + List *publications); +typedef void (*walrcv_table_info_fn) (WalReceiverConn *conn, + const char *nspname, + const char *relname, + LogicalRepRelation *lrel); +typedef void (*walrcv_table_copy_fn) (WalReceiverConn *conn, + const char *nspname, + const char *relname); typedef void (*walrcv_disconnect_fn) (WalReceiverConn *conn); typedef struct WalReceiverFunctionsType @@ -201,6 +213,9 @@ typedef struct WalReceiverFunctionsType walrcv_send_fn walrcv_send; walrcv_create_slot_fn walrcv_create_slot; walrcv_command_fn walrcv_command; + walrcv_table_list_fn walrcv_table_list; + walrcv_table_info_fn walrcv_table_info; + walrcv_table_copy_fn walrcv_table_copy; walrcv_disconnect_fn walrcv_disconnect; } WalReceiverFunctionsType; @@ -224,10 +239,16 @@ extern PGDLLIMPORT WalReceiverFunctionsType *WalReceiverFunctions; WalReceiverFunctions->walrcv_receive(conn, buffer, wait_fd) #define walrcv_send(conn, buffer, nbytes) \ WalReceiverFunctions->walrcv_send(conn, buffer, nbytes) -#define walrcv_create_slot(conn, slotname, temporary, lsn) \ - WalReceiverFunctions->walrcv_create_slot(conn, slotname, temporary, lsn) +#define walrcv_create_slot(conn, slotname, temporary, snapaction, lsn) \ + WalReceiverFunctions->walrcv_create_slot(conn, slotname, temporary, snapaction, lsn) #define walrcv_command(conn, cmd, err) \ WalReceiverFunctions->walrcv_command(conn, cmd, err) +#define walrcv_table_list(conn, publications) \ + WalReceiverFunctions->walrcv_table_list(conn, publications) +#define walrcv_table_info(conn, nspname, relname, lrel) \ + WalReceiverFunctions->walrcv_table_info(conn, nspname, relname, lrel) +#define walrcv_table_copy(conn, nspname, relname) \ + WalReceiverFunctions->walrcv_table_copy(conn, nspname, relname) #define walrcv_disconnect(conn) \ WalReceiverFunctions->walrcv_disconnect(conn) diff --git a/src/include/replication/walsender.h b/src/include/replication/walsender.h index fe23f66..2ca9038 100644 --- a/src/include/replication/walsender.h +++ b/src/include/replication/walsender.h @@ -16,6 +16,16 @@ #include "fmgr.h" +/* + * What to do with a snapshot in create replication slot command. + */ +typedef enum +{ + CRS_EXPORT_SNAPSHOT, + CRS_NOEXPORT_SNAPSHOT, + CRS_USE_SNAPSHOT +} CRSSnapshotAction; + /* global state */ extern bool am_walsender; extern bool am_cascading_walsender; @@ -28,7 +38,7 @@ extern int wal_sender_timeout; extern bool log_replication_commands; extern void InitWalSender(void); -extern void exec_replication_command(const char *query_string); +extern bool exec_replication_command(const char *query_string); extern void WalSndErrorCleanup(void); extern void WalSndSignals(void); extern Size WalSndShmemSize(void); diff --git a/src/include/replication/worker_internal.h b/src/include/replication/worker_internal.h index 8cbf268..acecbd0 100644 --- a/src/include/replication/worker_internal.h +++ b/src/include/replication/worker_internal.h @@ -33,6 +33,9 @@ typedef struct LogicalRepWorker /* Used for initial table synchronization. */ Oid relid; + char relstate; + XLogRecPtr relstate_lsn; + slock_t relmutex; /* Stats. */ XLogRecPtr last_lsn; @@ -42,6 +45,9 @@ typedef struct LogicalRepWorker TimestampTz reply_time; } LogicalRepWorker; +/* Memory context for cached variables in apply worker. */ +MemoryContext ApplyCacheContext; + /* libpqreceiver connection */ extern struct WalReceiverConn *wrconn; @@ -53,12 +59,20 @@ extern bool in_remote_transaction; extern bool got_SIGTERM; extern void logicalrep_worker_attach(int slot); -extern LogicalRepWorker *logicalrep_worker_find(Oid subid); -extern int logicalrep_worker_count(Oid subid); -extern void logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid); -extern void logicalrep_worker_stop(Oid subid); -extern void logicalrep_worker_wakeup(Oid subid); +extern LogicalRepWorker *logicalrep_worker_find(Oid subid, Oid relid, + bool only_running); +extern void logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, + Oid userid, Oid relid); +extern void logicalrep_worker_stop(Oid subid, Oid relid); +extern void logicalrep_worker_wakeup(Oid subid, Oid relid); +extern void logicalrep_worker_wakeup_ptr(LogicalRepWorker *worker); + +extern int logicalrep_sync_worker_count(Oid subid); extern void logicalrep_worker_sigterm(SIGNAL_ARGS); +extern char *LogicalRepSyncTableStart(XLogRecPtr *origin_startpos); +void process_syncing_tables(char *slotname, XLogRecPtr end_lsn); +void invalidate_syncing_table_states(Datum arg, int cacheid, + uint32 hashvalue); #endif /* WORKER_INTERNAL_H */ diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index 66f60d2..e3c7ef0 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -89,6 +89,8 @@ enum SysCacheIdentifier STATRELATTINH, SUBSCRIPTIONOID, SUBSCRIPTIONNAME, + SUBSCRIPTIONRELOID, + SUBSCRIPTIONRELMAP, TABLESPACEOID, TRFOID, TRFTYPELANG, diff --git a/src/test/regress/expected/object_address.out b/src/test/regress/expected/object_address.out index 71f16fd..d8edbfd 100644 --- a/src/test/regress/expected/object_address.out +++ b/src/test/regress/expected/object_address.out @@ -37,7 +37,8 @@ CREATE TRANSFORM FOR int LANGUAGE SQL ( FROM SQL WITH FUNCTION varchar_transform(internal), TO SQL WITH FUNCTION int4recv(internal)); CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable; -CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCREATE SLOT); +CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT); +WARNING: tables were not subscribed, you will have to run ALTER SUBSCRIPTION ... REFRESH PUBLICATION to subscribe the tables -- test some error cases SELECT pg_get_object_address('stone', '{}', '{}'); ERROR: unrecognized object type "stone" diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index c661f1d..b7f02bd 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1847,13 +1847,14 @@ pg_stat_ssl| SELECT s.pid, pg_stat_subscription| SELECT su.oid AS subid, su.subname, st.pid, + st.relid, st.received_lsn, st.last_msg_send_time, st.last_msg_receipt_time, st.latest_end_lsn, st.latest_end_time FROM (pg_subscription su - LEFT JOIN pg_stat_get_subscription(NULL::oid) st(subid, pid, received_lsn, last_msg_send_time, last_msg_receipt_time, latest_end_lsn, latest_end_time) ON ((st.subid = su.oid))); + LEFT JOIN pg_stat_get_subscription(NULL::oid) st(subid, relid, pid, received_lsn, last_msg_send_time, last_msg_receipt_time, latest_end_lsn, latest_end_time) ON ((st.subid = su.oid))); pg_stat_sys_indexes| SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out index 0af013f..ff3ef40 100644 --- a/src/test/regress/expected/sanity_check.out +++ b/src/test/regress/expected/sanity_check.out @@ -136,6 +136,7 @@ pg_shdescription|t pg_shseclabel|t pg_statistic|t pg_subscription|t +pg_subscription_rel|t pg_tablespace|t pg_transform|t pg_trigger|t diff --git a/src/test/regress/expected/subscription.out b/src/test/regress/expected/subscription.out index a8a61ee..d2187ee 100644 --- a/src/test/regress/expected/subscription.out +++ b/src/test/regress/expected/subscription.out @@ -22,8 +22,9 @@ COMMIT; CREATE SUBSCRIPTION testsub CONNECTION 'testconn' PUBLICATION testpub; ERROR: invalid connection string syntax: missing "=" after "testconn" in connection info string -CREATE SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist' PUBLICATION testpub WITH (DISABLED, NOCREATE SLOT); reset client_min_messages; +CREATE SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist' PUBLICATION testpub WITH (NOCONNECT); +WARNING: tables were not subscribed, you will have to run ALTER SUBSCRIPTION ... REFRESH PUBLICATION to subscribe the tables \dRs+ List of subscriptions Name | Owner | Enabled | Publication | Conninfo @@ -31,47 +32,38 @@ reset client_min_messages; testsub | regress_subscription_user | f | {testpub} | dbname=doesnotexist (1 row) -ALTER SUBSCRIPTION testsub SET PUBLICATION testpub2, testpub3; -\dRs - List of subscriptions - Name | Owner | Enabled | Publication ----------+---------------------------+---------+--------------------- - testsub | regress_subscription_user | f | {testpub2,testpub3} -(1 row) - ALTER SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist2'; -ALTER SUBSCRIPTION testsub SET PUBLICATION testpub, testpub1; \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Conninfo ----------+---------------------------+---------+--------------------+---------------------- - testsub | regress_subscription_user | f | {testpub,testpub1} | dbname=doesnotexist2 + List of subscriptions + Name | Owner | Enabled | Publication | Conninfo +---------+---------------------------+---------+-------------+---------------------- + testsub | regress_subscription_user | f | {testpub} | dbname=doesnotexist2 (1 row) BEGIN; ALTER SUBSCRIPTION testsub ENABLE; \dRs - List of subscriptions - Name | Owner | Enabled | Publication ----------+---------------------------+---------+-------------------- - testsub | regress_subscription_user | t | {testpub,testpub1} + List of subscriptions + Name | Owner | Enabled | Publication +---------+---------------------------+---------+------------- + testsub | regress_subscription_user | t | {testpub} (1 row) ALTER SUBSCRIPTION testsub DISABLE; \dRs - List of subscriptions - Name | Owner | Enabled | Publication ----------+---------------------------+---------+-------------------- - testsub | regress_subscription_user | f | {testpub,testpub1} + List of subscriptions + Name | Owner | Enabled | Publication +---------+---------------------------+---------+------------- + testsub | regress_subscription_user | f | {testpub} (1 row) COMMIT; ALTER SUBSCRIPTION testsub RENAME TO testsub_foo; \dRs - List of subscriptions - Name | Owner | Enabled | Publication --------------+---------------------------+---------+-------------------- - testsub_foo | regress_subscription_user | f | {testpub,testpub1} + List of subscriptions + Name | Owner | Enabled | Publication +-------------+---------------------------+---------+------------- + testsub_foo | regress_subscription_user | f | {testpub} (1 row) -- fail - cannot do DROP SUBSCRIPTION DROP SLOT inside transaction block diff --git a/src/test/regress/sql/object_address.sql b/src/test/regress/sql/object_address.sql index 0ace4dd..e8b09d3 100644 --- a/src/test/regress/sql/object_address.sql +++ b/src/test/regress/sql/object_address.sql @@ -40,7 +40,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL ( FROM SQL WITH FUNCTION varchar_transform(internal), TO SQL WITH FUNCTION int4recv(internal)); CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable; -CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCREATE SLOT); +CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCONNECT); -- test some error cases SELECT pg_get_object_address('stone', '{}', '{}'); diff --git a/src/test/regress/sql/subscription.sql b/src/test/regress/sql/subscription.sql index 0b6c8a3..3bdf79d 100644 --- a/src/test/regress/sql/subscription.sql +++ b/src/test/regress/sql/subscription.sql @@ -18,17 +18,13 @@ CREATE SUBSCRIPTION testsub CONNECTION 'testconn' PUBLICATION testpub WITH (CREA COMMIT; CREATE SUBSCRIPTION testsub CONNECTION 'testconn' PUBLICATION testpub; -CREATE SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist' PUBLICATION testpub WITH (DISABLED, NOCREATE SLOT); reset client_min_messages; -\dRs+ - -ALTER SUBSCRIPTION testsub SET PUBLICATION testpub2, testpub3; +CREATE SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist' PUBLICATION testpub WITH (NOCONNECT); -\dRs +\dRs+ ALTER SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist2'; -ALTER SUBSCRIPTION testsub SET PUBLICATION testpub, testpub1; \dRs+ diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl index b81028a..b80008f 100644 --- a/src/test/subscription/t/001_rep_changes.pl +++ b/src/test/subscription/t/001_rep_changes.pl @@ -3,7 +3,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 11; +use Test::More tests => 13; # Initialize publisher node my $node_publisher = get_new_node('publisher'); @@ -19,7 +19,7 @@ $node_subscriber->start; $node_publisher->safe_psql('postgres', "CREATE TABLE tab_notrep AS SELECT generate_series(1,10) AS a"); $node_publisher->safe_psql('postgres', - "CREATE TABLE tab_ins (a int)"); + "CREATE TABLE tab_ins AS SELECT generate_series(1,1002) AS a"); $node_publisher->safe_psql('postgres', "CREATE TABLE tab_full AS SELECT generate_series(1,10) AS a"); $node_publisher->safe_psql('postgres', @@ -56,10 +56,20 @@ my $caughtup_query = $node_publisher->poll_query_until('postgres', $caughtup_query) or die "Timed out while waiting for subscriber to catch up"; +# Also wait for initial table sync to finish +my $synced_query = +"SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; +$node_subscriber->poll_query_until('postgres', $synced_query) + or die "Timed out while waiting for subscriber to synchronize data"; + my $result = $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_notrep"); is($result, qq(0), 'check non-replicated table is empty on subscriber'); +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_ins"); +is($result, qq(1002), 'check initial data was copied to subscriber'); + $node_publisher->safe_psql('postgres', "INSERT INTO tab_ins SELECT generate_series(1,50)"); $node_publisher->safe_psql('postgres', @@ -79,7 +89,7 @@ $node_publisher->poll_query_until('postgres', $caughtup_query) $result = $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_ins"); -is($result, qq(50|1|50), 'check replicated inserts on subscriber'); +is($result, qq(1052|1|1002), 'check replicated inserts on subscriber'); $result = $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_rep"); @@ -109,7 +119,7 @@ $node_publisher->poll_query_until('postgres', $caughtup_query) $result = $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_full"); -is($result, qq(10|1|100), 'update works with REPLICA IDENTITY FULL and duplicate tuples'); +is($result, qq(20|1|100), 'update works with REPLICA IDENTITY FULL and duplicate tuples'); # check that change of connection string and/or publication list causes # restart of subscription workers. Not all of these are registered as tests @@ -126,7 +136,7 @@ $node_publisher->poll_query_until('postgres', $oldpid = $node_publisher->safe_psql('postgres', "SELECT pid FROM pg_stat_replication WHERE application_name = '$appname';"); $node_subscriber->safe_psql('postgres', - "ALTER SUBSCRIPTION tap_sub SET PUBLICATION tap_pub_ins_only"); + "ALTER SUBSCRIPTION tap_sub SET PUBLICATION tap_pub_ins_only WITH (NOCOPY DATA)"); $node_publisher->poll_query_until('postgres', "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = '$appname';") or die "Timed out while waiting for apply to restart"; @@ -141,7 +151,7 @@ $node_publisher->poll_query_until('postgres', $caughtup_query) $result = $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_ins"); -is($result, qq(150|1|1100), 'check replicated inserts after subscription publication change'); +is($result, qq(1152|1|1100), 'check replicated inserts after subscription publication change'); $result = $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_rep"); @@ -154,6 +164,8 @@ $node_publisher->safe_psql('postgres', "ALTER PUBLICATION tap_pub_ins_only ADD TABLE tab_full"); $node_publisher->safe_psql('postgres', "DELETE FROM tab_ins WHERE a > 0"); +$node_subscriber->safe_psql('postgres', + "ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION WITH (NOCOPY DATA)"); $node_publisher->safe_psql('postgres', "INSERT INTO tab_full VALUES(0)"); @@ -163,11 +175,11 @@ $node_publisher->poll_query_until('postgres', $caughtup_query) # note that data are different on provider and subscriber $result = $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_ins"); -is($result, qq(50|1|50), 'check replicated deletes after alter publication'); +is($result, qq(1052|1|1002), 'check replicated deletes after alter publication'); $result = $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_full"); -is($result, qq(11|0|100), 'check replicated insert after alter publication'); +is($result, qq(21|0|100), 'check replicated insert after alter publication'); # check restart on rename $oldpid = $node_publisher->safe_psql('postgres', @@ -190,8 +202,12 @@ $result = is($result, qq(0), 'check replication slot was dropped on publisher'); $result = - $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM pg_replication_origin"); -is($result, qq(0), 'check replication origin was dropped on subscriber'); + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM pg_subscription_rel"); +is($result, qq(0), 'check subscription relation status was dropped on subscriber'); + +$result = + $node_publisher->safe_psql('postgres', "SELECT count(*) FROM pg_replication_slots"); +is($result, qq(0), 'check replication slot was dropped on publisher'); $node_subscriber->stop('fast'); $node_publisher->stop('fast'); diff --git a/src/test/subscription/t/002_types.pl b/src/test/subscription/t/002_types.pl index f44e1e6..ad15e85 100644 --- a/src/test/subscription/t/002_types.pl +++ b/src/test/subscription/t/002_types.pl @@ -111,6 +111,12 @@ my $caughtup_query = $node_publisher->poll_query_until('postgres', $caughtup_query) or die "Timed out while waiting for subscriber to catch up"; +# Wait for initial sync to finish as well +my $synced_query = +"SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('s', 'r');"; +$node_subscriber->poll_query_until('postgres', $synced_query) + or die "Timed out while waiting for subscriber to synchronize data"; + # Insert initial test data $node_publisher->safe_psql('postgres', qq( -- test_tbl_one_array_col diff --git a/src/test/subscription/t/003_constraints.pl b/src/test/subscription/t/003_constraints.pl index b785132..11b8254 100644 --- a/src/test/subscription/t/003_constraints.pl +++ b/src/test/subscription/t/003_constraints.pl @@ -34,7 +34,7 @@ $node_publisher->safe_psql('postgres', my $appname = 'tap_sub'; $node_subscriber->safe_psql('postgres', - "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub;"); + "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (NOCOPY DATA)"); # Wait for subscriber to finish initialization my $caughtup_query = diff --git a/src/test/subscription/t/004_sync.pl b/src/test/subscription/t/004_sync.pl new file mode 100644 index 0000000..ba40578 --- /dev/null +++ b/src/test/subscription/t/004_sync.pl @@ -0,0 +1,159 @@ +# Basic logical replication test +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 7; + +# Initialize publisher node +my $node_publisher = get_new_node('publisher'); +$node_publisher->init(allows_streaming => 'logical'); +$node_publisher->start; + +# Create subscriber node +my $node_subscriber = get_new_node('subscriber'); +$node_subscriber->init(allows_streaming => 'logical'); +$node_subscriber->start; + +# Create some preexisting content on publisher +$node_publisher->safe_psql('postgres', + "CREATE TABLE tab_rep (a int primary key)"); +$node_publisher->safe_psql('postgres', + "INSERT INTO tab_rep SELECT generate_series(1,10)"); + +# Setup structure on subscriber +$node_subscriber->safe_psql('postgres', + "CREATE TABLE tab_rep (a int primary key)"); + +# Setup logical replication +my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; +$node_publisher->safe_psql('postgres', + "CREATE PUBLICATION tap_pub FOR ALL TABLES"); + +my $appname = 'tap_sub'; +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub"); + +# Wait for subscriber to finish initialization +my $caughtup_query = +"SELECT pg_current_wal_location() <= replay_location FROM pg_stat_replication WHERE application_name = '$appname';"; +$node_publisher->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for subscriber to catch up"; + +# Also wait for initial table sync to finish +my $synced_query = +"SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; +$node_subscriber->poll_query_until('postgres', $synced_query) + or die "Timed out while waiting for subscriber to synchronize data"; + +my $result = + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep"); +is($result, qq(10), 'initial data synced for first sub'); + +# drop subscription so that there is unreplicated data +$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub"); + +$node_publisher->safe_psql('postgres', + "INSERT INTO tab_rep SELECT generate_series(11,20)"); + +# recreate the subscription, it will try to do initial copy +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub"); + +# but it will be stuck on data copy as it will fail on constraint +my $started_query = +"SELECT srsubstate = 'd' FROM pg_subscription_rel;"; +$node_subscriber->poll_query_until('postgres', $started_query) + or die "Timed out while waiting for subscriber to start sync"; + +# remove the conflicting data +$node_subscriber->safe_psql('postgres', + "DELETE FROM tab_rep;"); + +# wait for sync to finish this time +$node_subscriber->poll_query_until('postgres', $synced_query) + or die "Timed out while waiting for subscriber to synchronize data"; + +# check that all data is synced +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep"); +is($result, qq(20), 'initial data synced for second sub'); + +# now check another subscription for the same node pair +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION tap_sub2 CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub"); + +# wait for it to start +$node_subscriber->poll_query_until('postgres', "SELECT pid IS NOT NULL FROM pg_stat_subscription WHERE subname = 'tap_sub2' AND relid IS NULL") + or die "Timed out while waiting for subscriber to start"; + +# and drop both subscriptions +$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub"); +$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub2"); + +# check subscriptions are removed +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM pg_subscription"); +is($result, qq(0), 'second and third sub are dropped'); + +# remove the conflicting data +$node_subscriber->safe_psql('postgres', + "DELETE FROM tab_rep;"); + +# recreate the subscription again +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub"); + +# and wait for data sync to finish again +$node_subscriber->poll_query_until('postgres', $synced_query) + or die "Timed out while waiting for subscriber to synchronize data"; + +# check that all data is synced +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep"); +is($result, qq(20), 'initial data synced for fourth sub'); + +# add new table on subscriber +$node_subscriber->safe_psql('postgres', + "CREATE TABLE tab_rep_next (a int)"); + +# setup structure with existing data on pubisher +$node_publisher->safe_psql('postgres', + "CREATE TABLE tab_rep_next (a) AS SELECT generate_series(1,10)"); + +# Wait for subscription to catch up +$node_publisher->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for subscriber to catch up"; + +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep_next"); +is($result, qq(0), 'no data for table added after subscription initialized'); + +# ask for data sync +$node_subscriber->safe_psql('postgres', + "ALTER SUBSCRIPTION tap_sub REFRESH PUBLICATION"); + +# wait for sync to finish +$node_subscriber->poll_query_until('postgres', $synced_query) + or die "Timed out while waiting for subscriber to synchronize data"; + +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep_next"); +is($result, qq(10), 'data for table added after subscription initialized are now synced'); + +# Add some data +$node_publisher->safe_psql('postgres', + "INSERT INTO tab_rep_next SELECT generate_series(1,10)"); + +# Wait for subscription to catch up +$node_publisher->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for subscriber to catch up"; + +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_rep_next"); +is($result, qq(20), 'changes for table added after subscription initialized replicated'); + +$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub"); + +$node_subscriber->stop('fast'); +$node_publisher->stop('fast'); -- 2.7.4