Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pgoutput.c
4 : * Logical Replication output plugin
5 : *
6 : * Copyright (c) 2012-2020, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/replication/pgoutput/pgoutput.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "postgres.h"
14 :
15 : #include "access/tupconvert.h"
16 : #include "catalog/partition.h"
17 : #include "catalog/pg_publication.h"
18 : #include "commands/defrem.h"
19 : #include "fmgr.h"
20 : #include "replication/logical.h"
21 : #include "replication/logicalproto.h"
22 : #include "replication/origin.h"
23 : #include "replication/pgoutput.h"
24 : #include "utils/int8.h"
25 : #include "utils/inval.h"
26 : #include "utils/lsyscache.h"
27 : #include "utils/memutils.h"
28 : #include "utils/syscache.h"
29 : #include "utils/varlena.h"
30 :
31 260 : PG_MODULE_MAGIC;
32 :
33 : extern void _PG_output_plugin_init(OutputPluginCallbacks *cb);
34 :
35 : static void pgoutput_startup(LogicalDecodingContext *ctx,
36 : OutputPluginOptions *opt, bool is_init);
37 : static void pgoutput_shutdown(LogicalDecodingContext *ctx);
38 : static void pgoutput_begin_txn(LogicalDecodingContext *ctx,
39 : ReorderBufferTXN *txn);
40 : static void pgoutput_commit_txn(LogicalDecodingContext *ctx,
41 : ReorderBufferTXN *txn, XLogRecPtr commit_lsn);
42 : static void pgoutput_change(LogicalDecodingContext *ctx,
43 : ReorderBufferTXN *txn, Relation rel,
44 : ReorderBufferChange *change);
45 : static void pgoutput_truncate(LogicalDecodingContext *ctx,
46 : ReorderBufferTXN *txn, int nrelations, Relation relations[],
47 : ReorderBufferChange *change);
48 : static bool pgoutput_origin_filter(LogicalDecodingContext *ctx,
49 : RepOriginId origin_id);
50 : static void pgoutput_prepare_txn(LogicalDecodingContext *ctx,
51 : ReorderBufferTXN *txn, XLogRecPtr prepare_lsn);
52 : static void pgoutput_commit_prepared_txn(LogicalDecodingContext *ctx,
53 : ReorderBufferTXN *txn, XLogRecPtr prepare_lsn);
54 : static void pgoutput_rollback_prepared_txn(LogicalDecodingContext *ctx,
55 : ReorderBufferTXN *txn, XLogRecPtr prepare_lsn);
56 : static void pgoutput_stream_start(struct LogicalDecodingContext *ctx,
57 : ReorderBufferTXN *txn);
58 : static void pgoutput_stream_stop(struct LogicalDecodingContext *ctx,
59 : ReorderBufferTXN *txn);
60 : static void pgoutput_stream_abort(struct LogicalDecodingContext *ctx,
61 : ReorderBufferTXN *txn,
62 : XLogRecPtr abort_lsn);
63 : static void pgoutput_stream_commit(struct LogicalDecodingContext *ctx,
64 : ReorderBufferTXN *txn,
65 : XLogRecPtr commit_lsn);
66 : static void pgoutput_stream_prepare_txn(LogicalDecodingContext *ctx,
67 : ReorderBufferTXN *txn, XLogRecPtr prepare_lsn);
68 :
69 : static bool publications_valid;
70 : static bool in_streaming;
71 :
72 : static List *LoadPublications(List *pubnames);
73 : static void publication_invalidation_cb(Datum arg, int cacheid,
74 : uint32 hashvalue);
75 : static void send_relation_and_attrs(Relation relation, TransactionId xid,
76 : LogicalDecodingContext *ctx);
77 :
78 : /*
79 : * Entry in the map used to remember which relation schemas we sent.
80 : *
81 : * The schema_sent flag determines if the current schema record was already
82 : * sent to the subscriber (in which case we don't need to send it again).
83 : *
84 : * The schema cache on downstream is however updated only at commit time,
85 : * and with streamed transactions the commit order may be different from
86 : * the order the transactions are sent in. Also, the (sub) transactions
87 : * might get aborted so we need to send the schema for each (sub) transaction
88 : * so that we don't lose the schema information on abort. For handling this,
89 : * we maintain the list of xids (streamed_txns) for those we have already sent
90 : * the schema.
91 : *
92 : * For partitions, 'pubactions' considers not only the table's own
93 : * publications, but also those of all of its ancestors.
94 : */
95 : typedef struct RelationSyncEntry
96 : {
97 : Oid relid; /* relation oid */
98 :
99 : /*
100 : * Did we send the schema? If ancestor relid is set, its schema must also
101 : * have been sent for this to be true.
102 : */
103 : bool schema_sent;
104 : List *streamed_txns; /* streamed toplevel transactions with this
105 : * schema */
106 :
107 : bool replicate_valid;
108 : PublicationActions pubactions;
109 :
110 : /*
111 : * OID of the relation to publish changes as. For a partition, this may
112 : * be set to one of its ancestors whose schema will be used when
113 : * replicating changes, if publish_via_partition_root is set for the
114 : * publication.
115 : */
116 : Oid publish_as_relid;
117 :
118 : /*
119 : * Map used when replicating using an ancestor's schema to convert tuples
120 : * from partition's type to the ancestor's; NULL if publish_as_relid is
121 : * same as 'relid' or if unnecessary due to partition and the ancestor
122 : * having identical TupleDesc.
123 : */
124 : TupleConversionMap *map;
125 : } RelationSyncEntry;
126 :
127 : /* Map used to remember which relation schemas we sent. */
128 : static HTAB *RelationSyncCache = NULL;
129 :
130 : static void init_rel_sync_cache(MemoryContext decoding_context);
131 : static void cleanup_rel_sync_cache(TransactionId xid, bool is_commit);
132 : static RelationSyncEntry *get_rel_sync_entry(PGOutputData *data, Oid relid);
133 : static void rel_sync_cache_relation_cb(Datum arg, Oid relid);
134 : static void rel_sync_cache_publication_cb(Datum arg, int cacheid,
135 : uint32 hashvalue);
136 : static void set_schema_sent_in_streamed_txn(RelationSyncEntry *entry,
137 : TransactionId xid);
138 : static bool get_schema_sent_in_streamed_txn(RelationSyncEntry *entry,
139 : TransactionId xid);
140 :
141 : /*
142 : * Specify output plugin callbacks
143 : */
144 : void
145 388 : _PG_output_plugin_init(OutputPluginCallbacks *cb)
146 : {
147 : AssertVariableIsOfType(&_PG_output_plugin_init, LogicalOutputPluginInit);
148 :
149 388 : cb->startup_cb = pgoutput_startup;
150 388 : cb->begin_cb = pgoutput_begin_txn;
151 388 : cb->change_cb = pgoutput_change;
152 388 : cb->truncate_cb = pgoutput_truncate;
153 388 : cb->commit_cb = pgoutput_commit_txn;
154 :
155 388 : cb->prepare_cb = pgoutput_prepare_txn;
156 388 : cb->commit_prepared_cb = pgoutput_commit_prepared_txn;
157 388 : cb->rollback_prepared_cb = pgoutput_rollback_prepared_txn;
158 388 : cb->filter_by_origin_cb = pgoutput_origin_filter;
159 388 : cb->shutdown_cb = pgoutput_shutdown;
160 :
161 : /* transaction streaming */
162 388 : cb->stream_start_cb = pgoutput_stream_start;
163 388 : cb->stream_stop_cb = pgoutput_stream_stop;
164 388 : cb->stream_abort_cb = pgoutput_stream_abort;
165 388 : cb->stream_commit_cb = pgoutput_stream_commit;
166 388 : cb->stream_change_cb = pgoutput_change;
167 388 : cb->stream_truncate_cb = pgoutput_truncate;
168 : /* transaction streaming - two-phase commit */
169 388 : cb->stream_prepare_cb = pgoutput_stream_prepare_txn;
170 388 : }
171 :
172 : static void
173 196 : parse_output_parameters(List *options, uint32 *protocol_version,
174 : List **publication_names, bool *binary,
175 : bool *enable_streaming)
176 : {
177 : ListCell *lc;
178 196 : bool protocol_version_given = false;
179 196 : bool publication_names_given = false;
180 196 : bool binary_option_given = false;
181 196 : bool streaming_given = false;
182 :
183 196 : *binary = false;
184 :
185 630 : foreach(lc, options)
186 : {
187 434 : DefElem *defel = (DefElem *) lfirst(lc);
188 :
189 434 : Assert(defel->arg == NULL || IsA(defel->arg, String));
190 :
191 : /* Check each param, whether or not we recognize it */
192 434 : if (strcmp(defel->defname, "proto_version") == 0)
193 : {
194 : int64 parsed;
195 :
196 196 : if (protocol_version_given)
197 0 : ereport(ERROR,
198 : (errcode(ERRCODE_SYNTAX_ERROR),
199 : errmsg("conflicting or redundant options")));
200 196 : protocol_version_given = true;
201 :
202 196 : if (!scanint8(strVal(defel->arg), true, &parsed))
203 0 : ereport(ERROR,
204 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
205 : errmsg("invalid proto_version")));
206 :
207 196 : if (parsed > PG_UINT32_MAX || parsed < 0)
208 0 : ereport(ERROR,
209 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
210 : errmsg("proto_version \"%s\" out of range",
211 : strVal(defel->arg))));
212 :
213 196 : *protocol_version = (uint32) parsed;
214 : }
215 238 : else if (strcmp(defel->defname, "publication_names") == 0)
216 : {
217 196 : if (publication_names_given)
218 0 : ereport(ERROR,
219 : (errcode(ERRCODE_SYNTAX_ERROR),
220 : errmsg("conflicting or redundant options")));
221 196 : publication_names_given = true;
222 :
223 196 : if (!SplitIdentifierString(strVal(defel->arg), ',',
224 : publication_names))
225 0 : ereport(ERROR,
226 : (errcode(ERRCODE_INVALID_NAME),
227 : errmsg("invalid publication_names syntax")));
228 : }
229 42 : else if (strcmp(defel->defname, "binary") == 0)
230 : {
231 10 : if (binary_option_given)
232 0 : ereport(ERROR,
233 : (errcode(ERRCODE_SYNTAX_ERROR),
234 : errmsg("conflicting or redundant options")));
235 10 : binary_option_given = true;
236 :
237 10 : *binary = defGetBoolean(defel);
238 : }
239 32 : else if (strcmp(defel->defname, "streaming") == 0)
240 : {
241 32 : if (streaming_given)
242 0 : ereport(ERROR,
243 : (errcode(ERRCODE_SYNTAX_ERROR),
244 : errmsg("conflicting or redundant options")));
245 32 : streaming_given = true;
246 :
247 32 : *enable_streaming = defGetBoolean(defel);
248 : }
249 : else
250 0 : elog(ERROR, "unrecognized pgoutput option: %s", defel->defname);
251 : }
252 196 : }
253 :
254 : /*
255 : * Initialize this plugin
256 : */
257 : static void
258 388 : pgoutput_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
259 : bool is_init)
260 : {
261 388 : bool enable_streaming = false;
262 388 : PGOutputData *data = palloc0(sizeof(PGOutputData));
263 :
264 : /* Create our memory context for private allocations. */
265 388 : data->context = AllocSetContextCreate(ctx->context,
266 : "logical replication output context",
267 : ALLOCSET_DEFAULT_SIZES);
268 :
269 388 : ctx->output_plugin_private = data;
270 :
271 : /* This plugin uses binary protocol. */
272 388 : opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT;
273 :
274 : /*
275 : * This is replication start and not slot initialization.
276 : *
277 : * Parse and validate options passed by the client.
278 : */
279 388 : if (!is_init)
280 : {
281 : /* Parse the params and ERROR if we see any we don't recognize */
282 196 : parse_output_parameters(ctx->output_plugin_options,
283 : &data->protocol_version,
284 : &data->publication_names,
285 : &data->binary,
286 : &enable_streaming);
287 :
288 : /* Check if we support requested protocol */
289 196 : if (data->protocol_version > LOGICALREP_PROTO_MAX_VERSION_NUM)
290 0 : ereport(ERROR,
291 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
292 : errmsg("client sent proto_version=%d but we only support protocol %d or lower",
293 : data->protocol_version, LOGICALREP_PROTO_MAX_VERSION_NUM)));
294 :
295 196 : if (data->protocol_version < LOGICALREP_PROTO_MIN_VERSION_NUM)
296 0 : ereport(ERROR,
297 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
298 : errmsg("client sent proto_version=%d but we only support protocol %d or higher",
299 : data->protocol_version, LOGICALREP_PROTO_MIN_VERSION_NUM)));
300 :
301 196 : if (list_length(data->publication_names) < 1)
302 0 : ereport(ERROR,
303 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
304 : errmsg("publication_names parameter missing")));
305 :
306 : /*
307 : * Decide whether to enable streaming. It is disabled by default, in
308 : * which case we just update the flag in decoding context. Otherwise
309 : * we only allow it with sufficient version of the protocol, and when
310 : * the output plugin supports it.
311 : */
312 196 : if (!enable_streaming)
313 164 : ctx->streaming = false;
314 32 : else if (data->protocol_version < LOGICALREP_PROTO_STREAM_VERSION_NUM)
315 0 : ereport(ERROR,
316 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
317 : errmsg("requested proto_version=%d does not support streaming, need %d or higher",
318 : data->protocol_version, LOGICALREP_PROTO_STREAM_VERSION_NUM)));
319 32 : else if (!ctx->streaming)
320 0 : ereport(ERROR,
321 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
322 : errmsg("streaming requested, but not supported by output plugin")));
323 :
324 : /* Also remember we're currently not streaming any transaction. */
325 196 : in_streaming = false;
326 :
327 : /* Init publication state. */
328 196 : data->publications = NIL;
329 196 : publications_valid = false;
330 196 : CacheRegisterSyscacheCallback(PUBLICATIONOID,
331 : publication_invalidation_cb,
332 : (Datum) 0);
333 :
334 : /* Initialize relation schema cache. */
335 196 : init_rel_sync_cache(CacheMemoryContext);
336 : }
337 : else
338 : {
339 : /* Disable the streaming during the slot initialization mode. */
340 192 : ctx->streaming = false;
341 : }
342 388 : }
343 :
344 : /*
345 : * BEGIN callback
346 : */
347 : static void
348 356 : pgoutput_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn)
349 : {
350 356 : bool send_replication_origin = txn->origin_id != InvalidRepOriginId;
351 :
352 356 : OutputPluginPrepareWrite(ctx, !send_replication_origin);
353 356 : logicalrep_write_begin(ctx->out, txn);
354 :
355 356 : if (send_replication_origin)
356 : {
357 : char *origin;
358 :
359 : /* Message boundary */
360 0 : OutputPluginWrite(ctx, false);
361 0 : OutputPluginPrepareWrite(ctx, true);
362 :
363 : /*----------
364 : * XXX: which behaviour do we want here?
365 : *
366 : * Alternatives:
367 : * - don't send origin message if origin name not found
368 : * (that's what we do now)
369 : * - throw error - that will break replication, not good
370 : * - send some special "unknown" origin
371 : *----------
372 : */
373 0 : if (replorigin_by_oid(txn->origin_id, true, &origin))
374 0 : logicalrep_write_origin(ctx->out, origin, txn->origin_lsn);
375 : }
376 :
377 356 : OutputPluginWrite(ctx, true);
378 356 : }
379 :
380 : /*
381 : * COMMIT callback
382 : */
383 : static void
384 342 : pgoutput_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
385 : XLogRecPtr commit_lsn)
386 : {
387 342 : OutputPluginUpdateProgress(ctx);
388 :
389 342 : OutputPluginPrepareWrite(ctx, true);
390 342 : logicalrep_write_commit(ctx->out, txn, commit_lsn);
391 342 : OutputPluginWrite(ctx, true);
392 342 : }
393 :
394 : /*
395 : * PREPARE callback
396 : */
397 : static void
398 14 : pgoutput_prepare_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
399 : XLogRecPtr prepare_lsn)
400 : {
401 14 : OutputPluginUpdateProgress(ctx);
402 :
403 14 : OutputPluginPrepareWrite(ctx, true);
404 14 : logicalrep_write_prepare(ctx->out, txn, prepare_lsn);
405 14 : OutputPluginWrite(ctx, true);
406 14 : }
407 :
408 : /*
409 : * COMMIT PREPARED callback
410 : */
411 : static void
412 20 : pgoutput_commit_prepared_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
413 : XLogRecPtr prepare_lsn)
414 : {
415 20 : OutputPluginUpdateProgress(ctx);
416 :
417 20 : OutputPluginPrepareWrite(ctx, true);
418 20 : logicalrep_write_prepare(ctx->out, txn, prepare_lsn);
419 20 : OutputPluginWrite(ctx, true);
420 20 : }
421 :
422 : /*
423 : * ROLLBACK PREPARED callback
424 : */
425 : static void
426 16 : pgoutput_rollback_prepared_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
427 : XLogRecPtr prepare_lsn)
428 : {
429 16 : OutputPluginUpdateProgress(ctx);
430 :
431 16 : OutputPluginPrepareWrite(ctx, true);
432 16 : logicalrep_write_prepare(ctx->out, txn, prepare_lsn);
433 16 : OutputPluginWrite(ctx, true);
434 16 : }
435 :
436 : /*
437 : * Write the current schema of the relation and its ancestor (if any) if not
438 : * done yet.
439 : */
440 : static void
441 318328 : maybe_send_schema(LogicalDecodingContext *ctx,
442 : ReorderBufferTXN *txn, ReorderBufferChange *change,
443 : Relation relation, RelationSyncEntry *relentry)
444 : {
445 : bool schema_sent;
446 318328 : TransactionId xid = InvalidTransactionId;
447 318328 : TransactionId topxid = InvalidTransactionId;
448 :
449 : /*
450 : * Remember XID of the (sub)transaction for the change. We don't care if
451 : * it's top-level transaction or not (we have already sent that XID in
452 : * start of the current streaming block).
453 : *
454 : * If we're not in a streaming block, just use InvalidTransactionId and
455 : * the write methods will not include it.
456 : */
457 318328 : if (in_streaming)
458 316830 : xid = change->txn->xid;
459 :
460 318328 : if (change->txn->toptxn)
461 25618 : topxid = change->txn->toptxn->xid;
462 : else
463 292710 : topxid = xid;
464 :
465 : /*
466 : * Do we need to send the schema? We do track streamed transactions
467 : * separately, because those may be applied later (and the regular
468 : * transactions won't see their effects until then) and in an order that
469 : * we don't know at this point.
470 : *
471 : * XXX There is a scope of optimization here. Currently, we always send
472 : * the schema first time in a streaming transaction but we can probably
473 : * avoid that by checking 'relentry->schema_sent' flag. However, before
474 : * doing that we need to study its impact on the case where we have a mix
475 : * of streaming and non-streaming transactions.
476 : */
477 318328 : if (in_streaming)
478 316830 : schema_sent = get_schema_sent_in_streamed_txn(relentry, topxid);
479 : else
480 1498 : schema_sent = relentry->schema_sent;
481 :
482 318328 : if (schema_sent)
483 636460 : return;
484 :
485 : /* If needed, send the ancestor's schema first. */
486 196 : if (relentry->publish_as_relid != RelationGetRelid(relation))
487 : {
488 0 : Relation ancestor = RelationIdGetRelation(relentry->publish_as_relid);
489 0 : TupleDesc indesc = RelationGetDescr(relation);
490 0 : TupleDesc outdesc = RelationGetDescr(ancestor);
491 : MemoryContext oldctx;
492 :
493 : /* Map must live as long as the session does. */
494 0 : oldctx = MemoryContextSwitchTo(CacheMemoryContext);
495 0 : relentry->map = convert_tuples_by_name(CreateTupleDescCopy(indesc),
496 : CreateTupleDescCopy(outdesc));
497 0 : MemoryContextSwitchTo(oldctx);
498 0 : send_relation_and_attrs(ancestor, xid, ctx);
499 0 : RelationClose(ancestor);
500 : }
501 :
502 196 : send_relation_and_attrs(relation, xid, ctx);
503 :
504 196 : if (in_streaming)
505 68 : set_schema_sent_in_streamed_txn(relentry, topxid);
506 : else
507 128 : relentry->schema_sent = true;
508 : }
509 :
510 : /*
511 : * Sends a relation
512 : */
513 : static void
514 196 : send_relation_and_attrs(Relation relation, TransactionId xid,
515 : LogicalDecodingContext *ctx)
516 : {
517 196 : TupleDesc desc = RelationGetDescr(relation);
518 : int i;
519 :
520 : /*
521 : * Write out type info if needed. We do that only for user-created types.
522 : * We use FirstGenbkiObjectId as the cutoff, so that we only consider
523 : * objects with hand-assigned OIDs to be "built in", not for instance any
524 : * function or type defined in the information_schema. This is important
525 : * because only hand-assigned OIDs can be expected to remain stable across
526 : * major versions.
527 : */
528 628 : for (i = 0; i < desc->natts; i++)
529 : {
530 432 : Form_pg_attribute att = TupleDescAttr(desc, i);
531 :
532 432 : if (att->attisdropped || att->attgenerated)
533 2 : continue;
534 :
535 430 : if (att->atttypid < FirstGenbkiObjectId)
536 398 : continue;
537 :
538 32 : OutputPluginPrepareWrite(ctx, false);
539 32 : logicalrep_write_typ(ctx->out, xid, att->atttypid);
540 32 : OutputPluginWrite(ctx, false);
541 : }
542 :
543 196 : OutputPluginPrepareWrite(ctx, false);
544 196 : logicalrep_write_rel(ctx->out, xid, relation);
545 196 : OutputPluginWrite(ctx, false);
546 196 : }
547 :
548 : /*
549 : * Sends the decoded DML over wire.
550 : *
551 : * This is called both in streaming and non-streaming modes.
552 : */
553 : static void
554 326506 : pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
555 : Relation relation, ReorderBufferChange *change)
556 : {
557 326506 : PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
558 : MemoryContext old;
559 : RelationSyncEntry *relentry;
560 326506 : TransactionId xid = InvalidTransactionId;
561 :
562 326506 : if (!is_publishable_relation(relation))
563 4 : return;
564 :
565 : /*
566 : * Remember the xid for the change in streaming mode. We need to send xid
567 : * with each change in the streaming mode so that subscriber can make
568 : * their association and on aborts, it can discard the corresponding
569 : * changes.
570 : */
571 326502 : if (in_streaming)
572 316830 : xid = change->txn->xid;
573 :
574 326502 : relentry = get_rel_sync_entry(data, RelationGetRelid(relation));
575 :
576 : /* First check the table filter */
577 326502 : switch (change->action)
578 : {
579 : case REORDER_BUFFER_CHANGE_INSERT:
580 168716 : if (!relentry->pubactions.pubinsert)
581 6000 : return;
582 162716 : break;
583 : case REORDER_BUFFER_CHANGE_UPDATE:
584 83184 : if (!relentry->pubactions.pubupdate)
585 80 : return;
586 83104 : break;
587 : case REORDER_BUFFER_CHANGE_DELETE:
588 74602 : if (!relentry->pubactions.pubdelete)
589 2104 : return;
590 72498 : break;
591 : default:
592 0 : Assert(false);
593 : }
594 :
595 : /* Avoid leaking memory by using and resetting our own context */
596 318318 : old = MemoryContextSwitchTo(data->context);
597 :
598 318318 : maybe_send_schema(ctx, txn, change, relation, relentry);
599 :
600 : /* Send the data */
601 318318 : switch (change->action)
602 : {
603 : case REORDER_BUFFER_CHANGE_INSERT:
604 : {
605 162716 : HeapTuple tuple = &change->data.tp.newtuple->tuple;
606 :
607 : /* Switch relation if publishing via root. */
608 162716 : if (relentry->publish_as_relid != RelationGetRelid(relation))
609 : {
610 0 : Assert(relation->rd_rel->relispartition);
611 0 : relation = RelationIdGetRelation(relentry->publish_as_relid);
612 : /* Convert tuple if needed. */
613 0 : if (relentry->map)
614 0 : tuple = execute_attr_map_tuple(tuple, relentry->map);
615 : }
616 :
617 162716 : OutputPluginPrepareWrite(ctx, true);
618 162716 : logicalrep_write_insert(ctx->out, xid, relation, tuple,
619 162716 : data->binary);
620 162716 : OutputPluginWrite(ctx, true);
621 162716 : break;
622 : }
623 : case REORDER_BUFFER_CHANGE_UPDATE:
624 : {
625 166208 : HeapTuple oldtuple = change->data.tp.oldtuple ?
626 83104 : &change->data.tp.oldtuple->tuple : NULL;
627 83104 : HeapTuple newtuple = &change->data.tp.newtuple->tuple;
628 :
629 : /* Switch relation if publishing via root. */
630 83104 : if (relentry->publish_as_relid != RelationGetRelid(relation))
631 : {
632 0 : Assert(relation->rd_rel->relispartition);
633 0 : relation = RelationIdGetRelation(relentry->publish_as_relid);
634 : /* Convert tuples if needed. */
635 0 : if (relentry->map)
636 : {
637 0 : oldtuple = execute_attr_map_tuple(oldtuple, relentry->map);
638 0 : newtuple = execute_attr_map_tuple(newtuple, relentry->map);
639 : }
640 : }
641 :
642 83104 : OutputPluginPrepareWrite(ctx, true);
643 83104 : logicalrep_write_update(ctx->out, xid, relation, oldtuple,
644 83104 : newtuple, data->binary);
645 83104 : OutputPluginWrite(ctx, true);
646 83104 : break;
647 : }
648 : case REORDER_BUFFER_CHANGE_DELETE:
649 72498 : if (change->data.tp.oldtuple)
650 : {
651 72498 : HeapTuple oldtuple = &change->data.tp.oldtuple->tuple;
652 :
653 : /* Switch relation if publishing via root. */
654 72498 : if (relentry->publish_as_relid != RelationGetRelid(relation))
655 : {
656 0 : Assert(relation->rd_rel->relispartition);
657 0 : relation = RelationIdGetRelation(relentry->publish_as_relid);
658 : /* Convert tuple if needed. */
659 0 : if (relentry->map)
660 0 : oldtuple = execute_attr_map_tuple(oldtuple, relentry->map);
661 : }
662 :
663 72498 : OutputPluginPrepareWrite(ctx, true);
664 72498 : logicalrep_write_delete(ctx->out, xid, relation, oldtuple,
665 72498 : data->binary);
666 72498 : OutputPluginWrite(ctx, true);
667 : }
668 : else
669 0 : elog(DEBUG1, "didn't send DELETE change because of missing oldtuple");
670 72494 : break;
671 : default:
672 0 : Assert(false);
673 : }
674 :
675 : /* Cleanup */
676 318314 : MemoryContextSwitchTo(old);
677 318314 : MemoryContextReset(data->context);
678 : }
679 :
680 : static void
681 14 : pgoutput_truncate(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
682 : int nrelations, Relation relations[], ReorderBufferChange *change)
683 : {
684 14 : PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
685 : MemoryContext old;
686 : RelationSyncEntry *relentry;
687 : int i;
688 : int nrelids;
689 : Oid *relids;
690 14 : TransactionId xid = InvalidTransactionId;
691 :
692 : /* Remember the xid for the change in streaming mode. See pgoutput_change. */
693 14 : if (in_streaming)
694 0 : xid = change->txn->xid;
695 :
696 14 : old = MemoryContextSwitchTo(data->context);
697 :
698 14 : relids = palloc0(nrelations * sizeof(Oid));
699 14 : nrelids = 0;
700 :
701 36 : for (i = 0; i < nrelations; i++)
702 : {
703 22 : Relation relation = relations[i];
704 22 : Oid relid = RelationGetRelid(relation);
705 :
706 22 : if (!is_publishable_relation(relation))
707 0 : continue;
708 :
709 22 : relentry = get_rel_sync_entry(data, relid);
710 :
711 22 : if (!relentry->pubactions.pubtruncate)
712 12 : continue;
713 :
714 : /*
715 : * Don't send partitions if the publication wants to send only the
716 : * root tables through it.
717 : */
718 18 : if (relation->rd_rel->relispartition &&
719 8 : relentry->publish_as_relid != relid)
720 0 : continue;
721 :
722 10 : relids[nrelids++] = relid;
723 10 : maybe_send_schema(ctx, txn, change, relation, relentry);
724 : }
725 :
726 14 : if (nrelids > 0)
727 : {
728 6 : OutputPluginPrepareWrite(ctx, true);
729 12 : logicalrep_write_truncate(ctx->out,
730 : xid,
731 : nrelids,
732 : relids,
733 6 : change->data.truncate.cascade,
734 6 : change->data.truncate.restart_seqs);
735 6 : OutputPluginWrite(ctx, true);
736 : }
737 :
738 14 : MemoryContextSwitchTo(old);
739 14 : MemoryContextReset(data->context);
740 14 : }
741 :
742 : /*
743 : * Currently we always forward.
744 : */
745 : static bool
746 597700 : pgoutput_origin_filter(LogicalDecodingContext *ctx,
747 : RepOriginId origin_id)
748 : {
749 597700 : return false;
750 : }
751 :
752 : /*
753 : * Shutdown the output plugin.
754 : *
755 : * Note, we don't need to clean the data->context as it's child context
756 : * of the ctx->context so it will be cleaned up by logical decoding machinery.
757 : */
758 : static void
759 312 : pgoutput_shutdown(LogicalDecodingContext *ctx)
760 : {
761 312 : if (RelationSyncCache)
762 : {
763 120 : hash_destroy(RelationSyncCache);
764 120 : RelationSyncCache = NULL;
765 : }
766 312 : }
767 :
768 : /*
769 : * Load publications from the list of publication names.
770 : */
771 : static List *
772 70 : LoadPublications(List *pubnames)
773 : {
774 70 : List *result = NIL;
775 : ListCell *lc;
776 :
777 142 : foreach(lc, pubnames)
778 : {
779 72 : char *pubname = (char *) lfirst(lc);
780 72 : Publication *pub = GetPublicationByName(pubname, false);
781 :
782 72 : result = lappend(result, pub);
783 : }
784 :
785 70 : return result;
786 : }
787 :
788 : /*
789 : * Publication cache invalidation callback.
790 : */
791 : static void
792 32 : publication_invalidation_cb(Datum arg, int cacheid, uint32 hashvalue)
793 : {
794 32 : publications_valid = false;
795 :
796 : /*
797 : * Also invalidate per-relation cache so that next time the filtering info
798 : * is checked it will be updated with the new publication settings.
799 : */
800 32 : rel_sync_cache_publication_cb(arg, cacheid, hashvalue);
801 32 : }
802 :
803 : /*
804 : * START STREAM callback
805 : */
806 : static void
807 796 : pgoutput_stream_start(struct LogicalDecodingContext *ctx,
808 : ReorderBufferTXN *txn)
809 : {
810 796 : bool send_replication_origin = txn->origin_id != InvalidRepOriginId;
811 :
812 : /* we can't nest streaming of transactions */
813 796 : Assert(!in_streaming);
814 :
815 : /*
816 : * If we already sent the first stream for this transaction then don't
817 : * send the origin id in the subsequent streams.
818 : */
819 796 : if (rbtxn_is_streamed(txn))
820 742 : send_replication_origin = false;
821 :
822 796 : OutputPluginPrepareWrite(ctx, !send_replication_origin);
823 796 : logicalrep_write_stream_start(ctx->out, txn->xid, !rbtxn_is_streamed(txn));
824 :
825 796 : if (send_replication_origin)
826 : {
827 : char *origin;
828 :
829 : /* Message boundary */
830 0 : OutputPluginWrite(ctx, false);
831 0 : OutputPluginPrepareWrite(ctx, true);
832 :
833 0 : if (replorigin_by_oid(txn->origin_id, true, &origin))
834 0 : logicalrep_write_origin(ctx->out, origin, InvalidXLogRecPtr);
835 : }
836 :
837 796 : OutputPluginWrite(ctx, true);
838 :
839 : /* we're streaming a chunk of transaction now */
840 796 : in_streaming = true;
841 796 : }
842 :
843 : /*
844 : * STOP STREAM callback
845 : */
846 : static void
847 792 : pgoutput_stream_stop(struct LogicalDecodingContext *ctx,
848 : ReorderBufferTXN *txn)
849 : {
850 : /* we should be streaming a trasanction */
851 792 : Assert(in_streaming);
852 :
853 792 : OutputPluginPrepareWrite(ctx, true);
854 792 : logicalrep_write_stream_stop(ctx->out);
855 792 : OutputPluginWrite(ctx, true);
856 :
857 : /* we've stopped streaming a transaction */
858 792 : in_streaming = false;
859 792 : }
860 :
861 : /*
862 : * Notify downstream to discard the streamed transaction (along with all
863 : * it's subtransactions, if it's a toplevel transaction).
864 : */
865 : static void
866 26 : pgoutput_stream_abort(struct LogicalDecodingContext *ctx,
867 : ReorderBufferTXN *txn,
868 : XLogRecPtr abort_lsn)
869 : {
870 : ReorderBufferTXN *toptxn;
871 :
872 : /*
873 : * The abort should happen outside streaming block, even for streamed
874 : * transactions. The transaction has to be marked as streamed, though.
875 : */
876 26 : Assert(!in_streaming);
877 :
878 : /* determine the toplevel transaction */
879 26 : toptxn = (txn->toptxn) ? txn->toptxn : txn;
880 :
881 26 : Assert(rbtxn_is_streamed(toptxn));
882 :
883 26 : OutputPluginPrepareWrite(ctx, true);
884 26 : logicalrep_write_stream_abort(ctx->out, toptxn->xid, txn->xid);
885 26 : OutputPluginWrite(ctx, true);
886 :
887 26 : cleanup_rel_sync_cache(toptxn->xid, false);
888 26 : }
889 :
890 : /*
891 : * Notify downstream to apply the streamed transaction (along with all
892 : * it's subtransactions).
893 : */
894 : static void
895 28 : pgoutput_stream_commit(struct LogicalDecodingContext *ctx,
896 : ReorderBufferTXN *txn,
897 : XLogRecPtr commit_lsn)
898 : {
899 : /*
900 : * The commit should happen outside streaming block, even for streamed
901 : * transactions. The transaction has to be marked as streamed, though.
902 : */
903 28 : Assert(!in_streaming);
904 28 : Assert(rbtxn_is_streamed(txn));
905 :
906 28 : OutputPluginUpdateProgress(ctx);
907 :
908 28 : OutputPluginPrepareWrite(ctx, true);
909 28 : logicalrep_write_stream_commit(ctx->out, txn, commit_lsn);
910 28 : OutputPluginWrite(ctx, true);
911 :
912 28 : cleanup_rel_sync_cache(txn->xid, true);
913 28 : }
914 :
915 : /*
916 : * PREPARE callback (for streaming two-phase commit).
917 : *
918 : * Notify the downstream to prepare the transaction.
919 : */
920 : static void
921 18 : pgoutput_stream_prepare_txn(LogicalDecodingContext *ctx,
922 : ReorderBufferTXN *txn,
923 : XLogRecPtr prepare_lsn)
924 : {
925 18 : Assert(rbtxn_is_streamed(txn));
926 :
927 18 : OutputPluginUpdateProgress(ctx);
928 18 : OutputPluginPrepareWrite(ctx, true);
929 18 : logicalrep_write_stream_prepare(ctx->out, txn, prepare_lsn);
930 18 : OutputPluginWrite(ctx, true);
931 18 : }
932 :
933 : /*
934 : * Initialize the relation schema sync cache for a decoding session.
935 : *
936 : * The hash table is destroyed at the end of a decoding session. While
937 : * relcache invalidations still exist and will still be invoked, they
938 : * will just see the null hash table global and take no action.
939 : */
940 : static void
941 196 : init_rel_sync_cache(MemoryContext cachectx)
942 : {
943 : HASHCTL ctl;
944 : MemoryContext old_ctxt;
945 :
946 196 : if (RelationSyncCache != NULL)
947 196 : return;
948 :
949 : /* Make a new hash table for the cache */
950 196 : MemSet(&ctl, 0, sizeof(ctl));
951 196 : ctl.keysize = sizeof(Oid);
952 196 : ctl.entrysize = sizeof(RelationSyncEntry);
953 196 : ctl.hcxt = cachectx;
954 :
955 196 : old_ctxt = MemoryContextSwitchTo(cachectx);
956 196 : RelationSyncCache = hash_create("logical replication output relation cache",
957 : 128, &ctl,
958 : HASH_ELEM | HASH_CONTEXT | HASH_BLOBS);
959 196 : (void) MemoryContextSwitchTo(old_ctxt);
960 :
961 196 : Assert(RelationSyncCache != NULL);
962 :
963 196 : CacheRegisterRelcacheCallback(rel_sync_cache_relation_cb, (Datum) 0);
964 196 : CacheRegisterSyscacheCallback(PUBLICATIONRELMAP,
965 : rel_sync_cache_publication_cb,
966 : (Datum) 0);
967 : }
968 :
969 : /*
970 : * We expect relatively small number of streamed transactions.
971 : */
972 : static bool
973 316830 : get_schema_sent_in_streamed_txn(RelationSyncEntry *entry, TransactionId xid)
974 : {
975 : ListCell *lc;
976 :
977 629592 : foreach(lc, entry->streamed_txns)
978 : {
979 629524 : if (xid == (uint32) lfirst_int(lc))
980 316762 : return true;
981 : }
982 :
983 68 : return false;
984 : }
985 :
986 : /*
987 : * Add the xid in the rel sync entry for which we have already sent the schema
988 : * of the relation.
989 : */
990 : static void
991 68 : set_schema_sent_in_streamed_txn(RelationSyncEntry *entry, TransactionId xid)
992 : {
993 : MemoryContext oldctx;
994 :
995 68 : oldctx = MemoryContextSwitchTo(CacheMemoryContext);
996 :
997 68 : entry->streamed_txns = lappend_int(entry->streamed_txns, xid);
998 :
999 68 : MemoryContextSwitchTo(oldctx);
1000 68 : }
1001 :
1002 : /*
1003 : * Find or create entry in the relation schema cache.
1004 : *
1005 : * This looks up publications that the given relation is directly or
1006 : * indirectly part of (the latter if it's really the relation's ancestor that
1007 : * is part of a publication) and fills up the found entry with the information
1008 : * about which operations to publish and whether to use an ancestor's schema
1009 : * when publishing.
1010 : */
1011 : static RelationSyncEntry *
1012 326524 : get_rel_sync_entry(PGOutputData *data, Oid relid)
1013 : {
1014 : RelationSyncEntry *entry;
1015 326524 : bool am_partition = get_rel_relispartition(relid);
1016 326524 : char relkind = get_rel_relkind(relid);
1017 : bool found;
1018 : MemoryContext oldctx;
1019 :
1020 326524 : Assert(RelationSyncCache != NULL);
1021 :
1022 : /* Find cached relation info, creating if not found */
1023 326524 : entry = (RelationSyncEntry *) hash_search(RelationSyncCache,
1024 : (void *) &relid,
1025 : HASH_ENTER, &found);
1026 326524 : Assert(entry != NULL);
1027 :
1028 : /* Not found means schema wasn't sent */
1029 326524 : if (!found)
1030 : {
1031 : /* immediately make a new entry valid enough to satisfy callbacks */
1032 134 : entry->schema_sent = false;
1033 134 : entry->streamed_txns = NIL;
1034 134 : entry->replicate_valid = false;
1035 134 : entry->pubactions.pubinsert = entry->pubactions.pubupdate =
1036 134 : entry->pubactions.pubdelete = entry->pubactions.pubtruncate = false;
1037 134 : entry->publish_as_relid = InvalidOid;
1038 : }
1039 :
1040 : /* Validate the entry */
1041 326524 : if (!entry->replicate_valid)
1042 : {
1043 136 : List *pubids = GetRelationPublications(relid);
1044 : ListCell *lc;
1045 136 : Oid publish_as_relid = relid;
1046 :
1047 : /* Reload publications if needed before use. */
1048 136 : if (!publications_valid)
1049 : {
1050 70 : oldctx = MemoryContextSwitchTo(CacheMemoryContext);
1051 70 : if (data->publications)
1052 4 : list_free_deep(data->publications);
1053 :
1054 70 : data->publications = LoadPublications(data->publication_names);
1055 70 : MemoryContextSwitchTo(oldctx);
1056 70 : publications_valid = true;
1057 : }
1058 :
1059 : /*
1060 : * Build publication cache. We can't use one provided by relcache as
1061 : * relcache considers all publications given relation is in, but here
1062 : * we only need to consider ones that the subscriber requested.
1063 : */
1064 164 : foreach(lc, data->publications)
1065 : {
1066 138 : Publication *pub = lfirst(lc);
1067 138 : bool publish = false;
1068 :
1069 138 : if (pub->alltables)
1070 : {
1071 62 : publish = true;
1072 62 : if (pub->pubviaroot && am_partition)
1073 0 : publish_as_relid = llast_oid(get_partition_ancestors(relid));
1074 : }
1075 :
1076 138 : if (!publish)
1077 : {
1078 76 : bool ancestor_published = false;
1079 :
1080 : /*
1081 : * For a partition, check if any of the ancestors are
1082 : * published. If so, note down the topmost ancestor that is
1083 : * published via this publication, which will be used as the
1084 : * relation via which to publish the partition's changes.
1085 : */
1086 76 : if (am_partition)
1087 : {
1088 6 : List *ancestors = get_partition_ancestors(relid);
1089 : ListCell *lc2;
1090 :
1091 : /*
1092 : * Find the "topmost" ancestor that is in this
1093 : * publication.
1094 : */
1095 12 : foreach(lc2, ancestors)
1096 : {
1097 6 : Oid ancestor = lfirst_oid(lc2);
1098 :
1099 6 : if (list_member_oid(GetRelationPublications(ancestor),
1100 : pub->oid))
1101 : {
1102 6 : ancestor_published = true;
1103 6 : if (pub->pubviaroot)
1104 0 : publish_as_relid = ancestor;
1105 : }
1106 : }
1107 : }
1108 :
1109 76 : if (list_member_oid(pubids, pub->oid) || ancestor_published)
1110 62 : publish = true;
1111 : }
1112 :
1113 : /*
1114 : * Don't publish changes for partitioned tables, because
1115 : * publishing those of its partitions suffices, unless partition
1116 : * changes won't be published due to pubviaroot being set.
1117 : */
1118 138 : if (publish &&
1119 2 : (relkind != RELKIND_PARTITIONED_TABLE || pub->pubviaroot))
1120 : {
1121 122 : entry->pubactions.pubinsert |= pub->pubactions.pubinsert;
1122 122 : entry->pubactions.pubupdate |= pub->pubactions.pubupdate;
1123 122 : entry->pubactions.pubdelete |= pub->pubactions.pubdelete;
1124 122 : entry->pubactions.pubtruncate |= pub->pubactions.pubtruncate;
1125 : }
1126 :
1127 248 : if (entry->pubactions.pubinsert && entry->pubactions.pubupdate &&
1128 220 : entry->pubactions.pubdelete && entry->pubactions.pubtruncate)
1129 110 : break;
1130 : }
1131 :
1132 136 : list_free(pubids);
1133 :
1134 136 : entry->publish_as_relid = publish_as_relid;
1135 136 : entry->replicate_valid = true;
1136 : }
1137 :
1138 326524 : return entry;
1139 : }
1140 :
1141 : /*
1142 : * Cleanup list of streamed transactions and update the schema_sent flag.
1143 : *
1144 : * When a streamed transaction commits or aborts, we need to remove the
1145 : * toplevel XID from the schema cache. If the transaction aborted, the
1146 : * subscriber will simply throw away the schema records we streamed, so
1147 : * we don't need to do anything else.
1148 : *
1149 : * If the transaction is committed, the subscriber will update the relation
1150 : * cache - so tweak the schema_sent flag accordingly.
1151 : */
1152 : static void
1153 54 : cleanup_rel_sync_cache(TransactionId xid, bool is_commit)
1154 : {
1155 : HASH_SEQ_STATUS hash_seq;
1156 : RelationSyncEntry *entry;
1157 : ListCell *lc;
1158 :
1159 54 : Assert(RelationSyncCache != NULL);
1160 :
1161 54 : hash_seq_init(&hash_seq, RelationSyncCache);
1162 162 : while ((entry = hash_seq_search(&hash_seq)) != NULL)
1163 : {
1164 : /*
1165 : * We can set the schema_sent flag for an entry that has committed xid
1166 : * in the list as that ensures that the subscriber would have the
1167 : * corresponding schema and we don't need to send it unless there is
1168 : * any invalidation for that relation.
1169 : */
1170 74 : foreach(lc, entry->streamed_txns)
1171 : {
1172 48 : if (xid == (uint32) lfirst_int(lc))
1173 : {
1174 28 : if (is_commit)
1175 20 : entry->schema_sent = true;
1176 :
1177 28 : entry->streamed_txns =
1178 28 : foreach_delete_current(entry->streamed_txns, lc);
1179 28 : break;
1180 : }
1181 : }
1182 : }
1183 54 : }
1184 :
1185 : /*
1186 : * Relcache invalidation callback
1187 : */
1188 : static void
1189 1044 : rel_sync_cache_relation_cb(Datum arg, Oid relid)
1190 : {
1191 : RelationSyncEntry *entry;
1192 :
1193 : /*
1194 : * We can get here if the plugin was used in SQL interface as the
1195 : * RelSchemaSyncCache is destroyed when the decoding finishes, but there
1196 : * is no way to unregister the relcache invalidation callback.
1197 : */
1198 1044 : if (RelationSyncCache == NULL)
1199 1044 : return;
1200 :
1201 : /*
1202 : * Nobody keeps pointers to entries in this hash table around outside
1203 : * logical decoding callback calls - but invalidation events can come in
1204 : * *during* a callback if we access the relcache in the callback. Because
1205 : * of that we must mark the cache entry as invalid but not remove it from
1206 : * the hash while it could still be referenced, then prune it at a later
1207 : * safe point.
1208 : *
1209 : * Getting invalidations for relations that aren't in the table is
1210 : * entirely normal, since there's no way to unregister for an invalidation
1211 : * event. So we don't care if it's found or not.
1212 : */
1213 1044 : entry = (RelationSyncEntry *) hash_search(RelationSyncCache, &relid,
1214 : HASH_FIND, NULL);
1215 :
1216 : /*
1217 : * Reset schema sent status as the relation definition may have changed.
1218 : */
1219 1044 : if (entry != NULL)
1220 : {
1221 242 : entry->schema_sent = false;
1222 242 : list_free(entry->streamed_txns);
1223 242 : entry->streamed_txns = NIL;
1224 : }
1225 : }
1226 :
1227 : /*
1228 : * Publication relation map syscache invalidation callback
1229 : */
1230 : static void
1231 70 : rel_sync_cache_publication_cb(Datum arg, int cacheid, uint32 hashvalue)
1232 : {
1233 : HASH_SEQ_STATUS status;
1234 : RelationSyncEntry *entry;
1235 :
1236 : /*
1237 : * We can get here if the plugin was used in SQL interface as the
1238 : * RelSchemaSyncCache is destroyed when the decoding finishes, but there
1239 : * is no way to unregister the relcache invalidation callback.
1240 : */
1241 70 : if (RelationSyncCache == NULL)
1242 70 : return;
1243 :
1244 : /*
1245 : * There is no way to find which entry in our cache the hash belongs to so
1246 : * mark the whole cache as invalid.
1247 : */
1248 70 : hash_seq_init(&status, RelationSyncCache);
1249 336 : while ((entry = (RelationSyncEntry *) hash_seq_search(&status)) != NULL)
1250 196 : entry->replicate_valid = false;
1251 : }
|