From 0d0157cfec25d00c7e484199d544275a4814b453 Mon Sep 17 00:00:00 2001 From: Kirk Wolak Date: Tue, 16 Jun 2026 19:49:07 -0400 Subject: [PATCH] pg_dump: Add table-data placeholders for excluded data When dumping in directory format, allow --exclude-table-data and --exclude-table-data-and-children to preserve TABLE DATA archive entries for excluded tables while writing an empty COPY data file containing only the COPY end marker. This supports workflows that generate or replace selected table data files outside pg_dump before running pg_restore. The option is limited to directory-format dumps using COPY data. Sequences and materialized views keep the existing exclusion behavior, because they are not restored from replaceable table data files. Documentation and TAP tests included. --- doc/src/sgml/ref/pg_dump.sgml | 50 ++++ src/bin/pg_dump/meson.build | 1 + src/bin/pg_dump/pg_backup.h | 1 + src/bin/pg_dump/pg_dump.c | 82 ++++++- src/bin/pg_dump/pg_dump.h | 1 + .../t/012_pg_dump_empty_excluded_data.pl | 228 ++++++++++++++++++ 6 files changed, 358 insertions(+), 5 deletions(-) create mode 100644 src/bin/pg_dump/t/012_pg_dump_empty_excluded_data.pl diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index ae1bc14d2f2..1a47bece49e 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -720,6 +720,43 @@ PostgreSQL documentation + + + + + When used together with or + in directory output + format ( or ), + still create a TABLE DATA archive entry (including + the usual COPY statement) for each excluded table, + but do not dump the table's rows. A data file named after the table's + dump ID (for example 3541.dat) is created + containing only the COPY end marker + (\.) as a placeholder. + + + This option is intended for workflows where excluded table data is + loaded separately after the dump is taken, for example by replacing + the placeholder data file with externally produced data before + restore. Excluded sequences and materialized views are not affected: + their data is still omitted from the dump, as with + alone. + + + cannot be used + without or + . It is only + supported when directory output format is selected + ( or ) and + data is being dumped as COPY (the default). + It cannot be used with , + , , + /, or + . + + + + @@ -830,6 +867,13 @@ PostgreSQL documentation To exclude data for all tables in the database, see or . + + By default, tables matched by this option are omitted from the + dump's TABLE DATA section entirely. To still + create a TABLE DATA entry with a placeholder data + file in directory format, see + . + @@ -842,6 +886,12 @@ PostgreSQL documentation child tables of the table(s) matching the pattern. + + As with , matched tables are + omitted from the TABLE DATA section unless + is also + specified. + diff --git a/src/bin/pg_dump/meson.build b/src/bin/pg_dump/meson.build index 79bd5036841..218fceb54e0 100644 --- a/src/bin/pg_dump/meson.build +++ b/src/bin/pg_dump/meson.build @@ -104,6 +104,7 @@ tests += { 't/005_pg_dump_filterfile.pl', 't/006_pg_dump_compress.pl', 't/010_dump_connstr.pl', + 't/012_pg_dump_empty_excluded_data.pl', ], }, } diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index c7bdda1deed..512189276f4 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -198,6 +198,7 @@ typedef struct _dumpOptions int use_setsessauth; int enable_row_security; int load_via_partition_root; + bool create_table_data_placeholders; /* default, if no "inclusion" switches appear, is to dump everything */ bool include_everything; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index c56437d6057..7603f05313d 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -493,6 +493,7 @@ main(int argc, char **argv) {"attribute-inserts", no_argument, &dopt.column_inserts, 1}, {"binary-upgrade", no_argument, &dopt.binary_upgrade, 1}, {"column-inserts", no_argument, &dopt.column_inserts, 1}, + {"create-table-data-placeholders", no_argument, NULL, 26}, {"disable-dollar-quoting", no_argument, &dopt.disable_dollar_quoting, 1}, {"disable-triggers", no_argument, &dopt.disable_triggers, 1}, {"enable-row-security", no_argument, &dopt.enable_row_security, 1}, @@ -799,6 +800,10 @@ main(int argc, char **argv) dopt.restrict_key = pg_strdup(optarg); break; + case 26: + dopt.create_table_data_placeholders = true; + break; + default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -886,9 +891,34 @@ main(int argc, char **argv) "--on-conflict-do-nothing", "--inserts", "--rows-per-insert", "--column-inserts"); + if (dopt.create_table_data_placeholders && + tabledata_exclude_patterns.head == NULL && + tabledata_exclude_patterns_and_children.head == NULL) + pg_fatal("option %s requires option %s or %s", + "--create-table-data-placeholders", + "--exclude-table-data", "--exclude-table-data-and-children"); + + if (dopt.create_table_data_placeholders && + dopt.dump_inserts != 0) + pg_fatal("option %s cannot be used with %s, %s, or %s", + "--create-table-data-placeholders", + "--inserts", "--column-inserts", "--rows-per-insert"); + + check_mut_excl_opts(dopt.create_table_data_placeholders, + "--create-table-data-placeholders", + schema_only, "-s/--schema-only"); + check_mut_excl_opts(dopt.create_table_data_placeholders, + "--create-table-data-placeholders", + no_data, "--no-data"); + /* Identify archive format to emit */ archiveFormat = parseArchiveFormat(format, &archiveMode); + if (dopt.create_table_data_placeholders && + archiveFormat != archDirectory) + pg_fatal("option %s is only supported by the directory format", + "--create-table-data-placeholders"); + /* archiveFormat specific setup */ if (archiveFormat == archNull) { @@ -1329,6 +1359,10 @@ help(const char *progname) printf(_(" -x, --no-privileges do not dump privileges (grant/revoke)\n")); printf(_(" --binary-upgrade for use by upgrade utilities only\n")); printf(_(" --column-inserts dump data as INSERT commands with column names\n")); + printf(_(" --create-table-data-placeholders\n" + " create TABLE DATA placeholders for tables\n" + " excluded with --exclude-table-data\n" + " (directory format and COPY data only)\n")); printf(_(" --disable-dollar-quoting disable dollar quoting, use SQL standard quoting\n")); printf(_(" --disable-triggers disable triggers during data-only restore\n")); printf(_(" --enable-row-security enable row security (dump only content user has\n" @@ -2355,6 +2389,29 @@ selectDumpableObject(DumpableObject *dobj, Archive *fout) DUMP_COMPONENT_ALL : DUMP_COMPONENT_NONE; } +/* + * Dump an empty data file for a table whose data was excluded with + * --exclude-table-data but --create-table-data-placeholders was set. + */ +static int +dumpTableData_empty(Archive *fout, const void *dcontext) +{ + const TableDataInfo *tdinfo = dcontext; + const TableInfo *tbinfo = tdinfo->tdtable; + + pg_log_info("creating table data placeholder for excluded table \"%s.%s\"", + tbinfo->dobj.namespace->dobj.name, tbinfo->dobj.name); + + /* + * Emit the COPY end marker, as dumpTableData_copy() does for an empty + * table. Archive formats store raw COPY data in separate blobs/files. + */ + if (fout->dopt->dump_inserts == 0) + archprintf(fout, "\\.\n\n\n"); + + return 1; +} + /* * Dump a table's contents for loading using the COPY command * - this routine is called by the Archiver when it wants the table @@ -2895,7 +2952,8 @@ dumpTableData(Archive *fout, const TableDataInfo *tdinfo) if (dopt->dump_inserts == 0) { /* Dump/restore using COPY */ - dumpFn = dumpTableData_copy; + dumpFn = tdinfo->tableDataPlaceholder ? + dumpTableData_empty : dumpTableData_copy; /* must use 2 steps here 'cause fmtId is nonreentrant */ printfPQExpBuffer(copyBuf, "COPY %s ", copyFrom); @@ -2906,7 +2964,8 @@ dumpTableData(Archive *fout, const TableDataInfo *tdinfo) else { /* Restore using INSERT */ - dumpFn = dumpTableData_insert; + dumpFn = tdinfo->tableDataPlaceholder ? + dumpTableData_empty : dumpTableData_insert; copyStmt = NULL; } @@ -3026,6 +3085,7 @@ static void makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo) { TableDataInfo *tdinfo; + bool data_excluded; /* * Nothing to do if we already decided to dump the table. This will @@ -3056,9 +3116,20 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo) return; /* Check that the data is not explicitly excluded */ - if (simple_oid_list_member(&tabledata_exclude_oids, - tbinfo->dobj.catId.oid)) - return; + data_excluded = simple_oid_list_member(&tabledata_exclude_oids, + tbinfo->dobj.catId.oid); + if (data_excluded) + { + /* + * This option only preserves COPY-backed table data archive entries. + * Sequences and materialized views do not have replaceable .dat files + * in this workflow, so preserve existing exclusion behavior for them. + */ + if (!dopt->create_table_data_placeholders || + tbinfo->relkind == RELKIND_SEQUENCE || + tbinfo->relkind == RELKIND_MATVIEW) + return; + } /* OK, let's dump it */ tdinfo = pg_malloc_object(TableDataInfo); @@ -3081,6 +3152,7 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo) tdinfo->dobj.namespace = tbinfo->dobj.namespace; tdinfo->tdtable = tbinfo; tdinfo->filtercond = NULL; /* might get set later */ + tdinfo->tableDataPlaceholder = data_excluded; addObjectDependency(&tdinfo->dobj, tbinfo->dobj.dumpId); /* A TableDataInfo contains data, of course */ diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 5a6726d8b12..12c859002a2 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -413,6 +413,7 @@ typedef struct _tableDataInfo DumpableObject dobj; TableInfo *tdtable; /* link to table to dump */ char *filtercond; /* WHERE condition to limit rows dumped */ + bool tableDataPlaceholder; /* excluded by --exclude-table-data */ } TableDataInfo; typedef struct _indxInfo diff --git a/src/bin/pg_dump/t/012_pg_dump_empty_excluded_data.pl b/src/bin/pg_dump/t/012_pg_dump_empty_excluded_data.pl new file mode 100644 index 00000000000..f0c13388eac --- /dev/null +++ b/src/bin/pg_dump/t/012_pg_dump_empty_excluded_data.pl @@ -0,0 +1,228 @@ + +# Copyright (c) 2026, PostgreSQL Global Development Group + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $tempdir = PostgreSQL::Test::Utils::tempdir; + +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init; +$node->start; + +my $src_db = 'empty_excl_src'; +my $dst_db = 'empty_excl_dst'; +my $dumpdir = "$tempdir/empty_excl_dump"; + +$node->safe_psql( + 'postgres', + qq{CREATE DATABASE $src_db; + \\c $src_db + CREATE TABLE keep_data(id int); + CREATE TABLE skip_data(id int); + INSERT INTO keep_data VALUES (1), (2); + INSERT INTO skip_data VALUES (10), (20), (30);}); + +# Flag without --exclude-table-data must fail. +$node->command_fails( + [ + 'pg_dump', + '--no-sync', + '--format' => 'directory', + '--file' => "$tempdir/bad_dump", + '--create-table-data-placeholders', + $node->connstr($src_db), + ], + 'create-table-data-placeholders requires exclude-table-data'); + +# Flag requires directory output format. +$node->command_fails_like( + [ + 'pg_dump', + '--no-sync', + '--format' => 'custom', + '--file' => "$tempdir/bad_custom.dump", + '--exclude-table-data' => 'skip_data', + '--create-table-data-placeholders', + $node->connstr($src_db), + ], + qr/create-table-data-placeholders.*only supported by the directory format/, + 'create-table-data-placeholders requires directory format'); + +# Flag requires COPY-format data, not INSERT output. +my @incompatible_opts = ( + { label => 'inserts', extra => [ '--inserts' ] }, + { label => 'column-inserts', extra => [ '--column-inserts' ] }, + { label => 'rows-per-insert', extra => [ '--rows-per-insert' => 10 ] }, +); +for my $case (@incompatible_opts) +{ + $node->command_fails_like( + [ + 'pg_dump', + '--no-sync', + '--format' => 'directory', + '--file' => "$tempdir/bad_$case->{label}", + '--exclude-table-data' => 'skip_data', + '--create-table-data-placeholders', + @{ $case->{extra} }, + $node->connstr($src_db), + ], + qr/create-table-data-placeholders.*cannot be used with/, + "create-table-data-placeholders rejects $case->{label}"); +} + +# Flag is incompatible with schema-only and no-data dumps. +$node->command_fails( + [ + 'pg_dump', + '--no-sync', + '--format' => 'directory', + '--file' => "$tempdir/bad_schema_only", + '--exclude-table-data' => 'skip_data', + '--create-table-data-placeholders', + '--schema-only', + $node->connstr($src_db), + ], + 'create-table-data-placeholders rejects schema-only'); + +$node->command_fails( + [ + 'pg_dump', + '--no-sync', + '--format' => 'directory', + '--file' => "$tempdir/bad_no_data", + '--exclude-table-data' => 'skip_data', + '--create-table-data-placeholders', + '--no-data', + $node->connstr($src_db), + ], + 'create-table-data-placeholders rejects no-data'); + +$node->command_ok( + [ + 'pg_dump', + '--no-sync', + '--format' => 'directory', + '--compress' => 'none', + '--file' => $dumpdir, + '--exclude-table-data' => 'skip_data', + '--create-table-data-placeholders', + $node->connstr($src_db), + ], + 'directory dump with table data placeholders for excluded tables'); + +$node->command_like( + [ 'pg_restore', '--list', $dumpdir ], + qr/TABLE DATA public skip_data/, + 'TOC lists TABLE DATA for excluded table'); + +my ($stdout, $stderr) = run_command([ 'pg_restore', '--list', $dumpdir ]); +my $skip_dumpid; +foreach my $line (split /\n/, $stdout) +{ + if ($line =~ /TABLE DATA public skip_data/ && $line =~ /^(\d+);/) + { + $skip_dumpid = $1; + last; + } +} +ok(defined $skip_dumpid, 'found dump ID for excluded table'); +like( + slurp_file("$dumpdir/${skip_dumpid}.dat"), + qr/^\\\.\n/, + 'excluded table data file contains only COPY end marker') + if defined $skip_dumpid; + +my @datfiles = grep { $_ !~ /\/toc\.dat$/ } glob("$dumpdir/*.dat"); +cmp_ok(scalar(@datfiles), '==', 2, 'two table data files in dump'); + +my ($keep_dat) = grep { $_ ne "$dumpdir/${skip_dumpid}.dat" } @datfiles; +ok(defined $keep_dat && -s $keep_dat > 0, + 'included table has a non-empty data file') + if defined $skip_dumpid; + +$node->safe_psql('postgres', "CREATE DATABASE $dst_db"); + +$node->command_ok( + [ + 'pg_restore', + '--dbname' => $node->connstr($dst_db), + $dumpdir, + ], + 'restore dump with table data placeholder file'); + +is( + $node->safe_psql($dst_db, 'SELECT count(*) FROM keep_data'), + '2', + 'included table data restored'); +is( + $node->safe_psql($dst_db, 'SELECT count(*) FROM skip_data'), + '0', + 'excluded table restored with no rows'); + +# Sequences and materialized views keep stock exclude behavior with the flag. +my $mixed_db = 'empty_excl_mixed'; +my $mixed_dumpdir = "$tempdir/empty_excl_mixed_dump"; + +$node->safe_psql( + 'postgres', + qq{CREATE DATABASE $mixed_db; + \\c $mixed_db + CREATE TABLE mv_base(id int); + INSERT INTO mv_base VALUES (1), (2); + CREATE SEQUENCE excluded_seq START 100; + SELECT nextval('excluded_seq'); + CREATE MATERIALIZED VIEW excluded_mv AS SELECT * FROM mv_base; + REFRESH MATERIALIZED VIEW excluded_mv;}); + +$node->command_ok( + [ + 'pg_dump', + '--no-sync', + '--format' => 'directory', + '--compress' => 'none', + '--file' => $mixed_dumpdir, + '--exclude-table-data' => 'excluded_seq', + '--exclude-table-data' => 'excluded_mv', + '--create-table-data-placeholders', + $node->connstr($mixed_db), + ], + 'directory dump with placeholders does not affect seq or mat view exclude'); + +my ($mixed_list) = run_command([ 'pg_restore', '--list', $mixed_dumpdir ]); +unlike( + $mixed_list, + qr/SEQUENCE SET public excluded_seq/, + 'TOC omits SEQUENCE SET for excluded sequence with placeholders flag'); +unlike( + $mixed_list, + qr/MATERIALIZED VIEW DATA public excluded_mv/, + 'TOC omits MATERIALIZED VIEW DATA for excluded mat view with placeholders flag'); + +my $mixed_dst = 'empty_excl_mixed_dst'; +$node->safe_psql('postgres', "CREATE DATABASE $mixed_dst"); + +$node->command_ok( + [ + 'pg_restore', + '--dbname' => $node->connstr($mixed_dst), + $mixed_dumpdir, + ], + 'restore mixed dump with excluded seq and mat view'); + +is( + $node->safe_psql($mixed_dst, q{SELECT nextval('excluded_seq')}), + '100', + 'excluded sequence not restored from source SEQUENCE SET'); +is( + $node->safe_psql($mixed_dst, + q{SELECT relispopulated FROM pg_class WHERE relname = 'excluded_mv'}), + 'f', + 'excluded materialized view restored unpopulated'); + +done_testing(); -- 2.43.0