diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml index 0f09d82..a8daa07 100644 --- a/doc/src/sgml/backup.sgml +++ b/doc/src/sgml/backup.sgml @@ -1090,6 +1090,22 @@ SELECT pg_stop_backup(); + The contents of the pg_dynshmem/, pg_stat_tmp/, + pg_notify/, pg_serial/, + pg_snapshots/, and pg_subtrans/ directories can + be omitted from the backup as they will be initialized on postmaster + startup. If the is set and is + under the database cluster directory then the contents of the directory + specified by can also be omitted. + + + + Any file or directory beginning with pgsql_tmp can be + omitted from the backup. These files are removed on postmaster start and + the directories will be recreated as needed. + + + The backup label file includes the label string you gave to pg_start_backup, as well as the time at which pg_start_backup was run, and diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 68b0941..d65687f 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2059,17 +2059,26 @@ The commands accepted in walsender mode are: - postmaster.pid + postmaster.pid and postmaster.opts - postmaster.opts + postgresql.auto.conf.tmp - various temporary files created during the operation of the PostgreSQL server + backup_label and tablespace_map. If these + files exist they belong to an exclusive backup and are not applicable + to the base backup. + + + + + Various temporary files and directories created during the operation of + the PostgreSQL server, i.e. any file or directory beginning with + pgsql_tmp. @@ -2082,7 +2091,11 @@ The commands accepted in walsender mode are: - pg_replslot is copied as an empty directory. + pg_replslot, pg_dynshmem, + pg_stat_tmp, pg_notify, + pg_serial, pg_snapshots, and + pg_subtrans are copied as empty directories (even if they + are symbolic links). diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index 9f1eae1..984ea5b 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -610,10 +610,8 @@ PostgreSQL documentation The backup will include all files in the data directory and tablespaces, including the configuration files and any additional files placed in the - directory by third parties. But only regular files and directories are - copied. Symbolic links (other than those used for tablespaces) and special - device files are skipped. (See for - the precise details.) + directory by third parties, with certain exceptions. (See + for the complete list of exceptions.) diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index da9b7a6..8412472 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -30,6 +30,7 @@ #include "replication/basebackup.h" #include "replication/walsender.h" #include "replication/walsender_private.h" +#include "storage/dsm_impl.h" #include "storage/fd.h" #include "storage/ipc.h" #include "utils/builtins.h" @@ -55,8 +56,10 @@ static int64 sendDir(char *path, int basepathlen, bool sizeonly, static bool sendFile(char *readfilename, char *tarfilename, struct stat * statbuf, bool missing_ok); static void sendFileWithContent(const char *filename, const char *content); -static void _tarWriteHeader(const char *filename, const char *linktarget, - struct stat * statbuf); +static int64 _tarWriteHeader(const char *filename, const char *linktarget, + struct stat * statbuf, bool sizeonly); +static int64 _tarWriteDir(char *pathbuf, int basepathlen, struct stat *statbuf, + bool sizeonly); static void send_int8_string(StringInfoData *buf, int64 intval); static void SendBackupHeader(List *tablespaces); static void base_backup_cleanup(int code, Datum arg); @@ -95,6 +98,74 @@ static int64 elapsed_min_unit; static int64 throttled_last; /* + * The contents of these directories are removed or recreated during server + * start so they will not be included in the backup. The directory entry + * will be included to preserve permissions. + */ +const char *excludeDirContents[] = +{ + /* + * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even + * when stats_temp_directory is set because PGSS_TEXT_FILE is always created + * there. + */ + PG_STAT_TMP_DIR, + + /* + * It is generally not useful to backup the contents of this directory even + * if the intention is to restore to another master. See backup.sgml for a + * more detailed description. + */ + "pg_replslot", + + /* Contents removed on startup, see dsm_cleanup_for_mmap(). */ + PG_DYNSHMEM_DIR, + + /* Contents removed on startup, see AsyncShmemInit(). */ + "pg_notify", + + /* + * Old contents are loaded for possible debugging but are not required for + * normal operation, see OldSerXidInit(). + */ + "pg_serial", + + /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */ + "pg_snapshots", + + /* Contents zeroed on startup, see StartupSUBTRANS(). */ + "pg_subtrans", + + /* Terminate list. */ + NULL +}; + +/* + * Files that should not be included in the backup. + */ +const char *excludeFiles[] = +{ + /* Skip auto conf temporary file. */ + PG_AUTOCONF_FILENAME ".tmp", + + /* + * If there's a backup_label or tablespace_map file, it belongs to a backup + * started by the user with pg_start_backup(). It is *not* correct for this + * backup, our backup_label/tablespace_map is injected into the tar + * separately. + */ + BACKUP_LABEL_FILE, + TABLESPACE_MAP, + + /* Skip postmaster.pid and postmaster.opts. */ + "postmaster.pid", + "postmaster.opts", + + /* Terminate list. */ + NULL +}; + +/* * Called when ERROR or FATAL happens in perform_base_backup() after * we have started the backup - make sure we end it! */ @@ -415,7 +486,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir) } /* send the WAL file itself */ - _tarWriteHeader(pathbuf, NULL, &statbuf); + _tarWriteHeader(pathbuf, NULL, &statbuf, false); while ((cnt = fread(buf, 1, Min(sizeof(buf), XLogSegSize - len), fp)) > 0) { @@ -807,7 +878,7 @@ sendFileWithContent(const char *filename, const char *content) statbuf.st_mode = S_IRUSR | S_IWUSR; statbuf.st_size = len; - _tarWriteHeader(filename, NULL, &statbuf); + _tarWriteHeader(filename, NULL, &statbuf, false); /* Send the contents as a CopyData message */ pq_putmessage('d', content, len); @@ -858,9 +929,9 @@ sendTablespace(char *path, bool sizeonly) /* If the tablespace went away while scanning, it's no error. */ return 0; } - if (!sizeonly) - _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf); - size = 512; /* Size of the header just added */ + + size = _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf, + sizeonly); /* Send all the files in the tablespace version directory */ size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true); @@ -889,6 +960,8 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces, char pathbuf[MAXPGPATH]; struct stat statbuf; int64 size = 0; + int excludeIdx; + bool excludeFound; dir = AllocateDir(path); while ((de = ReadDir(dir, path)) != NULL) @@ -903,22 +976,90 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces, strlen(PG_TEMP_FILE_PREFIX)) == 0) continue; - /* skip auto conf temporary file */ - if (strncmp(de->d_name, - PG_AUTOCONF_FILENAME ".tmp", - sizeof(PG_AUTOCONF_FILENAME) + 4) == 0) + /* Stat the file */ + snprintf(pathbuf, MAXPGPATH, "%s/%s", path, de->d_name); + + if (lstat(pathbuf, &statbuf) != 0) + { + if (errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file or directory \"%s\": %m", + pathbuf))); + + /* If the file went away while scanning, it's not an error. */ continue; + } /* - * If there's a backup_label or tablespace_map file, it belongs to a - * backup started by the user with pg_start_backup(). It is *not* - * correct for this backup, our backup_label/tablespace_map is - * injected into the tar separately. + * Scan for files that should be excluded. See excludeFiles[] for info + * on exclusions. */ - if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0) - continue; + excludeFound = false; + excludeIdx = 0; + + while (excludeFiles[excludeIdx] != NULL) + { + if (strcmp(pathbuf + basepathlen + 1, + excludeFiles[excludeIdx]) == 0) + { + ereport(DEBUG1, + (errmsg("file excluded from backup: %s", + excludeFiles[excludeIdx]))); + + excludeFound = true; + break; + } + + excludeIdx++; + } + + /* + * Scan for directories whose contents should be excluded. See + * excludeDirContents[] for info on exclusions. + */ + if (!excludeFound) + { + excludeIdx = 0; + + while (excludeDirContents[excludeIdx] != NULL) + { + if (strcmp(pathbuf + basepathlen + 1, + excludeDirContents[excludeIdx]) == 0) + { + ereport(DEBUG1, + (errmsg("directory contents excluded from backup: %s", + excludeDirContents[excludeIdx]))); + + size += _tarWriteDir(pathbuf, basepathlen, &statbuf, + sizeonly); + + excludeFound = true; + break; + } + + excludeIdx++; + } + } + + /* + * Exclude contents of directory specified by statrelpath if not set + * to the default (pg_stat_tmp) which is caught in the loop above. + */ + if (!excludeFound && statrelpath != NULL && + strcmp(pathbuf, statrelpath) == 0) + { + ereport(DEBUG1, + (errmsg("stats_temp_directory contents excluded from backup: %s", + statrelpath))); + + size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly); - if (strcmp(de->d_name, TABLESPACE_MAP) == 0) + excludeFound = true; + } + + /* If file matched exclusion, continue. */ + if (excludeFound) continue; /* @@ -938,55 +1079,10 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces, "and should not be used. " "Try taking another online backup."))); - snprintf(pathbuf, MAXPGPATH, "%s/%s", path, de->d_name); - - /* Skip postmaster.pid and postmaster.opts in the data directory */ - if (strcmp(pathbuf, "./postmaster.pid") == 0 || - strcmp(pathbuf, "./postmaster.opts") == 0) - continue; - /* Skip pg_control here to back up it last */ if (strcmp(pathbuf, "./global/pg_control") == 0) continue; - if (lstat(pathbuf, &statbuf) != 0) - { - if (errno != ENOENT) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file or directory \"%s\": %m", - pathbuf))); - - /* If the file went away while scanning, it's no error. */ - continue; - } - - /* - * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped - * even when stats_temp_directory is set because PGSS_TEXT_FILE is - * always created there. - */ - if ((statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0) || - strncmp(de->d_name, PG_STAT_TMP_DIR, strlen(PG_STAT_TMP_DIR)) == 0) - { - if (!sizeonly) - _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf); - size += 512; - continue; - } - - /* - * Skip pg_replslot, not useful to copy. But include it as an empty - * directory anyway, so we get permissions right. - */ - if (strcmp(de->d_name, "pg_replslot") == 0) - { - if (!sizeonly) - _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf); - size += 512; /* Size of the header just added */ - continue; - } - /* * We can skip pg_xlog, the WAL segments need to be fetched from the * WAL archive anyway. But include it as an empty directory anyway, so @@ -994,26 +1090,15 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces, */ if (strcmp(pathbuf, "./pg_xlog") == 0) { - if (!sizeonly) - { - /* If pg_xlog is a symlink, write it as a directory anyway */ -#ifndef WIN32 - if (S_ISLNK(statbuf.st_mode)) -#else - if (pgwin32_is_junction(pathbuf)) -#endif - statbuf.st_mode = S_IFDIR | S_IRWXU; - _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf); - } - size += 512; /* Size of the header just added */ + /* If pg_xlog is a symlink, write it as a directory anyway */ + size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly); /* * Also send archive_status directory (by hackishly reusing * statbuf from above ...). */ - if (!sizeonly) - _tarWriteHeader("./pg_xlog/archive_status", NULL, &statbuf); - size += 512; /* Size of the header just added */ + size += _tarWriteHeader("./pg_xlog/archive_status", NULL, &statbuf, + sizeonly); continue; /* don't recurse into pg_xlog */ } @@ -1044,9 +1129,8 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces, pathbuf))); linkpath[rllen] = '\0'; - if (!sizeonly) - _tarWriteHeader(pathbuf + basepathlen + 1, linkpath, &statbuf); - size += 512; /* Size of the header just added */ + size += _tarWriteHeader(pathbuf + basepathlen + 1, linkpath, + &statbuf, sizeonly); #else /* @@ -1069,9 +1153,8 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces, * Store a directory entry in the tar file so we can get the * permissions right. */ - if (!sizeonly) - _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf); - size += 512; /* Size of the header just added */ + size += _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf, + sizeonly); /* * Call ourselves recursively for a directory, unless it happens @@ -1162,7 +1245,7 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf, errmsg("could not open file \"%s\": %m", readfilename))); } - _tarWriteHeader(tarfilename, NULL, statbuf); + _tarWriteHeader(tarfilename, NULL, statbuf, false); while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0) { @@ -1215,36 +1298,62 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf, } -static void +static int64 _tarWriteHeader(const char *filename, const char *linktarget, - struct stat * statbuf) + struct stat * statbuf, bool sizeonly) { char h[512]; enum tarError rc; - rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size, - statbuf->st_mode, statbuf->st_uid, statbuf->st_gid, - statbuf->st_mtime); - - switch (rc) + if (!sizeonly) { - case TAR_OK: - break; - case TAR_NAME_TOO_LONG: - ereport(ERROR, - (errmsg("file name too long for tar format: \"%s\"", - filename))); - break; - case TAR_SYMLINK_TOO_LONG: - ereport(ERROR, - (errmsg("symbolic link target too long for tar format: file name \"%s\", target \"%s\"", - filename, linktarget))); - break; - default: - elog(ERROR, "unrecognized tar error: %d", rc); + rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size, + statbuf->st_mode, statbuf->st_uid, statbuf->st_gid, + statbuf->st_mtime); + + switch (rc) + { + case TAR_OK: + break; + case TAR_NAME_TOO_LONG: + ereport(ERROR, + (errmsg("file name too long for tar format: \"%s\"", + filename))); + break; + case TAR_SYMLINK_TOO_LONG: + ereport(ERROR, + (errmsg("symbolic link target too long for tar format: " + "file name \"%s\", target \"%s\"", + filename, linktarget))); + break; + default: + elog(ERROR, "unrecognized tar error: %d", rc); + } + + pq_putmessage('d', h, sizeof(h)); } - pq_putmessage('d', h, 512); + return sizeof(h); +} + +/* + * Write tar header for a directory. If the entry in statbuf is a link then + * write it as a directory anyway. + */ +static int64 +_tarWriteDir(char *pathbuf, int basepathlen, struct stat *statbuf, + bool sizeonly) +{ + /* If symlink, write it as a directory anyway */ +#ifndef WIN32 + if (S_ISLNK(statbuf->st_mode)) +#else + if (pgwin32_is_junction(pathbuf)) +#endif + + statbuf->st_mode = S_IFDIR | S_IRWXU; + + return _tarWriteHeader(pathbuf + basepathlen + 1, NULL, statbuf, sizeonly); } /* diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl index fd9857d..418fffa 100644 --- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl +++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl @@ -4,7 +4,7 @@ use Cwd; use Config; use PostgresNode; use TestLib; -use Test::More tests => 54; +use Test::More tests => 68; program_help_ok('pg_basebackup'); program_version_ok('pg_basebackup'); @@ -55,15 +55,72 @@ print CONF "wal_level = replica\n"; close CONF; $node->restart; +# Write backup_label to be sure it is not copied. +open BACKUP_LABEL, ">>$pgdata/backup_label"; +print BACKUP_LABEL "DONOTCOPY"; +close BACKUP_LABEL; + +# Write tablespace_map to be sure it is not copied. +open BACKUP_LABEL, ">>$pgdata/tablespace_map"; +print BACKUP_LABEL "DONOTCOPY"; +close BACKUP_LABEL; + +# Write postgresql.auto.conf.tmp to be sure it is not copied. +open BACKUP_LABEL, ">>$pgdata/postgresql.auto.conf.tmp"; +print BACKUP_LABEL "DONOTCOPY"; +close BACKUP_LABEL; + $node->command_ok([ 'pg_basebackup', '-D', "$tempdir/backup" ], 'pg_basebackup runs'); ok(-f "$tempdir/backup/PG_VERSION", 'backup was created'); +# Only archive_status directory should be copied in pg_xlog/. is_deeply( [ sort(slurp_dir("$tempdir/backup/pg_xlog/")) ], [ sort qw(. .. archive_status) ], 'no WAL files copied'); +# Contents of these directories should not be copied. +is_deeply( + [ sort(slurp_dir("$tempdir/backup/pg_stat_tmp/")) ], + [ sort qw(. ..) ], + 'contents of pg_stat_tmp/ not copied'); +is_deeply( + [ sort(slurp_dir("$tempdir/backup/pg_replslot/")) ], + [ sort qw(. ..) ], + 'contents of pg_replslot/ not copied'); +is_deeply( + [ sort(slurp_dir("$tempdir/backup/pg_dynshmem/")) ], + [ sort qw(. ..) ], + 'contents of pg_dynshmem/ not copied'); +is_deeply( + [ sort(slurp_dir("$tempdir/backup/pg_notify/")) ], + [ sort qw(. ..) ], + 'contents of pg_notify/ not copied'); +is_deeply( + [ sort(slurp_dir("$tempdir/backup/pg_serial/")) ], + [ sort qw(. ..) ], + 'contents of pg_serial/ not copied'); +is_deeply( + [ sort(slurp_dir("$tempdir/backup/pg_snapshots/")) ], + [ sort qw(. ..) ], + 'contents of pg_snapshots/ not copied'); +is_deeply( + [ sort(slurp_dir("$tempdir/backup/pg_subtrans/")) ], + [ sort qw(. ..) ], + 'contents of pg_subtrans/ not copied'); + +# These files should not be copied. +ok(!-f "$tempdir/backup/postmaster.pid", 'postmaster.pid not copied'); +ok(!-f "$tempdir/backup/postmaster.opts", 'postmaster.opts not copied'); +ok(!-f "$tempdir/backup/postgresql.auto.conf.tmp", + 'postgresql.auto.conf.tmp not copied'); +ok(!-f "$tempdir/backup/tablespace_map", 'tablespace_map not copied'); + +# Make sure existing backup_label was ignored. +ok(slurp_file("$tempdir/backup/backup_label") ne 'DONOTCOPY', + 'existing backup_label not copied'); + $node->command_ok( [ 'pg_basebackup', '-D', "$tempdir/backup2", '--xlogdir', "$tempdir/xlog2" ], @@ -110,7 +167,16 @@ unlink "$pgdata/$superlongname"; # skip on Windows. SKIP: { - skip "symlinks not supported on Windows", 10 if ($windows_os); + skip "symlinks not supported on Windows", 12 if ($windows_os); + + # Move pg_replslot out of $pgdata and create a symlink to it. + $node->stop; + + rename("$pgdata/pg_replslot", "$tempdir/pg_replslot") + or die "unable to move $pgdata/pg_replslot"; + symlink("$tempdir/pg_replslot", "$pgdata/pg_replslot"); + + $node->start; # Create a temporary directory in the system location and symlink it # to our physical temp location. That way we can use shorter names @@ -148,6 +214,11 @@ SKIP: "tablespace symlink was updated"); closedir $dh; + # The pg_replslot symlink should have been copied as a directory. + ok(-l "$pgdata/pg_replslot", 'pg_replslot is a symlink in pgdata'); + ok(-d "$tempdir/backup1/pg_replslot", + 'pg_replslot symlink copied as directory'); + mkdir "$tempdir/tbl=spc2"; $node->safe_psql('postgres', "DROP TABLE test1;"); $node->safe_psql('postgres', "DROP TABLESPACE tblspc1;");