diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 87586a7b06..dd4a668eea 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -376,6 +376,14 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser + + pg_stat_progress_basebackuppg_stat_progress_basebackup + One row for each WAL sender process streaming a base backup, + showing current progress. + See . + + + @@ -3535,7 +3543,10 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid, certain commands during command execution. Currently, the only commands which support progress reporting are ANALYZE, CLUSTER, - CREATE INDEX, and VACUUM. + CREATE INDEX, VACUUM, + and (i.e., replication + command that issues to take + a base backup). This may be expanded in the future. @@ -4336,6 +4347,156 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid, + + + + Base Backup Progress Reporting + + + Whenever an application like pg_basebackup + is taking a base backup, the + pg_stat_progress_basebackup + view will contain a row for each WAL sender process that is currently + running BASE_BACKUP replication command + and streaming the backup. The tables below describe the information + that will be reported and provide information about how to interpret it. + + + + <structname>pg_stat_progress_basebackup</structname> View + + + + Column + Type + Description + + + + + + pid + integer + Process ID of a WAL sender process. + + + phase + text + Current processing phase. See . + + + backup_total + bigint + + Total amount of data that will be streamed. If progress reporting + is not enabled in pg_basebackup + (i.e., --progress option is not specified), + this is 0. Otherwise, this is estimated and + reported as of the beginning of + streaming database files phase. Note that + this is only an approximation since the database + may change during streaming database files phase + and WAL log may be included in the backup later. This is always + the same value as backup_streamed + once the amount of data streamed exceeds the estimated + total size. + + + + backup_streamed + bigint + + Amount of data streamed. This counter only advances + when the phase is streaming database files or + transfering wal files. + + + + tablespaces_total + bigint + + Total number of tablespaces that will be streamed. + + + + tablespaces_streamed + bigint + + Number of tablespaces streamed. This counter only + advances when the phase is streaming database files. + + + + +
+ + + Base backup phases + + + + Phase + Description + + + + + initializing + + The WAL sender process is preparing to begin the backup. + This phase is expected to be very brief. + + + + waiting for checkpoint to finish + + The WAL sender process is currently performing + pg_start_backup to set up for + taking a base backup, and waiting for backup start + checkpoint to finish. + + + + estimating backup size + + The WAL sender process is currently estimating the total amount + of database files that will be streamed as a base backup. + + + + streaming database files + + The WAL sender process is currently streaming database files + as a base backup. + + + + waiting for wal archiving to finish + + The WAL sender process is currently performing + pg_stop_backup to finish the backup, + and waiting for all the WAL files required for the base backup + to be successfully archived. + If either --wal-method=none or + --wal-method=stream is specified in + pg_basebackup, the backup will end + when this phase is completed. + + + + transferring wal files + + The WAL sender process is currently transferring all WAL logs + generated during the backup. This phase occurs after + waiting for wal archiving to finish phase if + --wal-method=fetch is specified in + pg_basebackup. The backup will end + when this phase is completed. + + + + +
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 80275215e0..f139ba0231 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2465,7 +2465,7 @@ The commands accepted in replication mode are: - + BASE_BACKUP [ LABEL 'label' ] [ PROGRESS ] [ FAST ] [ WAL ] [ NOWAIT ] [ MAX_RATE rate ] [ TABLESPACE_MAP ] [ NOVERIFY_CHECKSUMS ] BASE_BACKUP diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index fc9e222f8d..fc9ba2e8b0 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -104,6 +104,13 @@ PostgreSQL documentation + + + Whenever pg_basebackup is taking a base + backup, the pg_stat_progress_basebackup + view will report the progress of the backup. + See for details. + @@ -459,6 +466,15 @@ PostgreSQL documentation This may make the backup take slightly longer, and in particular it will take longer before the first data is sent. + + Whether this is enabled or not, the + pg_stat_progress_basebackup view + report the progress of the backup in the server side. But note + that the total amount of data that will be streamed is estimated + and reported only when this option is enabled. In other words, + backup_total column in the view always + indicates 0 if this option is disabled. + diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index d19408b3be..4361568882 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -39,6 +39,7 @@ #include "catalog/catversion.h" #include "catalog/pg_control.h" #include "catalog/pg_database.h" +#include "commands/progress.h" #include "commands/tablespace.h" #include "common/controldata_utils.h" #include "miscadmin.h" @@ -10228,6 +10229,10 @@ issue_xlog_fsync(int fd, XLogSegNo segno) * active at the same time, and they don't conflict with an exclusive backup * either. * + * tablespaces is required only when this function is called while + * the streaming base backup requested by pg_basebackup is running. + * NULL should be specified otherwise. + * * tblspcmapfile is required mainly for tar format in windows as native windows * utilities are not able to create symlinks while extracting files from tar. * However for consistency, the same is used for all platforms. @@ -10470,6 +10475,14 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, datadirpathlen = strlen(DataDir); + /* + * Report that we are now estimating the total backup size + * if we're streaming base backup as requested by pg_basebackup + */ + if (tablespaces) + pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE, + PROGRESS_BASEBACKUP_PHASE_ESTIMATE_BACKUP_SIZE); + /* Collect information about all tablespaces */ tblspcdir = AllocateDir("pg_tblspc"); while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL) diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index f681aafcf9..b8a3f46912 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1060,6 +1060,22 @@ CREATE VIEW pg_stat_progress_create_index AS FROM pg_stat_get_progress_info('CREATE INDEX') AS S LEFT JOIN pg_database D ON S.datid = D.oid; +CREATE VIEW pg_stat_progress_basebackup AS + SELECT + S.pid AS pid, + CASE S.param1 WHEN 0 THEN 'initializing' + WHEN 1 THEN 'waiting for checkpoint to finish' + WHEN 2 THEN 'estimating backup size' + WHEN 3 THEN 'streaming database files' + WHEN 4 THEN 'waiting for wal archiving to finish' + WHEN 5 THEN 'transferring wal files' + END AS phase, + S.param2 AS backup_total, + S.param3 AS backup_streamed, + S.param4 AS tablespaces_total, + S.param5 AS tablespaces_streamed + FROM pg_stat_get_progress_info('BASEBACKUP') AS S; + CREATE VIEW pg_user_mappings AS SELECT U.oid AS umid, diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index ca8bebf432..fa576d5cae 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -19,6 +19,7 @@ #include "access/xlog_internal.h" /* for pg_start/stop_backup */ #include "catalog/pg_type.h" #include "common/file_perm.h" +#include "commands/progress.h" #include "lib/stringinfo.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" @@ -70,6 +71,7 @@ static void parse_basebackup_options(List *options, basebackup_options *opt); static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli); static int compareWalFileNames(const ListCell *a, const ListCell *b); static void throttle(size_t increment); +static void update_basebackup_progress(int64 delta); static bool is_checksummed_file(const char *fullpath, const char *filename); /* Was the backup currently in-progress initiated in recovery mode? */ @@ -121,6 +123,12 @@ static long long int total_checksum_failures; /* Do not verify checksums. */ static bool noverify_checksums = false; +/* Total amount of backup data that will be streamed */ +static int64 backup_total = 0; + +/* Amount of backup data already streamed */ +static int64 backup_streamed = 0; + /* * Definition of one element part of an exclusion list, used for paths part * of checksum validation or base backups. "name" is the name of the file @@ -246,6 +254,10 @@ perform_base_backup(basebackup_options *opt) int datadirpathlen; List *tablespaces = NIL; + backup_total = 0; + backup_streamed = 0; + pgstat_progress_start_command(PROGRESS_COMMAND_BASEBACKUP, InvalidOid); + datadirpathlen = strlen(DataDir); backup_started_in_recovery = RecoveryInProgress(); @@ -255,6 +267,8 @@ perform_base_backup(basebackup_options *opt) total_checksum_failures = 0; + pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE, + PROGRESS_BASEBACKUP_PHASE_WAIT_CHECKPOINT); startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli, labelfile, &tablespaces, tblspc_map_file, @@ -271,8 +285,7 @@ perform_base_backup(basebackup_options *opt) { ListCell *lc; tablespaceinfo *ti; - - SendXlogRecPtrResult(startptr, starttli); + int tblspc_streamed = 0; /* * Calculate the relative path of temporary statistics directory in @@ -291,6 +304,37 @@ perform_base_backup(basebackup_options *opt) ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1; tablespaces = lappend(tablespaces, ti); + /* + * Calculate the total backup size by summing up the size + * of each tablespace + */ + if (opt->progress) + { + foreach(lc, tablespaces) + { + tablespaceinfo *tmp = (tablespaceinfo *) lfirst(lc); + backup_total += tmp->size; + } + } + + /* Report that we are now streaming database files as a base backup */ + { + const int index[] = { + PROGRESS_BASEBACKUP_PHASE, + PROGRESS_BASEBACKUP_BACKUP_TOTAL, + PROGRESS_BASEBACKUP_TBLSPC_TOTAL + }; + const int64 val[] = { + PROGRESS_BASEBACKUP_PHASE_STREAM_BACKUP, + backup_total, list_length(tablespaces) + }; + + pgstat_progress_update_multi_param(3, index, val); + } + + /* Send the starting position of the backup */ + SendXlogRecPtrResult(startptr, starttli); + /* Send tablespace header */ SendBackupHeader(tablespaces); @@ -372,8 +416,14 @@ perform_base_backup(basebackup_options *opt) } else pq_putemptymessage('c'); /* CopyDone */ + + tblspc_streamed++; + pgstat_progress_update_param(PROGRESS_BASEBACKUP_TBLSPC_STREAMED, + tblspc_streamed); } + pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE, + PROGRESS_BASEBACKUP_PHASE_WAIT_WAL_ARCHIVE); endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli); } PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false)); @@ -399,6 +449,9 @@ perform_base_backup(basebackup_options *opt) ListCell *lc; TimeLineID tli; + pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE, + PROGRESS_BASEBACKUP_PHASE_TRANSFER_WAL); + /* * I'd rather not worry about timelines here, so scan pg_wal and * include all WAL files in the range between 'startptr' and 'endptr', @@ -548,6 +601,7 @@ perform_base_backup(basebackup_options *opt) if (pq_putmessage('d', buf, cnt)) ereport(ERROR, (errmsg("base backup could not send data, aborting backup"))); + update_basebackup_progress(cnt); len += cnt; throttle(cnt); @@ -623,6 +677,7 @@ perform_base_backup(basebackup_options *opt) errmsg("checksum verification failure during base backup"))); } + pgstat_progress_end_command(); } /* @@ -949,6 +1004,7 @@ sendFileWithContent(const char *filename, const char *content) _tarWriteHeader(filename, NULL, &statbuf, false); /* Send the contents as a CopyData message */ pq_putmessage('d', content, len); + update_basebackup_progress(len); /* Pad to 512 byte boundary, per tar format requirements */ pad = ((len + 511) & ~511) - len; @@ -958,6 +1014,7 @@ sendFileWithContent(const char *filename, const char *content) MemSet(buf, 0, pad); pq_putmessage('d', buf, pad); + update_basebackup_progress(pad); } } @@ -1565,6 +1622,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf if (pq_putmessage('d', buf, cnt)) ereport(ERROR, (errmsg("base backup could not send data, aborting backup"))); + update_basebackup_progress(cnt); len += cnt; throttle(cnt); @@ -1590,6 +1648,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf { cnt = Min(sizeof(buf), statbuf->st_size - len); pq_putmessage('d', buf, cnt); + update_basebackup_progress(cnt); len += cnt; throttle(cnt); } @@ -1604,6 +1663,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf { MemSet(buf, 0, pad); pq_putmessage('d', buf, pad); + update_basebackup_progress(pad); } FreeFile(fp); @@ -1658,6 +1718,7 @@ _tarWriteHeader(const char *filename, const char *linktarget, } pq_putmessage('d', h, sizeof(h)); + update_basebackup_progress(sizeof(h)); } return sizeof(h); @@ -1755,3 +1816,36 @@ throttle(size_t increment) */ throttled_last = GetCurrentTimestamp(); } + +/* + * Increment the counter for the amount of data already streamed + * by the given number of bytes, and update the progress report for + * pg_stat_progress_basebackup. + */ +static void +update_basebackup_progress(int64 delta) +{ + const int index[] = { + PROGRESS_BASEBACKUP_BACKUP_STREAMED, + PROGRESS_BASEBACKUP_BACKUP_TOTAL + }; + int64 val[2]; + int nparam = 0; + + backup_streamed += delta; + val[nparam++] = backup_streamed; + + /* + * Avoid overflowing past 100% or the full size. This may make the total + * size number change as we approach the end of the backup (the estimate + * will always be wrong if WAL is included), but that's better than having + * the done column be bigger than the total. + */ + if (backup_total > 0 && backup_streamed > backup_total) + { + backup_total = backup_streamed; + val[nparam++] = backup_total; + } + + pgstat_progress_update_multi_param(nparam, index, val); +} diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 7e6a3c1774..54d2673254 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -474,6 +474,8 @@ pg_stat_get_progress_info(PG_FUNCTION_ARGS) cmdtype = PROGRESS_COMMAND_CLUSTER; else if (pg_strcasecmp(cmd, "CREATE INDEX") == 0) cmdtype = PROGRESS_COMMAND_CREATE_INDEX; + else if (pg_strcasecmp(cmd, "BASEBACKUP") == 0) + cmdtype = PROGRESS_COMMAND_BASEBACKUP; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index 12e9d3d42f..a302a1e9b2 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -119,4 +119,18 @@ #define PROGRESS_SCAN_BLOCKS_TOTAL 15 #define PROGRESS_SCAN_BLOCKS_DONE 16 +/* Progress parameters for pg_basebackup */ +#define PROGRESS_BASEBACKUP_PHASE 0 +#define PROGRESS_BASEBACKUP_BACKUP_TOTAL 1 +#define PROGRESS_BASEBACKUP_BACKUP_STREAMED 2 +#define PROGRESS_BASEBACKUP_TBLSPC_TOTAL 3 +#define PROGRESS_BASEBACKUP_TBLSPC_STREAMED 4 + +/* Phases of pg_basebackup (as advertised via PROGRESS_BASEBACKUP_PHASE) */ +#define PROGRESS_BASEBACKUP_PHASE_WAIT_CHECKPOINT 1 +#define PROGRESS_BASEBACKUP_PHASE_ESTIMATE_BACKUP_SIZE 2 +#define PROGRESS_BASEBACKUP_PHASE_STREAM_BACKUP 3 +#define PROGRESS_BASEBACKUP_PHASE_WAIT_WAL_ARCHIVE 4 +#define PROGRESS_BASEBACKUP_PHASE_TRANSFER_WAL 5 + #endif diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 3a65a51696..7bc36c6583 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -958,7 +958,8 @@ typedef enum ProgressCommandType PROGRESS_COMMAND_VACUUM, PROGRESS_COMMAND_ANALYZE, PROGRESS_COMMAND_CLUSTER, - PROGRESS_COMMAND_CREATE_INDEX + PROGRESS_COMMAND_CREATE_INDEX, + PROGRESS_COMMAND_BASEBACKUP } ProgressCommandType; #define PGSTAT_NUM_PROGRESS_PARAM 20 diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 634f8256f7..c7304611c3 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1876,6 +1876,21 @@ pg_stat_progress_analyze| SELECT s.pid, (s.param8)::oid AS current_child_table_relid FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) LEFT JOIN pg_database d ON ((s.datid = d.oid))); +pg_stat_progress_basebackup| SELECT s.pid, + CASE s.param1 + WHEN 0 THEN 'initializing'::text + WHEN 1 THEN 'waiting for checkpoint to finish'::text + WHEN 2 THEN 'estimating backup size'::text + WHEN 3 THEN 'streaming database files'::text + WHEN 4 THEN 'waiting for wal archiving to finish'::text + WHEN 5 THEN 'transferring wal files'::text + ELSE NULL::text + END AS phase, + s.param2 AS backup_total, + s.param3 AS backup_streamed, + s.param4 AS tablespaces_total, + s.param5 AS tablespaces_streamed + FROM pg_stat_get_progress_info('BASEBACKUP'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20); pg_stat_progress_cluster| SELECT s.pid, s.datid, d.datname,