diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 8839699079..136fcbc2af 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -376,6 +376,14 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser + + pg_stat_progress_basebackuppg_stat_progress_basebackup + One row for each WAL sender process streaming a base backup, + showing current progress. + See . + + + @@ -3515,7 +3523,10 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid, certain commands during command execution. Currently, the only commands which support progress reporting are ANALYZE, CLUSTER, - CREATE INDEX, and VACUUM. + CREATE INDEX, VACUUM, + and (i.e., replication + command that issues to take + a base backup). This may be expanded in the future. @@ -4316,6 +4327,143 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid, + + + + Base Backup Progress Reporting + + + Whenever pg_basebackup is taking a base + backup, the pg_stat_progress_basebackup + view will contain a row for each WAL sender process that is currently + running BASE_BACKUP replication command + and streaming the backup. The tables below describe the information + that will be reported and provide information about how to interpret it. + + + + <structname>pg_stat_progress_basebackup</structname> View + + + + Column + Type + Description + + + + + + pid + integer + Process ID of a WAL sender process. + + + phase + text + Current processing phase. See . + + + backup_total + bigint + + Total amount of data that will be streamed. If progress reporting + is not enabled in pg_basebackup + (i.e., --progress option is not specified), + this is 0. Otherwise, this is estimated and + reported as of the beginning of streaming backup + phase. Note that this is only an approximation since the database + may change during streaming backup phase + and WAL log may be included in the backup later. This is always + the same value as backup_streamed + once the amount of data already streamed exceeds the estimated + total size. + + + + backup_streamed + bigint + + Amount of data already streamed. This counter only advances + when the phase is streaming backup or + transfering wal. + + + + tablespace_total + bigint + + Total number of tablespaces that will be streamed. + + + + tablespace_streamed + bigint + + Number of tablespaces already streamed. This counter only + advances when the phase is streaming backup. + + + + +
+ + + Base backup phases + + + + Phase + Description + + + + + initializing + + The WAL sender process is preparing to begin the backup. + This phase is expected to be very brief. + + + + starting backup + + The WAL sender process is currently performing + pg_start_backup and setting up for + making a base backup. + + + + streaming backup + + The WAL sender process is currently streaming a base backup. + + + + stopping backup + + The WAL sender process is currently performing + pg_stop_backup and finishing the backup. + If either --wal-method=none or + --wal-method=stream is specified in + pg_basebackup, the backup will end + when this phase is completed. + + + + transferring wal + + The WAL sender process is currently transferring all WAL logs + generated during the backup. This phase occurs after + stopping backup phase if + --wal-method=fetch is specified in + pg_basebackup. The backup will end + when this phase is completed. + + + + +
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 80275215e0..f139ba0231 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2465,7 +2465,7 @@ The commands accepted in replication mode are: - + BASE_BACKUP [ LABEL 'label' ] [ PROGRESS ] [ FAST ] [ WAL ] [ NOWAIT ] [ MAX_RATE rate ] [ TABLESPACE_MAP ] [ NOVERIFY_CHECKSUMS ] BASE_BACKUP diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index c9e75f4370..702d9d8002 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1058,6 +1058,21 @@ CREATE VIEW pg_stat_progress_create_index AS FROM pg_stat_get_progress_info('CREATE INDEX') AS S LEFT JOIN pg_database D ON S.datid = D.oid; +CREATE VIEW pg_stat_progress_basebackup AS + SELECT + S.pid AS pid, + CASE S.param1 WHEN 0 THEN 'initializing' + WHEN 1 THEN 'starting backup' + WHEN 2 THEN 'streaming backup' + WHEN 3 THEN 'stopping backup' + WHEN 4 THEN 'transferring wal' + END AS phase, + S.param2 AS backup_total, + S.param3 AS backup_streamed, + S.param4 AS tablespace_total, + S.param5 AS tablespace_streamed + FROM pg_stat_get_progress_info('BASEBACKUP') AS S; + CREATE VIEW pg_user_mappings AS SELECT U.oid AS umid, diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index dea8aab45e..2fc3ae7fd0 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -19,6 +19,7 @@ #include "access/xlog_internal.h" /* for pg_start/stop_backup */ #include "catalog/pg_type.h" #include "common/file_perm.h" +#include "commands/progress.h" #include "lib/stringinfo.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" @@ -70,6 +71,7 @@ static void parse_basebackup_options(List *options, basebackup_options *opt); static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli); static int compareWalFileNames(const ListCell *a, const ListCell *b); static void throttle(size_t increment); +static void update_basebackup_progress(int64 delta); static bool is_checksummed_file(const char *fullpath, const char *filename); /* Was the backup currently in-progress initiated in recovery mode? */ @@ -121,6 +123,12 @@ static long long int total_checksum_failures; /* Do not verify checksums. */ static bool noverify_checksums = false; +/* Total amount of backup data that will be streamed */ +static int64 backup_total = 0; + +/* Amount of backup data already streamed */ +static int64 backup_streamed = 0; + /* * The contents of these directories are removed or recreated during server * start so they are not included in backups. The directories themselves are @@ -232,6 +240,8 @@ perform_base_backup(basebackup_options *opt) int datadirpathlen; List *tablespaces = NIL; + pgstat_progress_start_command(PROGRESS_COMMAND_BASEBACKUP, InvalidOid); + datadirpathlen = strlen(DataDir); backup_started_in_recovery = RecoveryInProgress(); @@ -241,6 +251,8 @@ perform_base_backup(basebackup_options *opt) total_checksum_failures = 0; + pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE, + PROGRESS_BASEBACKUP_PHASE_START_BACKUP); startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli, labelfile, &tablespaces, tblspc_map_file, @@ -257,6 +269,10 @@ perform_base_backup(basebackup_options *opt) { ListCell *lc; tablespaceinfo *ti; + int tblspc_streamed = 0; + + pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE, + PROGRESS_BASEBACKUP_PHASE_STREAM_BACKUP); SendXlogRecPtrResult(startptr, starttli); @@ -358,8 +374,14 @@ perform_base_backup(basebackup_options *opt) } else pq_putemptymessage('c'); /* CopyDone */ + + tblspc_streamed++; + pgstat_progress_update_param(PROGRESS_BASEBACKUP_TBLSPC_STREAMED, + tblspc_streamed); } + pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE, + PROGRESS_BASEBACKUP_PHASE_STOP_BACKUP); endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli); } PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, BoolGetDatum(false)); @@ -385,6 +407,9 @@ perform_base_backup(basebackup_options *opt) ListCell *lc; TimeLineID tli; + pgstat_progress_update_param(PROGRESS_BASEBACKUP_PHASE, + PROGRESS_BASEBACKUP_PHASE_TRANSFER_WAL); + /* * I'd rather not worry about timelines here, so scan pg_wal and * include all WAL files in the range between 'startptr' and 'endptr', @@ -534,6 +559,7 @@ perform_base_backup(basebackup_options *opt) if (pq_putmessage('d', buf, cnt)) ereport(ERROR, (errmsg("base backup could not send data, aborting backup"))); + update_basebackup_progress(cnt); len += cnt; throttle(cnt); @@ -609,6 +635,7 @@ perform_base_backup(basebackup_options *opt) errmsg("checksum verification failure during base backup"))); } + pgstat_progress_end_command(); } /* @@ -837,7 +864,10 @@ SendBackupHeader(List *tablespaces) pq_sendbytes(&buf, ti->path, len); } if (ti->size >= 0) + { send_int8_string(&buf, ti->size / 1024); + backup_total += ti->size; + } else pq_sendint32(&buf, -1); /* NULL */ @@ -846,6 +876,11 @@ SendBackupHeader(List *tablespaces) /* Send a CommandComplete message */ pq_puttextmessage('C', "SELECT"); + + pgstat_progress_update_param(PROGRESS_BASEBACKUP_BACKUP_TOTAL, + backup_total); + pgstat_progress_update_param(PROGRESS_BASEBACKUP_TBLSPC_TOTAL, + list_length(tablespaces)); } /* @@ -935,6 +970,7 @@ sendFileWithContent(const char *filename, const char *content) _tarWriteHeader(filename, NULL, &statbuf, false); /* Send the contents as a CopyData message */ pq_putmessage('d', content, len); + update_basebackup_progress(len); /* Pad to 512 byte boundary, per tar format requirements */ pad = ((len + 511) & ~511) - len; @@ -944,6 +980,7 @@ sendFileWithContent(const char *filename, const char *content) MemSet(buf, 0, pad); pq_putmessage('d', buf, pad); + update_basebackup_progress(pad); } } @@ -1540,6 +1577,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf if (pq_putmessage('d', buf, cnt)) ereport(ERROR, (errmsg("base backup could not send data, aborting backup"))); + update_basebackup_progress(cnt); len += cnt; throttle(cnt); @@ -1565,6 +1603,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf { cnt = Min(sizeof(buf), statbuf->st_size - len); pq_putmessage('d', buf, cnt); + update_basebackup_progress(cnt); len += cnt; throttle(cnt); } @@ -1579,6 +1618,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf { MemSet(buf, 0, pad); pq_putmessage('d', buf, pad); + update_basebackup_progress(pad); } FreeFile(fp); @@ -1633,6 +1673,7 @@ _tarWriteHeader(const char *filename, const char *linktarget, } pq_putmessage('d', h, sizeof(h)); + update_basebackup_progress(sizeof(h)); } return sizeof(h); @@ -1730,3 +1771,30 @@ throttle(size_t increment) */ throttled_last = GetCurrentTimestamp(); } + +/* + * Increment the counter for the amount of data already streamed + * by the given number of bytes, and update the progress report for + * pg_stat_progress_basebackup. + */ +static void +update_basebackup_progress(int64 delta) +{ + backup_streamed += delta; + + /* + * Avoid overflowing past 100% or the full size. This may make the total + * size number change as we approach the end of the backup (the estimate + * will always be wrong if WAL is included), but that's better than having + * the done column be bigger than the total. + */ + if (backup_total > 0 && backup_streamed > backup_total) + { + backup_total = backup_streamed; + pgstat_progress_update_param(PROGRESS_BASEBACKUP_BACKUP_TOTAL, + backup_total); + } + + pgstat_progress_update_param(PROGRESS_BASEBACKUP_BACKUP_STREAMED, + backup_streamed); +} diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 74f899f24d..76dffa6602 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -474,6 +474,8 @@ pg_stat_get_progress_info(PG_FUNCTION_ARGS) cmdtype = PROGRESS_COMMAND_CLUSTER; else if (pg_strcasecmp(cmd, "CREATE INDEX") == 0) cmdtype = PROGRESS_COMMAND_CREATE_INDEX; + else if (pg_strcasecmp(cmd, "BASEBACKUP") == 0) + cmdtype = PROGRESS_COMMAND_BASEBACKUP; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index 12e9d3d42f..15693f2da0 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -119,4 +119,17 @@ #define PROGRESS_SCAN_BLOCKS_TOTAL 15 #define PROGRESS_SCAN_BLOCKS_DONE 16 +/* Progress parameters for pg_basebackup */ +#define PROGRESS_BASEBACKUP_PHASE 0 +#define PROGRESS_BASEBACKUP_BACKUP_TOTAL 1 +#define PROGRESS_BASEBACKUP_BACKUP_STREAMED 2 +#define PROGRESS_BASEBACKUP_TBLSPC_TOTAL 3 +#define PROGRESS_BASEBACKUP_TBLSPC_STREAMED 4 + +/* Phases of pg_basebackup (as advertised via PROGRESS_BASEBACKUP_PHASE) */ +#define PROGRESS_BASEBACKUP_PHASE_START_BACKUP 1 +#define PROGRESS_BASEBACKUP_PHASE_STREAM_BACKUP 2 +#define PROGRESS_BASEBACKUP_PHASE_STOP_BACKUP 3 +#define PROGRESS_BASEBACKUP_PHASE_TRANSFER_WAL 4 + #endif diff --git a/src/include/pgstat.h b/src/include/pgstat.h index aecb6013f0..62e547aa24 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -958,7 +958,8 @@ typedef enum ProgressCommandType PROGRESS_COMMAND_VACUUM, PROGRESS_COMMAND_ANALYZE, PROGRESS_COMMAND_CLUSTER, - PROGRESS_COMMAND_CREATE_INDEX + PROGRESS_COMMAND_CREATE_INDEX, + PROGRESS_COMMAND_BASEBACKUP } ProgressCommandType; #define PGSTAT_NUM_PROGRESS_PARAM 20 diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 70e1e2f78d..a47016904e 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1874,6 +1874,20 @@ pg_stat_progress_analyze| SELECT s.pid, (s.param8)::oid AS current_child_table_relid FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) LEFT JOIN pg_database d ON ((s.datid = d.oid))); +pg_stat_progress_basebackup| SELECT s.pid, + CASE s.param1 + WHEN 0 THEN 'initializing'::text + WHEN 1 THEN 'starting backup'::text + WHEN 2 THEN 'sending backup'::text + WHEN 3 THEN 'stopping backup'::text + WHEN 4 THEN 'sending wal'::text + ELSE NULL::text + END AS phase, + s.param2 AS backup_total, + s.param3 AS backup_streamed, + s.param4 AS tablespace_total, + s.param5 AS tablespace_streamed + FROM pg_stat_get_progress_info('BASEBACKUP'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20); pg_stat_progress_cluster| SELECT s.pid, s.datid, d.datname,