diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index e6c49eebad..c84d9a0775 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -360,6 +360,14 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser + + pg_stat_progress_analyzepg_stat_progress_analyze + One row for each backend (including autovacuum worker processes) running + ANALYZE, showing current progress. + See . + + + pg_stat_progress_clusterpg_stat_progress_cluster One row for each backend running @@ -3481,7 +3489,7 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid, PostgreSQL has the ability to report the progress of certain commands during command execution. Currently, the only commands which support progress reporting are CREATE INDEX, - VACUUM and + VACUUM, ANALYZE and CLUSTER. This may be expanded in the future. @@ -3927,6 +3935,140 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid, + + ANALYZE Progress Reporting + + + Whenever ANALYZE is running, the + pg_stat_progress_analyze view will contain a + row for each backend that is currently running that command. The tables + below describe the information that will be reported and provide + information about how to interpret it. + + + + <structname>pg_stat_progress_analyze</structname> View + + + + Column + Type + Description + + + + + + pid + integer + Process ID of backend. + + + datid + oid + OID of the database to which this backend is connected. + + + datname + name + Name of the database to which this backend is connected. + + + relid + oid + OID of the table being analyzed. + + + include_children + boolean + Whether scanning through child tables. + + + current_relid + oid + OID of the table currently being scanned. + It might be different from relid when analyzing tables that have child tables. + + + + phase + text + Current processing phase. See + + + sample_blks_total + bigint + + Total number of heap blocks that will be sampled. + + + + heap_blks_scanned + bigint + + Number of heap blocks scanned. + + + + ext_compute_count + bigint + + Number of computed extended stats. This counter only advances when the phase + is computing extended stats. + + + + +
+ + + ANALYZE phases + + + + Phase + Description + + + + + initializing + + The command is preparing to begin scanning the heap. This phase is + expected to be very brief. + + + + acquiring sample rows + + The command is currently scanning the current_relid + to obtain samples. + + + + computing stats + + The command is computing stats from the samples obtained during the table scan. + + + + computing extended stats + + The command is computing extended stats from the samples obtained in the previous phase. + + + + finalizing analyze + + The command is updating pg_class. When this phase is completed, + ANALYZE will end. + + + + +
+
+ CLUSTER Progress Reporting diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 9fe4a4794a..f28bd6fb1e 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -964,6 +964,23 @@ CREATE VIEW pg_stat_progress_vacuum AS FROM pg_stat_get_progress_info('VACUUM') AS S LEFT JOIN pg_database D ON S.datid = D.oid; +CREATE VIEW pg_stat_progress_analyze AS + SELECT + S.pid AS pid, S.datid AS datid, D.datname AS datname, + CAST(S.relid AS oid) AS relid, + CASE S.param1 WHEN 0 THEN 'initializing' + WHEN 1 THEN 'acquiring sample rows' + WHEN 2 THEN 'computing stats' + WHEN 3 THEN 'computing extended stats' + WHEN 4 THEN 'finalizing analyze' + END AS phase, + CAST(CAST(S.param2 AS int) AS boolean) AS include_children, + CAST(S.param3 AS oid) AS current_relid, + S.param4 AS sample_blks_total, S.param5 AS heap_blks_scanned, + S.param6 AS ext_compute_count + FROM pg_stat_get_progress_info('ANALYZE') AS S + LEFT JOIN pg_database D ON S.datid = D.oid; + CREATE VIEW pg_stat_progress_cluster AS SELECT S.pid AS pid, diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 7accb950eb..f0992cc605 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -35,6 +35,7 @@ #include "catalog/pg_namespace.h" #include "catalog/pg_statistic_ext.h" #include "commands/dbcommands.h" +#include "commands/progress.h" #include "commands/tablecmds.h" #include "commands/vacuum.h" #include "executor/executor.h" @@ -251,6 +252,8 @@ analyze_rel(Oid relid, RangeVar *relation, LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); MyPgXact->vacuumFlags |= PROC_IN_ANALYZE; LWLockRelease(ProcArrayLock); + pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE, + RelationGetRelid(onerel)); /* * Do the normal non-recursive ANALYZE. We can skip this for partitioned @@ -275,6 +278,8 @@ analyze_rel(Oid relid, RangeVar *relation, */ relation_close(onerel, NoLock); + pgstat_progress_end_command(); + /* * Reset my PGXACT flag. Note: we need this here, and not in vacuum_rel, * because the vacuum flag is cleared by the end-of-xact code. @@ -318,6 +323,11 @@ do_analyze_rel(Relation onerel, VacuumParams *params, Oid save_userid; int save_sec_context; int save_nestlevel; + const int st_index[] = { + PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_INCLUDE_CHILDREN + }; + int64 st_val[2]; if (inh) ereport(elevel, @@ -505,6 +515,10 @@ do_analyze_rel(Relation onerel, VacuumParams *params, /* * Acquire the sample rows */ + st_val[0] = PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS; + st_val[1] = inh; + pgstat_progress_update_multi_param(2, st_index, st_val); + rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple)); if (inh) numrows = acquire_inherited_sample_rows(onerel, elevel, @@ -524,7 +538,10 @@ do_analyze_rel(Relation onerel, VacuumParams *params, if (numrows > 0) { MemoryContext col_context, - old_context; + old_context; + + pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_PHASE_COMPUTE_STATS); col_context = AllocSetContextCreate(anl_context, "Analyze Column", @@ -592,10 +609,17 @@ do_analyze_rel(Relation onerel, VacuumParams *params, * not for relations representing inheritance trees. */ if (!inh) + { + pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_PHASE_COMPUTE_EXT_STATS); BuildRelationExtStatistics(onerel, totalrows, numrows, rows, attr_cnt, vacattrstats); + } } + pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, + PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE); + /* * Update pages/tuples stats in pg_class ... but not if we're doing * inherited stats. @@ -1034,6 +1058,13 @@ acquire_sample_rows(Relation onerel, int elevel, ReservoirStateData rstate; TupleTableSlot *slot; TableScanDesc scan; + BlockNumber nblocks; + BlockNumber blksdone = 0; + const int sb_index[] = { + PROGRESS_ANALYZE_TOTAL_BLOCKS, + PROGRESS_ANALYZE_SCANREL + }; + int64 sb_val[2]; Assert(targrows > 0); @@ -1043,7 +1074,13 @@ acquire_sample_rows(Relation onerel, int elevel, OldestXmin = GetOldestXmin(onerel, PROCARRAY_FLAGS_VACUUM); /* Prepare for sampling block numbers */ - BlockSampler_Init(&bs, totalblocks, targrows, random()); + nblocks = BlockSampler_Init(&bs, totalblocks, targrows, random()); + + /* Report sampling block numbers */ + sb_val[0] = nblocks; + sb_val[1] = RelationGetRelid(onerel); + pgstat_progress_update_multi_param(2, sb_index, sb_val); + /* Prepare for sampling rows */ reservoir_init_selection_state(&rstate, targrows); @@ -1104,6 +1141,9 @@ acquire_sample_rows(Relation onerel, int elevel, samplerows += 1; } + + pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_DONE, + ++blksdone); } ExecDropSingleTupleTableSlot(slot); diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 207ee3160e..352920b147 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -24,10 +24,12 @@ #include "catalog/pg_collation.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext_data.h" +#include "commands/progress.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/optimizer.h" +#include "pgstat.h" #include "postmaster/autovacuum.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics.h" @@ -92,6 +94,7 @@ BuildRelationExtStatistics(Relation onerel, double totalrows, List *stats; MemoryContext cxt; MemoryContext oldcxt; + int64 ext_cnt; cxt = AllocSetContextCreate(CurrentMemoryContext, "BuildRelationExtStatistics", @@ -100,6 +103,7 @@ BuildRelationExtStatistics(Relation onerel, double totalrows, pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock); stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel)); + ext_cnt = 0; foreach(lc, stats) { @@ -161,6 +165,10 @@ BuildRelationExtStatistics(Relation onerel, double totalrows, else if (t == STATS_EXT_MCV) mcv = statext_mcv_build(numrows, rows, stat->columns, stats, totalrows, stattarget); + + /* for reporting progress */ + pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_COMPUTE_COUNT, + ++ext_cnt); } /* store the statistics in the catalog */ diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 05240bfd14..db2cc5c316 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -469,6 +469,8 @@ pg_stat_get_progress_info(PG_FUNCTION_ARGS) /* Translate command name into command type code. */ if (pg_strcasecmp(cmd, "VACUUM") == 0) cmdtype = PROGRESS_COMMAND_VACUUM; + else if (pg_strcasecmp(cmd, "ANALYZE") == 0) + cmdtype = PROGRESS_COMMAND_ANALYZE; else if (pg_strcasecmp(cmd, "CLUSTER") == 0) cmdtype = PROGRESS_COMMAND_CLUSTER; else if (pg_strcasecmp(cmd, "CREATE INDEX") == 0) diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c index d2a1537979..f7daece5ee 100644 --- a/src/backend/utils/misc/sampling.c +++ b/src/backend/utils/misc/sampling.c @@ -32,8 +32,10 @@ * Since we know the total number of blocks in advance, we can use the * straightforward Algorithm S from Knuth 3.4.2, rather than Vitter's * algorithm. + * + * Returns the number of blocks that BlockSampler_Next will return. */ -void +BlockNumber BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize, long randseed) { @@ -48,6 +50,8 @@ BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize, bs->m = 0; /* blocks selected so far */ sampler_random_init_state(randseed, bs->randstate); + + return Min(bs->n, bs->N); } bool diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index acd1313cb3..00468cc474 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -34,6 +34,20 @@ #define PROGRESS_VACUUM_PHASE_TRUNCATE 5 #define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP 6 +/* Progress parameters for analyze */ +#define PROGRESS_ANALYZE_PHASE 0 +#define PROGRESS_ANALYZE_INCLUDE_CHILDREN 1 +#define PROGRESS_ANALYZE_SCANREL 2 +#define PROGRESS_ANALYZE_TOTAL_BLOCKS 3 +#define PROGRESS_ANALYZE_BLOCKS_DONE 4 +#define PROGRESS_ANALYZE_EXT_COMPUTE_COUNT 5 + +/* Phases of analyze (as advertised via PROGRESS_ANALYZE_PHASE) */ +#define PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS 1 +#define PROGRESS_ANALYZE_PHASE_COMPUTE_STATS 2 +#define PROGRESS_ANALYZE_PHASE_COMPUTE_EXT_STATS 3 +#define PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE 4 + /* Progress parameters for cluster */ #define PROGRESS_CLUSTER_COMMAND 0 #define PROGRESS_CLUSTER_PHASE 1 diff --git a/src/include/pgstat.h b/src/include/pgstat.h index fe076d823d..df418af4e7 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -955,6 +955,7 @@ typedef enum ProgressCommandType { PROGRESS_COMMAND_INVALID, PROGRESS_COMMAND_VACUUM, + PROGRESS_COMMAND_ANALYZE, PROGRESS_COMMAND_CLUSTER, PROGRESS_COMMAND_CREATE_INDEX } ProgressCommandType; diff --git a/src/include/utils/sampling.h b/src/include/utils/sampling.h index 541b507fb5..76d31dc126 100644 --- a/src/include/utils/sampling.h +++ b/src/include/utils/sampling.h @@ -37,7 +37,7 @@ typedef struct typedef BlockSamplerData *BlockSampler; -extern void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, +extern BlockNumber BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize, long randseed); extern bool BlockSampler_HasMore(BlockSampler bs); extern BlockNumber BlockSampler_Next(BlockSampler bs); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 210e9cd146..bd2f90a916 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1846,6 +1846,25 @@ pg_stat_gssapi| SELECT s.pid, s.gss_princ AS principal, s.gss_enc AS encrypted FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, sslcompression, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc); +pg_stat_progress_analyze| SELECT s.pid, + s.datid, + d.datname, + s.relid, + CASE s.param1 + WHEN 0 THEN 'initializing'::text + WHEN 1 THEN 'acquiring sample rows'::text + WHEN 2 THEN 'computing stats'::text + WHEN 3 THEN 'computing extended stats'::text + WHEN 4 THEN 'finalizing analyze'::text + ELSE NULL::text + END AS phase, + ((s.param2)::integer)::boolean AS include_children, + (s.param3)::oid AS current_relid, + s.param4 AS sample_blks_total, + s.param5 AS heap_blks_scanned, + S.param6 AS ext_compute_count + FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) + LEFT JOIN pg_database d ON ((s.datid = d.oid))); pg_stat_progress_cluster| SELECT s.pid, s.datid, d.datname,