From b009b1f8f6be47ae61b5e4538e2730d721ee60db Mon Sep 17 00:00:00 2001 From: "imai.yoshikazu" Date: Wed, 15 Jan 2020 09:13:19 +0000 Subject: [PATCH v4 1/2] Add pg_stat_waitaccum view. pg_stat_waitaccum shows counts and duration of each wait events. Each backend/backgrounds counts and measures the time of wait event in every pgstat_report_wait_start and pgstat_report_wait_end. They store those info into their local variables and send to Statistics Collector. We can get those info via Statistics Collector. For reducing overhead, I implemented statistic hash instead of dynamic hash. I also implemented track_wait_timing which determines wait event duration is collected or not. On windows, this function might be not worked correctly, because now it initializes local variables in pg_stat_init which is not passed to fork processes on windows. --- src/backend/catalog/system_views.sql | 8 + src/backend/postmaster/pgstat.c | 344 ++++++++++++++++++++++++++ src/backend/storage/lmgr/lwlock.c | 19 ++ src/backend/utils/adt/pgstatfuncs.c | 80 ++++++ src/backend/utils/misc/guc.c | 9 + src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/catalog/pg_proc.dat | 9 + src/include/pgstat.h | 123 ++++++++- src/include/storage/lwlock.h | 1 + src/include/storage/proc.h | 1 + src/test/regress/expected/rules.out | 5 + 11 files changed, 598 insertions(+), 2 deletions(-) diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 773edf8..80f2caa 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -957,6 +957,14 @@ CREATE VIEW pg_stat_bgwriter AS pg_stat_get_buf_alloc() AS buffers_alloc, pg_stat_get_bgwriter_stat_reset_time() AS stats_reset; +CREATE VIEW pg_stat_waitaccum AS + SELECT + S.wait_event_type AS wait_event_type, + S.wait_event AS wait_event, + S.calls AS calls, + S.times AS times + FROM pg_stat_get_waitaccum(NULL) AS S; + CREATE VIEW pg_stat_progress_vacuum AS SELECT S.pid AS pid, S.datid AS datid, D.datname AS datname, diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 51c486b..08e10ad 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -123,6 +123,7 @@ */ bool pgstat_track_activities = false; bool pgstat_track_counts = false; +bool pgstat_track_wait_timing = false; int pgstat_track_functions = TRACK_FUNC_OFF; int pgstat_track_activity_query_size = 1024; @@ -153,6 +154,10 @@ static time_t last_pgstat_start_time; static bool pgStatRunningInCollector = false; +WAHash *wa_hash; + +instr_time waitStart; + /* * Structures in which backends store per-table info that's waiting to be * sent to the collector. @@ -255,6 +260,7 @@ static int localNumBackends = 0; */ static PgStat_ArchiverStats archiverStats; static PgStat_GlobalStats globalStats; +static PgStat_WaitAccumStats waitAccumStats; /* * List of OIDs of databases we need to write out. If an entry is InvalidOid, @@ -280,6 +286,8 @@ static pid_t pgstat_forkexec(void); #endif NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn(); +static void pgstat_init_waitaccum_hash(WAHash **hash); +static PgStat_WaitAccumEntry *pgstat_add_wa_entry(WAHash *hash, uint32 key); static void pgstat_beshutdown_hook(int code, Datum arg); static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create); @@ -287,8 +295,11 @@ static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create); static void pgstat_write_statsfiles(bool permanent, bool allDbs); static void pgstat_write_db_statsfile(PgStat_StatDBEntry *dbentry, bool permanent); +static void pgstat_write_waitaccum_statsfile(FILE *fpout); static HTAB *pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep); static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool permanent); +static bool pgstat_read_waitaccum_statsfile(PgStat_WaitAccumStats *stats, + FILE *fpin, const char *statfile); static void backend_read_statsfile(void); static void pgstat_read_current_status(void); @@ -324,6 +335,7 @@ static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len); static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len); static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len); static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len); +static void pgstat_recv_waitaccum(PgStat_MsgWaitAccum *msg, int len); static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len); static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len); static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len); @@ -331,6 +343,27 @@ static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len); static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len); static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len); + +PgStat_WaitAccumEntry * +pgstat_get_wa_entry(WAHash *hash, uint32 key) +{ + WAEntry *current; + int bucket = key % WA_BUCKET_SIZE; + + current = hash->buckets[bucket]; + + while (current != NULL) + { + if (current->key == key) + return current->entry; + + current = current->next; + } + + return NULL; +} + + /* ------------------------------------------------------------ * Public functions called from postmaster follow * ------------------------------------------------------------ @@ -602,6 +635,8 @@ retry2: pg_freeaddrinfo_all(hints.ai_family, addrs); + pgstat_init_waitaccum_hash(&wa_hash); + return; startup_failed: @@ -624,6 +659,75 @@ startup_failed: SetConfigOption("track_counts", "off", PGC_INTERNAL, PGC_S_OVERRIDE); } +static PgStat_WaitAccumEntry * +pgstat_add_wa_entry(WAHash *hash, uint32 key) +{ + WAEntry *prev; + WAEntry *new; + int bucket = key % WA_BUCKET_SIZE; + + prev = hash->buckets[bucket]; + + while (prev != NULL && prev->next != NULL) + prev = prev->next; + + new = &hash->entries[hash->entry_num++]; + new->key = key; + new->entry = MemoryContextAllocZero(TopMemoryContext, (sizeof(PgStat_WaitAccumEntry))); + + if (prev != NULL) + prev->next = new; + else + hash->buckets[bucket] = new; + + return new->entry; +} + +static void +pgstat_init_waitaccum_entry(WAHash *hash, uint32 wait_event_info) +{ + PgStat_WaitAccumEntry *entry; + + entry = pgstat_add_wa_entry(hash, wait_event_info); + entry->wait_event_info = wait_event_info; +} + +static void +pgstat_init_waitaccum_hash(WAHash **hash) +{ + uint32 i; + int last_tranche_id; + + *hash = MemoryContextAllocZero(TopMemoryContext, sizeof(WAHash)); + + last_tranche_id = LWLockGetLastTrancheId(); + for (i = PG_WAIT_LWLOCK + 1; i <= (PG_WAIT_LWLOCK | last_tranche_id); i++) + pgstat_init_waitaccum_entry(*hash, i); + + for (i = (PG_WAIT_LOCK | LOCKTAG_RELATION); i <= (PG_WAIT_LOCK | LOCKTAG_LAST_TYPE); i++) + pgstat_init_waitaccum_entry(*hash, i); + + for (i = PG_WAIT_BUFFER_PIN; i <= PG_WAIT_BUFFER_PIN; i++) + pgstat_init_waitaccum_entry(*hash, i); + + for (i = PG_WAIT_ACTIVITY; i <= PG_WAIT_ACTIVITY_LAST_TYPE; i++) + pgstat_init_waitaccum_entry(*hash, i); + + for (i = PG_WAIT_CLIENT; i <= PG_WAIT_CLIENT_LAST_TYPE; i++) + pgstat_init_waitaccum_entry(*hash, i); + + //do extension stuff + + for (i = PG_WAIT_IPC; i <= PG_WAIT_IPC_LAST_TYPE; i++) + pgstat_init_waitaccum_entry(*hash, i); + + for (i = PG_WAIT_TIMEOUT; i <= PG_WAIT_TIMEOUT_LAST_TYPE; i++) + pgstat_init_waitaccum_entry(*hash, i); + + for (i = PG_WAIT_IO; i <= PG_WAIT_IO_LAST_TYPE; i++) + pgstat_init_waitaccum_entry(*hash, i); +} + /* * subroutine for pgstat_reset_all */ @@ -904,6 +1008,9 @@ pgstat_report_stat(bool force) /* Now, send function statistics */ pgstat_send_funcstats(); + + /* Send wait accumulative statistics */ + pgstat_send_waitaccum(); } /* @@ -1334,6 +1441,8 @@ pgstat_reset_shared_counters(const char *target) msg.m_resettarget = RESET_ARCHIVER; else if (strcmp(target, "bgwriter") == 0) msg.m_resettarget = RESET_BGWRITER; + else if (strcmp(target, "waitaccum") == 0) + msg.m_resettarget = RESET_WAITACCUM; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -2618,6 +2727,22 @@ pgstat_fetch_global(void) return &globalStats; } +/* + * --------- + * pgstat_fetch_stat_waitaccum() - + * + * Support function for the SQL-callable pgstat* functions. Returns + * a pointer to the wait accum statistics struct. + * --------- + */ +PgStat_WaitAccumStats * +pgstat_fetch_stat_waitaccum(void) +{ + backend_read_statsfile(); + + return &waitAccumStats; +} + /* ------------------------------------------------------------ * Functions for management of the shared-memory PgBackendStatus array @@ -4410,6 +4535,53 @@ pgstat_send_bgwriter(void) MemSet(&BgWriterStats, 0, sizeof(BgWriterStats)); } +/* ---------- + * pgstat_send_waitaccum() - + * + * ---------- + */ +void +pgstat_send_waitaccum() +{ + PgStat_MsgWaitAccum msg; + PgStat_WaitAccumEntry *entry; + int i; + + if (wa_hash == NULL) + return; + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_WAITACCUM); + msg.m_nentries = 0; + + for (i = 0; i < wa_hash->entry_num; i++) + { + entry = wa_hash->entries[i].entry; + + /* Send only wait events that have occurred. */ + if (entry->calls == 0) + continue; + + /* + * Prepare and send the message + */ + memcpy(&msg.m_entry[msg.m_nentries], entry, sizeof(PgStat_WaitAccumEntry)); + if (++msg.m_nentries >= PGSTAT_NUM_WAITACCUMENTRIES) + { + pgstat_send(&msg, offsetof(PgStat_MsgWaitAccum, m_entry[0]) + + msg.m_nentries * sizeof(PgStat_WaitAccumEntry)); + msg.m_nentries = 0; + } + + /* Clear wait events information. */ + entry->calls = 0; + INSTR_TIME_SET_ZERO(entry->times); + } + + if (msg.m_nentries > 0) + pgstat_send(&msg, offsetof(PgStat_MsgWaitAccum, m_entry[0]) + + msg.m_nentries * sizeof(PgStat_WaitAccumEntry)); +} + /* ---------- * PgstatCollectorMain() - @@ -4602,6 +4774,10 @@ PgstatCollectorMain(int argc, char *argv[]) pgstat_recv_bgwriter(&msg.msg_bgwriter, len); break; + case PGSTAT_MTYPE_WAITACCUM: + pgstat_recv_waitaccum(&msg.msg_waitaccum, len); + break; + case PGSTAT_MTYPE_FUNCSTAT: pgstat_recv_funcstat(&msg.msg_funcstat, len); break; @@ -4872,6 +5048,8 @@ pgstat_write_statsfiles(bool permanent, bool allDbs) rc = fwrite(&archiverStats, sizeof(archiverStats), 1, fpout); (void) rc; /* we'll check for error with ferror */ + pgstat_write_waitaccum_statsfile(fpout); + /* * Walk through the database table. */ @@ -5077,6 +5255,43 @@ pgstat_write_db_statsfile(PgStat_StatDBEntry *dbentry, bool permanent) } /* ---------- + * pgstat_write_waitaccum_statsfile() - + * Write the waitAccumStats to the stat file. + * + * ---------- + */ +static void +pgstat_write_waitaccum_statsfile(FILE *fpout) +{ + PgStat_WaitAccumEntry *entry; + WAHash *hash = waitAccumStats.hash; + int rc; + int i; + + /* + * Walk through the waitaccum hash. + */ + for (i = 0; i < hash->entry_num; i++) + { + entry = hash->entries[i].entry; + + /* Write only wait events that have occurred. */ + if (entry->calls == 0) + continue; + + /* + * Write out the DB entry. We don't write the tables or functions + * pointers, since they're of no use to any other process. + */ + fputc('D', fpout); + rc = fwrite(entry, sizeof(PgStat_WaitAccumEntry), 1, fpout); + (void) rc; /* we'll check for error with ferror */ + } + + fputc('E', fpout); +} + +/* ---------- * pgstat_read_statsfiles() - * * Reads in some existing statistics collector files and returns the @@ -5129,6 +5344,7 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep) */ memset(&globalStats, 0, sizeof(globalStats)); memset(&archiverStats, 0, sizeof(archiverStats)); + waitAccumStats.hash = MemoryContextAllocZero(pgStatLocalContext, sizeof(WAHash)); /* * Set the current timestamp (will be kept only in case we can't load an @@ -5199,6 +5415,9 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep) goto done; } + if(!pgstat_read_waitaccum_statsfile(&waitAccumStats, fpin, statfile)) + goto done; + /* * We found an existing collector stats file. Read it and put all the * hashtable entries into place. @@ -5497,10 +5716,13 @@ pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent, PgStat_StatDBEntry dbentry; PgStat_GlobalStats myGlobalStats; PgStat_ArchiverStats myArchiverStats; + PgStat_WaitAccumStats myWaitAccumStats; FILE *fpin; int32 format_id; const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : pgstat_stat_filename; + myWaitAccumStats.hash = MemoryContextAllocZero(CurrentMemoryContext, sizeof(WAHash)); + /* * Try to open the stats file. As above, anything but ENOENT is worthy of * complaining about. @@ -5551,6 +5773,9 @@ pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent, return false; } + if(!pgstat_read_waitaccum_statsfile(&myWaitAccumStats, fpin, statfile)) + return false; + /* By default, we're going to return the timestamp of the global file. */ *ts = myGlobalStats.stats_timestamp; @@ -5604,6 +5829,75 @@ done: return true; } +/* ---------- + * pgstat_read_statsfiles() - + * + * Reads the waitaccum stats from the file. + * If an error happens when reading file, return false. Otherwise return true. + * + * ---------- + */ +static bool +pgstat_read_waitaccum_statsfile(PgStat_WaitAccumStats *stats, + FILE *fpin, const char *statfile) +{ + PgStat_WaitAccumEntry *entry; + PgStat_WaitAccumEntry buf; + WAHash *hash = stats->hash; + + /* + * Read and put all the hashtable entries into place. + */ + for (;;) + { + switch (fgetc(fpin)) + { + /* + * 'D' A PgStat_WaitAccumEntry struct describing a database + * follows. + */ + case 'D': + if (fread(&buf, 1, sizeof(PgStat_WaitAccumEntry), fpin) + != sizeof(PgStat_WaitAccumEntry)) + { + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errmsg("corrupted statistics file \"%s\"", + statfile))); + return false; + } + + entry = pgstat_get_wa_entry(hash, buf.wait_event_info); + + if (entry) + { + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errmsg("corrupted statistics file \"%s\"", + statfile))); + return false; + } + + /* + * Add to the DB hash + */ + entry = pgstat_add_wa_entry(hash, buf.wait_event_info); + memcpy(entry, &buf, sizeof(PgStat_WaitAccumEntry)); + + break; + + case 'E': + return true; + + default: + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errmsg("corrupted statistics file \"%s\"", + statfile))); + return false; + } + } + + return 0; +} + /* * If not already done, read the statistics collector stats file into * some hash tables. The results will be kept until pgstat_clear_snapshot() @@ -6113,7 +6407,20 @@ pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len) memset(&archiverStats, 0, sizeof(archiverStats)); archiverStats.stat_reset_timestamp = GetCurrentTimestamp(); } + else if (msg->m_resettarget == RESET_WAITACCUM) + { + PgStat_WaitAccumEntry *entry; + WAHash *hash = waitAccumStats.hash; + int i; + + for (i = 0; i < hash->entry_num; i++) + { + entry = hash->entries[i].entry; + entry->calls = 0; + INSTR_TIME_SET_ZERO(entry->times); + } + } /* * Presumably the sender of this message validated the target, don't * complain here if it's not valid @@ -6293,6 +6600,43 @@ pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len) } /* ---------- + * pgstat_recv_waitaccum() - + * + * Process a WAITACCUM message. + * ---------- + */ +static void +pgstat_recv_waitaccum(PgStat_MsgWaitAccum *msg, int len) +{ + PgStat_WaitAccumEntry *m_entry = &(msg->m_entry[0]); + PgStat_WaitAccumEntry *entry; + WAHash *hash = waitAccumStats.hash; + int i; + + /* + * Process all function entries in the message. + */ + for (i = 0; i < msg->m_nentries; i++, m_entry++) + { + entry = pgstat_get_wa_entry(hash, m_entry->wait_event_info); + + if (!entry) + { + entry = pgstat_add_wa_entry(hash, m_entry->wait_event_info); + memcpy(entry, m_entry, sizeof(PgStat_WaitAccumEntry)); + } + else + { + /* + * Otherwise add the values to the existing entry. + */ + entry->calls += m_entry->calls; + INSTR_TIME_ADD(entry->times, m_entry->times); + } + } +} + +/* ---------- * pgstat_recv_recoveryconflict() - * * Process a RECOVERYCONFLICT message. diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index d07ce60..6f4eb19 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -594,6 +594,25 @@ LWLockNewTrancheId(void) } /* + * Get a last tranche ID. + */ +int +LWLockGetLastTrancheId(void) +{ + int result; + int *LWLockCounter; + + Assert(!lock_named_request_allowed); + + LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int)); + SpinLockAcquire(ShmemLock); + result = *LWLockCounter; + SpinLockRelease(ShmemLock); + + return result; +} + +/* * Register a tranche ID in the lookup table for the current process. This * routine will save a pointer to the tranche name passed as an argument, * so the name should be allocated in a backend-lifetime context diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 3dbf604..bed7d01 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1974,3 +1974,83 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS) PG_RETURN_DATUM(HeapTupleGetDatum( heap_form_tuple(tupdesc, values, nulls))); } + +Datum +pg_stat_get_waitaccum(PG_FUNCTION_ARGS) +{ +#define PG_STAT_GET_WAITACCUM_COLS 4 + PgStat_WaitAccumStats *waitaccum_stats; + PgStat_WaitAccumEntry *entry; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + int i; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not " \ + "allowed in this context"))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + + /* Get statistics about the waitaccum process */ + waitaccum_stats = pgstat_fetch_stat_waitaccum(); + + for (i = 0; i < waitaccum_stats->hash->entry_num; i++) + { + Datum values[PG_STAT_GET_WAITACCUM_COLS]; + bool nulls[PG_STAT_GET_WAITACCUM_COLS]; + const char *wait_event_type = NULL; + const char *wait_event = NULL; + + /* Initialise values and NULL flags arrays */ + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + entry = waitaccum_stats->hash->entries[i].entry; + + /* Fill values and NULLs */ + { + uint32 raw_wait_event; + + raw_wait_event = UINT32_ACCESS_ONCE(entry->wait_event_info); + wait_event_type = pgstat_get_wait_event_type(raw_wait_event); + wait_event = pgstat_get_wait_event(raw_wait_event); + } + + values[0] = CStringGetTextDatum(wait_event_type); + + values[1] = CStringGetTextDatum(wait_event); + + values[2] = Int64GetDatum(entry->calls); + + values[3] = UInt64GetDatum(INSTR_TIME_GET_MICROSEC(entry->times)); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + + /* clean up and return the tuplestore */ + tuplestore_donestoring(tupstore); + + return (Datum) 0; +} diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index e5f8a13..2924472 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1424,6 +1424,15 @@ static struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { + {"track_wait_timing", PGC_SUSET, STATS_COLLECTOR, + gettext_noop("Collects timing statistics for wait events."), + NULL + }, + &pgstat_track_wait_timing, + false, + NULL, NULL, NULL + }, { {"update_process_title", PGC_SUSET, PROCESS_TITLE, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index e1048c0..3a99182 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -570,6 +570,7 @@ #track_activities = on #track_counts = on #track_io_timing = off +#track_wait_timing = off #track_functions = none # none, pl, all #track_activity_query_size = 1024 # (change requires restart) #stats_temp_directory = 'pg_stat_tmp' diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 427faa3..4e5a502 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5159,6 +5159,15 @@ proargmodes => '{i,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', proargnames => '{pid,datid,pid,usesysid,application_name,state,query,wait_event_type,wait_event,xact_start,query_start,backend_start,state_change,client_addr,client_hostname,client_port,backend_xid,backend_xmin,backend_type,ssl,sslversion,sslcipher,sslbits,sslcompression,ssl_client_dn,ssl_client_serial,ssl_issuer_dn,gss_auth,gss_princ,gss_enc}', prosrc => 'pg_stat_get_activity' }, +{ oid => '2228', + descr => 'statistics: information about accumulative data of wait event', + proname => 'pg_stat_get_waitaccum', prorows => '200', proisstrict => 'f', + proretset => 't', provolatile => 's', proparallel => 'r', + prorettype => 'record', proargtypes => 'int4', + proallargtypes => '{int4,text,text,int8,int8}', + proargmodes => '{i,o,o,o,o}', + proargnames => '{pid,wait_event_type,wait_event,calls,times}', + prosrc => 'pg_stat_get_waitaccum' }, { oid => '3318', descr => 'statistics: information about progress of backends running maintenance command', proname => 'pg_stat_get_progress_info', prorows => '100', proretset => 't', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index e5a5d02..f90bb44 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -59,6 +59,7 @@ typedef enum StatMsgType PGSTAT_MTYPE_ANALYZE, PGSTAT_MTYPE_ARCHIVER, PGSTAT_MTYPE_BGWRITER, + PGSTAT_MTYPE_WAITACCUM, PGSTAT_MTYPE_FUNCSTAT, PGSTAT_MTYPE_FUNCPURGE, PGSTAT_MTYPE_RECOVERYCONFLICT, @@ -119,7 +120,8 @@ typedef struct PgStat_TableCounts typedef enum PgStat_Shared_Reset_Target { RESET_ARCHIVER, - RESET_BGWRITER + RESET_BGWRITER, + RESET_WAITACCUM } PgStat_Shared_Reset_Target; /* Possible object types for resetting single counters */ @@ -423,6 +425,33 @@ typedef struct PgStat_MsgBgWriter } PgStat_MsgBgWriter; /* ---------- + * PgStat_WaitAccumEntry Entry in backend/background's per-wait_event_info hash table + * ---------- + */ +typedef struct PgStat_WaitAccumEntry +{ + uint32 wait_event_info; + PgStat_Counter calls; + instr_time times; +} PgStat_WaitAccumEntry; + +/* ---------- + * PgStat_MsgWaitAccum Sent by backend/background's process to update statistics. + * ---------- + */ +#define PGSTAT_NUM_WAITACCUMENTRIES \ + ((PGSTAT_MSG_PAYLOAD - sizeof(int)) \ + / sizeof(PgStat_WaitAccumEntry)) + +typedef struct PgStat_MsgWaitAccum +{ + PgStat_MsgHdr m_hdr; + + int m_nentries; + PgStat_WaitAccumEntry m_entry[PGSTAT_NUM_WAITACCUMENTRIES]; +} PgStat_MsgWaitAccum; + +/* ---------- * PgStat_MsgRecoveryConflict Sent by the backend upon recovery conflict * ---------- */ @@ -564,6 +593,7 @@ typedef union PgStat_Msg PgStat_MsgAnalyze msg_analyze; PgStat_MsgArchiver msg_archiver; PgStat_MsgBgWriter msg_bgwriter; + PgStat_MsgWaitAccum msg_waitaccum; PgStat_MsgFuncstat msg_funcstat; PgStat_MsgFuncpurge msg_funcpurge; PgStat_MsgRecoveryConflict msg_recoveryconflict; @@ -581,7 +611,7 @@ typedef union PgStat_Msg * ------------------------------------------------------------ */ -#define PGSTAT_FILE_FORMAT_ID 0x01A5BC9D +#define PGSTAT_FILE_FORMAT_ID 0x01A5BC9E /* ---------- * PgStat_StatDBEntry The collector's data per database @@ -711,6 +741,30 @@ typedef struct PgStat_GlobalStats TimestampTz stat_reset_timestamp; } PgStat_GlobalStats; +typedef struct WAEntry +{ + int key; + PgStat_WaitAccumEntry *entry; + struct WAEntry *next; +} WAEntry; + +#define WA_BUCKET_SIZE 461 + +typedef struct WAHash +{ + WAEntry entries[WA_BUCKET_SIZE]; + WAEntry *buckets[WA_BUCKET_SIZE]; + int entry_num; +} WAHash; + +/* + * WaitAccum statistics kept in the stats collector + */ +typedef struct PgStat_WaitAccumStats +{ + WAHash *hash; +} PgStat_WaitAccumStats; + /* ---------- * Backend types @@ -787,6 +841,8 @@ typedef enum WAIT_EVENT_WAL_WRITER_MAIN } WaitEventActivity; +#define PG_WAIT_ACTIVITY_LAST_TYPE WAIT_EVENT_WAL_WRITER_MAIN + /* ---------- * Wait Events - Client * @@ -808,6 +864,8 @@ typedef enum WAIT_EVENT_GSS_OPEN_SERVER, } WaitEventClient; +#define PG_WAIT_CLIENT_LAST_TYPE WAIT_EVENT_GSS_OPEN_SERVER + /* ---------- * Wait Events - IPC * @@ -856,6 +914,8 @@ typedef enum WAIT_EVENT_SYNC_REP } WaitEventIPC; +#define PG_WAIT_IPC_LAST_TYPE WAIT_EVENT_SYNC_REP + /* ---------- * Wait Events - Timeout * @@ -869,6 +929,8 @@ typedef enum WAIT_EVENT_RECOVERY_APPLY_DELAY } WaitEventTimeout; +#define PG_WAIT_TIMEOUT_LAST_TYPE WAIT_EVENT_RECOVERY_APPLY_DELAY + /* ---------- * Wait Events - IO * @@ -948,6 +1010,8 @@ typedef enum WAIT_EVENT_WAL_WRITE } WaitEventIO; +#define PG_WAIT_IO_LAST_TYPE WAIT_EVENT_WAL_WRITE + /* ---------- * Command type for progress reporting purposes * ---------- @@ -1204,6 +1268,8 @@ typedef struct PgStat_FunctionCallUsage instr_time f_start; } PgStat_FunctionCallUsage; +extern WAHash *wa_hash; +extern instr_time waitStart; /* ---------- * GUC parameters @@ -1211,6 +1277,7 @@ typedef struct PgStat_FunctionCallUsage */ extern bool pgstat_track_activities; extern bool pgstat_track_counts; +extern bool pgstat_track_wait_timing; extern int pgstat_track_functions; extern PGDLLIMPORT int pgstat_track_activity_query_size; extern char *pgstat_stat_directory; @@ -1228,6 +1295,7 @@ extern PgStat_MsgBgWriter BgWriterStats; extern PgStat_Counter pgStatBlockReadTime; extern PgStat_Counter pgStatBlockWriteTime; +extern PgStat_WaitAccumEntry *pgstat_get_wa_entry(WAHash *hash, uint32 key); /* ---------- * Functions called from postmaster * ---------- @@ -1315,6 +1383,50 @@ extern char *pgstat_clip_activity(const char *raw_activity); * initialized. * ---------- */ + +static inline void +pgstat_report_waitaccum_start() +{ + if (wa_hash == NULL) + return; + + if (pgstat_track_wait_timing) + { + INSTR_TIME_SET_CURRENT(waitStart); + } +} + +static inline void +pgstat_report_waitaccum_end(uint32 wait_event_info) +{ + PgStat_WaitAccumEntry *entry; + instr_time diff; + + if (wa_hash == NULL) + return; + + if (pgstat_track_wait_timing) + { + INSTR_TIME_SET_CURRENT(diff); + INSTR_TIME_SUBTRACT(diff, waitStart); + } + + entry = pgstat_get_wa_entry(wa_hash, wait_event_info); + + if (!entry) + { + printf("wait_event_info: %u.\n", wait_event_info); + fflush(stdout); + return; + } + + entry->calls++; + if (pgstat_track_wait_timing) + { + INSTR_TIME_ADD(entry->times, diff); + } +} + static inline void pgstat_report_wait_start(uint32 wait_event_info) { @@ -1328,6 +1440,8 @@ pgstat_report_wait_start(uint32 wait_event_info) * four-bytes, updates are atomic. */ proc->wait_event_info = wait_event_info; + + pgstat_report_waitaccum_start(); } /* ---------- @@ -1347,6 +1461,8 @@ pgstat_report_wait_end(void) if (!pgstat_track_activities || !proc) return; + pgstat_report_waitaccum_end(proc->wait_event_info); + /* * Since this is a four-byte field which is always read and written as * four-bytes, updates are atomic. @@ -1354,6 +1470,7 @@ pgstat_report_wait_end(void) proc->wait_event_info = 0; } + /* nontransactional event counts are simple enough to inline */ #define pgstat_count_heap_scan(rel) \ @@ -1421,6 +1538,7 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, extern void pgstat_send_archiver(const char *xlog, bool failed); extern void pgstat_send_bgwriter(void); +extern void pgstat_send_waitaccum(void); /* ---------- * Support functions for the SQL-callable functions to @@ -1435,5 +1553,6 @@ extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid); extern int pgstat_fetch_stat_numbackends(void); extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void); extern PgStat_GlobalStats *pgstat_fetch_global(void); +extern PgStat_WaitAccumStats *pgstat_fetch_stat_waitaccum(void); #endif /* PGSTAT_H */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 8fda8e4..2149c96 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -184,6 +184,7 @@ extern LWLockPadded *GetNamedLWLockTranche(const char *tranche_name); * registration in the main shared memory segment wouldn't work for that case. */ extern int LWLockNewTrancheId(void); +extern int LWLockGetLastTrancheId(void); extern void LWLockRegisterTranche(int tranche_id, const char *tranche_name); extern void LWLockInitialize(LWLock *lock, int tranche_id); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 5b407e6..bd47ccb 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -21,6 +21,7 @@ #include "storage/lock.h" #include "storage/pg_sema.h" #include "storage/proclist_types.h" +#include "portability/instr_time.h" /* * Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 62eaf90..82566d0 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2063,6 +2063,11 @@ pg_stat_user_tables| SELECT pg_stat_all_tables.relid, pg_stat_all_tables.autoanalyze_count FROM pg_stat_all_tables WHERE ((pg_stat_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_tables.schemaname !~ '^pg_toast'::text)); +pg_stat_waitaccum| SELECT s.wait_event_type, + s.wait_event, + s.calls, + s.times + FROM pg_stat_get_waitaccum(NULL::integer) s(wait_event_type, wait_event, calls, times); pg_stat_wal_receiver| SELECT s.pid, s.status, s.receive_start_lsn, -- 1.8.3.1