diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 607a72f..2f84940 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -671,6 +671,17 @@ CREATE VIEW pg_stat_bgwriter AS pg_stat_get_buf_alloc() AS buffers_alloc, pg_stat_get_bgwriter_stat_reset_time() AS stats_reset; +CREATE VIEW pg_stat_lwlocks AS + SELECT + S.lwlockid, + S.local_calls, + S.local_waits, + S.local_time_ms, + S.shared_calls, + S.shared_waits, + S.shared_time_ms + FROM pg_stat_get_lwlocks() AS S; + CREATE VIEW pg_user_mappings AS SELECT U.oid AS umid, diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 8389d5c..970e8bd 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -282,6 +282,7 @@ static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len); static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len); static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len); static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len); +static void pgstat_recv_lwlockstat(PgStat_MsgLWLockstat *msg, int len); static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len); static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len); @@ -1188,6 +1189,8 @@ pgstat_reset_shared_counters(const char *target) if (strcmp(target, "bgwriter") == 0) msg.m_resettarget = RESET_BGWRITER; + else if (strcmp(target, "lwlocks") == 0) + msg.m_resettarget = RESET_LWLOCKSTAT; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -1344,6 +1347,72 @@ pgstat_report_recovery_conflict(int reason) } /* -------- + * pgstat_report_lwlockstat() - + * + * Tell the collector about lwlock statistics. + * -------- + */ +void +pgstat_report_lwlockstat(void) +{ + PgStat_MsgLWLockstat msg; + + int32 lockid = 0; + int need_continue = 0; + + report_continue: + memset(&msg, 0, sizeof(PgStat_MsgLWLockstat)); + + for ( ; lockid0 || waits>0 || time_ms>0 ) + { + msg.m_entry[msg.m_nentries].lockid = lockid; + msg.m_entry[msg.m_nentries].calls = calls; + msg.m_entry[msg.m_nentries].waits = waits; + msg.m_entry[msg.m_nentries].waited_time = time_ms; + + msg.m_nentries++; + + lwlock_reset_stat_global(lockid); + + /* + * Need to keep a message packet smaller than PGSTAT_MSG_PAYLOAD. + * So, going to split a report into multiple messages. + */ + if ( msg.m_nentries>=MAX_LWLOCKSTAT_ENTRIES ) + { + need_continue = 1; + break; + } + } + } + + if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts) + return; + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_LWLOCKSTAT); + pgstat_send(&msg, sizeof(msg)); + + /* + * Need to continue because of the larger report? + */ + if ( need_continue ) + { + need_continue = 0; + goto report_continue; + } +} + +/* -------- * pgstat_report_deadlock() - * * Tell the collector about a deadlock detected. @@ -3219,6 +3288,10 @@ PgstatCollectorMain(int argc, char *argv[]) pgstat_recv_recoveryconflict((PgStat_MsgRecoveryConflict *) &msg, len); break; + case PGSTAT_MTYPE_LWLOCKSTAT: + pgstat_recv_lwlockstat((PgStat_MsgLWLockstat *) &msg, len); + break; + case PGSTAT_MTYPE_DEADLOCK: pgstat_recv_deadlock((PgStat_MsgDeadlock *) &msg, len); break; @@ -4379,8 +4452,15 @@ pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len) if (msg->m_resettarget == RESET_BGWRITER) { /* Reset the global background writer statistics for the cluster. */ - memset(&globalStats, 0, sizeof(globalStats)); - globalStats.stat_reset_timestamp = GetCurrentTimestamp(); + memset(&globalStats.bgwriterstats, 0, sizeof(globalStats.bgwriterstats)); + globalStats.bgwriterstats.reset_timestamp = GetCurrentTimestamp(); + } + + if (msg->m_resettarget == RESET_LWLOCKSTAT) + { + /* Reset the global lwlock statistics for the cluster. */ + memset(&globalStats.lwlockstats, 0, sizeof(globalStats.lwlockstats)); + globalStats.lwlockstats.reset_timestamp = GetCurrentTimestamp(); } /* @@ -4521,16 +4601,16 @@ pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len) static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len) { - globalStats.timed_checkpoints += msg->m_timed_checkpoints; - globalStats.requested_checkpoints += msg->m_requested_checkpoints; - globalStats.checkpoint_write_time += msg->m_checkpoint_write_time; - globalStats.checkpoint_sync_time += msg->m_checkpoint_sync_time; - globalStats.buf_written_checkpoints += msg->m_buf_written_checkpoints; - globalStats.buf_written_clean += msg->m_buf_written_clean; - globalStats.maxwritten_clean += msg->m_maxwritten_clean; - globalStats.buf_written_backend += msg->m_buf_written_backend; - globalStats.buf_fsync_backend += msg->m_buf_fsync_backend; - globalStats.buf_alloc += msg->m_buf_alloc; + globalStats.bgwriterstats.timed_checkpoints += msg->m_timed_checkpoints; + globalStats.bgwriterstats.requested_checkpoints += msg->m_requested_checkpoints; + globalStats.bgwriterstats.checkpoint_write_time += msg->m_checkpoint_write_time; + globalStats.bgwriterstats.checkpoint_sync_time += msg->m_checkpoint_sync_time; + globalStats.bgwriterstats.buf_written_checkpoints += msg->m_buf_written_checkpoints; + globalStats.bgwriterstats.buf_written_clean += msg->m_buf_written_clean; + globalStats.bgwriterstats.maxwritten_clean += msg->m_maxwritten_clean; + globalStats.bgwriterstats.buf_written_backend += msg->m_buf_written_backend; + globalStats.bgwriterstats.buf_fsync_backend += msg->m_buf_fsync_backend; + globalStats.bgwriterstats.buf_alloc += msg->m_buf_alloc; } /* ---------- @@ -4574,6 +4654,27 @@ pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len) } /* ---------- + * pgstat_recv_lwlockstat() - + * + * Process a LWLockstat message. + * ---------- + */ +static void +pgstat_recv_lwlockstat(PgStat_MsgLWLockstat *msg, int len) +{ + int i; + + for (i=0 ; im_nentries ; i++) + { + int32 lockid = msg->m_entry[i].lockid; + + globalStats.lwlockstats.lwlock_stat[lockid].calls += msg->m_entry[i].calls; + globalStats.lwlockstats.lwlock_stat[lockid].waits += msg->m_entry[i].waits; + globalStats.lwlockstats.lwlock_stat[lockid].waited_time += msg->m_entry[i].waited_time; + } +} + +/* ---------- * pgstat_recv_deadlock() - * * Process a DEADLOCK message. diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 5e1ce17..402799d 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -32,6 +32,7 @@ #include "storage/proc.h" #include "storage/spin.h" +#include /* We use the ShmemLock spinlock to protect LWLockAssign */ extern slock_t *ShmemLock; @@ -48,6 +49,25 @@ typedef struct LWLock /* tail is undefined when head is NULL */ } LWLock; +typedef struct LWLockCounter2 +{ + /* statistics stuff */ + uint64 calls; + uint64 waits; + uint64 time_ms; +} LWLockCounter2; + +/* + * LWLockCounterLocal has counters + * and one additional counter for dynamic LWLocks + * to hold lwlock statistic in the local session. + */ +LWLockCounter2 LWLockCounterLocal[NumFixedLWLocks+1]; + +LWLockCounter2 LWLockCounterGlobal[NumFixedLWLocks+1]; + +#define LWLockCounterId(X) ((X) < (NumFixedLWLocks+1) ? (X) : (NumFixedLWLocks+1)) + /* * All the LWLock structs are allocated as an array in shared memory. * (LWLockIds are indexes into the array.) We force the array stride to @@ -90,6 +110,8 @@ static LWLockId held_lwlocks[MAX_SIMUL_LWLOCKS]; static int lock_addin_request = 0; static bool lock_addin_request_allowed = true; +static void InitLWockCounter(void); + #ifdef LWLOCK_STATS static int counts_for_pid = 0; static int *sh_acquire_counts; @@ -253,6 +275,26 @@ LWLockShmemSize(void) return size; } +/* + * Initialize local and global counters for lwlock statistics. + */ +static void +InitLWockCounter(void) +{ + int i; + + for (i=0 ; i= wait_start.tv_usec ) + { + waited = ( wait_done.tv_usec - wait_start.tv_usec ) / 1000 ; + waited += ( wait_done.tv_sec - wait_start.tv_sec ) * 1000 ; + } + else + { + waited = ( wait_done.tv_usec + 1000*1000 - wait_start.tv_usec ) / 1000 ; + waited += ( wait_done.tv_sec - 1 - wait_start.tv_sec ) * 1000 ; + } + LWLockCounterLocal[ LWLockCounterId(lockid) ].time_ms += waited; + LWLockCounterGlobal[ LWLockCounterId(lockid) ].time_ms += waited; LOG_LWDEBUG("LWLockAcquire", lockid, "awakened"); @@ -885,3 +951,55 @@ LWLockHeldByMe(LWLockId lockid) } return false; } + +uint64 +lwlock_get_stat_calls_local(LWLockId lockid) +{ + return LWLockCounterLocal[ LWLockCounterId(lockid) ].calls; +} + +uint64 +lwlock_get_stat_waits_local(LWLockId lockid) +{ + return LWLockCounterLocal[ LWLockCounterId(lockid) ].waits; +} + +uint64 +lwlock_get_stat_time_ms_local(LWLockId lockid) +{ + return LWLockCounterLocal[ LWLockCounterId(lockid) ].time_ms; +} + +void +lwlock_reset_stat_local(LWLockId lockid) +{ + LWLockCounterLocal[ LWLockCounterId(lockid) ].calls = 0; + LWLockCounterLocal[ LWLockCounterId(lockid) ].waits = 0; + LWLockCounterLocal[ LWLockCounterId(lockid) ].time_ms = 0; +} + +uint64 +lwlock_get_stat_calls_global(LWLockId lockid) +{ + return LWLockCounterGlobal[ LWLockCounterId(lockid) ].calls; +} + +uint64 +lwlock_get_stat_waits_global(LWLockId lockid) +{ + return LWLockCounterGlobal[ LWLockCounterId(lockid) ].waits; +} + +uint64 +lwlock_get_stat_time_ms_global(LWLockId lockid) +{ + return LWLockCounterGlobal[ LWLockCounterId(lockid) ].time_ms; +} + +void +lwlock_reset_stat_global(LWLockId lockid) +{ + LWLockCounterGlobal[ LWLockCounterId(lockid) ].calls = 0; + LWLockCounterGlobal[ LWLockCounterId(lockid) ].waits = 0; + LWLockCounterGlobal[ LWLockCounterId(lockid) ].time_ms = 0; +} diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 585db1a..5ca2c6f 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -3919,6 +3919,8 @@ PostgresMain(int argc, char *argv[], const char *username) pgstat_report_activity(STATE_IDLE, NULL); } + pgstat_report_lwlockstat(); + ReadyForQuery(whereToSendOutput); send_ready_for_query = false; } diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 7d4059f..e74042d 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -118,6 +118,8 @@ extern Datum pg_stat_reset_shared(PG_FUNCTION_ARGS); extern Datum pg_stat_reset_single_table_counters(PG_FUNCTION_ARGS); extern Datum pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS); +extern Datum pg_stat_get_lwlocks(PG_FUNCTION_ARGS); + /* Global bgwriter statistics, from bgwriter.c */ extern PgStat_MsgBgWriter bgwriterStats; @@ -1399,69 +1401,69 @@ pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS) Datum pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS) { - PG_RETURN_INT64(pgstat_fetch_global()->timed_checkpoints); + PG_RETURN_INT64(pgstat_fetch_global()->bgwriterstats.timed_checkpoints); } Datum pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS) { - PG_RETURN_INT64(pgstat_fetch_global()->requested_checkpoints); + PG_RETURN_INT64(pgstat_fetch_global()->bgwriterstats.requested_checkpoints); } Datum pg_stat_get_bgwriter_buf_written_checkpoints(PG_FUNCTION_ARGS) { - PG_RETURN_INT64(pgstat_fetch_global()->buf_written_checkpoints); + PG_RETURN_INT64(pgstat_fetch_global()->bgwriterstats.buf_written_checkpoints); } Datum pg_stat_get_bgwriter_buf_written_clean(PG_FUNCTION_ARGS) { - PG_RETURN_INT64(pgstat_fetch_global()->buf_written_clean); + PG_RETURN_INT64(pgstat_fetch_global()->bgwriterstats.buf_written_clean); } Datum pg_stat_get_bgwriter_maxwritten_clean(PG_FUNCTION_ARGS) { - PG_RETURN_INT64(pgstat_fetch_global()->maxwritten_clean); + PG_RETURN_INT64(pgstat_fetch_global()->bgwriterstats.maxwritten_clean); } Datum pg_stat_get_checkpoint_write_time(PG_FUNCTION_ARGS) { /* time is already in msec, just convert to double for presentation */ - PG_RETURN_FLOAT8((double) pgstat_fetch_global()->checkpoint_write_time); + PG_RETURN_FLOAT8((double) pgstat_fetch_global()->bgwriterstats.checkpoint_write_time); } Datum pg_stat_get_checkpoint_sync_time(PG_FUNCTION_ARGS) { /* time is already in msec, just convert to double for presentation */ - PG_RETURN_FLOAT8((double) pgstat_fetch_global()->checkpoint_sync_time); + PG_RETURN_FLOAT8((double) pgstat_fetch_global()->bgwriterstats.checkpoint_sync_time); } Datum pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS) { - PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->stat_reset_timestamp); + PG_RETURN_TIMESTAMPTZ(pgstat_fetch_global()->bgwriterstats.reset_timestamp); } Datum pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS) { - PG_RETURN_INT64(pgstat_fetch_global()->buf_written_backend); + PG_RETURN_INT64(pgstat_fetch_global()->bgwriterstats.buf_written_backend); } Datum pg_stat_get_buf_fsync_backend(PG_FUNCTION_ARGS) { - PG_RETURN_INT64(pgstat_fetch_global()->buf_fsync_backend); + PG_RETURN_INT64(pgstat_fetch_global()->bgwriterstats.buf_fsync_backend); } Datum pg_stat_get_buf_alloc(PG_FUNCTION_ARGS) { - PG_RETURN_INT64(pgstat_fetch_global()->buf_alloc); + PG_RETURN_INT64(pgstat_fetch_global()->bgwriterstats.buf_alloc); } Datum @@ -1701,3 +1703,162 @@ pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + +Datum +pg_stat_get_lwlocks(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + TupleDesc tupdesc; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + tupdesc = CreateTemplateTupleDesc(7, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lockid", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "local_calls", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "local_waits", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "local_time_ms", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "shared_calls", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "shared_waits", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "shared_time_ms", + INT8OID, -1, 0); + + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + funcctx->max_calls = NumFixedLWLocks + 1; + + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + + if (funcctx->call_cntr < funcctx->max_calls) + { + Datum values[7]; + bool nulls[7]; + HeapTuple tuple; + LWLockId lockid; + uint64 local_calls,local_waits,local_time_ms; + uint64 shared_calls,shared_waits,shared_time_ms; + int i; + PgStat_LWLockEntry *lwlock_stat = pgstat_fetch_global()->lwlockstats.lwlock_stat; + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + lockid = funcctx->call_cntr; + + local_calls = local_waits = local_time_ms = 0; + shared_calls = shared_waits = shared_time_ms = 0; + + /* + * Partitioned locks need to be summed up by the lock group. + */ + if ( FirstBufMappingLock <= lockid && lockid < FirstLockMgrLock ) + { + for (i=0 ; icall_cntr += NUM_BUFFER_PARTITIONS; + } + else if ( FirstLockMgrLock <= lockid && lockid < FirstPredicateLockMgrLock ) + { + for (i=0 ; icall_cntr += NUM_LOCK_PARTITIONS; + } + else if ( FirstPredicateLockMgrLock <= lockid && lockid < NumFixedLWLocks ) + { + for (i=0 ; icall_cntr += NUM_PREDICATELOCK_PARTITIONS; + } + else + { + /* local statistics */ + local_calls = lwlock_get_stat_calls_local(lockid); + local_waits = lwlock_get_stat_waits_local(lockid); + local_time_ms = lwlock_get_stat_time_ms_local(lockid); + + /* global statistics */ + shared_calls = lwlock_stat[lockid].calls; + shared_waits = lwlock_stat[lockid].waits; + shared_time_ms = lwlock_stat[lockid].waited_time; + } + + values[0] = Int64GetDatum(lockid); + values[1] = Int64GetDatum(local_calls); + values[2] = Int64GetDatum(local_waits); + values[3] = Int64GetDatum(local_time_ms); + values[4] = Int64GetDatum(shared_calls); + values[5] = Int64GetDatum(shared_waits); + values[6] = Int64GetDatum(shared_time_ms); + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + else + { + SRF_RETURN_DONE(funcctx); + } +} + +Datum +pg_stat_reset_lwlocks(PG_FUNCTION_ARGS) +{ + LWLockId lockid; + + for (lockid=0 ; lockid