From d99d1d4d1f39c0068be34fa5c9d0f6efa6c70d2c Mon Sep 17 00:00:00 2001 From: Mircea Cadariu Date: Mon, 30 Jun 2025 08:28:37 +0100 Subject: [PATCH v2] Add metadata (non-leaf) index block statistics to pg_stat functions and system views. This commit contains the changes for counting and exposing when index metadata block reads (either hits or from disk) are issued from the index handling code. --- doc/src/sgml/monitoring.sgml | 28 ++++++++++++++++++++ src/backend/access/brin/brin.c | 1 + src/backend/access/brin/brin_revmap.c | 5 ++++ src/backend/access/gin/ginbtree.c | 7 +++++ src/backend/access/gin/ginfast.c | 8 ++++++ src/backend/access/gin/ginget.c | 7 ++++- src/backend/access/gin/ginutil.c | 3 +++ src/backend/access/gist/gist.c | 11 ++++++++ src/backend/access/gist/gistbuild.c | 5 ++++ src/backend/access/gist/gistget.c | 2 ++ src/backend/access/hash/hashpage.c | 7 +++++ src/backend/access/nbtree/nbtinsert.c | 7 +++++ src/backend/access/nbtree/nbtpage.c | 12 +++++++++ src/backend/access/nbtree/nbtsearch.c | 13 +++++++++ src/backend/access/nbtree/nbtutils.c | 1 + src/backend/access/spgist/spgdoinsert.c | 3 +++ src/backend/access/spgist/spgscan.c | 2 ++ src/backend/access/spgist/spgutils.c | 6 +++++ src/backend/catalog/system_views.sql | 9 +++++-- src/backend/utils/activity/pgstat_database.c | 1 + src/backend/utils/activity/pgstat_relation.c | 2 ++ src/backend/utils/adt/pgstatfuncs.c | 6 +++++ src/include/catalog/pg_proc.dat | 8 ++++++ src/include/pgstat.h | 16 +++++++++++ src/test/regress/expected/rules.out | 16 ++++++++--- src/test/regress/expected/stats.out | 27 +++++++++++++++++++ src/test/regress/sql/stats.sql | 17 ++++++++++++ 27 files changed, 223 insertions(+), 7 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 4265a22d4d..beb1dded0b 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3420,6 +3420,15 @@ description | Waiting for a newly initialized WAL file to reach durable storage + + + idx_metadata_blks bigint + + + Number of metadata (non-leaf) index disk blocks hit or read in this database + + + tup_returned bigint @@ -4384,6 +4393,15 @@ description | Waiting for a newly initialized WAL file to reach durable storage + + + idx_metadata_blks bigint + + + Number of metadata (non-leaf) index disk blocks hit or read from all indexes in this table + + + toast_blks_read bigint @@ -4519,6 +4537,16 @@ description | Waiting for a newly initialized WAL file to reach durable storage Number of buffer hits in this index + + + + idx_metadata_blks bigint + + + Number of metadata (non-leaf) index disk blocks read or hit in this index + + + diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 4204088fa0..0e7b6f3db3 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -1652,6 +1652,7 @@ brinGetStats(Relation index, BrinStatsData *stats) BrinMetaPageData *metadata; metabuffer = ReadBuffer(index, BRIN_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); LockBuffer(metabuffer, BUFFER_LOCK_SHARE); metapage = BufferGetPage(metabuffer); metadata = (BrinMetaPageData *) PageGetContents(metapage); diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c index 4e380ecc71..0172ad83d6 100644 --- a/src/backend/access/brin/brin_revmap.c +++ b/src/backend/access/brin/brin_revmap.c @@ -27,6 +27,7 @@ #include "access/brin_xlog.h" #include "access/rmgr.h" #include "access/xloginsert.h" +#include "pgstat.h" #include "miscadmin.h" #include "storage/bufmgr.h" #include "utils/rel.h" @@ -75,6 +76,7 @@ brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange) Page page; meta = ReadBuffer(idxrel, BRIN_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(idxrel); LockBuffer(meta, BUFFER_LOCK_SHARE); page = BufferGetPage(meta); metadata = (BrinMetaPageData *) PageGetContents(page); @@ -232,6 +234,7 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Assert(mapBlk != InvalidBlockNumber); revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); + pgstat_count_metadata_buffer(idxRel); } LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_SHARE); @@ -486,6 +489,7 @@ revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk) ReleaseBuffer(revmap->rm_currBuf); revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); + pgstat_count_metadata_buffer(revmap->rm_irel); } return revmap->rm_currBuf; @@ -554,6 +558,7 @@ revmap_physical_extend(BrinRevmap *revmap) if (mapBlk < nblocks) { buf = ReadBuffer(irel, mapBlk); + pgstat_count_metadata_buffer(irel); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); } diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 644d484ea5..0d8778f589 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -18,6 +18,7 @@ #include "access/ginxlog.h" #include "access/xloginsert.h" #include "miscadmin.h" +#include "pgstat.h" #include "storage/predicate.h" #include "utils/injection_point.h" #include "utils/memutils.h" @@ -104,6 +105,8 @@ ginFindLeafPage(GinBtree btree, bool searchMode, page = BufferGetPage(stack->buffer); + pgstat_count_metadata_buffer_if(!GinPageIsLeaf(page), btree->index); + access = ginTraverseLock(stack->buffer, searchMode); /* @@ -191,6 +194,8 @@ ginStepRight(Buffer buffer, Relation index, int lockmode) if (isLeaf != GinPageIsLeaf(page) || isData != GinPageIsData(page)) elog(ERROR, "right sibling of GIN page is of different type"); + pgstat_count_metadata_buffer_if(!GinPageIsLeaf(page), index); + return nextbuffer; } @@ -254,6 +259,8 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack) page = BufferGetPage(buffer); if (GinPageIsLeaf(page)) elog(ERROR, "Lost path"); + else + pgstat_count_metadata_buffer(btree->index); if (GinPageIsIncompleteSplit(page)) { diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index a6d88572cc..328c3c19e5 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -25,6 +25,7 @@ #include "catalog/pg_am.h" #include "commands/vacuum.h" #include "miscadmin.h" +#include "pgstat.h" #include "port/pg_bitutils.h" #include "postmaster/autovacuum.h" #include "storage/indexfsm.h" @@ -240,6 +241,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector) data.newRightlink = data.prevTail = InvalidBlockNumber; metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); metapage = BufferGetPage(metabuffer); /* @@ -320,6 +322,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector) data.newRightlink = sublist.head; buffer = ReadBuffer(index, metadata->tail); + pgstat_count_metadata_buffer(index); LockBuffer(buffer, GIN_EXCLUSIVE); page = BufferGetPage(buffer); @@ -359,6 +362,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector) CheckForSerializableConflictIn(index, NULL, GIN_METAPAGE_BLKNO); buffer = ReadBuffer(index, metadata->tail); + pgstat_count_metadata_buffer(index); LockBuffer(buffer, GIN_EXCLUSIVE); page = BufferGetPage(buffer); @@ -576,6 +580,7 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead, { freespace[data.ndeleted] = blknoToDelete; buffers[data.ndeleted] = ReadBuffer(index, blknoToDelete); + pgstat_count_metadata_buffer(index); LockBuffer(buffers[data.ndeleted], GIN_EXCLUSIVE); page = BufferGetPage(buffers[data.ndeleted]); @@ -828,6 +833,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean, } metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); LockBuffer(metabuffer, GIN_SHARE); metapage = BufferGetPage(metabuffer); metadata = GinPageGetMeta(metapage); @@ -853,6 +859,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean, buffer = ReadBuffer(index, blkno); LockBuffer(buffer, GIN_SHARE); page = BufferGetPage(buffer); + pgstat_count_metadata_buffer(index); LockBuffer(metabuffer, GIN_UNLOCK); @@ -1004,6 +1011,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean, */ vacuum_delay_point(false); buffer = ReadBuffer(index, blkno); + pgstat_count_metadata_buffer(index); LockBuffer(buffer, GIN_SHARE); page = BufferGetPage(buffer); } diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index f29ccd3c2d..86747beb39 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -18,6 +18,7 @@ #include "access/relscan.h" #include "common/pg_prng.h" #include "miscadmin.h" +#include "pgstat.h" #include "storage/predicate.h" #include "utils/datum.h" #include "utils/memutils.h" @@ -1491,9 +1492,10 @@ scanGetCandidate(IndexScanDesc scan, pendingPosition *pos) * Here we must prevent deletion of next page by insertcleanup * process, which may be trying to obtain exclusive lock on * current page. So, we lock next page before releasing the - * current one + * current one. */ Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno); + pgstat_count_metadata_buffer(scan->indexRelation); LockBuffer(tmpbuf, GIN_SHARE); UnlockReleaseBuffer(pos->pendingBuffer); @@ -1844,6 +1846,8 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids) Page page; BlockNumber blkno; + pgstat_count_metadata_buffer(scan->indexRelation); + *ntids = 0; /* @@ -1868,6 +1872,7 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids) } pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno); + pgstat_count_metadata_buffer(scan->indexRelation); LockBuffer(pos.pendingBuffer, GIN_SHARE); pos.firstOffset = FirstOffsetNumber; UnlockReleaseBuffer(metabuffer); diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 78f7b7a249..9cc6e6d6c3 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -22,6 +22,7 @@ #include "catalog/pg_type.h" #include "commands/progress.h" #include "commands/vacuum.h" +#include "pgstat.h" #include "miscadmin.h" #include "storage/indexfsm.h" #include "utils/builtins.h" @@ -632,6 +633,7 @@ ginGetStats(Relation index, GinStatsData *stats) GinMetaPageData *metadata; metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); LockBuffer(metabuffer, GIN_SHARE); metapage = BufferGetPage(metabuffer); metadata = GinPageGetMeta(metapage); @@ -659,6 +661,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build) GinMetaPageData *metadata; metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); LockBuffer(metabuffer, GIN_EXCLUSIVE); metapage = BufferGetPage(metabuffer); metadata = GinPageGetMeta(metapage); diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 7b24380c97..2e5cc91296 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -20,6 +20,7 @@ #include "catalog/pg_collation.h" #include "commands/vacuum.h" #include "miscadmin.h" +#include "pgstat.h" #include "nodes/execnodes.h" #include "storage/predicate.h" #include "utils/fmgrprotos.h" @@ -684,7 +685,10 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, } if (XLogRecPtrIsInvalid(stack->lsn)) + { stack->buffer = ReadBuffer(state.r, stack->blkno); + pgstat_count_metadata_buffer(state.r); + } /* * Be optimistic and grab shared lock first. Swap it for an exclusive @@ -949,6 +953,8 @@ gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum) UnlockReleaseBuffer(buffer); break; } + else + pgstat_count_metadata_buffer(r); /* currently, internal pages are never deleted */ Assert(!GistPageIsDeleted(page)); @@ -1096,6 +1102,9 @@ gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build) break; } parent->buffer = ReadBuffer(r, parent->blkno); + + pgstat_count_metadata_buffer(r); + LockBuffer(parent->buffer, GIST_EXCLUSIVE); gistcheckpage(r, parent->buffer); parent->page = (Page) BufferGetPage(parent->buffer); @@ -1122,6 +1131,7 @@ gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build) { ptr->buffer = ReadBuffer(r, ptr->blkno); ptr->page = (Page) BufferGetPage(ptr->buffer); + pgstat_count_metadata_buffer_if(!GistPageIsLeaf(ptr->page), r); ptr = ptr->parent; } @@ -1236,6 +1246,7 @@ gistfixsplit(GISTInsertState *state, GISTSTATE *giststate) { /* lock next page */ buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink); + pgstat_count_metadata_buffer_if(!GistPageIsLeaf((Page) page), state->r); LockBuffer(buf, GIST_EXCLUSIVE); } else diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 9e707167d9..8b2abc30ec 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -39,6 +39,7 @@ #include "access/tableam.h" #include "access/xloginsert.h" #include "miscadmin.h" +#include "pgstat.h" #include "nodes/execnodes.h" #include "optimizer/optimizer.h" #include "storage/bufmgr.h" @@ -967,6 +968,7 @@ gistProcessItup(GISTBuildState *buildstate, IndexTuple itup, */ buffer = ReadBuffer(indexrel, blkno); + pgstat_count_metadata_buffer(indexrel); LockBuffer(buffer, GIST_EXCLUSIVE); page = (Page) BufferGetPage(buffer); @@ -1248,6 +1250,7 @@ gistBufferingFindCorrectParent(GISTBuildState *buildstate, buffer = ReadBuffer(buildstate->indexrel, parent); page = BufferGetPage(buffer); + pgstat_count_metadata_buffer(buildstate->indexrel); LockBuffer(buffer, GIST_EXCLUSIVE); gistcheckpage(buildstate->indexrel, buffer); maxoff = PageGetMaxOffsetNumber(page); @@ -1457,6 +1460,8 @@ gistGetMaxLevel(Relation index) break; } + pgstat_count_metadata_buffer(index); + /* * Pick the first downlink on the page, and follow it. It doesn't * matter which downlink we choose, the tree has the same depth diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index 387d997234..7f31331eed 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -346,6 +346,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, gistcheckpage(scan->indexRelation, buffer); page = BufferGetPage(buffer); opaque = GistPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!GistPageIsLeaf((Page) page), + scan->indexRelation); /* * Check if we need to follow the rightlink. We need to follow it if the diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index b8e5bd005e..ac282ec11e 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -32,6 +32,7 @@ #include "access/hash_xlog.h" #include "access/xloginsert.h" #include "miscadmin.h" +#include "pgstat.h" #include "port/pg_bitutils.h" #include "storage/predicate.h" #include "storage/smgr.h" @@ -76,6 +77,8 @@ _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags) buf = ReadBuffer(rel, blkno); + pgstat_count_metadata_buffer_if(flags == LH_META_PAGE, rel); + if (access != HASH_NOLOCK) LockBuffer(buf, access); @@ -102,6 +105,8 @@ _hash_getbuf_with_condlock_cleanup(Relation rel, BlockNumber blkno, int flags) buf = ReadBuffer(rel, blkno); + pgstat_count_metadata_buffer_if(flags == LH_META_PAGE, rel); + if (!ConditionalLockBufferForCleanup(buf)) { ReleaseBuffer(buf); @@ -247,6 +252,8 @@ _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); + pgstat_count_metadata_buffer_if(flags == LH_META_PAGE, rel); + if (access != HASH_NOLOCK) LockBuffer(buf, access); diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index aa82cede30..7802be2d9e 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -21,6 +21,7 @@ #include "access/xloginsert.h" #include "common/int.h" #include "common/pg_prng.h" +#include "pgstat.h" #include "lib/qunique.h" #include "miscadmin.h" #include "storage/lmgr.h" @@ -1260,6 +1261,7 @@ _bt_insertonpg(Relation rel, Assert(BufferIsValid(cbuf)); metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); @@ -2256,6 +2258,8 @@ _bt_finish_split(Relation rel, Relation heaprel, Buffer lbuf, BTStack stack) rpage = BufferGetPage(rbuf); rpageop = BTPageGetOpaque(rpage); + pgstat_count_metadata_buffer_if(!P_ISLEAF(rpageop), rel); + /* Could this be a root split? */ if (!stack) { @@ -2265,6 +2269,7 @@ _bt_finish_split(Relation rel, Relation heaprel, Buffer lbuf, BTStack stack) /* acquire lock on the metapage */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); @@ -2333,6 +2338,7 @@ _bt_getstackbuf(Relation rel, Relation heaprel, BTStack stack, BlockNumber child buf = _bt_getbuf(rel, blkno, BT_WRITE); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer(rel); Assert(heaprel != NULL); if (P_INCOMPLETE_SPLIT(opaque)) @@ -2473,6 +2479,7 @@ _bt_newlevel(Relation rel, Relation heaprel, Buffer lbuf, Buffer rbuf) /* acquire lock on the metapage */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index c79dd38ee1..ed812c6f9e 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -30,6 +30,7 @@ #include "access/xloginsert.h" #include "common/int.h" #include "miscadmin.h" +#include "pgstat.h" #include "storage/indexfsm.h" #include "storage/predicate.h" #include "storage/procarray.h" @@ -190,6 +191,7 @@ _bt_vacuum_needs_cleanup(Relation rel) * Note that we deliberately avoid using cached version of metapage here. */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); btm_version = metad->btm_version; @@ -254,6 +256,7 @@ _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages) * to be consistent. */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); @@ -374,6 +377,7 @@ _bt_getroot(Relation rel, Relation heaprel, int access) rootlevel = metad->btm_fastlevel; rootbuf = _bt_getbuf(rel, rootblkno, BT_READ); + pgstat_count_metadata_buffer(rel); rootpage = BufferGetPage(rootbuf); rootopaque = BTPageGetOpaque(rootpage); @@ -400,6 +404,7 @@ _bt_getroot(Relation rel, Relation heaprel, int access) } metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + pgstat_count_metadata_buffer(rel); metad = _bt_getmeta(rel, metabuf); /* if no root page initialized yet, do it */ @@ -536,6 +541,7 @@ _bt_getroot(Relation rel, Relation heaprel, int access) for (;;) { rootbuf = _bt_relandgetbuf(rel, rootbuf, rootblkno, BT_READ); + pgstat_count_metadata_buffer(rel); rootpage = BufferGetPage(rootbuf); rootopaque = BTPageGetOpaque(rootpage); @@ -600,6 +606,7 @@ _bt_gettrueroot(Relation rel) rel->rd_amcache = NULL; metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metaopaque = BTPageGetOpaque(metapg); metad = BTPageGetMeta(metapg); @@ -639,6 +646,7 @@ _bt_gettrueroot(Relation rel) for (;;) { rootbuf = _bt_relandgetbuf(rel, rootbuf, rootblkno, BT_READ); + pgstat_count_metadata_buffer(rel); rootpage = BufferGetPage(rootbuf); rootopaque = BTPageGetOpaque(rootpage); @@ -681,6 +689,7 @@ _bt_getrootheight(Relation rel) Buffer metabuf; metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + pgstat_count_metadata_buffer(rel); metad = _bt_getmeta(rel, metabuf); /* @@ -745,6 +754,7 @@ _bt_metaversion(Relation rel, bool *heapkeyspace, bool *allequalimage) Buffer metabuf; metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + pgstat_count_metadata_buffer(rel); metad = _bt_getmeta(rel, metabuf); /* @@ -2375,6 +2385,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, /* Fetch the block number of the target's left sibling */ buf = _bt_getbuf(rel, target, BT_READ); + pgstat_count_metadata_buffer(rel); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); leftsib = opaque->btpo_prev; @@ -2570,6 +2581,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, { /* rightsib will be the only one left on the level */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 4af1ff1e9e..a0bb9b5485 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -185,6 +185,8 @@ _bt_search(Relation rel, Relation heaprel, BTScanInsert key, Buffer *bufP, /* drop the read lock on the page, then acquire one on its child */ *bufP = _bt_relandgetbuf(rel, *bufP, child, page_access); + pgstat_count_metadata_buffer_if(opaque->btpo_level != 1, rel); + /* okay, all set to move down a level */ stack_in = new_stack; } @@ -305,6 +307,9 @@ _bt_moveright(Relation rel, /* re-acquire the lock in the right mode, and re-check */ buf = _bt_getbuf(rel, blkno, access); + + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); + continue; } @@ -312,6 +317,7 @@ _bt_moveright(Relation rel, { /* step right one page */ buf = _bt_relandgetbuf(rel, buf, opaque->btpo_next, access); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); continue; } else @@ -2512,6 +2518,7 @@ _bt_lock_and_validate_left(Relation rel, BlockNumber *blkno, buf = _bt_getbuf(rel, *blkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); /* * If this isn't the page we want, walk right till we find what we @@ -2539,6 +2546,7 @@ _bt_lock_and_validate_left(Relation rel, BlockNumber *blkno, buf = _bt_relandgetbuf(rel, buf, *blkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); } /* @@ -2549,6 +2557,8 @@ _bt_lock_and_validate_left(Relation rel, BlockNumber *blkno, buf = _bt_relandgetbuf(rel, buf, lastcurrblkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); + if (P_ISDELETED(opaque)) { /* @@ -2566,6 +2576,7 @@ _bt_lock_and_validate_left(Relation rel, BlockNumber *blkno, buf = _bt_relandgetbuf(rel, buf, lastcurrblkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); if (!P_ISDELETED(opaque)) break; } @@ -2655,6 +2666,7 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost) buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); } /* Done? */ @@ -2678,6 +2690,7 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost) buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); } return buf; diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 9aed207995..54a8e8e603 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -21,6 +21,7 @@ #include "access/reloptions.h" #include "commands/progress.h" #include "miscadmin.h" +#include "pgstat.h" #include "utils/datum.h" #include "utils/lsyscache.h" diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c index af6b27b213..424639fbb5 100644 --- a/src/backend/access/spgist/spgdoinsert.c +++ b/src/backend/access/spgist/spgdoinsert.c @@ -22,6 +22,7 @@ #include "common/int.h" #include "common/pg_prng.h" #include "miscadmin.h" +#include "pgstat.h" #include "storage/bufmgr.h" #include "utils/rel.h" @@ -2160,6 +2161,8 @@ spgdoinsert(Relation index, SpGistState *state, spgChooseIn in; spgChooseOut out; + pgstat_count_metadata_buffer(index); + /* * spgAddNode and spgSplitTuple cases will loop back to here to * complete the insertion operation. Just in case the choose diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c index 25893050c5..d0be58fb78 100644 --- a/src/backend/access/spgist/spgscan.c +++ b/src/backend/access/spgist/spgscan.c @@ -897,6 +897,8 @@ redirect: SpGistInnerTuple innerTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, offset)); + pgstat_count_metadata_buffer(index); + if (innerTuple->tupstate != SPGIST_LIVE) { if (innerTuple->tupstate == SPGIST_REDIRECT) diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index 95fea74e29..3e1b705a20 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -26,6 +26,7 @@ #include "commands/vacuum.h" #include "nodes/nodeFuncs.h" #include "parser/parse_coerce.h" +#include "pgstat.h" #include "storage/bufmgr.h" #include "storage/indexfsm.h" #include "utils/catcache.h" @@ -271,6 +272,7 @@ spgGetCache(Relation index) SpGistMetaPageData *metadata; metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); LockBuffer(metabuffer, BUFFER_LOCK_SHARE); metadata = SpGistPageGetMeta(BufferGetPage(metabuffer)); @@ -456,11 +458,13 @@ SpGistUpdateMetaPage(Relation index) Buffer metabuffer; metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); if (ConditionalLockBuffer(metabuffer)) { Page metapage = BufferGetPage(metabuffer); SpGistMetaPageData *metadata = SpGistPageGetMeta(metapage); + pgstat_count_metadata_buffer(index); metadata->lastUsedPages = cache->lastUsedPages; @@ -650,6 +654,8 @@ SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew) return buffer; } } + else + pgstat_count_metadata_buffer(index); /* * fallback to allocation of new buffer diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index e5dbbe61b8..3e13252a53 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -766,6 +766,7 @@ CREATE VIEW pg_statio_all_tables AS pg_stat_get_blocks_hit(C.oid) AS heap_blks_hit, I.idx_blks_read AS idx_blks_read, I.idx_blks_hit AS idx_blks_hit, + I.idx_metadata_blks AS idx_metadata_blks, pg_stat_get_blocks_fetched(T.oid) - pg_stat_get_blocks_hit(T.oid) AS toast_blks_read, pg_stat_get_blocks_hit(T.oid) AS toast_blks_hit, @@ -779,7 +780,9 @@ CREATE VIEW pg_statio_all_tables AS pg_stat_get_blocks_hit(indexrelid))::bigint AS idx_blks_read, sum(pg_stat_get_blocks_hit(indexrelid))::bigint - AS idx_blks_hit + AS idx_blks_hit, + sum(pg_stat_get_idx_metadata_blocks(indexrelid))::bigint + AS idx_metadata_blks FROM pg_index WHERE indrelid = C.oid ) I ON true LEFT JOIN LATERAL ( SELECT sum(pg_stat_get_blocks_fetched(indexrelid) - @@ -836,7 +839,8 @@ CREATE VIEW pg_statio_all_indexes AS I.relname AS indexrelname, pg_stat_get_blocks_fetched(I.oid) - pg_stat_get_blocks_hit(I.oid) AS idx_blks_read, - pg_stat_get_blocks_hit(I.oid) AS idx_blks_hit + pg_stat_get_blocks_hit(I.oid) AS idx_blks_hit, + pg_stat_get_idx_metadata_blocks(I.oid) AS idx_metadata_blks FROM pg_class C JOIN pg_index X ON C.oid = X.indrelid JOIN pg_class I ON I.oid = X.indexrelid @@ -1071,6 +1075,7 @@ CREATE VIEW pg_stat_database AS pg_stat_get_db_blocks_fetched(D.oid) - pg_stat_get_db_blocks_hit(D.oid) AS blks_read, pg_stat_get_db_blocks_hit(D.oid) AS blks_hit, + pg_stat_get_db_idx_metadata_blocks(D.oid) AS idx_metadata_blks, pg_stat_get_db_tuples_returned(D.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(D.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(D.oid) AS tup_inserted, diff --git a/src/backend/utils/activity/pgstat_database.c b/src/backend/utils/activity/pgstat_database.c index b31f20d41b..2f4a065af9 100644 --- a/src/backend/utils/activity/pgstat_database.c +++ b/src/backend/utils/activity/pgstat_database.c @@ -443,6 +443,7 @@ pgstat_database_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) PGSTAT_ACCUM_DBCOUNT(xact_rollback); PGSTAT_ACCUM_DBCOUNT(blocks_fetched); PGSTAT_ACCUM_DBCOUNT(blocks_hit); + PGSTAT_ACCUM_DBCOUNT(idx_metadata_blocks); PGSTAT_ACCUM_DBCOUNT(tuples_returned); PGSTAT_ACCUM_DBCOUNT(tuples_fetched); diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index 28587e2916..3de47c0b76 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -880,6 +880,7 @@ pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) tabentry->blocks_fetched += lstats->counts.blocks_fetched; tabentry->blocks_hit += lstats->counts.blocks_hit; + tabentry->idx_metadata_blocks += lstats->counts.idx_metadata_blocks; /* Clamp live_tuples in case of negative delta_live_tuples */ tabentry->live_tuples = Max(tabentry->live_tuples, 0); @@ -897,6 +898,7 @@ pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) dbentry->tuples_deleted += lstats->counts.tuples_deleted; dbentry->blocks_fetched += lstats->counts.blocks_fetched; dbentry->blocks_hit += lstats->counts.blocks_hit; + dbentry->idx_metadata_blocks += lstats->counts.idx_metadata_blocks; return true; } diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 1c12ddbae4..1bc93e6fe1 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -67,6 +67,9 @@ PG_STAT_GET_RELENTRY_INT64(blocks_fetched) /* pg_stat_get_blocks_hit */ PG_STAT_GET_RELENTRY_INT64(blocks_hit) +/* pg_stat_get_metadata_blocks */ +PG_STAT_GET_RELENTRY_INT64(idx_metadata_blocks) + /* pg_stat_get_dead_tuples */ PG_STAT_GET_RELENTRY_INT64(dead_tuples) @@ -1034,6 +1037,9 @@ PG_STAT_GET_DBENTRY_INT64(blocks_fetched) /* pg_stat_get_db_blocks_hit */ PG_STAT_GET_DBENTRY_INT64(blocks_hit) +/* pg_stat_get_db_metadata_blocks */ +PG_STAT_GET_DBENTRY_INT64(idx_metadata_blocks) + /* pg_stat_get_db_conflict_bufferpin */ PG_STAT_GET_DBENTRY_INT64(conflict_bufferpin) diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index d4650947c6..a8b60d5ae6 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5580,6 +5580,10 @@ proname => 'pg_stat_get_blocks_hit', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', prosrc => 'pg_stat_get_blocks_hit' }, +{ oid => '8888', descr => 'statistics: number of metadata blocks', + proname => 'pg_stat_get_idx_metadata_blocks', provolatile => 's', + proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', + prosrc => 'pg_stat_get_idx_metadata_blocks' }, { oid => '2781', descr => 'statistics: last manual vacuum time for a table', proname => 'pg_stat_get_last_vacuum_time', provolatile => 's', proparallel => 'r', prorettype => 'timestamptz', proargtypes => 'oid', @@ -5792,6 +5796,10 @@ proname => 'pg_stat_get_db_tuples_inserted', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', prosrc => 'pg_stat_get_db_tuples_inserted' }, +{ oid => '8892', descr => 'statistics: number of db metadata blocks', + proname => 'pg_stat_get_db_idx_metadata_blocks', provolatile => 's', + proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', + prosrc => 'pg_stat_get_db_idx_metadata_blocks' }, { oid => '2761', descr => 'statistics: tuples updated in database', proname => 'pg_stat_get_db_tuples_updated', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 378f2f2c2b..5870195f2a 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -153,6 +153,8 @@ typedef struct PgStat_TableCounts PgStat_Counter blocks_fetched; PgStat_Counter blocks_hit; + + PgStat_Counter idx_metadata_blocks; } PgStat_TableCounts; /* ---------- @@ -345,6 +347,7 @@ typedef struct PgStat_StatDBEntry PgStat_Counter xact_rollback; PgStat_Counter blocks_fetched; PgStat_Counter blocks_hit; + PgStat_Counter idx_metadata_blocks; PgStat_Counter tuples_returned; PgStat_Counter tuples_fetched; PgStat_Counter tuples_inserted; @@ -439,6 +442,7 @@ typedef struct PgStat_StatTabEntry PgStat_Counter blocks_fetched; PgStat_Counter blocks_hit; + PgStat_Counter idx_metadata_blocks; TimestampTz last_vacuum_time; /* user initiated vacuum */ PgStat_Counter vacuum_count; @@ -711,6 +715,18 @@ extern void pgstat_report_analyze(Relation rel, if (pgstat_should_count_relation(rel)) \ (rel)->pgstat_info->counts.blocks_hit++; \ } while (0) +#define pgstat_count_metadata_buffer(rel) \ + do { \ + if (pgstat_should_count_relation(rel)) { \ + (rel)->pgstat_info->counts.idx_metadata_blocks++; \ + } \ + } while (0) +#define pgstat_count_metadata_buffer_if(is_metadata, rel) \ + do { \ + if (pgstat_should_count_relation(rel) && (is_metadata)) { \ + (rel)->pgstat_info->counts.idx_metadata_blocks++; \ + } \ + } while (0) extern void pgstat_count_heap_insert(Relation rel, PgStat_Counter n); extern void pgstat_count_heap_update(Relation rel, bool hot, bool newpage); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 6cf828ca8d..16c2b39b81 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1868,6 +1868,7 @@ pg_stat_database| SELECT oid AS datid, pg_stat_get_db_xact_rollback(oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(oid) - pg_stat_get_db_blocks_hit(oid)) AS blks_read, pg_stat_get_db_blocks_hit(oid) AS blks_hit, + pg_stat_get_db_idx_metadata_blocks(oid) AS idx_metadata_blks, pg_stat_get_db_tuples_returned(oid) AS tup_returned, pg_stat_get_db_tuples_fetched(oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(oid) AS tup_inserted, @@ -2360,7 +2361,8 @@ pg_statio_all_indexes| SELECT c.oid AS relid, c.relname, i.relname AS indexrelname, (pg_stat_get_blocks_fetched(i.oid) - pg_stat_get_blocks_hit(i.oid)) AS idx_blks_read, - pg_stat_get_blocks_hit(i.oid) AS idx_blks_hit + pg_stat_get_blocks_hit(i.oid) AS idx_blks_hit, + pg_stat_get_idx_metadata_blocks(i.oid) AS idx_metadata_blks FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) @@ -2381,6 +2383,7 @@ pg_statio_all_tables| SELECT c.oid AS relid, pg_stat_get_blocks_hit(c.oid) AS heap_blks_hit, i.idx_blks_read, i.idx_blks_hit, + i.idx_metadata_blks, (pg_stat_get_blocks_fetched(t.oid) - pg_stat_get_blocks_hit(t.oid)) AS toast_blks_read, pg_stat_get_blocks_hit(t.oid) AS toast_blks_hit, x.idx_blks_read AS tidx_blks_read, @@ -2389,7 +2392,8 @@ pg_statio_all_tables| SELECT c.oid AS relid, LEFT JOIN pg_class t ON ((c.reltoastrelid = t.oid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) LEFT JOIN LATERAL ( SELECT (sum((pg_stat_get_blocks_fetched(pg_index.indexrelid) - pg_stat_get_blocks_hit(pg_index.indexrelid))))::bigint AS idx_blks_read, - (sum(pg_stat_get_blocks_hit(pg_index.indexrelid)))::bigint AS idx_blks_hit + (sum(pg_stat_get_blocks_hit(pg_index.indexrelid)))::bigint AS idx_blks_hit, + (sum(pg_stat_get_idx_metadata_blocks(pg_index.indexrelid)))::bigint AS idx_metadata_blks FROM pg_index WHERE (pg_index.indrelid = c.oid)) i ON (true)) LEFT JOIN LATERAL ( SELECT (sum((pg_stat_get_blocks_fetched(pg_index.indexrelid) - pg_stat_get_blocks_hit(pg_index.indexrelid))))::bigint AS idx_blks_read, @@ -2403,7 +2407,8 @@ pg_statio_sys_indexes| SELECT relid, relname, indexrelname, idx_blks_read, - idx_blks_hit + idx_blks_hit, + idx_metadata_blks FROM pg_statio_all_indexes WHERE ((schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (schemaname ~ '^pg_toast'::text)); pg_statio_sys_sequences| SELECT relid, @@ -2420,6 +2425,7 @@ pg_statio_sys_tables| SELECT relid, heap_blks_hit, idx_blks_read, idx_blks_hit, + idx_metadata_blks, toast_blks_read, toast_blks_hit, tidx_blks_read, @@ -2432,7 +2438,8 @@ pg_statio_user_indexes| SELECT relid, relname, indexrelname, idx_blks_read, - idx_blks_hit + idx_blks_hit, + idx_metadata_blks FROM pg_statio_all_indexes WHERE ((schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (schemaname !~ '^pg_toast'::text)); pg_statio_user_sequences| SELECT relid, @@ -2449,6 +2456,7 @@ pg_statio_user_tables| SELECT relid, heap_blks_hit, idx_blks_read, idx_blks_hit, + idx_metadata_blks, toast_blks_read, toast_blks_hit, tidx_blks_read, diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index 776f1ad0e5..eabf251653 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -1868,4 +1868,31 @@ SELECT * FROM check_estimated_rows('SELECT * FROM table_fillfactor'); (1 row) DROP TABLE table_fillfactor; +-- b-tree indexes: test stats collection for metadata index blocks +select count(*) from tenk2 where unique1 = '1504'; + count +------- + 1 +(1 row) + +-- ensure pending stats are flushed +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +-- check effects +BEGIN; +SET LOCAL stats_fetch_consistency = snapshot; +SELECT idx_metadata_blks < idx_blks_hit + idx_blks_read, + idx_metadata_blks > 0 + FROM pg_statio_all_indexes + WHERE indexrelname='tenk2_unique1'; + ?column? | ?column? +----------+---------- + t | t +(1 row) + +COMMIT; -- End of Stats Test diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index 232ab8db8f..ab9179c224 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -925,4 +925,21 @@ SELECT * FROM check_estimated_rows('SELECT * FROM table_fillfactor'); DROP TABLE table_fillfactor; +-- b-tree indexes: test stats collection for metadata index blocks +select count(*) from tenk2 where unique1 = '1504'; + +-- ensure pending stats are flushed +SELECT pg_stat_force_next_flush(); + +-- check effects +BEGIN; +SET LOCAL stats_fetch_consistency = snapshot; + +SELECT idx_metadata_blks < idx_blks_hit + idx_blks_read, + idx_metadata_blks > 0 + FROM pg_statio_all_indexes + WHERE indexrelname='tenk2_unique1'; + +COMMIT; + -- End of Stats Test -- 2.39.5 (Apple Git-154)