From 443dbc15033112f6ec18ef869c7476774600f635 Mon Sep 17 00:00:00 2001 From: Mircea Cadariu Date: Thu, 20 Nov 2025 11:41:51 +0000 Subject: [PATCH v1] Add metadata (non-leaf) index block statistics to pg_stat functions and system views. This commit contains the changes for counting and exposing when index metadata block reads (either hits or from disk) are issued from the index handling code. --- doc/src/sgml/monitoring.sgml | 27 ++++++++++++++++++++ src/backend/access/brin/brin.c | 1 + src/backend/access/brin/brin_revmap.c | 5 ++++ src/backend/access/gin/ginbtree.c | 7 +++++ src/backend/access/gin/ginfast.c | 3 +++ src/backend/access/gin/ginget.c | 5 +++- src/backend/access/gin/ginutil.c | 3 +++ src/backend/access/gist/gist.c | 3 +++ src/backend/access/gist/gistbuild.c | 5 ++++ src/backend/access/gist/gistget.c | 2 ++ src/backend/access/hash/hashpage.c | 7 +++++ src/backend/access/nbtree/nbtinsert.c | 7 +++++ src/backend/access/nbtree/nbtpage.c | 12 +++++++++ src/backend/access/nbtree/nbtsearch.c | 13 ++++++++++ src/backend/access/nbtree/nbtutils.c | 1 + src/backend/access/spgist/spgdoinsert.c | 3 +++ src/backend/access/spgist/spgscan.c | 2 ++ src/backend/access/spgist/spgutils.c | 5 ++++ src/backend/catalog/system_views.sql | 7 ++++- src/backend/utils/activity/pgstat_database.c | 1 + src/backend/utils/activity/pgstat_relation.c | 2 ++ src/backend/utils/adt/pgstatfuncs.c | 6 +++++ src/include/catalog/pg_proc.dat | 8 ++++++ src/include/pgstat.h | 16 ++++++++++++ src/test/regress/expected/rules.out | 10 +++++++- src/test/regress/expected/stats.out | 27 ++++++++++++++++++++ src/test/regress/sql/stats.sql | 17 ++++++++++++ 27 files changed, 202 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 436ef0e8bd..fc51ab9693 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3467,6 +3467,15 @@ description | Waiting for a newly initialized WAL file to reach durable storage + + + idx_metadata_blks bigint + + + Number of metadata (non-leaf) index disk blocks hit or read in this database + + + tup_returned bigint @@ -4451,6 +4460,15 @@ description | Waiting for a newly initialized WAL file to reach durable storage + + + idx_metadata_blks bigint + + + Number of metadata (non-leaf) index disk blocks hit or read from all indexes in this table + + + toast_blks_read bigint @@ -4596,6 +4614,15 @@ description | Waiting for a newly initialized WAL file to reach durable storage + + + idx_metadata_blks bigint + + + Number of metadata (non-leaf) index disk blocks read or hit in this index + + + stats_reset timestamp with time zone diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index cb3331921c..ee8a2315e3 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -1652,6 +1652,7 @@ brinGetStats(Relation index, BrinStatsData *stats) BrinMetaPageData *metadata; metabuffer = ReadBuffer(index, BRIN_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); LockBuffer(metabuffer, BUFFER_LOCK_SHARE); metapage = BufferGetPage(metabuffer); metadata = (BrinMetaPageData *) PageGetContents(metapage); diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c index 4e380ecc71..0172ad83d6 100644 --- a/src/backend/access/brin/brin_revmap.c +++ b/src/backend/access/brin/brin_revmap.c @@ -27,6 +27,7 @@ #include "access/brin_xlog.h" #include "access/rmgr.h" #include "access/xloginsert.h" +#include "pgstat.h" #include "miscadmin.h" #include "storage/bufmgr.h" #include "utils/rel.h" @@ -75,6 +76,7 @@ brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange) Page page; meta = ReadBuffer(idxrel, BRIN_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(idxrel); LockBuffer(meta, BUFFER_LOCK_SHARE); page = BufferGetPage(meta); metadata = (BrinMetaPageData *) PageGetContents(page); @@ -232,6 +234,7 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Assert(mapBlk != InvalidBlockNumber); revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); + pgstat_count_metadata_buffer(idxRel); } LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_SHARE); @@ -486,6 +489,7 @@ revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk) ReleaseBuffer(revmap->rm_currBuf); revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); + pgstat_count_metadata_buffer(revmap->rm_irel); } return revmap->rm_currBuf; @@ -554,6 +558,7 @@ revmap_physical_extend(BrinRevmap *revmap) if (mapBlk < nblocks) { buf = ReadBuffer(irel, mapBlk); + pgstat_count_metadata_buffer(irel); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buf); } diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index 644d484ea5..0d8778f589 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -18,6 +18,7 @@ #include "access/ginxlog.h" #include "access/xloginsert.h" #include "miscadmin.h" +#include "pgstat.h" #include "storage/predicate.h" #include "utils/injection_point.h" #include "utils/memutils.h" @@ -104,6 +105,8 @@ ginFindLeafPage(GinBtree btree, bool searchMode, page = BufferGetPage(stack->buffer); + pgstat_count_metadata_buffer_if(!GinPageIsLeaf(page), btree->index); + access = ginTraverseLock(stack->buffer, searchMode); /* @@ -191,6 +194,8 @@ ginStepRight(Buffer buffer, Relation index, int lockmode) if (isLeaf != GinPageIsLeaf(page) || isData != GinPageIsData(page)) elog(ERROR, "right sibling of GIN page is of different type"); + pgstat_count_metadata_buffer_if(!GinPageIsLeaf(page), index); + return nextbuffer; } @@ -254,6 +259,8 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack) page = BufferGetPage(buffer); if (GinPageIsLeaf(page)) elog(ERROR, "Lost path"); + else + pgstat_count_metadata_buffer(btree->index); if (GinPageIsIncompleteSplit(page)) { diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index 33816f8551..7b5b872586 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -25,6 +25,7 @@ #include "catalog/pg_am.h" #include "commands/vacuum.h" #include "miscadmin.h" +#include "pgstat.h" #include "port/pg_bitutils.h" #include "postmaster/autovacuum.h" #include "storage/indexfsm.h" @@ -240,6 +241,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector) data.newRightlink = data.prevTail = InvalidBlockNumber; metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); metapage = BufferGetPage(metabuffer); /* @@ -828,6 +830,7 @@ ginInsertCleanup(GinState *ginstate, bool full_clean, } metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); LockBuffer(metabuffer, GIN_SHARE); metapage = BufferGetPage(metabuffer); metadata = GinPageGetMeta(metapage); diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 0d4108d05a..c62e068a68 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -18,6 +18,7 @@ #include "access/relscan.h" #include "common/pg_prng.h" #include "miscadmin.h" +#include "pgstat.h" #include "storage/predicate.h" #include "utils/datum.h" #include "utils/memutils.h" @@ -1493,7 +1494,7 @@ scanGetCandidate(IndexScanDesc scan, pendingPosition *pos) * Here we must prevent deletion of next page by insertcleanup * process, which may be trying to obtain exclusive lock on * current page. So, we lock next page before releasing the - * current one + * current one. */ Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno); @@ -1846,6 +1847,8 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids) Page page; BlockNumber blkno; + pgstat_count_metadata_buffer(scan->indexRelation); + *ntids = 0; /* diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 78f7b7a249..9cc6e6d6c3 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -22,6 +22,7 @@ #include "catalog/pg_type.h" #include "commands/progress.h" #include "commands/vacuum.h" +#include "pgstat.h" #include "miscadmin.h" #include "storage/indexfsm.h" #include "utils/builtins.h" @@ -632,6 +633,7 @@ ginGetStats(Relation index, GinStatsData *stats) GinMetaPageData *metadata; metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); LockBuffer(metabuffer, GIN_SHARE); metapage = BufferGetPage(metabuffer); metadata = GinPageGetMeta(metapage); @@ -659,6 +661,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build) GinMetaPageData *metadata; metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); LockBuffer(metabuffer, GIN_EXCLUSIVE); metapage = BufferGetPage(metabuffer); metadata = GinPageGetMeta(metapage); diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 3fb1a1285c..54986b4f44 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -21,6 +21,7 @@ #include "commands/vacuum.h" #include "miscadmin.h" #include "nodes/execnodes.h" +#include "pgstat.h" #include "storage/predicate.h" #include "utils/fmgrprotos.h" #include "utils/index_selfuncs.h" @@ -696,6 +697,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, } stack->page = BufferGetPage(stack->buffer); + pgstat_count_metadata_buffer_if(!GistPageIsLeaf(stack->page), state.r); stack->lsn = xlocked ? PageGetLSN(stack->page) : BufferGetLSNAtomic(stack->buffer); Assert(!RelationNeedsWAL(state.r) || XLogRecPtrIsValid(stack->lsn)); @@ -1121,6 +1123,7 @@ gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build) { ptr->buffer = ReadBuffer(r, ptr->blkno); ptr->page = BufferGetPage(ptr->buffer); + pgstat_count_metadata_buffer_if(!GistPageIsLeaf(ptr->page), r); ptr = ptr->parent; } diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index be0fd5b753..8e21751797 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -39,6 +39,7 @@ #include "access/tableam.h" #include "access/xloginsert.h" #include "miscadmin.h" +#include "pgstat.h" #include "nodes/execnodes.h" #include "optimizer/optimizer.h" #include "storage/bufmgr.h" @@ -967,6 +968,7 @@ gistProcessItup(GISTBuildState *buildstate, IndexTuple itup, */ buffer = ReadBuffer(indexrel, blkno); + pgstat_count_metadata_buffer(indexrel); LockBuffer(buffer, GIST_EXCLUSIVE); page = BufferGetPage(buffer); @@ -1248,6 +1250,7 @@ gistBufferingFindCorrectParent(GISTBuildState *buildstate, buffer = ReadBuffer(buildstate->indexrel, parent); page = BufferGetPage(buffer); + pgstat_count_metadata_buffer(buildstate->indexrel); LockBuffer(buffer, GIST_EXCLUSIVE); gistcheckpage(buildstate->indexrel, buffer); maxoff = PageGetMaxOffsetNumber(page); @@ -1457,6 +1460,8 @@ gistGetMaxLevel(Relation index) break; } + pgstat_count_metadata_buffer(index); + /* * Pick the first downlink on the page, and follow it. It doesn't * matter which downlink we choose, the tree has the same depth diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index 9ba45acfff..57651cfd27 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -346,6 +346,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, gistcheckpage(scan->indexRelation, buffer); page = BufferGetPage(buffer); opaque = GistPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!GistPageIsLeaf((Page) page), + scan->indexRelation); /* * Check if we need to follow the rightlink. We need to follow it if the diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index b8e5bd005e..2e94856929 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -32,6 +32,7 @@ #include "access/hash_xlog.h" #include "access/xloginsert.h" #include "miscadmin.h" +#include "pgstat.h" #include "port/pg_bitutils.h" #include "storage/predicate.h" #include "storage/smgr.h" @@ -76,6 +77,8 @@ _hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags) buf = ReadBuffer(rel, blkno); + pgstat_count_metadata_buffer_if(flags == LH_META_PAGE || flags == LH_BITMAP_PAGE, rel); + if (access != HASH_NOLOCK) LockBuffer(buf, access); @@ -102,6 +105,8 @@ _hash_getbuf_with_condlock_cleanup(Relation rel, BlockNumber blkno, int flags) buf = ReadBuffer(rel, blkno); + pgstat_count_metadata_buffer_if(flags == LH_META_PAGE || flags == LH_BITMAP_PAGE, rel); + if (!ConditionalLockBufferForCleanup(buf)) { ReleaseBuffer(buf); @@ -247,6 +252,8 @@ _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); + pgstat_count_metadata_buffer_if(flags == LH_META_PAGE || flags == LH_BITMAP_PAGE, rel); + if (access != HASH_NOLOCK) LockBuffer(buf, access); diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 7c113c007e..48c9dcf9a6 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -22,6 +22,7 @@ #include "access/xloginsert.h" #include "common/int.h" #include "common/pg_prng.h" +#include "pgstat.h" #include "lib/qunique.h" #include "miscadmin.h" #include "storage/lmgr.h" @@ -1261,6 +1262,7 @@ _bt_insertonpg(Relation rel, Assert(BufferIsValid(cbuf)); metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); @@ -2263,6 +2265,8 @@ _bt_finish_split(Relation rel, Relation heaprel, Buffer lbuf, BTStack stack) rpage = BufferGetPage(rbuf); rpageop = BTPageGetOpaque(rpage); + pgstat_count_metadata_buffer_if(!P_ISLEAF(rpageop), rel); + /* Could this be a root split? */ if (!stack) { @@ -2272,6 +2276,7 @@ _bt_finish_split(Relation rel, Relation heaprel, Buffer lbuf, BTStack stack) /* acquire lock on the metapage */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); @@ -2340,6 +2345,7 @@ _bt_getstackbuf(Relation rel, Relation heaprel, BTStack stack, BlockNumber child buf = _bt_getbuf(rel, blkno, BT_WRITE); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer(rel); Assert(heaprel != NULL); if (P_INCOMPLETE_SPLIT(opaque)) @@ -2480,6 +2486,7 @@ _bt_newlevel(Relation rel, Relation heaprel, Buffer lbuf, Buffer rbuf) /* acquire lock on the metapage */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 30b43a4dd1..5ccb0a5501 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -30,6 +30,7 @@ #include "access/xloginsert.h" #include "common/int.h" #include "miscadmin.h" +#include "pgstat.h" #include "storage/indexfsm.h" #include "storage/predicate.h" #include "storage/procarray.h" @@ -190,6 +191,7 @@ _bt_vacuum_needs_cleanup(Relation rel) * Note that we deliberately avoid using cached version of metapage here. */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); btm_version = metad->btm_version; @@ -254,6 +256,7 @@ _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages) * to be consistent. */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); @@ -374,6 +377,7 @@ _bt_getroot(Relation rel, Relation heaprel, int access) rootlevel = metad->btm_fastlevel; rootbuf = _bt_getbuf(rel, rootblkno, BT_READ); + pgstat_count_metadata_buffer_if(rootlevel > 0, rel); rootpage = BufferGetPage(rootbuf); rootopaque = BTPageGetOpaque(rootpage); @@ -400,6 +404,7 @@ _bt_getroot(Relation rel, Relation heaprel, int access) } metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + pgstat_count_metadata_buffer(rel); metad = _bt_getmeta(rel, metabuf); /* if no root page initialized yet, do it */ @@ -536,6 +541,7 @@ _bt_getroot(Relation rel, Relation heaprel, int access) for (;;) { rootbuf = _bt_relandgetbuf(rel, rootbuf, rootblkno, BT_READ); + pgstat_count_metadata_buffer_if(rootlevel > 0, rel); rootpage = BufferGetPage(rootbuf); rootopaque = BTPageGetOpaque(rootpage); @@ -600,6 +606,7 @@ _bt_gettrueroot(Relation rel) rel->rd_amcache = NULL; metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metaopaque = BTPageGetOpaque(metapg); metad = BTPageGetMeta(metapg); @@ -639,6 +646,7 @@ _bt_gettrueroot(Relation rel) for (;;) { rootbuf = _bt_relandgetbuf(rel, rootbuf, rootblkno, BT_READ); + pgstat_count_metadata_buffer_if(rootlevel > 0, rel); rootpage = BufferGetPage(rootbuf); rootopaque = BTPageGetOpaque(rootpage); @@ -681,6 +689,7 @@ _bt_getrootheight(Relation rel) Buffer metabuf; metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + pgstat_count_metadata_buffer(rel); metad = _bt_getmeta(rel, metabuf); /* @@ -745,6 +754,7 @@ _bt_metaversion(Relation rel, bool *heapkeyspace, bool *allequalimage) Buffer metabuf; metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + pgstat_count_metadata_buffer(rel); metad = _bt_getmeta(rel, metabuf); /* @@ -2372,6 +2382,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, /* Fetch the block number of the target's left sibling */ buf = _bt_getbuf(rel, target, BT_READ); + pgstat_count_metadata_buffer(rel); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); leftsib = opaque->btpo_prev; @@ -2567,6 +2578,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, { /* rightsib will be the only one left on the level */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); + pgstat_count_metadata_buffer(rel); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 0605356ec9..00705a8a82 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -185,6 +185,8 @@ _bt_search(Relation rel, Relation heaprel, BTScanInsert key, Buffer *bufP, /* drop the read lock on the page, then acquire one on its child */ *bufP = _bt_relandgetbuf(rel, *bufP, child, page_access); + pgstat_count_metadata_buffer_if(opaque->btpo_level != 1, rel); + /* okay, all set to move down a level */ stack_in = new_stack; } @@ -305,6 +307,9 @@ _bt_moveright(Relation rel, /* re-acquire the lock in the right mode, and re-check */ buf = _bt_getbuf(rel, blkno, access); + + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); + continue; } @@ -312,6 +317,7 @@ _bt_moveright(Relation rel, { /* step right one page */ buf = _bt_relandgetbuf(rel, buf, opaque->btpo_next, access); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); continue; } else @@ -2509,6 +2515,7 @@ _bt_lock_and_validate_left(Relation rel, BlockNumber *blkno, buf = _bt_getbuf(rel, *blkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); /* * If this isn't the page we want, walk right till we find what we @@ -2536,6 +2543,7 @@ _bt_lock_and_validate_left(Relation rel, BlockNumber *blkno, buf = _bt_relandgetbuf(rel, buf, *blkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); } /* @@ -2546,6 +2554,8 @@ _bt_lock_and_validate_left(Relation rel, BlockNumber *blkno, buf = _bt_relandgetbuf(rel, buf, lastcurrblkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); + if (P_ISDELETED(opaque)) { /* @@ -2563,6 +2573,7 @@ _bt_lock_and_validate_left(Relation rel, BlockNumber *blkno, buf = _bt_relandgetbuf(rel, buf, lastcurrblkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); if (!P_ISDELETED(opaque)) break; } @@ -2652,6 +2663,7 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost) buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); } /* Done? */ @@ -2675,6 +2687,7 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost) buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); + pgstat_count_metadata_buffer_if(!P_ISLEAF(opaque), rel); } return buf; diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index ab0f98b028..912edf03dd 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -22,6 +22,7 @@ #include "access/relscan.h" #include "commands/progress.h" #include "miscadmin.h" +#include "pgstat.h" #include "utils/datum.h" #include "utils/lsyscache.h" #include "utils/rel.h" diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c index 4eadb51877..f777b3dec5 100644 --- a/src/backend/access/spgist/spgdoinsert.c +++ b/src/backend/access/spgist/spgdoinsert.c @@ -22,6 +22,7 @@ #include "common/int.h" #include "common/pg_prng.h" #include "miscadmin.h" +#include "pgstat.h" #include "storage/bufmgr.h" #include "utils/rel.h" @@ -2156,6 +2157,8 @@ spgdoinsert(Relation index, SpGistState *state, spgChooseIn in; spgChooseOut out; + pgstat_count_metadata_buffer(index); + /* * spgAddNode and spgSplitTuple cases will loop back to here to * complete the insertion operation. Just in case the choose diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c index 25893050c5..d0be58fb78 100644 --- a/src/backend/access/spgist/spgscan.c +++ b/src/backend/access/spgist/spgscan.c @@ -897,6 +897,8 @@ redirect: SpGistInnerTuple innerTuple = (SpGistInnerTuple) PageGetItem(page, PageGetItemId(page, offset)); + pgstat_count_metadata_buffer(index); + if (innerTuple->tupstate != SPGIST_LIVE) { if (innerTuple->tupstate == SPGIST_REDIRECT) diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index 87c31da71a..267c3f25b7 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -26,6 +26,7 @@ #include "commands/vacuum.h" #include "nodes/nodeFuncs.h" #include "parser/parse_coerce.h" +#include "pgstat.h" #include "storage/bufmgr.h" #include "storage/indexfsm.h" #include "utils/catcache.h" @@ -271,6 +272,7 @@ spgGetCache(Relation index) SpGistMetaPageData *metadata; metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); LockBuffer(metabuffer, BUFFER_LOCK_SHARE); metadata = SpGistPageGetMeta(BufferGetPage(metabuffer)); @@ -456,6 +458,7 @@ SpGistUpdateMetaPage(Relation index) Buffer metabuffer; metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO); + pgstat_count_metadata_buffer(index); if (ConditionalLockBuffer(metabuffer)) { @@ -650,6 +653,8 @@ SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew) return buffer; } } + else if (!SpGistPageIsLeaf(page)) + pgstat_count_metadata_buffer(index); /* * fallback to allocation of new buffer diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 95ad29a64b..d140ec46fe 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -785,6 +785,7 @@ CREATE VIEW pg_statio_all_tables AS pg_stat_get_blocks_hit(C.oid) AS heap_blks_hit, I.idx_blks_read AS idx_blks_read, I.idx_blks_hit AS idx_blks_hit, + I.idx_metadata_blks AS idx_metadata_blks, pg_stat_get_blocks_fetched(T.oid) - pg_stat_get_blocks_hit(T.oid) AS toast_blks_read, pg_stat_get_blocks_hit(T.oid) AS toast_blks_hit, @@ -799,7 +800,9 @@ CREATE VIEW pg_statio_all_tables AS pg_stat_get_blocks_hit(indexrelid))::bigint AS idx_blks_read, sum(pg_stat_get_blocks_hit(indexrelid))::bigint - AS idx_blks_hit + AS idx_blks_hit, + sum(pg_stat_get_idx_metadata_blocks(indexrelid))::bigint + AS idx_metadata_blks FROM pg_index WHERE indrelid = C.oid ) I ON true LEFT JOIN LATERAL ( SELECT sum(pg_stat_get_blocks_fetched(indexrelid) - @@ -858,6 +861,7 @@ CREATE VIEW pg_statio_all_indexes AS pg_stat_get_blocks_fetched(I.oid) - pg_stat_get_blocks_hit(I.oid) AS idx_blks_read, pg_stat_get_blocks_hit(I.oid) AS idx_blks_hit, + pg_stat_get_idx_metadata_blocks(I.oid) AS idx_metadata_blks, pg_stat_get_stat_reset_time(I.oid) AS stats_reset FROM pg_class C JOIN pg_index X ON C.oid = X.indrelid JOIN @@ -1094,6 +1098,7 @@ CREATE VIEW pg_stat_database AS pg_stat_get_db_blocks_fetched(D.oid) - pg_stat_get_db_blocks_hit(D.oid) AS blks_read, pg_stat_get_db_blocks_hit(D.oid) AS blks_hit, + pg_stat_get_db_idx_metadata_blocks(D.oid) AS idx_metadata_blks, pg_stat_get_db_tuples_returned(D.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(D.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(D.oid) AS tup_inserted, diff --git a/src/backend/utils/activity/pgstat_database.c b/src/backend/utils/activity/pgstat_database.c index b31f20d41b..2f4a065af9 100644 --- a/src/backend/utils/activity/pgstat_database.c +++ b/src/backend/utils/activity/pgstat_database.c @@ -443,6 +443,7 @@ pgstat_database_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) PGSTAT_ACCUM_DBCOUNT(xact_rollback); PGSTAT_ACCUM_DBCOUNT(blocks_fetched); PGSTAT_ACCUM_DBCOUNT(blocks_hit); + PGSTAT_ACCUM_DBCOUNT(idx_metadata_blocks); PGSTAT_ACCUM_DBCOUNT(tuples_returned); PGSTAT_ACCUM_DBCOUNT(tuples_fetched); diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index 1de477cbee..48b2b28e62 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -880,6 +880,7 @@ pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) tabentry->blocks_fetched += lstats->counts.blocks_fetched; tabentry->blocks_hit += lstats->counts.blocks_hit; + tabentry->idx_metadata_blocks += lstats->counts.idx_metadata_blocks; /* Clamp live_tuples in case of negative delta_live_tuples */ tabentry->live_tuples = Max(tabentry->live_tuples, 0); @@ -897,6 +898,7 @@ pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) dbentry->tuples_deleted += lstats->counts.tuples_deleted; dbentry->blocks_fetched += lstats->counts.blocks_fetched; dbentry->blocks_hit += lstats->counts.blocks_hit; + dbentry->idx_metadata_blocks += lstats->counts.idx_metadata_blocks; return true; } diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 3d98d064a9..84e3e5bc45 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -67,6 +67,9 @@ PG_STAT_GET_RELENTRY_INT64(blocks_fetched) /* pg_stat_get_blocks_hit */ PG_STAT_GET_RELENTRY_INT64(blocks_hit) +/* pg_stat_get_metadata_blocks */ +PG_STAT_GET_RELENTRY_INT64(idx_metadata_blocks) + /* pg_stat_get_dead_tuples */ PG_STAT_GET_RELENTRY_INT64(dead_tuples) @@ -1055,6 +1058,9 @@ PG_STAT_GET_DBENTRY_INT64(blocks_fetched) /* pg_stat_get_db_blocks_hit */ PG_STAT_GET_DBENTRY_INT64(blocks_hit) +/* pg_stat_get_db_metadata_blocks */ +PG_STAT_GET_DBENTRY_INT64(idx_metadata_blocks) + /* pg_stat_get_db_conflict_bufferpin */ PG_STAT_GET_DBENTRY_INT64(conflict_bufferpin) diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index aaadfd8c74..abdd1e421c 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5596,6 +5596,10 @@ proname => 'pg_stat_get_blocks_hit', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', prosrc => 'pg_stat_get_blocks_hit' }, +{ oid => '8888', descr => 'statistics: number of metadata blocks', + proname => 'pg_stat_get_idx_metadata_blocks', provolatile => 's', + proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', + prosrc => 'pg_stat_get_idx_metadata_blocks' }, { oid => '2781', descr => 'statistics: last manual vacuum time for a table', proname => 'pg_stat_get_last_vacuum_time', provolatile => 's', proparallel => 'r', prorettype => 'timestamptz', proargtypes => 'oid', @@ -5808,6 +5812,10 @@ proname => 'pg_stat_get_db_tuples_inserted', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', prosrc => 'pg_stat_get_db_tuples_inserted' }, +{ oid => '8892', descr => 'statistics: number of db metadata blocks', + proname => 'pg_stat_get_db_idx_metadata_blocks', provolatile => 's', + proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', + prosrc => 'pg_stat_get_db_idx_metadata_blocks' }, { oid => '2761', descr => 'statistics: tuples updated in database', proname => 'pg_stat_get_db_tuples_updated', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index a68e725259..db40efedc8 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -156,6 +156,8 @@ typedef struct PgStat_TableCounts PgStat_Counter blocks_fetched; PgStat_Counter blocks_hit; + + PgStat_Counter idx_metadata_blocks; } PgStat_TableCounts; /* ---------- @@ -348,6 +350,7 @@ typedef struct PgStat_StatDBEntry PgStat_Counter xact_rollback; PgStat_Counter blocks_fetched; PgStat_Counter blocks_hit; + PgStat_Counter idx_metadata_blocks; PgStat_Counter tuples_returned; PgStat_Counter tuples_fetched; PgStat_Counter tuples_inserted; @@ -445,6 +448,7 @@ typedef struct PgStat_StatTabEntry PgStat_Counter blocks_fetched; PgStat_Counter blocks_hit; + PgStat_Counter idx_metadata_blocks; TimestampTz last_vacuum_time; /* user initiated vacuum */ PgStat_Counter vacuum_count; @@ -720,6 +724,18 @@ extern void pgstat_report_analyze(Relation rel, if (pgstat_should_count_relation(rel)) \ (rel)->pgstat_info->counts.blocks_hit++; \ } while (0) +#define pgstat_count_metadata_buffer(rel) \ + do { \ + if (pgstat_should_count_relation(rel)) { \ + (rel)->pgstat_info->counts.idx_metadata_blocks++; \ + } \ + } while (0) +#define pgstat_count_metadata_buffer_if(is_metadata, rel) \ + do { \ + if (pgstat_should_count_relation(rel) && (is_metadata)) { \ + (rel)->pgstat_info->counts.idx_metadata_blocks++; \ + } \ + } while (0) extern void pgstat_count_heap_insert(Relation rel, PgStat_Counter n); extern void pgstat_count_heap_update(Relation rel, bool hot, bool newpage); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 372a2188c2..37cd0969a6 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1882,6 +1882,7 @@ pg_stat_database| SELECT oid AS datid, pg_stat_get_db_xact_rollback(oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(oid) - pg_stat_get_db_blocks_hit(oid)) AS blks_read, pg_stat_get_db_blocks_hit(oid) AS blks_hit, + pg_stat_get_db_idx_metadata_blocks(oid) AS idx_metadata_blks, pg_stat_get_db_tuples_returned(oid) AS tup_returned, pg_stat_get_db_tuples_fetched(oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(oid) AS tup_inserted, @@ -2389,6 +2390,7 @@ pg_statio_all_indexes| SELECT c.oid AS relid, i.relname AS indexrelname, (pg_stat_get_blocks_fetched(i.oid) - pg_stat_get_blocks_hit(i.oid)) AS idx_blks_read, pg_stat_get_blocks_hit(i.oid) AS idx_blks_hit, + pg_stat_get_idx_metadata_blocks(i.oid) AS idx_metadata_blks, pg_stat_get_stat_reset_time(i.oid) AS stats_reset FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) @@ -2410,6 +2412,7 @@ pg_statio_all_tables| SELECT c.oid AS relid, pg_stat_get_blocks_hit(c.oid) AS heap_blks_hit, i.idx_blks_read, i.idx_blks_hit, + i.idx_metadata_blks, (pg_stat_get_blocks_fetched(t.oid) - pg_stat_get_blocks_hit(t.oid)) AS toast_blks_read, pg_stat_get_blocks_hit(t.oid) AS toast_blks_hit, x.idx_blks_read AS tidx_blks_read, @@ -2419,7 +2422,8 @@ pg_statio_all_tables| SELECT c.oid AS relid, LEFT JOIN pg_class t ON ((c.reltoastrelid = t.oid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) LEFT JOIN LATERAL ( SELECT (sum((pg_stat_get_blocks_fetched(pg_index.indexrelid) - pg_stat_get_blocks_hit(pg_index.indexrelid))))::bigint AS idx_blks_read, - (sum(pg_stat_get_blocks_hit(pg_index.indexrelid)))::bigint AS idx_blks_hit + (sum(pg_stat_get_blocks_hit(pg_index.indexrelid)))::bigint AS idx_blks_hit, + (sum(pg_stat_get_idx_metadata_blocks(pg_index.indexrelid)))::bigint AS idx_metadata_blks FROM pg_index WHERE (pg_index.indrelid = c.oid)) i ON (true)) LEFT JOIN LATERAL ( SELECT (sum((pg_stat_get_blocks_fetched(pg_index.indexrelid) - pg_stat_get_blocks_hit(pg_index.indexrelid))))::bigint AS idx_blks_read, @@ -2434,6 +2438,7 @@ pg_statio_sys_indexes| SELECT relid, indexrelname, idx_blks_read, idx_blks_hit, + idx_metadata_blks, stats_reset FROM pg_statio_all_indexes WHERE ((schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (schemaname ~ '^pg_toast'::text)); @@ -2451,6 +2456,7 @@ pg_statio_sys_tables| SELECT relid, heap_blks_hit, idx_blks_read, idx_blks_hit, + idx_metadata_blks, toast_blks_read, toast_blks_hit, tidx_blks_read, @@ -2465,6 +2471,7 @@ pg_statio_user_indexes| SELECT relid, indexrelname, idx_blks_read, idx_blks_hit, + idx_metadata_blks, stats_reset FROM pg_statio_all_indexes WHERE ((schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (schemaname !~ '^pg_toast'::text)); @@ -2482,6 +2489,7 @@ pg_statio_user_tables| SELECT relid, heap_blks_hit, idx_blks_read, idx_blks_hit, + idx_metadata_blks, toast_blks_read, toast_blks_hit, tidx_blks_read, diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index 67e1860e98..c78ea27c04 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -1910,4 +1910,31 @@ SELECT * FROM check_estimated_rows('SELECT * FROM table_fillfactor'); (1 row) DROP TABLE table_fillfactor; +-- b-tree indexes: test stats collection for metadata index blocks +select count(*) from tenk2 where unique1 = '1504'; + count +------- + 1 +(1 row) + +-- ensure pending stats are flushed +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +-- check effects +BEGIN; +SET LOCAL stats_fetch_consistency = snapshot; +SELECT idx_metadata_blks < idx_blks_hit + idx_blks_read, + idx_metadata_blks > 0 + FROM pg_statio_all_indexes + WHERE indexrelname='tenk2_unique1'; + ?column? | ?column? +----------+---------- + t | t +(1 row) + +COMMIT; -- End of Stats Test diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index 8768e0f27f..3c61cab7cc 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -944,4 +944,21 @@ SELECT * FROM check_estimated_rows('SELECT * FROM table_fillfactor'); DROP TABLE table_fillfactor; +-- b-tree indexes: test stats collection for metadata index blocks +select count(*) from tenk2 where unique1 = '1504'; + +-- ensure pending stats are flushed +SELECT pg_stat_force_next_flush(); + +-- check effects +BEGIN; +SET LOCAL stats_fetch_consistency = snapshot; + +SELECT idx_metadata_blks < idx_blks_hit + idx_blks_read, + idx_metadata_blks > 0 + FROM pg_statio_all_indexes + WHERE indexrelname='tenk2_unique1'; + +COMMIT; + -- End of Stats Test -- 2.39.5 (Apple Git-154)