From 7b4b492372a667b18dc598dd47f05b69281549e3 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Sat, 20 Jun 2026 11:18:59 -0400 Subject: [PATCH v28 03/11] Limit get_actual_variable_range leaf page reads. get_actual_variable_range scans an index to find actual min/max values for planner selectivity estimation. Since this happens during planning, we can't afford to spend too much time on it. Commit 9c6ad5eaa9 added VISITED_PAGES_LIMIT (a limit of 100 heap page visits) to bound the amount of work performed, giving up and falling back to the pg_statistic extremal value when the limit is exceeded. But that isn't effective in cases with more extreme concentrations of dead index tuples. Benchmark results from Mark Callaghan show that VISITED_PAGES_LIMIT stops being effective once the dead index tuple problem gets out of hand (which is expected with queue-like tables that continually delete older records and insert newer ones). VISITED_PAGES_LIMIT counts heap page visits, but when many index tuples are marked LP_DEAD, _bt_readpage traverses arbitrarily many index pages without returning any tuples. The heap page counter never gets a chance to increment, so VISITED_PAGES_LIMIT never triggers. The more LP_DEAD bits we set, the less effective the limit becomes at bailing out early. Add a complementary mechanism that limits get_actual_variable_range to scanning only three index leaf pages (INDEX_PAGES_LIMIT-many pages) that have exactly zero matching items/that won't return a batch. When the limit is exceeded, the scan returns without returning any matches, forcing get_actual_variable_range to give up. INDEX_PAGES_LIMIT provides a backstop against reading an excessive number of leaf pages, without fundamentally altering the existing VISITED_PAGES_LIMIT design. Leaf page reads that return a batch with at least one matching item aren't tallied against the new limit. This balances the need for get_actual_variable_range to locate a min/max value when that's feasible against the need to bound the amount of work it must perform to do so. Author: Peter Geoghegan Discussion: https://postgr.es/m/CAH2-Wzkt1WkKp4VRJu3qHfmKXc8W+XYv1RXg5d2d3fSvAeO=rg@mail.gmail.com --- src/include/access/nbtree.h | 6 ++++++ src/include/access/relscan.h | 3 ++- src/backend/access/index/genam.c | 1 + src/backend/access/nbtree/nbtree.c | 1 + src/backend/access/nbtree/nbtsearch.c | 13 +++++++++++-- src/backend/utils/adt/selfuncs.c | 10 ++++++++++ 6 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 0254a223e..e7064813a 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -998,6 +998,12 @@ typedef struct BTScanOpaqueData BTArrayKeyInfo *arrayKeys; /* info about each equality-type array key */ FmgrInfo *orderProcs; /* ORDER procs for required equality keys */ MemoryContext arrayContext; /* scan-lifespan context for array data */ + + /* + * Running count of leaf pages read without finding a match, compared + * against scan->xs_index_pages_limit to bound planner scans + */ + int numNoMatchPages; /* no-batch-returned leaf page count */ } BTScanOpaqueData; typedef BTScanOpaqueData *BTScanOpaque; diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 2f2314843..f2f66e367 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -404,12 +404,13 @@ typedef struct IndexScanDescData int xs_name_cstring_count; /* - * An approximate limit on the amount of work, measured in pages touched, + * Approximate limits on the amount of work, measured in pages touched, * imposed on the index scan. The default, 0, means no limit. Only * honored during index-only scans. Used by selfuncs.c to bound the cost * of get_actual_variable_endpoint(). */ uint8 xs_visited_pages_limit; + uint8 xs_index_pages_limit; /* parallel index scan information, in shared memory */ struct ParallelIndexScanDescData *parallel_scan; diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 54042f6f5..ca9bae803 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -139,6 +139,7 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) scan->xs_name_cstring_count = 0; scan->xs_visited_pages_limit = 0; + scan->xs_index_pages_limit = 0; return scan; } diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 5939e728f..b83926f9f 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -344,6 +344,7 @@ btbeginscan(Relation rel, int nkeys, int norderbys) so->arrayKeys = NULL; so->orderProcs = NULL; so->arrayContext = NULL; + so->numNoMatchPages = 0; scan->opaque = so; scan->xs_recheck = false; diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 4c94b9e59..88c87781c 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1750,6 +1750,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, BlockNumber lastcurrblkno, ScanDirection dir, bool firstpage) { Relation rel = scan->indexRelation; + BTScanOpaque so = (BTScanOpaque) scan->opaque; IndexScanBatch newbatch; BTBatchData *btnewbatch; @@ -1832,10 +1833,18 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, /* no matching tuples on this page */ _bt_relbuf(rel, btnewbatch->buf); - /* Continue the scan in this direction? */ + /* + * Continue the scan in this direction? + * + * Also give up if an opted-in planner scan (selfuncs.c) has now read + * too many leaf pages without a match. This bounds planning time + * when the scanned end of the index is full of LP_DEAD-marked items. + */ if (blkno == P_NONE || (ScanDirectionIsForward(dir) ? - !btnewbatch->moreRight : !btnewbatch->moreLeft)) + !btnewbatch->moreRight : !btnewbatch->moreLeft) || + (unlikely(scan->xs_index_pages_limit > 0) && + ++so->numNoMatchPages > scan->xs_index_pages_limit)) { /* * blkno _bt_readpage call ended scan in this direction (though if diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index fb978f0cf..1366f4988 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -7185,8 +7185,17 @@ get_actual_variable_endpoint(Relation heapRel, * We set xs_visited_pages_limit to tell the table AM to count distinct * heap pages visited for non-visible tuples and give up after the limit * is exceeded. + * + * We also set xs_index_pages_limit to independently tell the index AM to + * give up when this many leaf pages that lack even one matching index + * tuple have been read. This acts as a backstop against pages entirely + * full of index entries that were already marked killed (typically by + * prior calls here). That way we avoid hopelessly searching through an + * unbounded number of index leaf pages that don't contain even a single + * still-live entry (which can't trigger xs_visited_pages_limit). */ #define VISITED_PAGES_LIMIT 100 +#define INDEX_PAGES_LIMIT 3 InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(heapRel)); @@ -7196,6 +7205,7 @@ get_actual_variable_endpoint(Relation heapRel, SO_NONE); Assert(index_scan->xs_want_itup); index_scan->xs_visited_pages_limit = VISITED_PAGES_LIMIT; + index_scan->xs_index_pages_limit = INDEX_PAGES_LIMIT; index_rescan(index_scan, scankeys, 1, NULL, 0); /* Fetch first/next tuple in specified direction */ -- 2.53.0