From b4148fc01e789700309cb144ca5e68bbcd9c2aa6 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Mon, 12 Feb 2024 20:15:05 -0500
Subject: [PATCH v5 03/14] Push BitmapHeapScan skip fetch optimization into
 table AM

7c70996ebf0949b142 introduced an optimization to allow bitmap table
scans to skip fetching a block from the heap if none of the underlying
data was needed and the block is marked all visible in the visibility
map. With the addition of table AMs, a FIXME was added to this code
indicating that it should be pushed into table AM specific code, as not
all table AMs may use a visibility map in the same way.

Resolve this FIXME for the current block and implement it for the heap
table AM by moving the vmbuffer and other fields needed for the
optimization from the BitmapHeapScanState into the HeapScanDescData.
heapam_scan_bitmap_next_block() now decides whether or not to skip
fetching the block before reading it in and
heapam_scan_bitmap_next_tuple() returns NULL-filled tuples for skipped
blocks.

The layering violation is still present in BitmapHeapScans's prefetching
code. However, this will be eliminated when prefetching is implemented
using the upcoming streaming read API discussed in [1].

[1] https://www.postgresql.org/message-id/flat/CA%2BhUKGJkOiOCa%2Bmag4BF%2BzHo7qo%3Do9CFheB8%3Dg6uT5TUm2gkvA%40mail.gmail.com
---
 src/backend/access/heap/heapam.c          |  14 +++
 src/backend/access/heap/heapam_handler.c  |  29 ++++++
 src/backend/executor/nodeBitmapHeapscan.c | 118 ++++++----------------
 src/include/access/heapam.h               |  10 ++
 src/include/access/tableam.h              |   7 ++
 src/include/nodes/execnodes.h             |   8 +-
 6 files changed, 94 insertions(+), 92 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 707460a5364..b93f243c282 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -955,6 +955,8 @@ heap_beginscan(Relation relation, Snapshot snapshot,
 	scan->rs_base.rs_flags = flags;
 	scan->rs_base.rs_parallel = parallel_scan;
 	scan->rs_strategy = NULL;	/* set in initscan */
+	scan->rs_vmbuffer = InvalidBuffer;
+	scan->rs_empty_tuples_pending = 0;
 
 	/*
 	 * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
@@ -1043,6 +1045,12 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
 	if (BufferIsValid(scan->rs_cbuf))
 		ReleaseBuffer(scan->rs_cbuf);
 
+	if (BufferIsValid(scan->rs_vmbuffer))
+	{
+		ReleaseBuffer(scan->rs_vmbuffer);
+		scan->rs_vmbuffer = InvalidBuffer;
+	}
+
 	/*
 	 * reinitialize scan descriptor
 	 */
@@ -1062,6 +1070,12 @@ heap_endscan(TableScanDesc sscan)
 	if (BufferIsValid(scan->rs_cbuf))
 		ReleaseBuffer(scan->rs_cbuf);
 
+	if (BufferIsValid(scan->rs_vmbuffer))
+	{
+		ReleaseBuffer(scan->rs_vmbuffer);
+		scan->rs_vmbuffer = InvalidBuffer;
+	}
+
 	/*
 	 * decrement relation reference count and free scan descriptor storage
 	 */
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 680a50bf8b1..c9b9b4c00f1 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -27,6 +27,7 @@
 #include "access/syncscan.h"
 #include "access/tableam.h"
 #include "access/tsmapi.h"
+#include "access/visibilitymap.h"
 #include "access/xact.h"
 #include "catalog/catalog.h"
 #include "catalog/index.h"
@@ -2122,6 +2123,24 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
 	hscan->rs_cindex = 0;
 	hscan->rs_ntuples = 0;
 
+	/*
+	 * We can skip fetching the heap page if we don't need any fields from the
+	 * heap, and the bitmap entries don't need rechecking, and all tuples on
+	 * the page are visible to our transaction.
+	 */
+	if (scan->rs_flags & SO_CAN_SKIP_FETCH &&
+		!tbmres->recheck &&
+		VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &hscan->rs_vmbuffer))
+	{
+		/* can't be lossy in the skip_fetch case */
+		Assert(tbmres->ntuples >= 0);
+		Assert(hscan->rs_empty_tuples_pending >= 0);
+
+		hscan->rs_empty_tuples_pending += tbmres->ntuples;
+
+		return true;
+	}
+
 	/*
 	 * Ignore any claimed entries past what we think is the end of the
 	 * relation. It may have been extended after the start of our scan (we
@@ -2234,6 +2253,16 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan,
 	Page		page;
 	ItemId		lp;
 
+	if (hscan->rs_empty_tuples_pending > 0)
+	{
+		/*
+		 * If we don't have to fetch the tuple, just return nulls.
+		 */
+		ExecStoreAllNullTuple(slot);
+		hscan->rs_empty_tuples_pending--;
+		return true;
+	}
+
 	/*
 	 * Out of range?  If so, nothing more to look at on this page
 	 */
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index a9ba2bdfb88..2e4f87ea3a3 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -108,16 +108,6 @@ BitmapHeapNext(BitmapHeapScanState *node)
 	 */
 	if (!node->initialized)
 	{
-		/*
-		 * We can potentially skip fetching heap pages if we do not need any
-		 * columns of the table, either for checking non-indexable quals or
-		 * for returning data.  This test is a bit simplistic, as it checks
-		 * the stronger condition that there's no qual or return tlist at all.
-		 * But in most cases it's probably not worth working harder than that.
-		 */
-		node->can_skip_fetch = (node->ss.ps.plan->qual == NIL &&
-								node->ss.ps.plan->targetlist == NIL);
-
 		if (!pstate)
 		{
 			tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
@@ -211,6 +201,17 @@ BitmapHeapNext(BitmapHeapScanState *node)
 				extra_flags |= SO_TEMP_SNAPSHOT;
 			}
 
+			/*
+			 * We can potentially skip fetching heap pages if we do not need
+			 * any columns of the table, either for checking non-indexable
+			 * quals or for returning data.  This test is a bit simplistic, as
+			 * it checks the stronger condition that there's no qual or return
+			 * tlist at all. But in most cases it's probably not worth working
+			 * harder than that.
+			 */
+			if (node->ss.ps.plan->qual == NIL && node->ss.ps.plan->targetlist == NIL)
+				extra_flags |= SO_CAN_SKIP_FETCH;
+
 			scan = node->ss.ss_currentScanDesc = table_beginscan_bm(
 																	node->ss.ss_currentRelation,
 																	snapshot,
@@ -224,8 +225,6 @@ BitmapHeapNext(BitmapHeapScanState *node)
 
 	for (;;)
 	{
-		bool		skip_fetch;
-
 		CHECK_FOR_INTERRUPTS();
 
 		/*
@@ -245,32 +244,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
 
 			BitmapAdjustPrefetchIterator(node, tbmres);
 
-			/*
-			 * We can skip fetching the heap page if we don't need any fields
-			 * from the heap, and the bitmap entries don't need rechecking,
-			 * and all tuples on the page are visible to our transaction.
-			 *
-			 * XXX: It's a layering violation that we do these checks above
-			 * tableam, they should probably moved below it at some point.
-			 */
-			skip_fetch = (node->can_skip_fetch &&
-						  !tbmres->recheck &&
-						  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-										 tbmres->blockno,
-										 &node->vmbuffer));
-
-			if (skip_fetch)
-			{
-				/* can't be lossy in the skip_fetch case */
-				Assert(tbmres->ntuples >= 0);
-
-				/*
-				 * The number of tuples on this page is put into
-				 * node->return_empty_tuples.
-				 */
-				node->return_empty_tuples = tbmres->ntuples;
-			}
-			else if (!table_scan_bitmap_next_block(scan, tbmres))
+			if (!table_scan_bitmap_next_block(scan, tbmres))
 			{
 				/* AM doesn't think this block is valid, skip */
 				continue;
@@ -318,52 +292,33 @@ BitmapHeapNext(BitmapHeapScanState *node)
 		 * should happen only when we have determined there is still something
 		 * to do on the current page, else we may uselessly prefetch the same
 		 * page we are just about to request for real.
-		 *
-		 * XXX: It's a layering violation that we do these checks above
-		 * tableam, they should probably moved below it at some point.
 		 */
 		BitmapPrefetch(node, scan);
 
-		if (node->return_empty_tuples > 0)
+		/*
+		 * Attempt to fetch tuple from AM.
+		 */
+		if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
 		{
-			/*
-			 * If we don't have to fetch the tuple, just return nulls.
-			 */
-			ExecStoreAllNullTuple(slot);
-
-			if (--node->return_empty_tuples == 0)
-			{
-				/* no more tuples to return in the next round */
-				node->tbmres = tbmres = NULL;
-			}
+			/* nothing more to look at on this page */
+			node->tbmres = tbmres = NULL;
+			continue;
 		}
-		else
+
+		/*
+		 * If we are using lossy info, we have to recheck the qual conditions
+		 * at every tuple.
+		 */
+		if (tbmres->recheck)
 		{
-			/*
-			 * Attempt to fetch tuple from AM.
-			 */
-			if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
+			econtext->ecxt_scantuple = slot;
+			if (!ExecQualAndReset(node->bitmapqualorig, econtext))
 			{
-				/* nothing more to look at on this page */
-				node->tbmres = tbmres = NULL;
+				/* Fails recheck, so drop it and loop back for another */
+				InstrCountFiltered2(node, 1);
+				ExecClearTuple(slot);
 				continue;
 			}
-
-			/*
-			 * If we are using lossy info, we have to recheck the qual
-			 * conditions at every tuple.
-			 */
-			if (tbmres->recheck)
-			{
-				econtext->ecxt_scantuple = slot;
-				if (!ExecQualAndReset(node->bitmapqualorig, econtext))
-				{
-					/* Fails recheck, so drop it and loop back for another */
-					InstrCountFiltered2(node, 1);
-					ExecClearTuple(slot);
-					continue;
-				}
-			}
 		}
 
 		/* OK to return this tuple */
@@ -535,7 +490,8 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
 				 * it did for the current heap page; which is not a certainty
 				 * but is true in many cases.
 				 */
-				skip_fetch = (node->can_skip_fetch &&
+
+				skip_fetch = (scan->rs_flags & SO_CAN_SKIP_FETCH &&
 							  (node->tbmres ? !node->tbmres->recheck : false) &&
 							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
 											 tbmpre->blockno,
@@ -586,7 +542,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
 				}
 
 				/* As above, skip prefetch if we expect not to need page */
-				skip_fetch = (node->can_skip_fetch &&
+				skip_fetch = (scan->rs_flags & SO_CAN_SKIP_FETCH &&
 							  (node->tbmres ? !node->tbmres->recheck : false) &&
 							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
 											 tbmpre->blockno,
@@ -656,8 +612,6 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
 		tbm_end_shared_iterate(node->shared_prefetch_iterator);
 	if (node->tbm)
 		tbm_free(node->tbm);
-	if (node->vmbuffer != InvalidBuffer)
-		ReleaseBuffer(node->vmbuffer);
 	if (node->pvmbuffer != InvalidBuffer)
 		ReleaseBuffer(node->pvmbuffer);
 	node->tbm = NULL;
@@ -667,7 +621,6 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
 	node->initialized = false;
 	node->shared_tbmiterator = NULL;
 	node->shared_prefetch_iterator = NULL;
-	node->vmbuffer = InvalidBuffer;
 	node->pvmbuffer = InvalidBuffer;
 
 	ExecScanReScan(&node->ss);
@@ -712,8 +665,6 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 		tbm_end_shared_iterate(node->shared_tbmiterator);
 	if (node->shared_prefetch_iterator)
 		tbm_end_shared_iterate(node->shared_prefetch_iterator);
-	if (node->vmbuffer != InvalidBuffer)
-		ReleaseBuffer(node->vmbuffer);
 	if (node->pvmbuffer != InvalidBuffer)
 		ReleaseBuffer(node->pvmbuffer);
 
@@ -757,8 +708,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->tbm = NULL;
 	scanstate->tbmiterator = NULL;
 	scanstate->tbmres = NULL;
-	scanstate->return_empty_tuples = 0;
-	scanstate->vmbuffer = InvalidBuffer;
 	scanstate->pvmbuffer = InvalidBuffer;
 	scanstate->exact_pages = 0;
 	scanstate->lossy_pages = 0;
@@ -770,7 +719,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->shared_tbmiterator = NULL;
 	scanstate->shared_prefetch_iterator = NULL;
 	scanstate->pstate = NULL;
-	scanstate->can_skip_fetch = false;
 	scanstate->worker_snapshot = NULL;
 
 	/*
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 4b133f68593..3dfb19ec7d5 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -72,6 +72,16 @@ typedef struct HeapScanDescData
 	 */
 	ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
 
+	/*
+	 * These fields are only used for bitmap scans for the "skip fetch"
+	 * optimization. Bitmap scans needing no fields from the heap may skip
+	 * fetching an all visible block, instead using the number of tuples per
+	 * block reported by the bitmap to determine how many NULL-filled tuples
+	 * to return.
+	 */
+	Buffer		rs_vmbuffer;
+	int			rs_empty_tuples_pending;
+
 	/* these fields only used in page-at-a-time mode and for bitmap scans */
 	int			rs_cindex;		/* current tuple's index in vistuples */
 	int			rs_ntuples;		/* number of visible tuples on page */
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 5375dd7150f..c193ea5db43 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -62,6 +62,13 @@ typedef enum ScanOptions
 
 	/* unregister snapshot at scan end? */
 	SO_TEMP_SNAPSHOT = 1 << 9,
+
+	/*
+	 * At the discretion of the table AM, bitmap table scans may be able to
+	 * skip fetching a block from the table if none of the table data is
+	 * needed.
+	 */
+	SO_CAN_SKIP_FETCH = 1 << 10,
 }			ScanOptions;
 
 /*
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 00c75fb10e2..6fb4ec07c5f 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1711,10 +1711,7 @@ typedef struct ParallelBitmapHeapState
  *		tbm				   bitmap obtained from child index scan(s)
  *		tbmiterator		   iterator for scanning current pages
  *		tbmres			   current-page data
- *		can_skip_fetch	   can we potentially skip tuple fetches in this scan?
- *		return_empty_tuples number of empty tuples to return
- *		vmbuffer		   buffer for visibility-map lookups
- *		pvmbuffer		   ditto, for prefetched pages
+ *		pvmbuffer		   buffer for visibility-map lookups of prefetched pages
  *		exact_pages		   total number of exact pages retrieved
  *		lossy_pages		   total number of lossy pages retrieved
  *		prefetch_iterator  iterator for prefetching ahead of current page
@@ -1736,9 +1733,6 @@ typedef struct BitmapHeapScanState
 	TIDBitmap  *tbm;
 	TBMIterator *tbmiterator;
 	TBMIterateResult *tbmres;
-	bool		can_skip_fetch;
-	int			return_empty_tuples;
-	Buffer		vmbuffer;
 	Buffer		pvmbuffer;
 	long		exact_pages;
 	long		lossy_pages;
-- 
2.37.2

