From 6709a066fad0d75cf8d2daec97f27878f8cd9ce4 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Sun, 20 Jan 2019 00:08:58 -0800
Subject: [PATCH v18 14/18] tableam: Sample Scan Support.

Author:
Reviewed-By:
Discussion: https://postgr.es/m/
Backpatch:
---
 contrib/tsm_system_rows/tsm_system_rows.c |  86 ++------
 contrib/tsm_system_time/tsm_system_time.c |  13 +-
 src/backend/access/heap/heapam_handler.c  | 218 +++++++++++++++++++
 src/backend/access/tablesample/system.c   |  11 +-
 src/backend/executor/nodeSamplescan.c     | 249 +++-------------------
 src/include/access/tableam.h              |  31 +++
 src/include/access/tsmapi.h               |   2 +-
 src/include/nodes/execnodes.h             |   4 +
 src/include/nodes/tidbitmap.h             |   2 +-
 9 files changed, 308 insertions(+), 308 deletions(-)

diff --git a/contrib/tsm_system_rows/tsm_system_rows.c b/contrib/tsm_system_rows/tsm_system_rows.c
index 1d35ea3c53a..3611d058331 100644
--- a/contrib/tsm_system_rows/tsm_system_rows.c
+++ b/contrib/tsm_system_rows/tsm_system_rows.c
@@ -28,7 +28,6 @@
 
 #include "postgres.h"
 
-#include "access/heapam.h"
 #include "access/relscan.h"
 #include "access/tsmapi.h"
 #include "catalog/pg_type.h"
@@ -46,7 +45,6 @@ typedef struct
 {
 	uint32		seed;			/* random seed */
 	int64		ntuples;		/* number of tuples to return */
-	int64		donetuples;		/* number of tuples already returned */
 	OffsetNumber lt;			/* last tuple returned from current block */
 	BlockNumber doneblocks;		/* number of already-scanned blocks */
 	BlockNumber lb;				/* last block visited */
@@ -67,11 +65,10 @@ static void system_rows_beginsamplescan(SampleScanState *node,
 							Datum *params,
 							int nparams,
 							uint32 seed);
-static BlockNumber system_rows_nextsampleblock(SampleScanState *node);
+static BlockNumber system_rows_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
 static OffsetNumber system_rows_nextsampletuple(SampleScanState *node,
 							BlockNumber blockno,
 							OffsetNumber maxoffset);
-static bool SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan);
 static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate);
 
 
@@ -187,7 +184,6 @@ system_rows_beginsamplescan(SampleScanState *node,
 
 	sampler->seed = seed;
 	sampler->ntuples = ntuples;
-	sampler->donetuples = 0;
 	sampler->lt = InvalidOffsetNumber;
 	sampler->doneblocks = 0;
 	/* lb will be initialized during first NextSampleBlock call */
@@ -206,11 +202,9 @@ system_rows_beginsamplescan(SampleScanState *node,
  * Uses linear probing algorithm for picking next block.
  */
 static BlockNumber
-system_rows_nextsampleblock(SampleScanState *node)
+system_rows_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
 {
 	SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
-	TableScanDesc scan = node->ss.ss_currentScanDesc;
-	HeapScanDesc hscan = (HeapScanDesc) scan;
 
 	/* First call within scan? */
 	if (sampler->doneblocks == 0)
@@ -222,14 +216,14 @@ system_rows_nextsampleblock(SampleScanState *node)
 			SamplerRandomState randstate;
 
 			/* If relation is empty, there's nothing to scan */
-			if (hscan->rs_nblocks == 0)
+			if (nblocks == 0)
 				return InvalidBlockNumber;
 
 			/* We only need an RNG during this setup step */
 			sampler_random_init_state(sampler->seed, randstate);
 
 			/* Compute nblocks/firstblock/step only once per query */
-			sampler->nblocks = hscan->rs_nblocks;
+			sampler->nblocks = nblocks;
 
 			/* Choose random starting block within the relation */
 			/* (Actually this is the predecessor of the first block visited) */
@@ -246,7 +240,7 @@ system_rows_nextsampleblock(SampleScanState *node)
 
 	/* If we've read all blocks or returned all needed tuples, we're done */
 	if (++sampler->doneblocks > sampler->nblocks ||
-		sampler->donetuples >= sampler->ntuples)
+		node->donetuples >= sampler->ntuples)
 		return InvalidBlockNumber;
 
 	/*
@@ -259,7 +253,7 @@ system_rows_nextsampleblock(SampleScanState *node)
 	{
 		/* Advance lb, using uint64 arithmetic to forestall overflow */
 		sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
-	} while (sampler->lb >= hscan->rs_nblocks);
+	} while (sampler->lb >= nblocks);
 
 	return sampler->lb;
 }
@@ -279,77 +273,27 @@ system_rows_nextsampletuple(SampleScanState *node,
 							OffsetNumber maxoffset)
 {
 	SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
-	TableScanDesc scan = node->ss.ss_currentScanDesc;
-	HeapScanDesc hscan = (HeapScanDesc) scan;
 	OffsetNumber tupoffset = sampler->lt;
 
 	/* Quit if we've returned all needed tuples */
-	if (sampler->donetuples >= sampler->ntuples)
+	if (node->donetuples >= sampler->ntuples)
 		return InvalidOffsetNumber;
 
-	/*
-	 * Because we should only count visible tuples as being returned, we need
-	 * to search for a visible tuple rather than just let the core code do it.
-	 */
+	/* Advance to next possible offset on page */
+	if (tupoffset == InvalidOffsetNumber)
+		tupoffset = FirstOffsetNumber;
+	else
+		tupoffset++;
 
-	/* We rely on the data accumulated in pagemode access */
-	Assert(scan->rs_pageatatime);
-	for (;;)
-	{
-		/* Advance to next possible offset on page */
-		if (tupoffset == InvalidOffsetNumber)
-			tupoffset = FirstOffsetNumber;
-		else
-			tupoffset++;
-
-		/* Done? */
-		if (tupoffset > maxoffset)
-		{
-			tupoffset = InvalidOffsetNumber;
-			break;
-		}
-
-		/* Found a candidate? */
-		if (SampleOffsetVisible(tupoffset, hscan))
-		{
-			sampler->donetuples++;
-			break;
-		}
-	}
+	/* Done? */
+	if (tupoffset > maxoffset)
+		tupoffset = InvalidOffsetNumber;
 
 	sampler->lt = tupoffset;
 
 	return tupoffset;
 }
 
-/*
- * Check if tuple offset is visible
- *
- * In pageatatime mode, heapgetpage() already did visibility checks,
- * so just look at the info it left in rs_vistuples[].
- */
-static bool
-SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan)
-{
-	int			start = 0,
-				end = scan->rs_ntuples - 1;
-
-	while (start <= end)
-	{
-		int			mid = (start + end) / 2;
-		OffsetNumber curoffset = scan->rs_vistuples[mid];
-
-		if (tupoffset == curoffset)
-			return true;
-		else if (tupoffset < curoffset)
-			end = mid - 1;
-		else
-			start = mid + 1;
-	}
-
-	return false;
-}
-
 /*
  * Compute greatest common divisor of two uint32's.
  */
diff --git a/contrib/tsm_system_time/tsm_system_time.c b/contrib/tsm_system_time/tsm_system_time.c
index 1cc7264e084..ab9f685f0af 100644
--- a/contrib/tsm_system_time/tsm_system_time.c
+++ b/contrib/tsm_system_time/tsm_system_time.c
@@ -26,7 +26,6 @@
 
 #include <math.h>
 
-#include "access/heapam.h"
 #include "access/relscan.h"
 #include "access/tsmapi.h"
 #include "catalog/pg_type.h"
@@ -66,7 +65,7 @@ static void system_time_beginsamplescan(SampleScanState *node,
 							Datum *params,
 							int nparams,
 							uint32 seed);
-static BlockNumber system_time_nextsampleblock(SampleScanState *node);
+static BlockNumber system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
 static OffsetNumber system_time_nextsampletuple(SampleScanState *node,
 							BlockNumber blockno,
 							OffsetNumber maxoffset);
@@ -213,11 +212,9 @@ system_time_beginsamplescan(SampleScanState *node,
  * Uses linear probing algorithm for picking next block.
  */
 static BlockNumber
-system_time_nextsampleblock(SampleScanState *node)
+system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
 {
 	SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
-	TableScanDesc scan = node->ss.ss_currentScanDesc;
-	HeapScanDesc hscan = (HeapScanDesc) scan;
 	instr_time	cur_time;
 
 	/* First call within scan? */
@@ -230,14 +227,14 @@ system_time_nextsampleblock(SampleScanState *node)
 			SamplerRandomState randstate;
 
 			/* If relation is empty, there's nothing to scan */
-			if (hscan->rs_nblocks == 0)
+			if (nblocks == 0)
 				return InvalidBlockNumber;
 
 			/* We only need an RNG during this setup step */
 			sampler_random_init_state(sampler->seed, randstate);
 
 			/* Compute nblocks/firstblock/step only once per query */
-			sampler->nblocks = hscan->rs_nblocks;
+			sampler->nblocks = nblocks;
 
 			/* Choose random starting block within the relation */
 			/* (Actually this is the predecessor of the first block visited) */
@@ -273,7 +270,7 @@ system_time_nextsampleblock(SampleScanState *node)
 	{
 		/* Advance lb, using uint64 arithmetic to forestall overflow */
 		sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
-	} while (sampler->lb >= hscan->rs_nblocks);
+	} while (sampler->lb >= nblocks);
 
 	return sampler->lb;
 }
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index d13f934833a..f71b9b2a062 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -27,6 +27,7 @@
 #include "access/heapam.h"
 #include "access/multixact.h"
 #include "access/tableam.h"
+#include "access/tsmapi.h"
 #include "access/xact.h"
 #include "catalog/catalog.h"
 #include "catalog/index.h"
@@ -34,6 +35,7 @@
 #include "catalog/storage_xlog.h"
 #include "executor/executor.h"
 #include "optimizer/plancat.h"
+#include "pgstat.h"
 #include "storage/bufmgr.h"
 #include "storage/bufpage.h"
 #include "storage/bufmgr.h"
@@ -1804,6 +1806,219 @@ heapam_index_validate_scan(Relation heapRelation,
 	indexInfo->ii_PredicateState = NULL;
 }
 
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleHeapTupleVisible(HeapScanDesc scan, Buffer buffer,
+					   HeapTuple tuple,
+					   OffsetNumber tupoffset)
+{
+	if (scan->rs_base.rs_pageatatime)
+	{
+		/*
+		 * In pageatatime mode, heapgetpage() already did visibility checks,
+		 * so just look at the info it left in rs_vistuples[].
+		 *
+		 * We use a binary search over the known-sorted array.  Note: we could
+		 * save some effort if we insisted that NextSampleTuple select tuples
+		 * in increasing order, but it's not clear that there would be enough
+		 * gain to justify the restriction.
+		 */
+		int			start = 0,
+					end = scan->rs_ntuples - 1;
+
+		while (start <= end)
+		{
+			int			mid = (start + end) / 2;
+			OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+			if (tupoffset == curoffset)
+				return true;
+			else if (tupoffset < curoffset)
+				end = mid - 1;
+			else
+				start = mid + 1;
+		}
+
+		return false;
+	}
+	else
+	{
+		/* Otherwise, we have to check the tuple individually. */
+		return HeapTupleSatisfiesVisibility(tuple, scan->rs_base.rs_snapshot,
+											buffer);
+	}
+}
+
+static bool
+heapam_scan_sample_next_block(TableScanDesc sscan, struct SampleScanState *scanstate)
+{
+	HeapScanDesc scan = (HeapScanDesc) sscan;
+	TsmRoutine *tsm = scanstate->tsmroutine;
+	BlockNumber blockno;
+
+	/* return false immediately if relation is empty */
+	if (scan->rs_nblocks == 0)
+		return false;
+
+	if (tsm->NextSampleBlock)
+	{
+		blockno = tsm->NextSampleBlock(scanstate, scan->rs_nblocks);
+		scan->rs_cblock = blockno;
+	}
+	else
+	{
+		/* scanning table sequentially */
+
+		if (scan->rs_cblock == InvalidBlockNumber)
+		{
+			Assert(!scan->rs_inited);
+			blockno = scan->rs_startblock;
+		}
+		else
+		{
+			Assert(scan->rs_inited);
+
+			blockno = scan->rs_cblock + 1;
+
+			if (blockno >= scan->rs_nblocks)
+			{
+				/* wrap to begining of rel, might not have started at 0 */
+				blockno = 0;
+			}
+
+			/*
+			 * Report our new scan position for synchronization purposes.
+			 *
+			 * Note: we do this before checking for end of scan so that the
+			 * final state of the position hint is back at the start of the
+			 * rel.  That's not strictly necessary, but otherwise when you run
+			 * the same query multiple times the starting position would shift
+			 * a little bit backwards on every invocation, which is confusing.
+			 * We don't guarantee any specific ordering in general, though.
+			 */
+			if (scan->rs_base.rs_syncscan)
+				ss_report_location(scan->rs_base.rs_rd, blockno);
+
+			if (blockno == scan->rs_startblock)
+			{
+				blockno = InvalidBlockNumber;
+			}
+		}
+	}
+
+	if (!BlockNumberIsValid(blockno))
+	{
+		if (BufferIsValid(scan->rs_cbuf))
+			ReleaseBuffer(scan->rs_cbuf);
+		scan->rs_cbuf = InvalidBuffer;
+		scan->rs_cblock = InvalidBlockNumber;
+		scan->rs_inited = false;
+
+		return false;
+	}
+
+	heapgetpage(sscan, blockno);
+	scan->rs_inited = true;
+
+	return true;
+}
+
+static bool
+heapam_scan_sample_next_tuple(TableScanDesc sscan, struct SampleScanState *scanstate, TupleTableSlot *slot)
+{
+	HeapScanDesc scan = (HeapScanDesc) sscan;
+	TsmRoutine *tsm = scanstate->tsmroutine;
+	BlockNumber blockno = scan->rs_cblock;
+	bool		pagemode = scan->rs_base.rs_pageatatime;
+
+	Page		page;
+	bool		all_visible;
+	OffsetNumber maxoffset;
+
+	ExecClearTuple(slot);
+
+	/*
+	 * When not using pagemode, we must lock the buffer during tuple
+	 * visibility checks.
+	 */
+	if (!pagemode)
+		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+	page = (Page) BufferGetPage(scan->rs_cbuf);
+	all_visible = PageIsAllVisible(page) &&
+		!scan->rs_base.rs_snapshot->takenDuringRecovery;
+	maxoffset = PageGetMaxOffsetNumber(page);
+
+	for (;;)
+	{
+		OffsetNumber tupoffset;
+
+		CHECK_FOR_INTERRUPTS();
+
+		/* Ask the tablesample method which tuples to check on this page. */
+		tupoffset = tsm->NextSampleTuple(scanstate,
+										 blockno,
+										 maxoffset);
+
+		if (OffsetNumberIsValid(tupoffset))
+		{
+			ItemId		itemid;
+			bool		visible;
+			HeapTuple	tuple = &(scan->rs_ctup);
+
+			/* Skip invalid tuple pointers. */
+			itemid = PageGetItemId(page, tupoffset);
+			if (!ItemIdIsNormal(itemid))
+				continue;
+
+			tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+			tuple->t_len = ItemIdGetLength(itemid);
+			ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+
+			if (all_visible)
+				visible = true;
+			else
+				visible = SampleHeapTupleVisible(scan, scan->rs_cbuf, tuple, tupoffset);
+
+			/* in pagemode, heapgetpage did this for us */
+			if (!pagemode)
+				CheckForSerializableConflictOut(visible, scan->rs_base.rs_rd, tuple,
+												scan->rs_cbuf, scan->rs_base.rs_snapshot);
+
+			/* Try next tuple from same page. */
+			if (!visible)
+				continue;
+
+			/* Found visible tuple, return it. */
+			if (!pagemode)
+				LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+			ExecStoreBufferHeapTuple(tuple, slot, scan->rs_cbuf);
+
+			/* Count successfully-fetched tuples as heap fetches */
+			pgstat_count_heap_getnext(scan->rs_base.rs_rd);
+
+			return true;
+		}
+		else
+		{
+			/*
+			 * If we get here, it means we've exhausted the items on this page
+			 * and it's time to move to the next.
+			 */
+			if (!pagemode)
+				LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+			break;
+		}
+	}
+
+	return false;
+}
+
 /*
  * Reconstruct and rewrite the given tuple
  *
@@ -1992,6 +2207,9 @@ static const TableAmRoutine heapam_methods = {
 	.index_validate_scan = heapam_index_validate_scan,
 
 	.relation_estimate_size = heapam_estimate_rel_size,
+
+	.scan_sample_next_block = heapam_scan_sample_next_block,
+	.scan_sample_next_tuple = heapam_scan_sample_next_tuple
 };
 
 
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c
index fe62a73341e..476411caf1a 100644
--- a/src/backend/access/tablesample/system.c
+++ b/src/backend/access/tablesample/system.c
@@ -27,7 +27,6 @@
 #include <math.h>
 
 #include "access/hash.h"
-#include "access/heapam.h"
 #include "access/relscan.h"
 #include "access/tsmapi.h"
 #include "catalog/pg_type.h"
@@ -56,7 +55,7 @@ static void system_beginsamplescan(SampleScanState *node,
 					   Datum *params,
 					   int nparams,
 					   uint32 seed);
-static BlockNumber system_nextsampleblock(SampleScanState *node);
+static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
 static OffsetNumber system_nextsampletuple(SampleScanState *node,
 					   BlockNumber blockno,
 					   OffsetNumber maxoffset);
@@ -177,11 +176,9 @@ system_beginsamplescan(SampleScanState *node,
  * Select next block to sample.
  */
 static BlockNumber
-system_nextsampleblock(SampleScanState *node)
+system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
 {
 	SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
-	TableScanDesc scan = node->ss.ss_currentScanDesc;
-	HeapScanDesc hscan = (HeapScanDesc) scan;
 	BlockNumber nextblock = sampler->nextblock;
 	uint32		hashinput[2];
 
@@ -200,7 +197,7 @@ system_nextsampleblock(SampleScanState *node)
 	 * Loop over block numbers until finding suitable block or reaching end of
 	 * relation.
 	 */
-	for (; nextblock < hscan->rs_nblocks; nextblock++)
+	for (; nextblock < nblocks; nextblock++)
 	{
 		uint32		hash;
 
@@ -212,7 +209,7 @@ system_nextsampleblock(SampleScanState *node)
 			break;
 	}
 
-	if (nextblock < hscan->rs_nblocks)
+	if (nextblock < nblocks)
 	{
 		/* Found a suitable block; remember where we should start next time */
 		sampler->nextblock = nextblock + 1;
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c
index 3a00d648099..ce0d3bfa572 100644
--- a/src/backend/executor/nodeSamplescan.c
+++ b/src/backend/executor/nodeSamplescan.c
@@ -15,7 +15,6 @@
 #include "postgres.h"
 
 #include "access/hash.h"
-#include "access/heapam.h"
 #include "access/relscan.h"
 #include "access/tableam.h"
 #include "access/tsmapi.h"
@@ -29,9 +28,7 @@
 
 static TupleTableSlot *SampleNext(SampleScanState *node);
 static void tablesample_init(SampleScanState *scanstate);
-static HeapTuple tablesample_getnext(SampleScanState *scanstate);
-static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
-				   HeapScanDesc scan);
+static TupleTableSlot *tablesample_getnext(SampleScanState *scanstate);
 
 /* ----------------------------------------------------------------
  *						Scan Support
@@ -47,10 +44,6 @@ static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
 static TupleTableSlot *
 SampleNext(SampleScanState *node)
 {
-	HeapTuple	tuple;
-	TupleTableSlot *slot;
-	HeapScanDesc hscan;
-
 	/*
 	 * if this is first call within a scan, initialize
 	 */
@@ -60,19 +53,7 @@ SampleNext(SampleScanState *node)
 	/*
 	 * get the next tuple, and store it in our result slot
 	 */
-	tuple = tablesample_getnext(node);
-
-	slot = node->ss.ss_ScanTupleSlot;
-	hscan = (HeapScanDesc) node->ss.ss_currentScanDesc;
-
-	if (tuple)
-		ExecStoreBufferHeapTuple(tuple, /* tuple to store */
-								 slot,	/* slot to store in */
-								 hscan->rs_cbuf); /* tuple's buffer */
-	else
-		ExecClearTuple(slot);
-
-	return slot;
+	return tablesample_getnext(node);
 }
 
 /*
@@ -237,6 +218,9 @@ ExecReScanSampleScan(SampleScanState *node)
 {
 	/* Remember we need to do BeginSampleScan again (if we did it at all) */
 	node->begun = false;
+	node->done = false;
+	node->haveblock = false;
+	node->donetuples = 0;
 
 	ExecScanReScan(&node->ss);
 }
@@ -258,6 +242,7 @@ tablesample_init(SampleScanState *scanstate)
 	int			i;
 	ListCell   *arg;
 
+	scanstate->donetuples = 0;
 	params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
 
 	i = 0;
@@ -345,225 +330,49 @@ tablesample_init(SampleScanState *scanstate)
 
 /*
  * Get next tuple from TABLESAMPLE method.
- *
- * Note: an awful lot of this is copied-and-pasted from heapam.c.  It would
- * perhaps be better to refactor to share more code.
  */
-static HeapTuple
+static TupleTableSlot *
 tablesample_getnext(SampleScanState *scanstate)
 {
-	TsmRoutine *tsm = scanstate->tsmroutine;
 	TableScanDesc scan = scanstate->ss.ss_currentScanDesc;
-	HeapScanDesc hscan = (HeapScanDesc) scan;
-	HeapTuple	tuple = &(hscan->rs_ctup);
-	Snapshot	snapshot = scan->rs_snapshot;
-	bool		pagemode = scan->rs_pageatatime;
-	BlockNumber blockno;
-	Page		page;
-	bool		all_visible;
-	OffsetNumber maxoffset;
+	TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot;
 
-	if (!hscan->rs_inited)
-	{
-		/*
-		 * return null immediately if relation is empty
-		 */
-		if (hscan->rs_nblocks == 0)
-		{
-			Assert(!BufferIsValid(hscan->rs_cbuf));
-			tuple->t_data = NULL;
-			return NULL;
-		}
-		if (tsm->NextSampleBlock)
-		{
-			blockno = tsm->NextSampleBlock(scanstate);
-			if (!BlockNumberIsValid(blockno))
-			{
-				tuple->t_data = NULL;
-				return NULL;
-			}
-		}
-		else
-			blockno = hscan->rs_startblock;
-		Assert(blockno < hscan->rs_nblocks);
-		heapgetpage(scan, blockno);
-		hscan->rs_inited = true;
-	}
-	else
-	{
-		/* continue from previously returned page/tuple */
-		blockno = hscan->rs_cblock;	/* current page */
-	}
+	ExecClearTuple(slot);
 
-	/*
-	 * When not using pagemode, we must lock the buffer during tuple
-	 * visibility checks.
-	 */
-	if (!pagemode)
-		LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-	page = (Page) BufferGetPage(hscan->rs_cbuf);
-	all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
-	maxoffset = PageGetMaxOffsetNumber(page);
+	if (scanstate->done)
+		return NULL;
 
 	for (;;)
 	{
-		OffsetNumber tupoffset;
-		bool		finished;
-
-		CHECK_FOR_INTERRUPTS();
-
-		/* Ask the tablesample method which tuples to check on this page. */
-		tupoffset = tsm->NextSampleTuple(scanstate,
-										 blockno,
-										 maxoffset);
-
-		if (OffsetNumberIsValid(tupoffset))
+		if (!scanstate->haveblock)
 		{
-			ItemId		itemid;
-			bool		visible;
-
-			/* Skip invalid tuple pointers. */
-			itemid = PageGetItemId(page, tupoffset);
-			if (!ItemIdIsNormal(itemid))
-				continue;
-
-			tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
-			tuple->t_len = ItemIdGetLength(itemid);
-			ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
-
-			if (all_visible)
-				visible = true;
-			else
-				visible = SampleTupleVisible(tuple, tupoffset, hscan);
-
-			/* in pagemode, heapgetpage did this for us */
-			if (!pagemode)
-				CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
-												hscan->rs_cbuf, snapshot);
-
-			if (visible)
+			if (!table_scan_sample_next_block(scan, scanstate))
 			{
-				/* Found visible tuple, return it. */
-				if (!pagemode)
-					LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-				break;
-			}
-			else
-			{
-				/* Try next tuple from same page. */
-				continue;
+				scanstate->haveblock = false;
+				scanstate->done = true;
+
+				/* exhausted relation */
+				return NULL;
 			}
+
+			scanstate->haveblock = true;
 		}
 
-		/*
-		 * if we get here, it means we've exhausted the items on this page and
-		 * it's time to move to the next.
-		 */
-		if (!pagemode)
-			LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-
-		if (tsm->NextSampleBlock)
+		if (!table_scan_sample_next_tuple(scan, scanstate, slot))
 		{
-			blockno = tsm->NextSampleBlock(scanstate);
-			Assert(!scan->rs_syncscan);
-			finished = !BlockNumberIsValid(blockno);
-		}
-		else
-		{
-			/* Without NextSampleBlock, just do a plain forward seqscan. */
-			blockno++;
-			if (blockno >= hscan->rs_nblocks)
-				blockno = 0;
-
 			/*
-			 * Report our new scan position for synchronization purposes.
-			 *
-			 * Note: we do this before checking for end of scan so that the
-			 * final state of the position hint is back at the start of the
-			 * rel.  That's not strictly necessary, but otherwise when you run
-			 * the same query multiple times the starting position would shift
-			 * a little bit backwards on every invocation, which is confusing.
-			 * We don't guarantee any specific ordering in general, though.
+			 * If we get here, it means we've exhausted the items on this page
+			 * and it's time to move to the next.
 			 */
-			if (scan->rs_syncscan)
-				ss_report_location(scan->rs_rd, blockno);
-
-			finished = (blockno == hscan->rs_startblock);
+			scanstate->haveblock = false;
+			continue;
 		}
 
-		/*
-		 * Reached end of scan?
-		 */
-		if (finished)
-		{
-			if (BufferIsValid(hscan->rs_cbuf))
-				ReleaseBuffer(hscan->rs_cbuf);
-			hscan->rs_cbuf = InvalidBuffer;
-			hscan->rs_cblock = InvalidBlockNumber;
-			tuple->t_data = NULL;
-			hscan->rs_inited = false;
-			return NULL;
-		}
-
-		Assert(blockno < hscan->rs_nblocks);
-		heapgetpage(scan, blockno);
-
-		/* Re-establish state for new page */
-		if (!pagemode)
-			LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-		page = (Page) BufferGetPage(hscan->rs_cbuf);
-		all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
-		maxoffset = PageGetMaxOffsetNumber(page);
+		/* Found visible tuple, return it. */
+		break;
 	}
 
-	/* Count successfully-fetched tuples as heap fetches */
-	pgstat_count_heap_getnext(scan->rs_rd);
+	scanstate->donetuples++;
 
-	return &(hscan->rs_ctup);
-}
-
-/*
- * Check visibility of the tuple.
- */
-static bool
-SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
-{
-	if (scan->rs_base.rs_pageatatime)
-	{
-		/*
-		 * In pageatatime mode, heapgetpage() already did visibility checks,
-		 * so just look at the info it left in rs_vistuples[].
-		 *
-		 * We use a binary search over the known-sorted array.  Note: we could
-		 * save some effort if we insisted that NextSampleTuple select tuples
-		 * in increasing order, but it's not clear that there would be enough
-		 * gain to justify the restriction.
-		 */
-		int			start = 0,
-					end = scan->rs_ntuples - 1;
-
-		while (start <= end)
-		{
-			int			mid = (start + end) / 2;
-			OffsetNumber curoffset = scan->rs_vistuples[mid];
-
-			if (tupoffset == curoffset)
-				return true;
-			else if (tupoffset < curoffset)
-				end = mid - 1;
-			else
-				start = mid + 1;
-		}
-
-		return false;
-	}
-	else
-	{
-		/* Otherwise, we have to check the tuple individually. */
-		return HeapTupleSatisfiesVisibility(tuple,
-											scan->rs_base.rs_snapshot,
-											scan->rs_cbuf);
-	}
+	return slot;
 }
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 2fc144c3665..0c9339c676e 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -30,6 +30,7 @@ extern bool synchronize_seqscans;
 
 struct VacuumParams;
 struct ValidateIndexState;
+struct SampleScanState;
 struct BulkInsertStateData;
 
 /*
@@ -344,6 +345,18 @@ typedef struct TableAmRoutine
 	void		(*relation_estimate_size) (Relation rel, int32 *attr_widths,
 										   BlockNumber *pages, double *tuples, double *allvisfrac);
 
+
+	/* ------------------------------------------------------------------------
+	 * Executor related functions.
+	 * ------------------------------------------------------------------------
+	 */
+
+	bool		(*scan_sample_next_block) (TableScanDesc scan,
+										   struct SampleScanState *scanstate);
+	bool		(*scan_sample_next_tuple) (TableScanDesc scan,
+										   struct SampleScanState *scanstate,
+										   TupleTableSlot *slot);
+
 } TableAmRoutine;
 
 
@@ -887,6 +900,24 @@ table_estimate_size(Relation rel, int32 *attr_widths,
 }
 
 
+/* ----------------------------------------------------------------------------
+ * Executor related functionality
+ * ----------------------------------------------------------------------------
+ */
+
+static inline bool
+table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate)
+{
+	return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate);
+}
+
+static inline bool
+table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
+{
+	return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate, slot);
+}
+
+
 /* ----------------------------------------------------------------------------
  * Functions to make modifications a bit simpler.
  * ----------------------------------------------------------------------------
diff --git a/src/include/access/tsmapi.h b/src/include/access/tsmapi.h
index a5c0b4cafec..ccef65aedb4 100644
--- a/src/include/access/tsmapi.h
+++ b/src/include/access/tsmapi.h
@@ -34,7 +34,7 @@ typedef void (*BeginSampleScan_function) (SampleScanState *node,
 										  int nparams,
 										  uint32 seed);
 
-typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node);
+typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node, BlockNumber nblocks);
 
 typedef OffsetNumber (*NextSampleTuple_function) (SampleScanState *node,
 												  BlockNumber blockno,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 62eb1a06eef..22ac3b17518 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1300,6 +1300,9 @@ typedef struct SampleScanState
 	bool		use_pagemode;	/* use page-at-a-time visibility checking? */
 	bool		begun;			/* false means need to call BeginSampleScan */
 	uint32		seed;			/* random seed */
+	int64		donetuples;		/* number of tuples already returned */
+	bool		haveblock;		/* has a block for sampling been determined */
+	bool		done;			/* exhausted all tuples? */
 } SampleScanState;
 
 /*
@@ -1528,6 +1531,7 @@ typedef struct BitmapHeapScanState
 	Buffer		pvmbuffer;
 	long		exact_pages;
 	long		lossy_pages;
+	int			return_empty_tuples;
 	TBMIterator *prefetch_iterator;
 	int			prefetch_pages;
 	int			prefetch_target;
diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h
index 2645085b344..6a7f3054a41 100644
--- a/src/include/nodes/tidbitmap.h
+++ b/src/include/nodes/tidbitmap.h
@@ -37,7 +37,7 @@ typedef struct TBMIterator TBMIterator;
 typedef struct TBMSharedIterator TBMSharedIterator;
 
 /* Result structure for tbm_iterate */
-typedef struct
+typedef struct TBMIterateResult
 {
 	BlockNumber blockno;		/* page number containing tuples */
 	int			ntuples;		/* -1 indicates lossy result */
-- 
2.21.0.dirty

