From a5d981a5ecd867bb46f51a8fb1b153d203df03ac Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Thu, 12 Dec 2019 20:54:37 -0600
Subject: [PATCH v24 1/4] vacuum errcontext to show block being processed

Discussion:
https://www.postgresql.org/message-id/20191120210600.GC30362@telsasoft.com
---
 src/backend/access/heap/vacuumlazy.c | 192 +++++++++++++++++++++++----
 1 file changed, 167 insertions(+), 25 deletions(-)

diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 03c43efc32..084b43c178 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -268,8 +268,19 @@ typedef struct LVParallelState
 	int			nindexes_parallel_condcleanup;
 } LVParallelState;
 
+typedef enum {
+	VACUUM_ERRCB_PHASE_UNKNOWN,
+	VACUUM_ERRCB_PHASE_SCAN_HEAP,
+	VACUUM_ERRCB_PHASE_VACUUM_INDEX,
+	VACUUM_ERRCB_PHASE_VACUUM_HEAP,
+	VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
+	VACUUM_ERRCB_PHASE_VACUUM_FSM,
+} errcb_phase;
+
 typedef struct LVRelStats
 {
+	char 		*relnamespace;
+	char		*relname;
 	/* useindex = true means two-pass strategy; false means one-pass */
 	bool		useindex;
 	/* Overall statistics about rel */
@@ -290,8 +301,12 @@ typedef struct LVRelStats
 	int			num_index_scans;
 	TransactionId latestRemovedXid;
 	bool		lock_waiter_detected;
-} LVRelStats;
 
+	/* Used for error callback: */
+	char 		*indname;
+	BlockNumber blkno;	/* used only for heap operations */
+	errcb_phase	phase;
+} LVRelStats;
 
 /* A few variables that don't seem worth passing around as parameters */
 static int	elevel = -1;
@@ -314,10 +329,10 @@ static void lazy_vacuum_all_indexes(Relation onerel, Relation *Irel,
 									LVRelStats *vacrelstats, LVParallelState *lps,
 									int nindexes);
 static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
-							  LVDeadTuples *dead_tuples, double reltuples);
+							  LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats);
 static void lazy_cleanup_index(Relation indrel,
 							   IndexBulkDeleteResult **stats,
-							   double reltuples, bool estimated_count);
+							   double reltuples, bool estimated_count, LVRelStats *vacrelstats);
 static int	lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 							 int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
 static bool should_attempt_truncation(VacuumParams *params,
@@ -337,13 +352,13 @@ static void lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult *
 										 int nindexes);
 static void parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats,
 								  LVShared *lvshared, LVDeadTuples *dead_tuples,
-								  int nindexes);
+								  int nindexes, LVRelStats *vacrelstats);
 static void vacuum_indexes_leader(Relation *Irel, IndexBulkDeleteResult **stats,
 								  LVRelStats *vacrelstats, LVParallelState *lps,
 								  int nindexes);
 static void vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats,
 							 LVShared *lvshared, LVSharedIndStats *shared_indstats,
-							 LVDeadTuples *dead_tuples);
+							 LVDeadTuples *dead_tuples, LVRelStats *vacrelstats);
 static void lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
 									 LVRelStats *vacrelstats, LVParallelState *lps,
 									 int nindexes);
@@ -361,6 +376,9 @@ static void end_parallel_vacuum(Relation *Irel, IndexBulkDeleteResult **stats,
 								LVParallelState *lps, int nindexes);
 static LVSharedIndStats *get_indstats(LVShared *lvshared, int n);
 static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared);
+static void vacuum_error_callback(void *arg);
+static void update_vacuum_error_cbarg(LVRelStats *errcbarg, int phase,
+		BlockNumber blkno, Relation rel);
 
 
 /*
@@ -460,6 +478,9 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
 
 	vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
 
+	vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel));
+	vacrelstats->relname = pstrdup(RelationGetRelationName(onerel));
+	vacrelstats->indname = NULL;
 	vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
 	vacrelstats->old_live_tuples = onerel->rd_rel->reltuples;
 	vacrelstats->num_index_scans = 0;
@@ -699,7 +720,6 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	BlockNumber nblocks,
 				blkno;
 	HeapTupleData tuple;
-	char	   *relname;
 	TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid;
 	TransactionId relminmxid = onerel->rd_rel->relminmxid;
 	BlockNumber empty_pages,
@@ -724,20 +744,20 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		PROGRESS_VACUUM_MAX_DEAD_TUPLES
 	};
 	int64		initprog_val[3];
+	ErrorContextCallback errcallback;
 
 	pg_rusage_init(&ru0);
 
-	relname = RelationGetRelationName(onerel);
 	if (aggressive)
 		ereport(elevel,
 				(errmsg("aggressively vacuuming \"%s.%s\"",
-						get_namespace_name(RelationGetNamespace(onerel)),
-						relname)));
+						vacrelstats->relnamespace,
+						vacrelstats->relname)));
 	else
 		ereport(elevel,
 				(errmsg("vacuuming \"%s.%s\"",
-						get_namespace_name(RelationGetNamespace(onerel)),
-						relname)));
+						vacrelstats->relnamespace,
+						vacrelstats->relname)));
 
 	empty_pages = vacuumed_pages = 0;
 	next_fsm_block_to_vacuum = (BlockNumber) 0;
@@ -870,6 +890,14 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	else
 		skipping_blocks = false;
 
+	/* Setup error traceback support for ereport() */
+	update_vacuum_error_cbarg(vacrelstats, VACUUM_ERRCB_PHASE_SCAN_HEAP,
+			InvalidBlockNumber, NULL);
+	errcallback.callback = vacuum_error_callback;
+	errcallback.arg = vacrelstats;
+	errcallback.previous = error_context_stack;
+	error_context_stack = &errcallback;
+
 	for (blkno = 0; blkno < nblocks; blkno++)
 	{
 		Buffer		buf;
@@ -891,6 +919,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 #define FORCE_CHECK_PAGE() \
 		(blkno == nblocks - 1 && should_attempt_truncation(params, vacrelstats))
 
+		vacrelstats->blkno = blkno;
+
 		pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
 
 		if (blkno == next_unskippable_block)
@@ -1005,12 +1035,18 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			 * Vacuum the Free Space Map to make newly-freed space visible on
 			 * upper-level FSM pages.  Note we have not yet processed blkno.
 			 */
+			update_vacuum_error_cbarg(vacrelstats,
+					VACUUM_ERRCB_PHASE_VACUUM_FSM, InvalidBlockNumber, NULL);
 			FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
 			next_fsm_block_to_vacuum = blkno;
 
 			/* Report that we are once again scanning the heap */
 			pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
 										 PROGRESS_VACUUM_PHASE_SCAN_HEAP);
+
+			/* Set the error context while continuing heap scan */
+			update_vacuum_error_cbarg(vacrelstats,
+					VACUUM_ERRCB_PHASE_SCAN_HEAP, blkno, NULL);
 		}
 
 		/*
@@ -1488,9 +1524,15 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			 */
 			if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
 			{
+				update_vacuum_error_cbarg(vacrelstats,
+						VACUUM_ERRCB_PHASE_VACUUM_FSM, InvalidBlockNumber,
+						NULL);
 				FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum,
 										blkno);
 				next_fsm_block_to_vacuum = blkno;
+				/* Set the error context while continuing heap scan */
+				update_vacuum_error_cbarg(vacrelstats,
+					VACUUM_ERRCB_PHASE_SCAN_HEAP, blkno, NULL);
 			}
 		}
 
@@ -1534,7 +1576,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 				 && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
 		{
 			elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
-				 relname, blkno);
+				 vacrelstats->relname, blkno);
 			visibilitymap_clear(onerel, blkno, vmbuffer,
 								VISIBILITYMAP_VALID_BITS);
 		}
@@ -1555,7 +1597,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		else if (PageIsAllVisible(page) && has_dead_tuples)
 		{
 			elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
-				 relname, blkno);
+				 vacrelstats->relname, blkno);
 			PageClearAllVisible(page);
 			MarkBufferDirty(buf);
 			visibilitymap_clear(onerel, blkno, vmbuffer,
@@ -1642,7 +1684,11 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	 * not there were indexes.
 	 */
 	if (blkno > next_fsm_block_to_vacuum)
+	{
+		update_vacuum_error_cbarg(vacrelstats,
+				VACUUM_ERRCB_PHASE_VACUUM_FSM, InvalidBlockNumber, NULL);
 		FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
+	}
 
 	/* report all blocks vacuumed */
 	pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
@@ -1651,6 +1697,9 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	if (vacrelstats->useindex)
 		lazy_cleanup_all_indexes(Irel, indstats, vacrelstats, lps, nindexes);
 
+	/* Pop the error context stack */
+	error_context_stack = errcallback.previous;
+
 	/*
 	 * End parallel mode before updating index statistics as we cannot write
 	 * during parallel mode.
@@ -1744,7 +1793,7 @@ lazy_vacuum_all_indexes(Relation onerel, Relation *Irel,
 
 		for (idx = 0; idx < nindexes; idx++)
 			lazy_vacuum_index(Irel[idx], &stats[idx], vacrelstats->dead_tuples,
-							  vacrelstats->old_live_tuples);
+							  vacrelstats->old_live_tuples, vacrelstats);
 	}
 
 	/* Increase and report the number of index scans */
@@ -1847,6 +1896,10 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 
 	pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
 
+	/* Setup error traceback support for ereport() */
+	update_vacuum_error_cbarg(vacrelstats, VACUUM_ERRCB_PHASE_VACUUM_HEAP,
+			blkno, NULL);
+
 	START_CRIT_SECTION();
 
 	for (; tupindex < dead_tuples->num_tuples; tupindex++)
@@ -2083,7 +2136,7 @@ lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
 	 * indexes in the case where no workers are launched.
 	 */
 	parallel_vacuum_index(Irel, stats, lps->lvshared,
-						  vacrelstats->dead_tuples, nindexes);
+						  vacrelstats->dead_tuples, nindexes, vacrelstats);
 
 	/* Wait for all vacuum workers to finish */
 	WaitForParallelWorkersToFinish(lps->pcxt);
@@ -2106,7 +2159,7 @@ lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
 static void
 parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats,
 					  LVShared *lvshared, LVDeadTuples *dead_tuples,
-					  int nindexes)
+					  int nindexes, LVRelStats *vacrelstats)
 {
 	/*
 	 * Increment the active worker count if we are able to launch any worker.
@@ -2140,7 +2193,7 @@ parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats,
 
 		/* Do vacuum or cleanup of the index */
 		vacuum_one_index(Irel[idx], &(stats[idx]), lvshared, shared_indstats,
-						 dead_tuples);
+						 dead_tuples, vacrelstats);
 	}
 
 	/*
@@ -2180,7 +2233,8 @@ vacuum_indexes_leader(Relation *Irel, IndexBulkDeleteResult **stats,
 		if (shared_indstats == NULL ||
 			skip_parallel_vacuum_index(Irel[i], lps->lvshared))
 			vacuum_one_index(Irel[i], &(stats[i]), lps->lvshared,
-							 shared_indstats, vacrelstats->dead_tuples);
+							 shared_indstats, vacrelstats->dead_tuples,
+							 vacrelstats);
 	}
 
 	/*
@@ -2200,7 +2254,7 @@ vacuum_indexes_leader(Relation *Irel, IndexBulkDeleteResult **stats,
 static void
 vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats,
 				 LVShared *lvshared, LVSharedIndStats *shared_indstats,
-				 LVDeadTuples *dead_tuples)
+				 LVDeadTuples *dead_tuples, LVRelStats *vacrelstats)
 {
 	IndexBulkDeleteResult *bulkdelete_res = NULL;
 
@@ -2220,10 +2274,10 @@ vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats,
 	/* Do vacuum or cleanup of the index */
 	if (lvshared->for_cleanup)
 		lazy_cleanup_index(indrel, stats, lvshared->reltuples,
-						   lvshared->estimated_count);
+						   lvshared->estimated_count, vacrelstats);
 	else
 		lazy_vacuum_index(indrel, stats, dead_tuples,
-						  lvshared->reltuples);
+						  lvshared->reltuples, vacrelstats);
 
 	/*
 	 * Copy the index bulk-deletion result returned from ambulkdelete and
@@ -2298,7 +2352,8 @@ lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
 		for (idx = 0; idx < nindexes; idx++)
 			lazy_cleanup_index(Irel[idx], &stats[idx],
 							   vacrelstats->new_rel_tuples,
-							   vacrelstats->tupcount_pages < vacrelstats->rel_pages);
+							   vacrelstats->tupcount_pages < vacrelstats->rel_pages,
+							   vacrelstats);
 	}
 }
 
@@ -2313,7 +2368,7 @@ lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
  */
 static void
 lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
-				  LVDeadTuples *dead_tuples, double reltuples)
+				  LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats)
 {
 	IndexVacuumInfo ivinfo;
 	const char *msg;
@@ -2329,6 +2384,11 @@ lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
 	ivinfo.num_heap_tuples = reltuples;
 	ivinfo.strategy = vac_strategy;
 
+	/* Setup error traceback support for ereport() */
+	update_vacuum_error_cbarg(vacrelstats,
+			VACUUM_ERRCB_PHASE_VACUUM_INDEX, InvalidBlockNumber,
+			indrel);
+
 	/* Do bulk deletion */
 	*stats = index_bulk_delete(&ivinfo, *stats,
 							   lazy_tid_reaped, (void *) dead_tuples);
@@ -2354,7 +2414,7 @@ lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
 static void
 lazy_cleanup_index(Relation indrel,
 				   IndexBulkDeleteResult **stats,
-				   double reltuples, bool estimated_count)
+				   double reltuples, bool estimated_count, LVRelStats *vacrelstats)
 {
 	IndexVacuumInfo ivinfo;
 	const char *msg;
@@ -2371,6 +2431,10 @@ lazy_cleanup_index(Relation indrel,
 	ivinfo.num_heap_tuples = reltuples;
 	ivinfo.strategy = vac_strategy;
 
+	/* Setup error traceback support for ereport() */
+	update_vacuum_error_cbarg(vacrelstats,
+			VACUUM_ERRCB_PHASE_INDEX_CLEANUP, InvalidBlockNumber, indrel);
+
 	*stats = index_vacuum_cleanup(&ivinfo, *stats);
 
 	if (!(*stats))
@@ -3320,6 +3384,8 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	int			nindexes;
 	char	   *sharedquery;
 	IndexBulkDeleteResult **stats;
+	LVRelStats vacrelstats;
+	ErrorContextCallback errcallback;
 
 	lvshared = (LVShared *) shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_SHARED,
 										   false);
@@ -3341,6 +3407,18 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	 */
 	onerel = table_open(lvshared->relid, ShareUpdateExclusiveLock);
 
+	/* Init vacrelstats for use as error callback by parallel worker: */
+	vacrelstats.relnamespace = get_namespace_name(RelationGetNamespace(onerel));
+	vacrelstats.relname = pstrdup(RelationGetRelationName(onerel));
+	vacrelstats.indname = NULL;
+	vacrelstats.phase = VACUUM_ERRCB_PHASE_UNKNOWN; /* Not yet processing */
+
+	/* Setup error traceback support for ereport() */
+	errcallback.callback = vacuum_error_callback;
+	errcallback.arg = &vacrelstats;
+	errcallback.previous = error_context_stack;
+	error_context_stack = &errcallback;
+
 	/*
 	 * Open all indexes. indrels are sorted in order by OID, which should be
 	 * matched to the leader's one.
@@ -3370,9 +3448,73 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 		maintenance_work_mem = lvshared->maintenance_work_mem_worker;
 
 	/* Process indexes to perform vacuum/cleanup */
-	parallel_vacuum_index(indrels, stats, lvshared, dead_tuples, nindexes);
+	parallel_vacuum_index(indrels, stats, lvshared, dead_tuples, nindexes,
+			&vacrelstats);
 
 	vac_close_indexes(nindexes, indrels, RowExclusiveLock);
 	table_close(onerel, ShareUpdateExclusiveLock);
 	pfree(stats);
 }
+
+/*
+ * Error context callback for errors occurring during vacuum.
+ */
+static void
+vacuum_error_callback(void *arg)
+{
+	LVRelStats *cbarg = arg;
+
+	switch (cbarg->phase) {
+		case VACUUM_ERRCB_PHASE_SCAN_HEAP:
+			if (BlockNumberIsValid(cbarg->blkno))
+				errcontext("while scanning block %u of relation \"%s.%s\"",
+						cbarg->blkno, cbarg->relnamespace, cbarg->relname);
+			break;
+
+		case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
+			if (BlockNumberIsValid(cbarg->blkno))
+				errcontext("while vacuuming block %u of relation \"%s.%s\"",
+						cbarg->blkno, cbarg->relnamespace, cbarg->relname);
+			break;
+
+		case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
+			errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
+					cbarg->indname, cbarg->relnamespace, cbarg->relname);
+			break;
+
+		case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
+			errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
+					cbarg->indname, cbarg->relnamespace, cbarg->relname);
+			break;
+
+		case VACUUM_ERRCB_PHASE_VACUUM_FSM:
+			errcontext("while vacuuming free space map of relation \"%s.%s\"",
+					cbarg->relnamespace, cbarg->relname);
+			break;
+
+		case VACUUM_ERRCB_PHASE_UNKNOWN:
+		default:
+			return; /* do nothing; the cbarg maybe isn't yet initialized */
+	}
+}
+
+/* Update vacuum error callback for current phase, block and index */
+static void
+update_vacuum_error_cbarg(LVRelStats *errcbarg, int phase, BlockNumber blkno,
+		Relation indrel)
+{
+	errcbarg->blkno = blkno;
+	errcbarg->phase = phase;
+
+	/* Free index name from any previous phase */
+	if (errcbarg->indname) {
+		pfree(errcbarg->indname);
+		errcbarg->indname = NULL;
+	}
+
+	/* For index phases, save the name of the current index for the callback */
+	if (indrel) {
+		Assert(indrel->rd_rel->relkind == RELKIND_INDEX);
+		errcbarg->indname = pstrdup(RelationGetRelationName(indrel));
+	}
+}
-- 
2.17.0

