From 87c6cbd43685b2f158fd68cc5fd8e15a0cd1b44a Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <postgres@jeltef.nl>
Date: Sat, 4 Jul 2026 11:12:15 +0200
Subject: [PATCH v2 2/2] pg_dump: Remove TerminateThread() call

When pg_dump or pg_restore --jobs N is interrupted with ^C on Windows,
we cancel all queries, but we don't want the cancellations to be
reported as errors to the user in the short time before the whole
process exits.

That was previously achieved by calling TerminateThread() on each worker
thread before sending the cancel message, but that doesn't appear to be
100% safe: the implementations of write() and the socket calls inside
PQcancel() might acquire user space locks that were held by the
terminated threads.  (write() certainly does that.)

Instead of doing it that sketchy way this now atomically sets a flag
(before sending any cancel requests) that tells the threads to not log
errors anymore.
---
 src/bin/pg_dump/parallel.c           | 49 +++++++++++++---------------
 src/bin/pg_dump/pg_backup_archiver.c | 14 ++++++--
 src/bin/pg_dump/pg_backup_db.c       | 18 +++++++---
 src/bin/pg_dump/pg_backup_utils.c    | 24 ++++++++++++++
 src/bin/pg_dump/pg_backup_utils.h    | 21 ++++++++++++
 5 files changed, 92 insertions(+), 34 deletions(-)

diff --git a/src/bin/pg_dump/parallel.c b/src/bin/pg_dump/parallel.c
index 7e2e9f958ea..e11f7ca9363 100644
--- a/src/bin/pg_dump/parallel.c
+++ b/src/bin/pg_dump/parallel.c
@@ -531,11 +531,12 @@ WaitForTerminatingWorkers(ParallelState *pstate)
  * might be that only the leader gets signaled.
  *
  * On Windows, the cancel handler runs in a separate thread, because that's
- * how SetConsoleCtrlHandler works.  We make it stop worker threads, send
- * cancels on all active connections, and then return FALSE, which will allow
- * the process to die.  For safety's sake, we use a critical section to
- * protect the PGcancel structures against being changed while the signal
- * thread runs.
+ * how SetConsoleCtrlHandler works.  Because the workers are threads in this
+ * same process, we set a flag (is_cancel_in_progress()) so they stay quiet about
+ * the query cancellations instead of cluttering the screen, then send cancels
+ * on all active connections and return FALSE, which will allow the process to
+ * die.  For safety's sake, we use a critical section to protect the PGcancel
+ * structures against being changed while the signal thread runs.
  */
 
 #ifndef WIN32
@@ -641,34 +642,30 @@ consoleHandler(DWORD dwCtrlType)
 	if (dwCtrlType == CTRL_C_EVENT ||
 		dwCtrlType == CTRL_BREAK_EVENT)
 	{
+		/*
+		 * Tell worker threads to stay quiet about the query cancellations
+		 * we're about to send them; otherwise they'd report them as errors
+		 * and clutter the user's screen.  This must be set before we send any
+		 * cancel, so that a worker is guaranteed to see it by the time its
+		 * query fails as a result.
+		 */
+		set_cancel_in_progress();
+
 		/* Critical section prevents changing data we look at here */
 		EnterCriticalSection(&signal_info_lock);
 
 		/*
-		 * If in parallel mode, stop worker threads and send QueryCancel to
-		 * their connected backends.  The main point of stopping the worker
-		 * threads is to keep them from reporting the query cancels as errors,
-		 * which would clutter the user's screen.  We needn't stop the leader
-		 * thread since it won't be doing much anyway.  Do this before
-		 * canceling the main transaction, else we might get invalid-snapshot
-		 * errors reported before we can stop the workers.  Ignore errors,
-		 * there's not much we can do about them anyway.
+		 * If in parallel mode, send QueryCancel to each worker's connected
+		 * backend.  Do this before canceling the main transaction, else we
+		 * might get invalid-snapshot errors reported before we can stop the
+		 * workers.  Ignore errors, there's not much we can do about them
+		 * anyway.
 		 */
 		if (signal_info.pstate != NULL)
 		{
 			for (i = 0; i < signal_info.pstate->numWorkers; i++)
 			{
-				ParallelSlot *slot = &(signal_info.pstate->parallelSlot[i]);
-				ArchiveHandle *AH = slot->AH;
-				HANDLE		hThread = (HANDLE) slot->hThread;
-
-				/*
-				 * Using TerminateThread here may leave some resources leaked,
-				 * but it doesn't matter since we're about to end the whole
-				 * process.
-				 */
-				if (hThread != INVALID_HANDLE_VALUE)
-					TerminateThread(hThread, 0);
+				ArchiveHandle *AH = signal_info.pstate->parallelSlot[i].AH;
 
 				if (AH != NULL && AH->connCancel != NULL)
 					(void) PQcancel(AH->connCancel, errbuf, sizeof(errbuf));
@@ -687,9 +684,7 @@ consoleHandler(DWORD dwCtrlType)
 
 		/*
 		 * Report we're quitting, using nothing more complicated than
-		 * write(2).  (We might be able to get away with using pg_log_*()
-		 * here, but since we terminated other threads uncleanly above, it
-		 * seems better to assume as little as possible.)
+		 * write(2).
 		 */
 		if (progname)
 		{
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 46f4c518347..5c0983fd11a 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -1933,9 +1933,17 @@ warn_or_exit_horribly(ArchiveHandle *AH, const char *fmt, ...)
 	AH->lastErrorStage = AH->stage;
 	AH->lastErrorTE = AH->currentTE;
 
-	va_start(ap, fmt);
-	pg_log_generic_v(PG_LOG_ERROR, PG_LOG_PRIMARY, fmt, ap);
-	va_end(ap);
+	/*
+	 * Don't report the failure if we're cancelling this query ourselves as
+	 * part of tearing down the process; the error is expected.  (Only
+	 * possible on Windows, where the workers are threads.)
+	 */
+	if (!is_cancel_in_progress())
+	{
+		va_start(ap, fmt);
+		pg_log_generic_v(PG_LOG_ERROR, PG_LOG_PRIMARY, fmt, ap);
+		va_end(ap);
+	}
 
 	if (AH->public.exit_on_error)
 		exit_nicely(1);
diff --git a/src/bin/pg_dump/pg_backup_db.c b/src/bin/pg_dump/pg_backup_db.c
index f6bca7dc508..c960ca57243 100644
--- a/src/bin/pg_dump/pg_backup_db.c
+++ b/src/bin/pg_dump/pg_backup_db.c
@@ -402,8 +402,13 @@ ExecuteSqlCommandBuf(Archive *AHX, const char *buf, size_t bufLen)
 		 */
 		if (AH->pgCopyIn &&
 			PQputCopyData(AH->connection, buf, bufLen) <= 0)
-			pg_fatal("error returned by PQputCopyData: %s",
-					 PQerrorMessage(AH->connection));
+		{
+			/* Stay quiet if this is a result of our own cancellation. */
+			if (!is_cancel_in_progress())
+				pg_log_error("error returned by PQputCopyData: %s",
+							 PQerrorMessage(AH->connection));
+			exit_nicely(1);
+		}
 	}
 	else if (AH->outputKind == OUTPUT_OTHERDATA)
 	{
@@ -451,8 +456,13 @@ EndDBCopyMode(Archive *AHX, const char *tocEntryTag)
 		PGresult   *res;
 
 		if (PQputCopyEnd(AH->connection, NULL) <= 0)
-			pg_fatal("error returned by PQputCopyEnd: %s",
-					 PQerrorMessage(AH->connection));
+		{
+			/* Stay quiet if this is a result of our own cancellation. */
+			if (!is_cancel_in_progress())
+				pg_log_error("error returned by PQputCopyEnd: %s",
+							 PQerrorMessage(AH->connection));
+			exit_nicely(1);
+		}
 
 		/* Check command status and return to normal libpq state */
 		res = PQgetResult(AH->connection);
diff --git a/src/bin/pg_dump/pg_backup_utils.c b/src/bin/pg_dump/pg_backup_utils.c
index 0368f7623a7..c529ec078fe 100644
--- a/src/bin/pg_dump/pg_backup_utils.c
+++ b/src/bin/pg_dump/pg_backup_utils.c
@@ -21,6 +21,30 @@
 /* Globals exported by this file */
 const char *progname = NULL;
 
+#ifdef WIN32
+
+/*
+ * Flag telling worker threads to stay quiet about query failures because we're
+ * cancelling their queries as part of tearing down the process.  See the
+ * comment in pg_backup_utils.h.  Accessed with interlocked primitives because
+ * the cancel thread writes it while worker threads read it.
+ */
+static volatile LONG cancelInProgress = 0;
+
+void
+set_cancel_in_progress(void)
+{
+	InterlockedExchange(&cancelInProgress, 1);
+}
+
+bool
+is_cancel_in_progress(void)
+{
+	return InterlockedCompareExchange(&cancelInProgress, 0, 0) != 0;
+}
+
+#endif							/* WIN32 */
+
 #define MAX_ON_EXIT_NICELY				20
 
 static struct
diff --git a/src/bin/pg_dump/pg_backup_utils.h b/src/bin/pg_dump/pg_backup_utils.h
index 9e98ed11619..779b10d8af5 100644
--- a/src/bin/pg_dump/pg_backup_utils.h
+++ b/src/bin/pg_dump/pg_backup_utils.h
@@ -31,6 +31,27 @@ extern void set_dump_section(const char *arg, int *dumpSections);
 extern void on_exit_nicely(on_exit_nicely_callback function, void *arg);
 pg_noreturn extern void exit_nicely(int code);
 
+/*
+ * On Windows the parallel workers are threads inside the leader process.  When
+ * a cancel is processed there, the leader sends cancels to the workers'
+ * in-flight queries; without this flag each worker would then report the
+ * resulting "canceling statement due to user request" error and clutter the
+ * screen in the brief window before the whole process exits.  The cancel
+ * thread sets this flag before sending any cancel, and worker threads check it
+ * before reporting a query failure.  It's written and read from different
+ * threads, so it's accessed with the Windows interlocked (atomic) primitives.
+ *
+ * On other platforms the workers are separate processes that just _exit() when
+ * cancelled, so they never reach the error-reporting code; there the check is
+ * compiled out to a constant false and the flag doesn't exist.
+ */
+#ifdef WIN32
+extern void set_cancel_in_progress(void);
+extern bool is_cancel_in_progress(void);
+#else
+#define is_cancel_in_progress() false
+#endif
+
 /* In pg_dump, we modify pg_fatal to call exit_nicely instead of exit */
 #undef pg_fatal
 #define pg_fatal(...) do { \
-- 
2.54.0

