From b28034bae725549cceacb92b7ed631072d4a5665 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 21 Mar 2026 15:15:16 -0400
Subject: [PATCH v4 3/3] Fix init_archive_reader() to not depend on cur_file.

Relying on privateInfo->cur_file is a mistake: it can only work
if read_archive_file stops at a point where some WAL segment has
been partially read.  That might not happen, notably if we reach
the end of the archive before satisfying the loop.  This appears
to explain not-very-reproducible "could not find WAL in archive"
failures we're seeing in the buildfarm.

Instead, after calling read_archive_file, scan the archive_wal_htab
to see if there is any cached WAL segment that has enough data.

While at it, fix a minor thinko: we don't have to insist on having
collected XLOG_BLCKSZ worth of WAL from that first WAL segment.
It's enough if we have the first page header.
---
 src/bin/pg_waldump/archive_waldump.c | 20 +++++++++++++++-----
 src/bin/pg_waldump/pg_waldump.h      |  9 ++++++++-
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/src/bin/pg_waldump/archive_waldump.c b/src/bin/pg_waldump/archive_waldump.c
index 16e5cd58e41..20ca4547ba7 100644
--- a/src/bin/pg_waldump/archive_waldump.c
+++ b/src/bin/pg_waldump/archive_waldump.c
@@ -173,17 +173,27 @@ init_archive_reader(XLogDumpPrivate *privateInfo,
 	privateInfo->archive_wal_htab = ArchivedWAL_create(8, NULL);
 
 	/*
-	 * Read until we have at least one full WAL page (XLOG_BLCKSZ bytes) from
-	 * the first WAL segment in the archive so we can extract the WAL segment
-	 * size from the long page header.
+	 * Read the archive until we've found at least one WAL segment and
+	 * obtained enough bytes from it to let us extract the WAL segment size
+	 * from the long page header.
 	 */
-	while (entry == NULL || entry->buf->len < XLOG_BLCKSZ)
+	while (entry == NULL)
 	{
+		ArchivedWAL_iterator iter;
+
+		/* Read more data, fail if there is no more. */
 		if (!read_archive_file(privateInfo, XLOG_BLCKSZ))
 			pg_fatal("could not find WAL in archive \"%s\"",
 					 privateInfo->archive_name);
 
-		entry = privateInfo->cur_file;
+		/* Search the hash table for a WAL segment with enough data. */
+		ArchivedWAL_start_iterate(privateInfo->archive_wal_htab, &iter);
+		while ((entry = ArchivedWAL_iterate(privateInfo->archive_wal_htab,
+											&iter)) != NULL)
+		{
+			if (entry->read_len >= sizeof(XLogLongPageHeader))
+				break;
+		}
 	}
 
 	/* Extract the WAL segment size from the long page header */
diff --git a/src/bin/pg_waldump/pg_waldump.h b/src/bin/pg_waldump/pg_waldump.h
index cde7c6ca3f2..ca0dfd97168 100644
--- a/src/bin/pg_waldump/pg_waldump.h
+++ b/src/bin/pg_waldump/pg_waldump.h
@@ -44,7 +44,14 @@ typedef struct XLogDumpPrivate
 	Size		archive_read_buf_size;
 #endif
 
-	/* What the archive streamer is currently reading */
+	/*
+	 * The buffer for the WAL file the archive streamer is currently reading,
+	 * or NULL if none.  It is quite risky to examine this anywhere except in
+	 * astreamer_waldump_content(), since it can change multiple times during
+	 * a single read_archive_file() call.  However, it is safe to assume that
+	 * if cur_file is different from a particular ArchivedWALFile of interest,
+	 * then the archive streamer has finished reading that file.
+	 */
 	struct ArchivedWALFile *cur_file;
 
 	/*
-- 
2.43.7

