From 6077acc68ac944d0fee83fe20b6abd85ebaab686 Mon Sep 17 00:00:00 2001
From: Mats Kindahl <mats@kindahl.net>
Date: Sat, 23 May 2026 16:09:44 +0200
Subject: pg_rewind: use UUIDs to detect independent same-TLI promotions

Two PostgreSQL standbys can independently promote to the same timeline
ID if their primary stopped before either had a chance to promote.  In
that situation both clusters share a timeline history prefix that looks
identical to pg_rewind: same TLI numbers and same begin/end LSNs.  The
existing same-TLI shortcut therefore treated the source as a valid
rewind target and skipped the rewind entirely, leaving the target's
diverged WAL intact.

Fix this by embedding a UUIDv7 value in every timeline history file
entry at promotion time.  Each promotion generates a fresh UUID, so two
independent promotions to the same TLI will carry different UUIDs even
though the TLI number and begin LSN are identical.

When loading the timeline history, pg_rewind uses these UUIDs in two
places:

1. findCommonAncestorTimeline checks that the TLI and UUID in each entry
   match.  A mismatch signals independent promotions and the search
   continues to earlier entries to find the true common ancestor.

2. The same-TLI shortcut (source and target on the same current TLI)
   compares the UUID stored in the last completed history entry and a
   mismatch forces a full rewind instead of a no-op.

UUIDs are zero for clusters that predate this change, and the comparison
function treats a zero UUID on either side as different from a UUID
since that promotion has to be from a different server (it had a
pre-change version server that was promoted, so it cannot be the same as
a post-change version server that was promoted).

Two new tests in t/005_same_timeline.pl cover both detection paths.

The first covers the same-TLI shortcut: two standbys independently
promote to TLI2 and TLI2', each with a distinct UUID.

The second covers the ancestor search: the target goes through TLI1 ->
TLI2 -> TLI3 while the source independently promoted so that it has a
timeline with TLI1 -> TLI2' -> TLI3'. The test ensures that
findCommonAncestorTimeline backs up to TLI1 as the true common ancestor
rather than accepting the numerically matching TLI2 entry.
---
 src/backend/access/transam/timeline.c    |  79 ++++-
 src/backend/access/transam/xlog.c        |  15 +
 src/backend/utils/adt/uuid.c             |  15 +-
 src/bin/pg_rewind/pg_rewind.c            | 112 +++++++-
 src/bin/pg_rewind/t/005_same_timeline.pl | 352 +++++++++++++++++++++++
 src/bin/pg_rewind/timeline.c             |  47 ++-
 src/include/access/timeline.h            |   5 +-
 src/include/access/xlog_internal.h       |   1 +
 src/include/utils/uuid.h                 |  10 +-
 9 files changed, 613 insertions(+), 23 deletions(-)

diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c
index 68e5f692d26..df161dcc0d5 100644
--- a/src/backend/access/transam/timeline.c
+++ b/src/backend/access/transam/timeline.c
@@ -42,6 +42,8 @@
 #include "pgstat.h"
 #include "storage/fd.h"
 #include "utils/wait_event.h"
+#include "utils/fmgrprotos.h"
+#include "utils/uuid.h"
 
 /*
  * Copies all timeline history files with id's between 'begin' and 'end'
@@ -110,8 +112,12 @@ readTimeLineHistory(TimeLineID targetTLI)
 			ereport(FATAL,
 					(errcode_for_file_access(),
 					 errmsg("could not open file \"%s\": %m", path)));
-		/* Not there, so assume no parents */
-		entry = palloc_object(TimeLineHistoryEntry);
+
+		/*
+		 * Not there, so assume no parents. We use palloc0_object to ensure
+		 * that tluuid is all-zero.
+		 */
+		entry = palloc0_object(TimeLineHistoryEntry);
 		entry->tli = targetTLI;
 		entry->begin = entry->end = InvalidXLogRecPtr;
 		return list_make1(entry);
@@ -125,6 +131,7 @@ readTimeLineHistory(TimeLineID targetTLI)
 	prevend = InvalidXLogRecPtr;
 	for (;;)
 	{
+		char		uuid_str[UUID_STR_LEN + 1] = {0};
 		char		fline[MAXPGPATH];
 		char	   *res;
 		char	   *ptr;
@@ -155,7 +162,8 @@ readTimeLineHistory(TimeLineID targetTLI)
 		if (*ptr == '\0' || *ptr == '#')
 			continue;
 
-		nfields = sscanf(fline, "%u\t%X/%08X", &tli, &switchpoint_hi, &switchpoint_lo);
+		nfields =
+			sscanf(fline, "%u\t%X/%08X\t%36s", &tli, &switchpoint_hi, &switchpoint_lo, uuid_str);
 
 		if (nfields < 1)
 		{
@@ -164,7 +172,7 @@ readTimeLineHistory(TimeLineID targetTLI)
 					(errmsg("syntax error in history file: %s", fline),
 					 errhint("Expected a numeric timeline ID.")));
 		}
-		if (nfields != 3)
+		if (nfields < 3)
 			ereport(FATAL,
 					(errmsg("syntax error in history file: %s", fline),
 					 errhint("Expected a write-ahead log switchpoint location.")));
@@ -176,12 +184,45 @@ readTimeLineHistory(TimeLineID targetTLI)
 
 		lasttli = tli;
 
-		entry = palloc_object(TimeLineHistoryEntry);
+		/*
+		 * We use palloc0_object to ensure that tluuid is all-zero, which is
+		 * important for pg_rewind to detect whether the history file is
+		 * missing or not.
+		 */
+		entry = palloc0_object(TimeLineHistoryEntry);
 		entry->tli = tli;
 		entry->begin = prevend;
 		entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
 		prevend = entry->end;
 
+		/*
+		 * Parse the optional UUID field. Old history files have the reason
+		 * string in field 4. It is in theory possible that the reason string
+		 * starts with a UUID, but the current usage do not store a UUID. This
+		 * allows us to support both old and new formats of history files
+		 * without breaking compatibility by checking if the field contains a
+		 * valid UUID.
+		 */
+		if (nfields == 4 && strlen(uuid_str) == UUID_STR_LEN)
+		{
+			PG_TRY();
+			{
+				Datum		datum = DirectFunctionCall1(uuid_in, CStringGetDatum(uuid_str));
+
+				memcpy(&entry->tluuid, DatumGetUUIDP(datum), sizeof(pg_uuid_t));
+			}
+			PG_CATCH();
+			{
+				ErrorData  *edata = CopyErrorData();
+
+				FlushErrorState();
+				ereport(FATAL,
+						errmsg("invalid UUID in history file \"%s\"", path),
+						errdetail("%s", edata->message));
+			}
+			PG_END_TRY();
+		}
+
 		/* Build list with newest item first */
 		result = lcons(entry, result);
 
@@ -197,9 +238,11 @@ readTimeLineHistory(TimeLineID targetTLI)
 
 	/*
 	 * Create one more entry for the "tip" of the timeline, which has no entry
-	 * in the history file.
+	 * in the history file. We use palloc0_object to ensure that tluuid is
+	 * all-zero, which is important for pg_rewind to detect whether the
+	 * history file is missing or not.
 	 */
-	entry = palloc_object(TimeLineHistoryEntry);
+	entry = palloc0_object(TimeLineHistoryEntry);
 	entry->tli = targetTLI;
 	entry->begin = prevend;
 	entry->end = InvalidXLogRecPtr;
@@ -294,21 +337,33 @@ findNewestTimeLine(TimeLineID startTLI)
  *
  *	newTLI: ID of the new timeline
  *	parentTLI: ID of its immediate parent
+ *	newTLUUID: UUID uniquely identifying this promotion instance
  *	switchpoint: WAL location where the system switched to the new timeline
  *	reason: human-readable explanation of why the timeline was switched
  *
- * Currently this is only used at the end recovery, and so there are no locking
+ * The output file is named <newTLI>.history (e.g. 00000003.history).  If two
+ * servers independently promote to the same timeline ID, their history files
+ * share the same name. In a shared WAL archive the second file to arrive
+ * silently overwrites the first.  The newTLUUID written into the file content
+ * lets pg_rewind detect this collision: it fetches each server's history file
+ * directly from that server, compares the UUIDs for every shared TLI, and
+ * treats a UUID mismatch as evidence of independent promotion even when the
+ * TLI numbers agree.
+ *
+ * Currently this is only used at end of recovery, and so there are no locking
  * considerations.  But we should be just as tense as XLogFileInit to avoid
  * emplacing a bogus file.
  */
 void
 writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
+					 const pg_uuid_t *newTLUUID,
 					 XLogRecPtr switchpoint, char *reason)
 {
 	char		path[MAXPGPATH];
 	char		tmppath[MAXPGPATH];
 	char		histfname[MAXFNAMELEN];
 	char		buffer[BLCKSZ];
+	char	   *uuid_str;
 	int			srcfd;
 	int			fd;
 	int			nbytes;
@@ -398,13 +453,19 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
 	 *
 	 * If we did have a parent file, insert an extra newline just in case the
 	 * parent file failed to end with one.
+	 *
+	 * Format: <parentTLI>\t<switchpoint>\t<ThisTimeLineUUID>\t<reason>\n
 	 */
+	uuid_str = DatumGetCString(DirectFunctionCall1(uuid_out, UUIDPGetDatum(newTLUUID)));
+
 	snprintf(buffer, sizeof(buffer),
-			 "%s%u\t%X/%08X\t%s\n",
+			 "%s%u\t%X/%08X\t%s\t%s\n",
 			 (srcfd < 0) ? "" : "\n",
 			 parentTLI,
 			 LSN_FORMAT_ARGS(switchpoint),
+			 uuid_str,
 			 reason);
+	pfree(uuid_str);
 
 	nbytes = strlen(buffer);
 	errno = 0;
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index e39af79c03b..f0cf9f7b435 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -99,6 +99,7 @@
 #include "storage/subsystems.h"
 #include "storage/sync.h"
 #include "utils/guc_hooks.h"
+#include "utils/uuid.h"
 #include "utils/guc_tables.h"
 #include "utils/injection_point.h"
 #include "utils/pgstat_internal.h"
@@ -6377,6 +6378,9 @@ StartupXLOG(void)
 	newTLI = endOfRecoveryInfo->lastRecTLI;
 	if (ArchiveRecoveryRequested)
 	{
+		struct timeval tv;
+		pg_uuid_t	uuid_buf;
+
 		newTLI = findNewestTimeLine(recoveryTargetTLI) + 1;
 		ereport(LOG,
 				(errmsg("selected new timeline ID: %u", newTLI)));
@@ -6407,8 +6411,19 @@ StartupXLOG(void)
 		 * to the new timeline, and will try to connect to the new timeline.
 		 * To minimize the window for that, try to do as little as possible
 		 * between here and writing the end-of-recovery record.
+		 *
+		 * Generate a UUIDv7 that uniquely identifies this promotion.  The
+		 * same UUID is written into the history file so that pg_rewind can
+		 * distinguish two servers that independently promoted to the same
+		 * timeline ID.  Use gettimeofday() since we are not on a hot path;
+		 * generate_uuidv7 wants milliseconds and we pass 0 for sub-ms since
+		 * the random bits already distinguish UUIDs generated within the same
+		 * millisecond.
 		 */
+		gettimeofday(&tv, NULL);
+		generate_uuidv7_r(&uuid_buf, tv.tv_sec * 1000 + tv.tv_usec / 1000, 0);
 		writeTimeLineHistory(newTLI, recoveryTargetTLI,
+							 &uuid_buf,
 							 EndOfLog, endOfRecoveryInfo->recoveryStopReason);
 
 		ereport(LOG,
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index 6ee3752ac78..f1dc0196cd8 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -72,7 +72,7 @@ static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
 static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
 static inline void uuid_set_version(pg_uuid_t *uuid, unsigned char version);
 static inline int64 get_real_time_ns_ascending(void);
-static pg_uuid_t *generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms);
+pg_uuid_t  *generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms);
 
 Datum
 uuid_in(PG_FUNCTION_ARGS)
@@ -581,6 +581,14 @@ get_real_time_ns_ascending(void)
 	return ns;
 }
 
+pg_uuid_t *
+generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms)
+{
+	pg_uuid_t  *uuid = palloc(UUID_LEN);
+
+	return generate_uuidv7_r(uuid, unix_ts_ms, sub_ms);
+}
+
 /*
  * Generate UUID version 7 per RFC 9562, with the given timestamp.
  *
@@ -597,10 +605,9 @@ get_real_time_ns_ascending(void)
  *
  * NB: all numbers here are unsigned, unix_ts_ms cannot be negative per RFC.
  */
-static pg_uuid_t *
-generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms)
+pg_uuid_t *
+generate_uuidv7_r(pg_uuid_t *uuid, uint64 unix_ts_ms, uint32 sub_ms)
 {
-	pg_uuid_t  *uuid = palloc(UUID_LEN);
 	uint32		increased_clock_precision;
 
 	/* Fill in time part */
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 9d745d4b25b..ddb1a6b5001 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -32,6 +32,19 @@
 #include "rewind_source.h"
 #include "storage/bufpage.h"
 
+/*
+ * Timeline histories for both clusters, populated by timelines_match().
+ */
+typedef struct TimelineHistoriesData
+{
+	TimeLineHistoryEntry *source,
+			   *target;
+	int			sourceNentries,
+				targetNentries;
+}			TimelineHistoriesData;
+
+typedef TimelineHistoriesData * TimelineHistories;
+
 static void usage(const char *progname);
 
 static void perform_rewind(filemap_t *filemap, rewind_source *source,
@@ -53,6 +66,9 @@ static void findCommonAncestorTimeline(TimeLineHistoryEntry *a_history,
 									   TimeLineHistoryEntry *b_history,
 									   int b_nentries,
 									   XLogRecPtr *recptr, int *tliIndex);
+static inline bool matchingTimelineUUID(TimeLineHistoryEntry *a, TimeLineHistoryEntry *b);
+static bool matchAndFetchTimelines(TimeLineID source_tli, TimeLineID target_tli,
+								   TimelineHistories timelineHistories);
 static void ensureCleanShutdown(const char *argv0);
 static void disconnect_atexit(void);
 
@@ -141,6 +157,7 @@ main(int argc, char **argv)
 	int			c;
 	XLogRecPtr	divergerec;
 	int			lastcommontliIndex;
+	TimelineHistoriesData timelineHistories;
 	XLogRecPtr	chkptrec;
 	TimeLineID	chkpttli;
 	XLogRecPtr	chkptredo;
@@ -372,10 +389,20 @@ main(int argc, char **argv)
 	 *
 	 * If both clusters are already on the same timeline, there's nothing to
 	 * do.
+	 *
+	 * This also handles the case when two servers independently promoted to
+	 * the same timeline ID: one crashed after writing the history file but
+	 * before its EOR WAL record was distributed, so a second standby promoted
+	 * independently.  The history files produced by those two promotions
+	 * carry different UUIDs.
+	 *
+	 * When the clusters are on different timelines we locate the fork point
+	 * via findCommonAncestorTimeline.
 	 */
-	if (target_tli == source_tli)
+	if (matchAndFetchTimelines(source_tli, target_tli, &timelineHistories))
 	{
 		pg_log_info("source and target cluster are on the same timeline");
+		pfree(timelineHistories.source);
 		rewind_needed = false;
 		target_wal_endrec = InvalidXLogRecPtr;
 	}
@@ -389,8 +416,10 @@ main(int argc, char **argv)
 		 * Retrieve timelines for both source and target, and find the point
 		 * where they diverged.
 		 */
-		sourceHistory = getTimelineHistory(source_tli, true, &sourceNentries);
-		targetHistory = getTimelineHistory(target_tli, false, &targetNentries);
+		targetHistory = timelineHistories.target;
+		targetNentries = timelineHistories.targetNentries;
+		sourceHistory = timelineHistories.source;
+		sourceNentries = timelineHistories.sourceNentries;
 
 		findCommonAncestorTimeline(sourceHistory, sourceNentries,
 								   targetHistory, targetNentries,
@@ -874,7 +903,7 @@ getTimelineHistory(TimeLineID tli, bool is_source, int *nentries)
 	 */
 	if (tli == 1)
 	{
-		history = pg_malloc_object(TimeLineHistoryEntry);
+		history = pg_malloc0_object(TimeLineHistoryEntry);
 		history->tli = tli;
 		history->begin = history->end = InvalidXLogRecPtr;
 		*nentries = 1;
@@ -920,6 +949,65 @@ getTimelineHistory(TimeLineID tli, bool is_source, int *nentries)
 	return history;
 }
 
+/*
+ * Return true if two per-entry promotion UUIDs are compatible.
+ *
+ * A zero UUID means the history file predates this fix (or the entry is
+ * synthetic).  If both sides are zero we have no UUID information and fall
+ * back to TLI-number-only matching (backward compatibility with old servers).
+ * If one side carries a UUID and the other does not, they cannot originate
+ * from the same promotion and are treated as incompatible.
+ */
+static inline bool
+matchingTimelineUUID(TimeLineHistoryEntry *a, TimeLineHistoryEntry *b)
+{
+	static const pg_uuid_t zero = {{0}};
+
+	if (memcmp(&a->tluuid, &zero, UUID_LEN) == 0 && memcmp(&b->tluuid, &zero, UUID_LEN) == 0)
+		return true;
+	return memcmp(&a->tluuid, &b->tluuid, UUID_LEN) == 0;
+}
+
+/*
+ * Fetch the timeline history for both clusters, store them in tlh, and return
+ * true if the clusters are on the same timeline (no rewind needed).
+ *
+ * tlh is always fully populated on return regardless of the result, so the
+ * caller can pass tlh->source / tlh->target directly to
+ * findCommonAncestorTimeline() when the return value is false.
+ *
+ * TLI 1 always returns true: it is the original timeline and has no promotion
+ * UUID.  For TLI >= 2, the UUID in entry[Nentries - 2] identifies the
+ * promotion that created the current TLI.  Both-zero UUIDs (old history files)
+ * are treated as compatible; zero-vs-nonzero is treated as a mismatch because
+ * one side carries a promotion UUID and they cannot be the same promotion.
+ */
+static bool
+matchAndFetchTimelines(TimeLineID source_tli, TimeLineID target_tli, TimelineHistories tlh)
+{
+	static const pg_uuid_t zero = {0};
+	pg_uuid_t  *a,
+			   *b;
+
+	tlh->source = getTimelineHistory(source_tli, true, &tlh->sourceNentries);
+	tlh->target = getTimelineHistory(target_tli, false, &tlh->targetNentries);
+
+	if (source_tli != target_tli)
+		return false;
+
+	/* TLI 1 has no promotion UUID; always treat as the same timeline. */
+	if (tlh->sourceNentries < 2 || tlh->targetNentries < 2)
+		return true;
+
+	a = &tlh->source[tlh->sourceNentries - 2].tluuid;
+	b = &tlh->target[tlh->targetNentries - 2].tluuid;
+
+	if (memcmp(a, &zero, UUID_LEN) == 0 && memcmp(b, &zero, UUID_LEN) == 0)
+		return true;
+
+	return memcmp(a, b, UUID_LEN) == 0;
+}
+
 /*
  * Determine the TLI of the last common timeline in the timeline history of
  * two clusters. *tliIndex is set to the index of last common timeline in
@@ -941,12 +1029,26 @@ findCommonAncestorTimeline(TimeLineHistoryEntry *a_history, int a_nentries,
 	 * depending on the history files that each node has fetched in previous
 	 * recovery processes. Hence check the start position of the new timeline
 	 * as well and move down by one extra timeline entry if they do not match.
+	 *
+	 * We also compare timeline UUIDs when both sides carry one.  Two servers
+	 * that independently promoted to the same timeline ID produce history
+	 * files with the same name (e.g. 00000003.history); in a shared WAL
+	 * archive the second file silently overwrites the first.  pg_rewind
+	 * fetches each server's history file directly from that server, so it
+	 * sees both UUIDs.
+	 *
+	 * The timeline UUID stored in history entry[i] is the UUID of the
+	 * promotion that created entry[i+1], i.e. the UUID of TLI entry[i+1].tli.
+	 * So to check whether entry[i] itself represents the same timeline on
+	 * both sides we look at entry[i-1].tluuid (for i > 0).  TLI 1 (i == 0) is
+	 * always the same: it is the original timeline and has no promotion UUID.
 	 */
 	n = Min(a_nentries, b_nentries);
 	for (i = 0; i < n; i++)
 	{
 		if (a_history[i].tli != b_history[i].tli ||
-			a_history[i].begin != b_history[i].begin)
+			a_history[i].begin != b_history[i].begin ||
+			(i > 0 && !matchingTimelineUUID(&a_history[i - 1], &b_history[i - 1])))
 			break;
 	}
 
diff --git a/src/bin/pg_rewind/t/005_same_timeline.pl b/src/bin/pg_rewind/t/005_same_timeline.pl
index 95a40c3b270..7d2dadfd171 100644
--- a/src/bin/pg_rewind/t/005_same_timeline.pl
+++ b/src/bin/pg_rewind/t/005_same_timeline.pl
@@ -7,6 +7,8 @@
 #
 use strict;
 use warnings FATAL => 'all';
+use File::Copy;
+use PostgreSQL::Test::Cluster;
 use PostgreSQL::Test::Utils;
 use Test::More;
 
@@ -21,4 +23,354 @@ RewindTest::create_standby();
 RewindTest::run_pg_rewind('local');
 RewindTest::clean_rewind_test();
 
+# Helper function to run pg_rewind in local mode with the given source and
+# target nodes and extra arguments.
+#
+# The target and source nodes are stopped before the call and the target is
+# restarted afterward.  The target's postgresql.conf is copied to a temporary
+# location and passed to pg_rewind with --config-file, so that pg_rewind can
+# update the target's config file in place without worrying about file
+# permissions.  The temporary config file is moved back to the target's data
+# directory and permissions fixed after pg_rewind finishes.
+sub rewind_node
+{
+	my ($target, $source, $label, @extra_args) = @_;
+	$source->stop;
+	$target->stop;
+
+	my $tpgdata = $target->data_dir;
+	my $tmp = PostgreSQL::Test::Utils::tempdir;
+	copy("$tpgdata/postgresql.conf", "$tmp/target-postgresql.conf.tmp");
+
+	command_ok(
+		[
+			'pg_rewind',
+			'--debug',
+			'--source-pgdata' => $source->data_dir,
+			'--target-pgdata' => $target->data_dir,
+			'--no-sync',
+			'--config-file' => "$tmp/target-postgresql.conf.tmp",
+			@extra_args,
+		],
+		$label);
+
+	move("$tmp/target-postgresql.conf.tmp", "$tpgdata/postgresql.conf");
+	chmod($target->group_access() ? 0640 : 0600, "$tpgdata/postgresql.conf")
+	  or BAIL_OUT("unable to set permissions for $tpgdata/postgresql.conf");
+
+	$target->start;
+}
+
+# Rewrite a node's TLI history file in the old 3-field format (no UUID), so
+# that pg_rewind sees a zero UUID for that side, as if the node had been
+# promoted by a server that predates the UUID feature.
+sub strip_tli_uuid
+{
+	my ($node, $tli) = @_;
+	my $histfile = sprintf("%s/pg_wal/%08X.history", $node->data_dir, $tli);
+	open(my $fh, '<', $histfile) or die "cannot open $histfile: $!";
+	my @lines = <$fh>;
+	close $fh;
+	open($fh, '>', $histfile) or die "cannot write $histfile: $!";
+	for my $line (@lines)
+	{
+		chomp $line;
+		my @f = split(/\t/, $line, 4);
+		if (@f == 4)
+		{
+			# Drop the UUID field (index 2); keep parentTLI, switchpoint, reason.
+			print $fh join("\t", $f[0], $f[1], $f[3]) . "\n";
+		}
+		else
+		{
+			print $fh "$line\n";
+		}
+	}
+	close $fh;
+}
+
+# Helper function to create an origin node with a test table and a row containing
+# the given label.  The node is started and ready for use as a source for
+# standbys.
+sub setup_origin
+{
+	my ($label) = @_;
+	my $node = PostgreSQL::Test::Cluster->new($label);
+	$node->init(allows_streaming => 1);
+	$node->append_conf('postgresql.conf', "wal_keep_size = 320MB\n");
+	$node->start;
+	$node->safe_psql('postgres', "CREATE TABLE tbl (val text)");
+	$node->safe_psql('postgres', "INSERT INTO tbl VALUES ('$label')");
+	$node->safe_psql('postgres', 'CHECKPOINT');
+	return $node;
+}
+
+# Helper function to create multiple standby nodes from the same origin node.
+# Each standby gets its own backup and data directory, so that they will
+# generate independent UUIDs on promotion even though they share the same
+# timeline history up to the point of promotion.
+sub setup_standbys_from_origin
+{
+	my ($origin, @names) = @_;
+	my @standbys;
+	for my $name (@names)
+	{
+		my $standby = PostgreSQL::Test::Cluster->new($name);
+		$origin->backup($standby->name);
+		$standby->init_from_backup($origin, $standby->name,
+			has_streaming => 1);
+		$standby->append_conf('postgresql.conf', "wal_keep_size = 320MB\n");
+		$standby->set_standby_mode();
+		$standby->start;
+		push @standbys, $standby;
+	}
+	return @standbys;
+}
+
+# Helper function to wait for multiple standby nodes to catch up to the origin.
+sub sync_standbys_with_origin
+{
+	my ($origin, @standbys) = @_;
+	$origin->wait_for_catchup($_) for @standbys;
+}
+
+# Helper function to insert a row with the given label into a node's test table.
+sub write_record
+{
+	my ($node, $label) = @_;
+	$node->safe_psql('postgres', "INSERT INTO tbl VALUES ('$label')");
+	$node->safe_psql('postgres', 'CHECKPOINT');
+}
+
+# Test that pg_rewind detects and handles two standbys that independently
+# promoted to the same timeline ID.  Before the UUID-based divergence check,
+# pg_rewind's same-TLI shortcut would incorrectly skip the rewind in this
+# case, leaving the target's diverged WAL intact.
+#
+#   origin (TLI 1)
+#       |
+#       +--- node_a (TLI 1) --promote--> TLI 2, UUID-A  (target)
+#       |
+#       +--- node_b (TLI 1) --promote--> TLI 2, UUID-B  (source)
+#
+# pg_rewind must detect the UUID mismatch and rewind node_a to match node_b.
+
+my $node_origin = setup_origin('origin');
+
+# Create node_a and node_b from separate backups of origin so that each
+# has its own data directory and will generate an independent UUID on promotion.
+my ($node_a, $node_b) =
+  setup_standbys_from_origin($node_origin, 'node_a', 'node_b');
+
+# Wait for both standbys to catch up to origin, then stop origin.  After
+# this point the two standbys are isolated and will promote independently.
+sync_standbys_with_origin($node_origin, $node_a, $node_b);
+$node_origin->stop;
+
+# Promote both standbys.  Each lands on TLI 2 but generates a distinct UUID,
+# so the resulting clusters are diverged even though they share a timeline ID.
+$node_a->promote;
+$node_b->promote;
+
+# Insert a divergent row on each so the rewind has visible work to do.
+write_record($node_a, 'in A');
+write_record($node_b, 'in B');
+
+rewind_node($node_a, $node_b,
+	'pg_rewind detects independent same-TLI promotions');
+
+my $result =
+  $node_a->safe_psql('postgres', "SELECT val FROM tbl ORDER BY val");
+is($result, "in B\norigin",
+	'rewound node has source data, not its own divergent data');
+
+$node_a->teardown_node;
+$node_b->teardown_node;
+$node_origin->teardown_node;
+
+# Test that pg_rewind correctly rewinds across a TLI mismatch buried in a shared
+# prefix of the timeline history.  The target has gone through three timelines
+# (TLI 1 -> TLI 2 -> TLI 3) while the source independently promoted from TLI 1
+# to what is numerically TLI 2 but with a different UUID (TLI 2').  The deepest
+# common ancestor is therefore TLI 1, and pg_rewind must rewind the target all
+# the way back to the end of TLI 1.
+#
+#   origin (TLI 1) --+-- node_x --promote--> TLI 2 -- node_a --promote--> TLI 3
+#                    |                                  (target: TLI 1->TLI 2->TLI 3)
+#                    +-- node_b --promote--> TLI 2'
+#                                            (source: TLI 1->TLI 2')
+#
+# findCommonAncestorTimeline walks forward: TLI 1 entries match (UUID=0 on
+# both sides), then TLI 2 vs TLI 2' match on tli and begin but differ on
+# UUID, signalling independent promotions.  The algorithm therefore backs up
+# to TLI 1 as the common ancestor and sets the divergence point to the end
+# of TLI 1.
+
+my $node_origin2 = setup_origin('origin2');
+
+# node_x and node_b both start from the same TLI 1 baseline.
+my ($node_x, $node_b2) =
+  setup_standbys_from_origin($node_origin2, 'node_x', 'node_b2');
+
+# Both standbys must be caught up to the same LSN before origin stops, so
+# that TLI 2 and TLI 2' both begin at the same WAL position.
+sync_standbys_with_origin($node_origin2, $node_x, $node_b2);
+$node_origin2->stop;
+
+# Promote node_x to TLI 2 (UUID-X) and insert a row.  node_b2 is still on
+# TLI 1 and has not yet seen any TLI 2 WAL.
+$node_x->promote;
+write_record($node_x, 'x');
+
+# Build node_a2 as a standby of node_x, then promote it to TLI 3.
+my ($node_a2) = setup_standbys_from_origin($node_x, 'node_a2');
+
+sync_standbys_with_origin($node_x, $node_a2);
+$node_x->stop;
+
+$node_a2->promote;
+
+# Now promote node_b2 independently from TLI 1 to TLI 2' (UUID-B, != UUID-X).
+$node_b2->promote;
+write_record($node_b2, 'b');
+
+# Rewind node_a2 (TLI 1->TLI 2->TLI 3) from node_b2 (TLI 1->TLI 2') in
+# local mode.  The rewind must reach back to the end of TLI 1.
+#
+# node_a2 was initialised from a streaming backup of node_x taken after
+# node_x had already completed segment 4 of TLI 2; that segment therefore
+# does not appear in node_a2's pg_wal.  pg_rewind's backward scan for the
+# last checkpoint before the divergence point needs that segment, so we
+# point restore_command at node_x's pg_wal and use --restore-target-wal.
+#
+# Note: no row is inserted on TLI 3.  This is intentional: the only
+# post-divergence table modification in the target's WAL is the 'x' INSERT
+# on TLI 2.  On unpatched code the WAL scan would start from the TLI 2
+# shutdown checkpoint (just before TLI 3), miss that earlier insert, and
+# leave 'x' in place instead of replacing it with 'b'.
+my $node_x_waldir = $node_x->data_dir . "/pg_wal";
+$node_a2->append_conf('postgresql.conf',
+	"restore_command = 'cp \"$node_x_waldir/%f\" \"%p\"'\n");
+
+rewind_node($node_a2, $node_b2,
+	'pg_rewind rewinds across mismatched TLI 2 / TLI 2-prime to TLI 1',
+	'--restore-target-wal');
+my $result2 =
+  $node_a2->safe_psql('postgres', "SELECT val FROM tbl ORDER BY val");
+is($result2, "b\norigin2",
+	'rewound node reflects source history, not target TLI 2/TLI 3 data');
+
+$node_a2->teardown_node;
+$node_b2->teardown_node;
+$node_x->teardown_node;
+$node_origin2->teardown_node;
+
+# Test that pg_rewind correctly detects a mismatch when one cluster's TLI 2
+# history entry carries a zero UUID (old-format history file) while the other
+# carries a real UUID.  The two clusters must have promoted independently, so
+# pg_rewind must rewind to TLI 1 rather than accepting the same-TLI shortcut.
+#
+# Run both orientations:
+#   (a) target has zero UUID, source has real UUID
+#   (b) target has real UUID, source has zero UUID
+#
+# In both cases the setup is:
+#
+#   origin (TLI 1) --+-- node_p --promote--> TLI 2, UUID-P  (target)
+#                    |
+#                    +-- node_q --promote--> TLI 2, UUID-Q  (source)
+#
+# One side then has its history file rewritten to the old 3-field format so
+# that its UUID reads as zero.  pg_rewind must treat zero-vs-nonzero as
+# incompatible (they cannot be the same promotion) and rewind to TLI 1.
+
+for my $strip_target (1, 0)
+{
+	my $side = $strip_target ? 'target' : 'source';
+	my $sfx = $strip_target ? 'zt' : 'zs';
+	my $label =
+	  "pg_rewind rewinds when target has zero UUID and $side has real UUID";
+
+	my $node_origin3 = setup_origin("origin3_$sfx");
+	my ($node_p, $node_q) =
+	  setup_standbys_from_origin($node_origin3, "node_p_$sfx", "node_q_$sfx");
+
+	sync_standbys_with_origin($node_origin3, $node_p, $node_q);
+	$node_origin3->stop;
+
+	$node_p->promote;
+	$node_q->promote;
+
+	write_record($node_p, 'in P');
+	write_record($node_q, 'in Q');
+
+	# Strip UUID from the chosen side to simulate a pre-UUID server.
+	strip_tli_uuid($strip_target ? $node_p : $node_q, 2);
+
+	rewind_node($node_p, $node_q, $label);
+	my $result3 =
+	  $node_p->safe_psql('postgres', "SELECT val FROM tbl ORDER BY val");
+	is( $result3,
+		"in Q\norigin3_$sfx",
+		'rewound node has source data, not its own divergent row');
+
+	$node_p->teardown_node;
+	$node_q->teardown_node;
+	$node_origin3->teardown_node;
+}
+
+# Test that pg_rewind detects independent promotions to TLI 3 when both
+# clusters share a common TLI 1 -> TLI 2 history (same UUID) but independently
+# promoted from TLI 2 to TLI 3, producing different TLI 3 UUIDs.
+#
+#   origin (TLI 1) --- node_mid --promote--> TLI 2, UUID-M
+#                                                  |
+#                                                  +-- node_c --promote--> TLI 3, UUID-C  (target)
+#                                                  |
+#                                                  +-- node_d --promote--> TLI 3', UUID-D  (source)
+#
+# The same-TLI shortcut compares entry[Nentries-2].tluuid on each side; that
+# is the UUID of the TLI 3 promotion, which differs.  The full rewind path
+# then walks the history forward: TLI 1 matches (same tli/begin/UUID-M at
+# entry[0]), TLI 2 also matches (same tli/begin; UUID-M is the same on both
+# sides at entry[0]), but TLI 3 vs TLI 3' differ at entry[1] (UUID-C != UUID-D),
+# so the divergence point is set to the end of TLI 2.
+
+my $node_origin4 = setup_origin('origin4');
+my ($node_mid) = setup_standbys_from_origin($node_origin4, 'node_mid');
+
+$node_origin4->wait_for_catchup($node_mid);
+$node_origin4->stop;
+
+# Promote node_mid to TLI 2 and insert a row that both TLI 3 nodes will share.
+$node_mid->promote;
+write_record($node_mid, 'mid');
+
+# node_c and node_d both start as standbys of node_mid so they share the same
+# TLI 2 promotion UUID (UUID-M).
+my ($node_c, $node_d) =
+  setup_standbys_from_origin($node_mid, 'node_c', 'node_d');
+sync_standbys_with_origin($node_mid, $node_c, $node_d);
+$node_mid->stop;
+
+# Promote both independently; each generates a distinct TLI 3 UUID.
+$node_c->promote;
+$node_d->promote;
+
+write_record($node_c, 'c');
+write_record($node_d, 'd');
+
+rewind_node($node_c, $node_d,
+	'pg_rewind detects independent TLI 3 / TLI 3-prime promotions sharing TLI 2'
+);
+my $result4 =
+  $node_c->safe_psql('postgres', "SELECT val FROM tbl ORDER BY val");
+is($result4, "d\nmid\norigin4",
+	'rewound node has source TLI 3-prime data, not its own TLI 3 data');
+
+$node_c->teardown_node;
+$node_d->teardown_node;
+$node_mid->teardown_node;
+$node_origin4->teardown_node;
+
 done_testing();
diff --git a/src/bin/pg_rewind/timeline.c b/src/bin/pg_rewind/timeline.c
index dda06eaa0bc..b6500606b27 100644
--- a/src/bin/pg_rewind/timeline.c
+++ b/src/bin/pg_rewind/timeline.c
@@ -9,9 +9,40 @@
  */
 #include "postgres_fe.h"
 
+#include <ctype.h>
+#include <string.h>
+
 #include "access/timeline.h"
 #include "pg_rewind.h"
 
+/*
+ * Parse a UUID string in standard dashed form into a pg_uuid_t.
+ * Returns true on success, false if str is not a valid UUID string.
+ */
+static bool
+rewind_parse_uuid(const char *str, pg_uuid_t *uuid)
+{
+	const char *src = str;
+
+	for (int i = 0; i < UUID_LEN; i++)
+	{
+		char		buf[3];
+
+		if (!isxdigit((unsigned char) src[0]) ||
+			!isxdigit((unsigned char) src[1]))
+			return false;
+		buf[0] = src[0];
+		buf[1] = src[1];
+		buf[2] = '\0';
+		uuid->data[i] = (unsigned char) strtoul(buf, NULL, 16);
+		src += 2;
+		/* skip dash at positions after bytes 3, 5, 7, 9 (i == 3,5,7,9) */
+		if (src[0] == '-' && (i == 3 || i == 5 || i == 7 || i == 9))
+			src++;
+	}
+	return (*src == '\0');
+}
+
 /*
  * This is copy-pasted from the backend readTimeLineHistory, modified to
  * return a malloc'd array and to work without backend functions.
@@ -48,6 +79,7 @@ rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries)
 		uint32		switchpoint_hi;
 		uint32		switchpoint_lo;
 		int			nfields;
+		char		uuid_str[UUID_STR_LEN + 1] = {0};
 
 		fline = bufptr;
 		while (*bufptr && *bufptr != '\n')
@@ -66,7 +98,8 @@ rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries)
 		if (*ptr == '\0' || *ptr == '#')
 			continue;
 
-		nfields = sscanf(fline, "%u\t%X/%08X", &tli, &switchpoint_hi, &switchpoint_lo);
+		nfields = sscanf(fline, "%u\t%X/%08X\t%36s", &tli, &switchpoint_hi,
+						 &switchpoint_lo, uuid_str);
 
 		if (nfields < 1)
 		{
@@ -75,7 +108,7 @@ rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries)
 			pg_log_error_detail("Expected a numeric timeline ID.");
 			exit(1);
 		}
-		if (nfields != 3)
+		if (nfields < 3)
 		{
 			pg_log_error("syntax error in history file: %s", fline);
 			pg_log_error_detail("Expected a write-ahead log switchpoint location.");
@@ -99,7 +132,14 @@ rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries)
 		entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo;
 		prevend = entry->end;
 
-		/* we ignore the remainder of each line */
+		/*
+		 * Parse the optional UUID field.  Old history files have the reason
+		 * string in field 4; its first word is much shorter than UUID_STR_LEN
+		 * so the length check safely distinguishes old from new format.
+		 */
+		memset(&entry->tluuid, 0, sizeof(pg_uuid_t));
+		if (nfields == 4 && strlen(uuid_str) == UUID_STR_LEN)
+			rewind_parse_uuid(uuid_str, &entry->tluuid);
 	}
 
 	if (entries && targetTLI <= lasttli)
@@ -123,6 +163,7 @@ rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries)
 	entry->tli = targetTLI;
 	entry->begin = prevend;
 	entry->end = InvalidXLogRecPtr;
+	memset(&entry->tluuid, 0, sizeof(pg_uuid_t));
 
 	*nentries = nlines;
 	return entries;
diff --git a/src/include/access/timeline.h b/src/include/access/timeline.h
index 97f1d619c35..cdd642c94f0 100644
--- a/src/include/access/timeline.h
+++ b/src/include/access/timeline.h
@@ -13,6 +13,7 @@
 
 #include "access/xlogdefs.h"
 #include "nodes/pg_list.h"
+#include "utils/uuid.h"
 
 /*
  * A list of these structs describes the timeline history of the server. Each
@@ -22,9 +23,10 @@
  * pointers of all the entries form a contiguous line from beginning of time
  * to infinity.
  */
-typedef struct
+typedef struct TimeLineHistoryEntry
 {
 	TimeLineID	tli;
+	pg_uuid_t	tluuid;			/* from history file; zero if unknown */
 	XLogRecPtr	begin;			/* inclusive */
 	XLogRecPtr	end;			/* exclusive, InvalidXLogRecPtr means infinity */
 } TimeLineHistoryEntry;
@@ -33,6 +35,7 @@ extern List *readTimeLineHistory(TimeLineID targetTLI);
 extern bool existsTimeLineHistory(TimeLineID probeTLI);
 extern TimeLineID findNewestTimeLine(TimeLineID startTLI);
 extern void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
+								 const pg_uuid_t *newTLUUID,
 								 XLogRecPtr switchpoint, char *reason);
 extern void writeTimeLineHistoryFile(TimeLineID tli, char *content, int size);
 extern void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end);
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 13ae3ad4fbb..8d5e374dfad 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -22,6 +22,7 @@
 #include "access/xlogdefs.h"
 #include "access/xlogreader.h"
 #include "datatype/timestamp.h"
+#include "utils/uuid.h"
 #include "lib/stringinfo.h"
 #include "pgtime.h"
 #include "storage/block.h"
diff --git a/src/include/utils/uuid.h b/src/include/utils/uuid.h
index 572d8cf4c36..6839de2e0b2 100644
--- a/src/include/utils/uuid.h
+++ b/src/include/utils/uuid.h
@@ -17,12 +17,16 @@
 /* uuid size in bytes */
 #define UUID_LEN 16
 
+/* length of a UUID string (without null terminator): xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx */
+#define UUID_STR_LEN 36
+
 typedef struct pg_uuid_t
 {
 	unsigned char data[UUID_LEN];
 } pg_uuid_t;
 
-/* fmgr interface macros */
+/* fmgr interface macros (backend only) */
+#ifndef FRONTEND
 static inline Datum
 UUIDPGetDatum(const pg_uuid_t *X)
 {
@@ -38,5 +42,9 @@ DatumGetUUIDP(Datum X)
 }
 
 #define PG_GETARG_UUID_P(X)		DatumGetUUIDP(PG_GETARG_DATUM(X))
+#endif							/* !FRONTEND */
+
+extern pg_uuid_t *generate_uuidv7(uint64 unix_ts_ms, uint32 sub_ms);
+extern pg_uuid_t *generate_uuidv7_r(pg_uuid_t *uuid, uint64 unix_ts_ms, uint32 sub_ms);
 
 #endif							/* UUID_H */
-- 
2.43.0

