From 762580bf48f2d41f3ec20359d31b80014e8bf8b3 Mon Sep 17 00:00:00 2001 From: ChangAo Chen Date: Thu, 11 Jun 2026 10:34:01 +0800 Subject: [PATCH v2] pg_rewind: use flush lsn to set minRecoveryPoint. Currently pg_rewind uses pg_current_wal_insert_lsn() to set minRecoveryPoint. The minRecoveryPoint is expected to be a lsn points to a xlog record end lsn + 1, but the return value of pg_current_wal_insert_lsn() is the current wal insert lsn. They are different when at a xlog page boundary, e.g., when the former points to the begin of a xlog page, the latter will skip the xlog page header. This can lead to a standby cannot reach consistent when the primary is idle. To fix it, use pg_current_wal_flush_lsn() instead. It's safe because we query flush lsn at the end of rewind, so all copied pages should have lsn <= flush lsn. --- src/bin/pg_rewind/libpq_source.c | 12 ++++++------ src/bin/pg_rewind/local_source.c | 2 +- src/bin/pg_rewind/pg_rewind.c | 4 ++-- src/bin/pg_rewind/rewind_source.h | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/bin/pg_rewind/libpq_source.c b/src/bin/pg_rewind/libpq_source.c index 6955bc575ea..97435427803 100644 --- a/src/bin/pg_rewind/libpq_source.c +++ b/src/bin/pg_rewind/libpq_source.c @@ -68,7 +68,7 @@ static void libpq_queue_fetch_range(rewind_source *source, const char *path, static void libpq_finish_fetch(rewind_source *source); static char *libpq_fetch_file(rewind_source *source, const char *path, size_t *filesize); -static XLogRecPtr libpq_get_current_wal_insert_lsn(rewind_source *source); +static XLogRecPtr libpq_get_current_wal_flush_lsn(rewind_source *source); static void libpq_destroy(rewind_source *source); /* @@ -91,7 +91,7 @@ init_libpq_source(PGconn *conn) src->common.queue_fetch_file = libpq_queue_fetch_file; src->common.queue_fetch_range = libpq_queue_fetch_range; src->common.finish_fetch = libpq_finish_fetch; - src->common.get_current_wal_insert_lsn = libpq_get_current_wal_insert_lsn; + src->common.get_current_wal_flush_lsn = libpq_get_current_wal_flush_lsn; src->common.destroy = libpq_destroy; src->conn = conn; @@ -202,10 +202,10 @@ run_simple_command(PGconn *conn, const char *sql) } /* - * Call the pg_current_wal_insert_lsn() function in the remote system. + * Call the pg_current_wal_flush_lsn() function in the remote system. */ static XLogRecPtr -libpq_get_current_wal_insert_lsn(rewind_source *source) +libpq_get_current_wal_flush_lsn(rewind_source *source) { PGconn *conn = ((libpq_source *) source)->conn; XLogRecPtr result; @@ -213,10 +213,10 @@ libpq_get_current_wal_insert_lsn(rewind_source *source) uint32 lo; char *val; - val = run_simple_query(conn, "SELECT pg_current_wal_insert_lsn()"); + val = run_simple_query(conn, "SELECT pg_current_wal_flush_lsn()"); if (sscanf(val, "%X/%08X", &hi, &lo) != 2) - pg_fatal("unrecognized result \"%s\" for current WAL insert location", val); + pg_fatal("unrecognized result \"%s\" for current WAL flush location", val); result = ((uint64) hi) << 32 | lo; diff --git a/src/bin/pg_rewind/local_source.c b/src/bin/pg_rewind/local_source.c index 4841cf01fb7..029f13a7396 100644 --- a/src/bin/pg_rewind/local_source.c +++ b/src/bin/pg_rewind/local_source.c @@ -46,7 +46,7 @@ init_local_source(const char *datadir) src->common.queue_fetch_file = local_queue_fetch_file; src->common.queue_fetch_range = local_queue_fetch_range; src->common.finish_fetch = local_finish_fetch; - src->common.get_current_wal_insert_lsn = NULL; + src->common.get_current_wal_flush_lsn = NULL; src->common.destroy = local_destroy; src->datadir = datadir; diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 9d745d4b25b..4221a47a3a1 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -712,12 +712,12 @@ perform_rewind(filemap_t *filemap, rewind_source *source, { /* * Source is a production, non-standby, server. We must replay to - * the last WAL insert location. + * the last WAL flush location. */ if (ControlFile_source_after.state != DB_IN_PRODUCTION) pg_fatal("source system was in unexpected state at end of rewind"); - endrec = source->get_current_wal_insert_lsn(source); + endrec = source->get_current_wal_flush_lsn(source); endtli = Max(ControlFile_source_after.checkPointCopy.ThisTimeLineID, ControlFile_source_after.minRecoveryPointTLI); } diff --git a/src/bin/pg_rewind/rewind_source.h b/src/bin/pg_rewind/rewind_source.h index f2e9b84a732..093c2ca90d0 100644 --- a/src/bin/pg_rewind/rewind_source.h +++ b/src/bin/pg_rewind/rewind_source.h @@ -66,9 +66,9 @@ typedef struct rewind_source void (*finish_fetch) (struct rewind_source *); /* - * Get the current WAL insert position in the source system. + * Get the current WAL flush position in the source system. */ - XLogRecPtr (*get_current_wal_insert_lsn) (struct rewind_source *); + XLogRecPtr (*get_current_wal_flush_lsn) (struct rewind_source *); /* * Free this rewind_source object. -- 2.34.1