diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c index 6cb288f099..2a407da1e4 100644 --- a/src/bin/pg_rewind/file_ops.c +++ b/src/bin/pg_rewind/file_ops.c @@ -309,9 +309,11 @@ sync_target_dir(void) * buffer is actually *filesize + 1. That's handy when reading a text file. * This function can be used to read binary files as well, you can just * ignore the zero-terminator in that case. + * + * If noerror is true, returns NULL when the file is not found. */ char * -slurpFile(const char *datadir, const char *path, size_t *filesize) +slurpFile(const char *datadir, const char *path, size_t *filesize, bool noerror) { int fd; char *buffer; @@ -323,8 +325,13 @@ slurpFile(const char *datadir, const char *path, size_t *filesize) snprintf(fullpath, sizeof(fullpath), "%s/%s", datadir, path); if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) == -1) + { + if (noerror && errno == ENOENT) + return NULL; + pg_fatal("could not open file \"%s\" for reading: %m", fullpath); + } if (fstat(fd, &statbuf) < 0) pg_fatal("could not open file \"%s\" for reading: %m", diff --git a/src/bin/pg_rewind/file_ops.h b/src/bin/pg_rewind/file_ops.h index 54a853bd42..92e19042cb 100644 --- a/src/bin/pg_rewind/file_ops.h +++ b/src/bin/pg_rewind/file_ops.h @@ -21,7 +21,8 @@ extern void create_target(file_entry_t *t); extern void remove_target(file_entry_t *t); extern void sync_target_dir(void); -extern char *slurpFile(const char *datadir, const char *path, size_t *filesize); +extern char *slurpFile(const char *datadir, const char *path, size_t *filesize, + bool noerror); typedef void (*process_file_callback_t) (const char *path, file_type_t type, size_t size, const char *link_target); extern void traverse_datadir(const char *datadir, process_file_callback_t callback); diff --git a/src/bin/pg_rewind/libpq_source.c b/src/bin/pg_rewind/libpq_source.c index 011c9cce6e..92067d4f2c 100644 --- a/src/bin/pg_rewind/libpq_source.c +++ b/src/bin/pg_rewind/libpq_source.c @@ -68,7 +68,7 @@ static void libpq_queue_fetch_range(rewind_source *source, const char *path, off_t off, size_t len); static void libpq_finish_fetch(rewind_source *source); static char *libpq_fetch_file(rewind_source *source, const char *path, - size_t *filesize); + size_t *filesize, bool noerror); static XLogRecPtr libpq_get_current_wal_insert_lsn(rewind_source *source); static void libpq_destroy(rewind_source *source); @@ -620,9 +620,12 @@ appendArrayEscapedString(StringInfo buf, const char *str) /* * Fetch a single file as a malloc'd buffer. + * + * If noerror is true, returns NULL if pg_read_binary_file() failed. */ static char * -libpq_fetch_file(rewind_source *source, const char *path, size_t *filesize) +libpq_fetch_file(rewind_source *source, const char *path, size_t *filesize, + bool noerror) { PGconn *conn = ((libpq_source *) source)->conn; PGresult *res; @@ -631,6 +634,34 @@ libpq_fetch_file(rewind_source *source, const char *path, size_t *filesize) const char *paramValues[1]; paramValues[0] = path; + + /* + * check the existence of the file. We don't do this separately from + * pg_read_binary_file so that server doesn't emit an error + */ + if (noerror) + { + res = PQexecParams(conn, "SELECT pg_stat_file($1, true)", + 1, NULL, paramValues, NULL, NULL, 1); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + pg_fatal("could not stat remote file \"%s\": %s", + path, PQresultErrorMessage(res)); + } + + /* sanity check the result set */ + if (PQntuples(res) != 1) + pg_fatal("unexpected result set while stating remote file \"%s\"", + path); + + /* Return NULL if the file was not found */ + if (PQgetisnull(res, 0, 0)) + return NULL; + + PQclear(res); + } + res = PQexecParams(conn, "SELECT pg_read_binary_file($1)", 1, NULL, paramValues, NULL, NULL, 1); diff --git a/src/bin/pg_rewind/local_source.c b/src/bin/pg_rewind/local_source.c index 2e50485c39..fc2e1e9f11 100644 --- a/src/bin/pg_rewind/local_source.c +++ b/src/bin/pg_rewind/local_source.c @@ -28,7 +28,7 @@ typedef struct static void local_traverse_files(rewind_source *source, process_file_callback_t callback); static char *local_fetch_file(rewind_source *source, const char *path, - size_t *filesize); + size_t *filesize, bool noerror); static void local_queue_fetch_file(rewind_source *source, const char *path, size_t len); static void local_queue_fetch_range(rewind_source *source, const char *path, @@ -63,9 +63,11 @@ local_traverse_files(rewind_source *source, process_file_callback_t callback) } static char * -local_fetch_file(rewind_source *source, const char *path, size_t *filesize) +local_fetch_file(rewind_source *source, const char *path, size_t *filesize, + bool noerror) { - return slurpFile(((local_source *) source)->datadir, path, filesize); + return slurpFile(((local_source *) source)->datadir, path, filesize, + noerror); } /* diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 1ff8da1676..f9c7853f08 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -43,6 +43,8 @@ static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, static void digestControlFile(ControlFileData *ControlFile, const char *content, size_t size); +static TimeLineHistoryEntry *getTimelineHistory(ControlFileData *controlFile, + int *nentries); static void getRestoreCommand(const char *argv0); static void sanityChecks(void); static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex); @@ -141,6 +143,7 @@ main(int argc, char **argv) bool rewind_needed; bool writerecoveryconf = false; filemap_t *filemap; + TimeLineID source_tli; pg_logging_init(argv[0]); set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind")); @@ -311,7 +314,7 @@ main(int argc, char **argv) * need to make sure by themselves that the target cluster is in a clean * state. */ - buffer = slurpFile(datadir_target, "global/pg_control", &size); + buffer = slurpFile(datadir_target, "global/pg_control", &size, false); digestControlFile(&ControlFile_target, buffer, size); pg_free(buffer); @@ -321,25 +324,47 @@ main(int argc, char **argv) { ensureCleanShutdown(argv[0]); - buffer = slurpFile(datadir_target, "global/pg_control", &size); + buffer = slurpFile(datadir_target, "global/pg_control", &size, false); digestControlFile(&ControlFile_target, buffer, size); pg_free(buffer); } - buffer = source->fetch_file(source, "global/pg_control", &size); + buffer = source->fetch_file(source, "global/pg_control", &size, false); digestControlFile(&ControlFile_source, buffer, size); pg_free(buffer); sanityChecks(); + /* + * There may be a case where the source has been promoted but the + * end-of-recovery checkpoint has not completed. In this case the soruce + * control file is has a bit older content for this purpose. Look into + * timeline history file, which is refreshed up-to-date. + */ + source_tli = ControlFile_source.checkPointCopy.ThisTimeLineID; + if (ControlFile_target.checkPointCopy.ThisTimeLineID == source_tli) + { + int nentries; + TimeLineHistoryEntry *hist; + + hist = getTimelineHistory(&ControlFile_source, &nentries); + + /* last line of history file is the newest timeline */ + if (nentries > 0 && hist[nentries - 1].tli > source_tli) + { + pg_log_info("source's actual timeline ID (%d) is newer than control file (%d)", hist[nentries - 1].tli, source_tli); + source_tli = hist[nentries - 1].tli; + } + pg_free(hist); + } + /* * Find the common ancestor timeline between the clusters. * * If both clusters are already on the same timeline, there's nothing to * do. */ - if (ControlFile_target.checkPointCopy.ThisTimeLineID == - ControlFile_source.checkPointCopy.ThisTimeLineID) + if (ControlFile_target.checkPointCopy.ThisTimeLineID == source_tli) { pg_log_info("source and target cluster are on the same timeline"); rewind_needed = false; @@ -581,7 +606,7 @@ perform_rewind(filemap_t *filemap, rewind_source *source, * Fetch the control file from the source last. This ensures that the * minRecoveryPoint is up-to-date. */ - buffer = source->fetch_file(source, "global/pg_control", &size); + buffer = source->fetch_file(source, "global/pg_control", &size, false); digestControlFile(&ControlFile_source_after, buffer, size); pg_free(buffer); @@ -630,6 +655,10 @@ perform_rewind(filemap_t *filemap, rewind_source *source, */ if (connstr_source) { + int nentries; + TimeLineHistoryEntry *hist; + int i; + /* * The source is a live server. Like in an online backup, it's * important that we recover all the WAL that was generated while we @@ -655,6 +684,29 @@ perform_rewind(filemap_t *filemap, rewind_source *source, endrec = source->get_current_wal_insert_lsn(source); endtli = ControlFile_source_after.checkPointCopy.ThisTimeLineID; + + /* + * Find the timeline ID corresponding to endrec on the source. + * + * In most cases we can rely on control file, but that is not the + * case after promotion until end-of-recovery checkpoint completes. + * Identify the timeline ID the hard way since we don't have a + * easer way to detect that case. In case where we failed to do + * that, fall back to the control file's value. + */ + hist = getTimelineHistory(&ControlFile_source, &nentries); + if (hist[nentries - 1].tli != endtli) + { + for (i = 0; i < nentries; i++) + { + if ((hist[i].begin == 0 || hist[i].begin <= endrec) && + (hist[i].end == 0 || endrec < hist[i].end)) + { + endtli = hist[i].tli; + break; + } + } + } } } else @@ -804,9 +856,32 @@ getTimelineHistory(ControlFileData *controlFile, int *nentries) { TimeLineHistoryEntry *history; TimeLineID tli; + TimeLineID probe_tli; tli = controlFile->checkPointCopy.ThisTimeLineID; + Assert(tli > 0); + for (probe_tli = tli + 1 ;; probe_tli++) + { + char path[MAXPGPATH]; + char *histfile; + + TLHistoryFilePath(path, probe_tli); + + /* Get history file from appropriate source */ + if (controlFile == &ControlFile_source) + histfile = source->fetch_file(source, path, NULL, true); + else if (controlFile == &ControlFile_target) + histfile = slurpFile(datadir_target, path, NULL, true); + + if (!histfile) + break; + + pg_free(histfile); + } + + tli = probe_tli - 1; + /* * Timeline 1 does not have a history file, so there is no need to check * and fake an entry with infinite start and end positions. @@ -827,9 +902,9 @@ getTimelineHistory(ControlFileData *controlFile, int *nentries) /* Get history file from appropriate source */ if (controlFile == &ControlFile_source) - histfile = source->fetch_file(source, path, NULL); + histfile = source->fetch_file(source, path, NULL, false); else if (controlFile == &ControlFile_target) - histfile = slurpFile(datadir_target, path, NULL); + histfile = slurpFile(datadir_target, path, NULL, false); else pg_fatal("invalid control file"); diff --git a/src/bin/pg_rewind/rewind_source.h b/src/bin/pg_rewind/rewind_source.h index 1310e86e75..6975848668 100644 --- a/src/bin/pg_rewind/rewind_source.h +++ b/src/bin/pg_rewind/rewind_source.h @@ -35,7 +35,7 @@ typedef struct rewind_source * handy for text files. */ char *(*fetch_file) (struct rewind_source *, const char *path, - size_t *filesize); + size_t *filesize, bool noerror); /* * Request to fetch (part of) a file in the source system, specified by an