diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 5e66961..7409471 100644 *** a/src/backend/commands/dbcommands.c --- b/src/backend/commands/dbcommands.c *************** static bool have_createdb_privilege(void *** 89,94 **** --- 89,98 ---- static void remove_dbtablespaces(Oid db_id); static bool check_db_file_conflict(Oid db_id); static int errdetail_busy_db(int notherbackends, int npreparedxacts); + static void copydir_set_lsn(char *fromdir, char *todir, bool recurse, + XLogRecPtr recptr); + static void copy_file_set_lsn(char *fromfile, char *tofile, + XLogRecPtr recptr); /* *************** createdb(const CreatedbStmt *stmt) *** 586,591 **** --- 590,596 ---- Oid dsttablespace; char *srcpath; char *dstpath; + XLogRecPtr recptr; struct stat st; /* No need to copy global tablespace */ *************** createdb(const CreatedbStmt *stmt) *** 609,621 **** dstpath = GetDatabasePath(dboid, dsttablespace); - /* - * Copy this subdirectory to the new location - * - * We don't need to copy subdirectories - */ - copydir(srcpath, dstpath, false); - /* Record the filesystem change in XLOG */ { xl_dbase_create_rec xlrec; --- 614,619 ---- *************** createdb(const CreatedbStmt *stmt) *** 628,636 **** XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec)); ! (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); } } heap_endscan(scan); heap_close(rel, AccessShareLock); --- 626,641 ---- XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec)); ! recptr = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); } + + /* + * Copy this subdirectory to the new location + * + * We don't need to copy subdirectories + */ + copydir_set_lsn(srcpath, dstpath, false, recptr); } heap_endscan(scan); heap_close(rel, AccessShareLock); *************** movedb(const char *dbname, const char *t *** 1214,1223 **** PG_ENSURE_ERROR_CLEANUP(movedb_failure_callback, PointerGetDatum(&fparms)); { ! /* ! * Copy files from the old tablespace to the new one ! */ ! copydir(src_dbpath, dst_dbpath, false); /* * Record the filesystem change in XLOG --- 1219,1225 ---- PG_ENSURE_ERROR_CLEANUP(movedb_failure_callback, PointerGetDatum(&fparms)); { ! XLogRecPtr recptr; /* * Record the filesystem change in XLOG *************** movedb(const char *dbname, const char *t *** 1233,1243 **** XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec)); ! (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); } /* * Update the database's pg_database tuple */ ScanKeyInit(&scankey, --- 1235,1250 ---- XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec)); ! recptr = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE); } /* + * Copy files from the old tablespace to the new one + */ + copydir_set_lsn(src_dbpath, dst_dbpath, false, recptr); + + /* * Update the database's pg_database tuple */ ScanKeyInit(&scankey, *************** dbase_redo(XLogReaderState *record) *** 2045,2050 **** --- 2052,2058 ---- if (info == XLOG_DBASE_CREATE) { xl_dbase_create_rec *xlrec = (xl_dbase_create_rec *) XLogRecGetData(record); + XLogRecPtr lsn = record->EndRecPtr; char *src_path; char *dst_path; struct stat st; *************** dbase_redo(XLogReaderState *record) *** 2077,2083 **** * * We don't need to copy subdirectories */ ! copydir(src_path, dst_path, false); } else if (info == XLOG_DBASE_DROP) { --- 2085,2091 ---- * * We don't need to copy subdirectories */ ! copydir_set_lsn(src_path, dst_path, false, lsn); } else if (info == XLOG_DBASE_DROP) { *************** dbase_redo(XLogReaderState *record) *** 2128,2130 **** --- 2136,2377 ---- else elog(PANIC, "dbase_redo: unknown op code %u", info); } + + /* + * copydir: copy a directory + * + * If recurse is false, subdirectories are ignored. Anything that's not + * a directory or a regular file is ignored. + * + * If recptr is different from InvalidXlogRecPtr, LSN of pages in the + * destination directory will be updated to recptr. + */ + void + copydir_set_lsn(char *fromdir, char *todir, bool recurse, XLogRecPtr recptr) + { + DIR *xldir; + struct dirent *xlde; + char fromfile[MAXPGPATH]; + char tofile[MAXPGPATH]; + + if (mkdir(todir, S_IRWXU) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", todir))); + + xldir = AllocateDir(fromdir); + if (xldir == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open directory \"%s\": %m", fromdir))); + + while ((xlde = ReadDir(xldir, fromdir)) != NULL) + { + struct stat fst; + + /* If we got a cancel signal during the copy of the directory, quit */ + CHECK_FOR_INTERRUPTS(); + + if (strcmp(xlde->d_name, ".") == 0 || + strcmp(xlde->d_name, "..") == 0) + continue; + + snprintf(fromfile, MAXPGPATH, "%s/%s", fromdir, xlde->d_name); + snprintf(tofile, MAXPGPATH, "%s/%s", todir, xlde->d_name); + + if (lstat(fromfile, &fst) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", fromfile))); + + if (S_ISDIR(fst.st_mode)) + { + /* recurse to handle subdirectories */ + if (recurse) + copydir_set_lsn(fromfile, tofile, true, recptr); + } + else if (S_ISREG(fst.st_mode)) + copy_file_set_lsn(fromfile, tofile, recptr); + } + FreeDir(xldir); + + /* + * Be paranoid here and fsync all files to ensure the copy is really done. + * But if fsync is disabled, we're done. + */ + if (!enableFsync) + return; + + xldir = AllocateDir(todir); + if (xldir == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open directory \"%s\": %m", todir))); + + while ((xlde = ReadDir(xldir, todir)) != NULL) + { + struct stat fst; + + if (strcmp(xlde->d_name, ".") == 0 || + strcmp(xlde->d_name, "..") == 0) + continue; + + snprintf(tofile, MAXPGPATH, "%s/%s", todir, xlde->d_name); + + /* + * We don't need to sync subdirectories here since the recursive + * copydir will do it before it returns + */ + if (lstat(tofile, &fst) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", tofile))); + + if (S_ISREG(fst.st_mode)) + fsync_fname(tofile, false); + } + FreeDir(xldir); + + /* + * It's important to fsync the destination directory itself as individual + * file fsyncs don't guarantee that the directory entry for the file is + * synced. Recent versions of ext4 have made the window much wider but + * it's been true for ext3 and other filesystems in the past. + */ + fsync_fname(todir, true); + } + + /* + * copy one file + * + * If recptr is different from InvalidXlogRecPtr, the destination file will + * have all its pages with LSN set accordingly + */ + void + copy_file_set_lsn(char *fromfile, char *tofile, XLogRecPtr recptr) + { + char *buffer; + int srcfd; + int dstfd; + int nbytes; + off_t offset; + BlockNumber blkno = 0; + + /* Use palloc to ensure we get a maxaligned buffer */ + #define COPY_BUF_SIZE (8 * BLCKSZ) + + buffer = palloc(COPY_BUF_SIZE); + + /* + * To support incremental backups, we need to update the LSN in + * all relation files we are copying. + * + * We are updating only the MAIN fork because at the moment + * blocks in FSM and VM forks are not guaranteed to have an + * up-to-date LSN + */ + if (recptr != InvalidXLogRecPtr) + { + char *filename = last_dir_separator(fromfile); + ForkNumber fork; + int oidchars; + uint32 segno; + + if (filename && + *(filename + 1) && + parse_filename_for_nontemp_relation(filename + 1, + &oidchars, &fork, &segno) && fork == MAIN_FORKNUM) + blkno = segno * RELSEG_SIZE; + else + recptr = InvalidXLogRecPtr; + } + + /* + * Open the files + */ + srcfd = OpenTransientFile(fromfile, O_RDONLY | PG_BINARY, 0); + if (srcfd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", fromfile))); + + dstfd = OpenTransientFile(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + S_IRUSR | S_IWUSR); + if (dstfd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", tofile))); + + /* + * Do the data copying. + */ + for (offset = 0;; offset += nbytes) + { + /* If we got a cancel signal during the copy of the file, quit */ + CHECK_FOR_INTERRUPTS(); + + nbytes = read(srcfd, buffer, COPY_BUF_SIZE); + if (nbytes < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", fromfile))); + if (nbytes == 0) + break; + + /* + * If a valid recptr has been provided, the resulting file will have + * all its pages with LSN set accordingly + */ + if (recptr != InvalidXLogRecPtr) + { + char *page; + + /* + * If we are updating LSN of a file, we must be sure that the + * source file is not being extended. + */ + if (nbytes % BLCKSZ != 0) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("file \"%s\" size is not multiple of %d", + fromfile, BLCKSZ))); + + for (page = buffer; page < (buffer + nbytes); page += BLCKSZ, blkno++) + { + /* Update LSN only if the page looks valid */ + if (!PageIsNew(page) && PageIsVerified(page, blkno)) + { + PageSetLSN(page, recptr); + PageSetChecksumInplace(page, blkno); + } + } + } + + errno = 0; + if ((int) write(dstfd, buffer, nbytes) != nbytes) + { + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", tofile))); + } + + /* + * We fsync the files later but first flush them to avoid spamming the + * cache and hopefully get the kernel to start writing them out before + * the fsync comes. Ignore any error, since it's only a hint. + */ + (void) pg_flush_data(dstfd, offset, nbytes); + } + + if (CloseTransientFile(dstfd)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", tofile))); + + CloseTransientFile(srcfd); + + pfree(buffer); + } diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 107d70c..8f85752 100644 *** a/src/backend/replication/basebackup.c --- b/src/backend/replication/basebackup.c *************** sendDir(char *path, int basepathlen, boo *** 1219,1225 **** is_relfile = ( has_relfiles && parse_filename_for_nontemp_relation(de->d_name, &oidchars, ! &forknum) && forknum == MAIN_FORKNUM); if (!is_relfile --- 1219,1226 ---- is_relfile = ( has_relfiles && parse_filename_for_nontemp_relation(de->d_name, &oidchars, ! &forknum, ! NULL) && forknum == MAIN_FORKNUM); if (!is_relfile diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index 02b5fee..2f7dca6 100644 *** a/src/backend/storage/file/reinit.c --- b/src/backend/storage/file/reinit.c *************** ResetUnloggedRelationsInDbspaceDir(const *** 190,196 **** /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, ! &forkNum)) continue; /* Also skip it unless this is the init fork. */ --- 190,196 ---- /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, ! &forkNum, NULL)) continue; /* Also skip it unless this is the init fork. */ *************** ResetUnloggedRelationsInDbspaceDir(const *** 243,249 **** /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, ! &forkNum)) continue; /* We never remove the init fork. */ --- 243,249 ---- /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, ! &forkNum, NULL)) continue; /* We never remove the init fork. */ *************** ResetUnloggedRelationsInDbspaceDir(const *** 313,319 **** /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, ! &forkNum)) continue; /* Also skip it unless this is the init fork. */ --- 313,319 ---- /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, ! &forkNum, NULL)) continue; /* Also skip it unless this is the init fork. */ *************** ResetUnloggedRelationsInDbspaceDir(const *** 364,370 **** /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, ! &forkNum)) continue; /* Also skip it unless this is the init fork. */ --- 364,370 ---- /* Skip anything that doesn't look like a relation data file. */ if (!parse_filename_for_nontemp_relation(de->d_name, &oidchars, ! &forkNum, NULL)) continue; /* Also skip it unless this is the init fork. */ diff --git a/src/common/relpath.c b/src/common/relpath.c index 83a1e3a..63972bd 100644 *** a/src/common/relpath.c --- b/src/common/relpath.c *************** GetRelationPath(Oid dbNode, Oid spcNode, *** 213,218 **** --- 213,222 ---- * This function returns true if the file appears to be in the correct format * for a non-temporary relation and false otherwise. * + * The segno parameter can be safely set to NULL. + * It should be of BlockNumber* type, but it is declared as uint32 + * to avoid depending on storage/block.h + * * NB: If this function returns true, the caller is entitled to assume that * *oidchars has been set to the a value no more than OIDCHARS, and thus * that a buffer of OIDCHARS+1 characters is sufficient to hold the OID *************** GetRelationPath(Oid dbNode, Oid spcNode, *** 221,227 **** */ bool parse_filename_for_nontemp_relation(const char *name, int *oidchars, ! ForkNumber *fork) { int pos; --- 225,231 ---- */ bool parse_filename_for_nontemp_relation(const char *name, int *oidchars, ! ForkNumber *fork, uint32 *segno) { int pos; *************** parse_filename_for_nontemp_relation(cons *** 246,257 **** } /* Check for a segment number. */ if (name[pos] == '.') { int segchar; for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar) ! ; if (segchar <= 1) return false; pos += segchar; --- 250,264 ---- } /* Check for a segment number. */ + if (segno) + *segno = 0; if (name[pos] == '.') { int segchar; for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar) ! if (segno) ! *segno = *segno * 10 + name[pos + segchar] - '0'; if (segchar <= 1) return false; pos += segchar; diff --git a/src/include/common/relpath.h b/src/include/common/relpath.h index 9736a78..9dd492f 100644 *** a/src/include/common/relpath.h --- b/src/include/common/relpath.h *************** extern char *GetDatabasePath(Oid dbNode, *** 53,59 **** extern char *GetRelationPath(Oid dbNode, Oid spcNode, Oid relNode, int backendId, ForkNumber forkNumber); extern bool parse_filename_for_nontemp_relation(const char *name, ! int *oidchars, ForkNumber *fork); /* * Wrapper macros for GetRelationPath. Beware of multiple --- 53,60 ---- extern char *GetRelationPath(Oid dbNode, Oid spcNode, Oid relNode, int backendId, ForkNumber forkNumber); extern bool parse_filename_for_nontemp_relation(const char *name, ! int *oidchars, ForkNumber *fork, ! uint32 *seqno); /* * Wrapper macros for GetRelationPath. Beware of multiple -- 2.3.0