From b33a856c68e1c48c8663a760f79b2b3d3bf0088c Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 18 Mar 2020 16:24:59 +0100 Subject: [PATCH] Drop slot's LWLock before returning from SaveSlotToPath() When SaveSlotToPath() is called with elevel=LOG, the early exits didn't release the slot's io_in_progress_lock. This could result in a walsender being stuck on the lock forever. A possible way to get into this situation is if the offending code paths are triggered in a low disk space situation. Author: Pavan Deolasee Reported-by: Craig Ringer --- src/backend/replication/slot.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 1cec53d748..1bff2d6185 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -1256,6 +1256,13 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) fd = OpenTransientFile(tmppath, O_CREAT | O_EXCL | O_WRONLY | PG_BINARY); if (fd < 0) { + /* + * If not an ERROR, then release the lock before returning the control + * back to the caller. In case of an ERROR, the error recovery path + * should automatically release the lock, but no harm in explicitly + * releaseing even in that case. + */ + LWLockRelease(&slot->io_in_progress_lock); ereport(elevel, (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", @@ -1287,6 +1294,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) pgstat_report_wait_end(); CloseTransientFile(fd); + LWLockRelease(&slot->io_in_progress_lock); /* if write didn't set errno, assume problem is no disk space */ errno = save_errno ? save_errno : ENOSPC; @@ -1306,6 +1314,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) pgstat_report_wait_end(); CloseTransientFile(fd); + LWLockRelease(&slot->io_in_progress_lock); errno = save_errno; ereport(elevel, (errcode_for_file_access(), @@ -1317,6 +1326,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) if (CloseTransientFile(fd) != 0) { + LWLockRelease(&slot->io_in_progress_lock); ereport(elevel, (errcode_for_file_access(), errmsg("could not close file \"%s\": %m", @@ -1327,6 +1337,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) /* rename to permanent file, fsync file and directory */ if (rename(tmppath, path) != 0) { + LWLockRelease(&slot->io_in_progress_lock); ereport(elevel, (errcode_for_file_access(), errmsg("could not rename file \"%s\" to \"%s\": %m", -- 2.25.0