From fc72be32668f0c37b899fd922d6986f39d8a6a2a Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Tue, 27 Sep 2022 21:12:03 +1300 Subject: [PATCH] Restore pg_pread and friends. Commit cf112c12 was a little too hasty in getting rid of the pg_ prefixes where we use pread(), pwrite() and vectored variants. We dropped support for ancient Unixes that needed to use lseek() to implement replacements for those, but it turned out that Windows also changes the current position even when you pass in an offset to ReadFile() and WriteFile(), if the file handle is synchronous. Switching to asynchronous file handles would fix that, but have other complications. For now let's just put back the pg_ prefix and add some comments to highlight the non-standard side-effect. Reported-by: Bharath Rupireddy Discussion: https://postgr.es/m/20220923202439.GA1156054%40nathanxps13 --- .../pg_stat_statements/pg_stat_statements.c | 4 +- src/backend/access/heap/rewriteheap.c | 2 +- src/backend/access/transam/slru.c | 4 +- src/backend/access/transam/xlog.c | 4 +- src/backend/access/transam/xlogreader.c | 2 +- src/backend/access/transam/xlogrecovery.c | 2 +- src/backend/backup/basebackup.c | 2 +- src/backend/replication/walreceiver.c | 2 +- src/backend/storage/file/fd.c | 8 +-- src/backend/utils/init/miscinit.c | 2 +- src/bin/pg_test_fsync/pg_test_fsync.c | 50 +++++++++---------- src/include/access/xlogreader.h | 4 +- src/include/port.h | 9 ++++ src/include/port/pg_iovec.h | 13 ++++- src/include/port/win32_port.h | 4 +- src/port/preadv.c | 4 +- src/port/pwritev.c | 4 +- src/port/win32pread.c | 2 +- src/port/win32pwrite.c | 2 +- 19 files changed, 71 insertions(+), 53 deletions(-) diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index ba868f0de9..73439c0199 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2103,9 +2103,9 @@ qtext_store(const char *query, int query_len, if (fd < 0) goto error; - if (pwrite(fd, query, query_len, off) != query_len) + if (pg_pwrite(fd, query, query_len, off) != query_len) goto error; - if (pwrite(fd, "\0", 1, off + query_len) != 1) + if (pg_pwrite(fd, "\0", 1, off + query_len) != 1) goto error; CloseTransientFile(fd); diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 2f08fbe8d3..b01b39b008 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -1150,7 +1150,7 @@ heap_xlog_logical_rewrite(XLogReaderState *r) /* write out tail end of mapping file (again) */ errno = 0; pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE); - if (pwrite(fd, data, len, xlrec->offset) != len) + if (pg_pwrite(fd, data, len, xlrec->offset) != len) { /* if write didn't set errno, assume problem is no disk space */ if (errno == 0) diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index c9a7b97949..b65cb49d7f 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -718,7 +718,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) errno = 0; pgstat_report_wait_start(WAIT_EVENT_SLRU_READ); - if (pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ) + if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ) { pgstat_report_wait_end(); slru_errcause = SLRU_READ_FAILED; @@ -873,7 +873,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata) errno = 0; pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE); - if (pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ) + if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ) { pgstat_report_wait_end(); /* if write didn't set errno, assume problem is no disk space */ diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 1dd6df0fe1..7f9f350531 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2196,7 +2196,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) INSTR_TIME_SET_CURRENT(start); pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE); - written = pwrite(openLogFile, from, nleft, startoffset); + written = pg_pwrite(openLogFile, from, nleft, startoffset); pgstat_report_wait_end(); /* @@ -3018,7 +3018,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, * enough. */ errno = 0; - if (pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1) + if (pg_pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1) { /* if write didn't set errno, assume no disk space */ save_errno = errno ? errno : ENOSPC; diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 4d6c34e0fc..5a8fe81f82 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -1544,7 +1544,7 @@ WALRead(XLogReaderState *state, /* Reset errno first; eases reporting non-errno-affecting errors */ errno = 0; - readbytes = pread(state->seg.ws_file, p, segbytes, (off_t) startoff); + readbytes = pg_pread(state->seg.ws_file, p, segbytes, (off_t) startoff); #ifndef FRONTEND pgstat_report_wait_end(); diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index b41e682664..cb07694aea 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -3271,7 +3271,7 @@ retry: readOff = targetPageOff; pgstat_report_wait_start(WAIT_EVENT_WAL_READ); - r = pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff); + r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff); if (r != XLOG_BLCKSZ) { char fname[MAXFNAMELEN]; diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c index 411cac9be3..e252ad7421 100644 --- a/src/backend/backup/basebackup.c +++ b/src/backend/backup/basebackup.c @@ -1828,7 +1828,7 @@ basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset, int rc; pgstat_report_wait_start(WAIT_EVENT_BASEBACKUP_READ); - rc = pread(fd, buf, nbytes, offset); + rc = pg_pread(fd, buf, nbytes, offset); pgstat_report_wait_end(); if (rc < 0) diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index f6ef0ace2c..3767466ef3 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -915,7 +915,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli) /* OK to write the logs */ errno = 0; - byteswritten = pwrite(recvFile, buf, segbytes, (off_t) startoff); + byteswritten = pg_pwrite(recvFile, buf, segbytes, (off_t) startoff); if (byteswritten <= 0) { char xlogfname[MAXFNAMELEN]; diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 073dab2be5..e4d954578c 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -2053,7 +2053,7 @@ FileRead(File file, char *buffer, int amount, off_t offset, retry: pgstat_report_wait_start(wait_event_info); - returnCode = pread(vfdP->fd, buffer, amount, offset); + returnCode = pg_pread(vfdP->fd, buffer, amount, offset); pgstat_report_wait_end(); if (returnCode < 0) @@ -2135,7 +2135,7 @@ FileWrite(File file, char *buffer, int amount, off_t offset, retry: errno = 0; pgstat_report_wait_start(wait_event_info); - returnCode = pwrite(VfdCache[file].fd, buffer, amount, offset); + returnCode = pg_pwrite(VfdCache[file].fd, buffer, amount, offset); pgstat_report_wait_end(); /* if write didn't set errno, assume problem is no disk space */ @@ -3740,7 +3740,7 @@ data_sync_elevel(int elevel) } /* - * A convenience wrapper for pwritev() that retries on partial write. If an + * A convenience wrapper for pg_pwritev() that retries on partial write. If an * error is returned, it is unspecified how much has been written. */ ssize_t @@ -3760,7 +3760,7 @@ pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset) for (;;) { /* Write as much as we can. */ - part = pwritev(fd, iov, iovcnt, offset); + part = pg_pwritev(fd, iov, iovcnt, offset); if (part < 0) return -1; diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index 683f616b1a..4d341d3f7f 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -1527,7 +1527,7 @@ AddToDataDirLockFile(int target_line, const char *str) len = strlen(destbuffer); errno = 0; pgstat_report_wait_start(WAIT_EVENT_LOCK_FILE_ADDTODATADIR_WRITE); - if (pwrite(fd, destbuffer, len, 0) != len) + if (pg_pwrite(fd, destbuffer, len, 0) != len) { pgstat_report_wait_end(); /* if write didn't set errno, assume problem is no disk space */ diff --git a/src/bin/pg_test_fsync/pg_test_fsync.c b/src/bin/pg_test_fsync/pg_test_fsync.c index 585b3ef731..5f8cbb75ff 100644 --- a/src/bin/pg_test_fsync/pg_test_fsync.c +++ b/src/bin/pg_test_fsync/pg_test_fsync.c @@ -312,10 +312,10 @@ test_sync(int writes_per_op) for (ops = 0; alarm_triggered == false; ops++) { for (writes = 0; writes < writes_per_op; writes++) - if (pwrite(tmpfile, - buf, - XLOG_BLCKSZ, - writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) + if (pg_pwrite(tmpfile, + buf, + XLOG_BLCKSZ, + writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) die("write failed"); } STOP_TIMER; @@ -337,10 +337,10 @@ test_sync(int writes_per_op) for (ops = 0; alarm_triggered == false; ops++) { for (writes = 0; writes < writes_per_op; writes++) - if (pwrite(tmpfile, - buf, - XLOG_BLCKSZ, - writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) + if (pg_pwrite(tmpfile, + buf, + XLOG_BLCKSZ, + writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) die("write failed"); fdatasync(tmpfile); } @@ -359,10 +359,10 @@ test_sync(int writes_per_op) for (ops = 0; alarm_triggered == false; ops++) { for (writes = 0; writes < writes_per_op; writes++) - if (pwrite(tmpfile, - buf, - XLOG_BLCKSZ, - writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) + if (pg_pwrite(tmpfile, + buf, + XLOG_BLCKSZ, + writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) die("write failed"); if (fsync(tmpfile) != 0) die("fsync failed"); @@ -383,10 +383,10 @@ test_sync(int writes_per_op) for (ops = 0; alarm_triggered == false; ops++) { for (writes = 0; writes < writes_per_op; writes++) - if (pwrite(tmpfile, - buf, - XLOG_BLCKSZ, - writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) + if (pg_pwrite(tmpfile, + buf, + XLOG_BLCKSZ, + writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) die("write failed"); if (pg_fsync_writethrough(tmpfile) != 0) die("fsync failed"); @@ -415,10 +415,10 @@ test_sync(int writes_per_op) for (ops = 0; alarm_triggered == false; ops++) { for (writes = 0; writes < writes_per_op; writes++) - if (pwrite(tmpfile, - buf, - XLOG_BLCKSZ, - writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) + if (pg_pwrite(tmpfile, + buf, + XLOG_BLCKSZ, + writes * XLOG_BLCKSZ) != XLOG_BLCKSZ) /* * This can generate write failures if the filesystem has @@ -480,10 +480,10 @@ test_open_sync(const char *msg, int writes_size) for (ops = 0; alarm_triggered == false; ops++) { for (writes = 0; writes < 16 / writes_size; writes++) - if (pwrite(tmpfile, - buf, - writes_size * 1024, - writes * writes_size * 1024) != + if (pg_pwrite(tmpfile, + buf, + writes_size * 1024, + writes * writes_size * 1024) != writes_size * 1024) die("write failed"); } @@ -582,7 +582,7 @@ test_non_sync(void) START_TIMER; for (ops = 0; alarm_triggered == false; ops++) { - if (pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ) + if (pg_pwrite(tmpfile, buf, XLOG_BLCKSZ, 0) != XLOG_BLCKSZ) die("write failed"); } STOP_TIMER; diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index 6dcde2523a..e87f91316a 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -378,11 +378,11 @@ extern void XLogReaderResetError(XLogReaderState *state); /* * Error information from WALRead that both backend and frontend caller can - * process. Currently only errors from pread can be reported. + * process. Currently only errors from pg_pread can be reported. */ typedef struct WALReadError { - int wre_errno; /* errno set by the last pread() */ + int wre_errno; /* errno set by the last pg_pread() */ int wre_off; /* Offset we tried to read from. */ int wre_req; /* Bytes requested to be read. */ int wre_read; /* Bytes read by the last read(). */ diff --git a/src/include/port.h b/src/include/port.h index 3562d471b5..69d8818d61 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -213,6 +213,15 @@ extern int pg_fprintf(FILE *stream, const char *fmt,...) pg_attribute_printf(2, extern int pg_vprintf(const char *fmt, va_list args) pg_attribute_printf(1, 0); extern int pg_printf(const char *fmt,...) pg_attribute_printf(1, 2); +#ifndef WIN32 +/* + * We add a pg_ prefix as a warning that the Windows implementations have the + * non-standard side-effect of changing the current file position. + */ +#define pg_pread pread +#define pg_pwrite pwrite +#endif + /* * We use __VA_ARGS__ for printf to prevent replacing references to * the "printf" format archetype in format() attribute declarations. diff --git a/src/include/port/pg_iovec.h b/src/include/port/pg_iovec.h index ecdddba7fc..9e1d5dbab3 100644 --- a/src/include/port/pg_iovec.h +++ b/src/include/port/pg_iovec.h @@ -35,12 +35,21 @@ struct iovec /* Define a reasonable maximum that is safe to use on the stack. */ #define PG_IOV_MAX Min(IOV_MAX, 32) +/* + * Note that pg_preadv and pg_writev have a pg_ prefix as a warning that the + * Windows implementations have the side-effect of changing the file position. + */ + #if !HAVE_DECL_PREADV -extern ssize_t preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset); +extern ssize_t pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset); +#else +#define pg_preadv preadv #endif #if !HAVE_DECL_PWRITEV -extern ssize_t pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset); +extern ssize_t pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset); +#else +#define pg_pwritev pwritev #endif #endif /* PG_IOVEC_H */ diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h index 707f8760ca..a22867d295 100644 --- a/src/include/port/win32_port.h +++ b/src/include/port/win32_port.h @@ -564,9 +564,9 @@ typedef unsigned short mode_t; #endif /* in port/win32pread.c */ -extern ssize_t pread(int fd, void *buf, size_t nbyte, off_t offset); +extern ssize_t pg_pread(int fd, void *buf, size_t nbyte, off_t offset); /* in port/win32pwrite.c */ -extern ssize_t pwrite(int fd, const void *buf, size_t nbyte, off_t offset); +extern ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, off_t offset); #endif /* PG_WIN32_PORT_H */ diff --git a/src/port/preadv.c b/src/port/preadv.c index 188e10f065..ce5863b696 100644 --- a/src/port/preadv.c +++ b/src/port/preadv.c @@ -19,14 +19,14 @@ #include "port/pg_iovec.h" ssize_t -preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset) +pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset) { ssize_t sum = 0; ssize_t part; for (int i = 0; i < iovcnt; ++i) { - part = pread(fd, iov[i].iov_base, iov[i].iov_len, offset); + part = pg_pread(fd, iov[i].iov_base, iov[i].iov_len, offset); if (part < 0) { if (i == 0) diff --git a/src/port/pwritev.c b/src/port/pwritev.c index de9b7e4e3d..6712a8986c 100644 --- a/src/port/pwritev.c +++ b/src/port/pwritev.c @@ -19,14 +19,14 @@ #include "port/pg_iovec.h" ssize_t -pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset) +pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset) { ssize_t sum = 0; ssize_t part; for (int i = 0; i < iovcnt; ++i) { - part = pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset); + part = pg_pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset); if (part < 0) { if (i == 0) diff --git a/src/port/win32pread.c b/src/port/win32pread.c index ebcdd33756..fb6f00bcbf 100644 --- a/src/port/win32pread.c +++ b/src/port/win32pread.c @@ -17,7 +17,7 @@ #include ssize_t -pread(int fd, void *buf, size_t size, off_t offset) +pg_pread(int fd, void *buf, size_t size, off_t offset) { OVERLAPPED overlapped = {0}; HANDLE handle; diff --git a/src/port/win32pwrite.c b/src/port/win32pwrite.c index 7f2e62e8a7..aa374d0ffc 100644 --- a/src/port/win32pwrite.c +++ b/src/port/win32pwrite.c @@ -17,7 +17,7 @@ #include ssize_t -pwrite(int fd, const void *buf, size_t size, off_t offset) +pg_pwrite(int fd, const void *buf, size_t size, off_t offset) { OVERLAPPED overlapped = {0}; HANDLE handle; -- 2.37.0 (Apple Git-136)