From e0e42fd722b0be07ab8f59e340013a89e5794734 Mon Sep 17 00:00:00 2001
From: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Date: Mon, 15 Jun 2026 05:44:08 +0000
Subject: [PATCH v1] Fix pgstat_count_io_op_time() calls passing error returns
 as bytes

Several places pass the return value of pg_pread() or pg_pwrite()
directly as the bytes argument to pgstat_count_io_op_time(). Since the
bytes parameter is of type uint64, error returns of -1 are implicitly
cast to UINT64_MAX, corrupting the I/O statistics byte counters in
pg_stat_io.

Fix this by moving the pgstat_count_io_op_time() calls to after the
error handling, so they are only reached with a valid positive byte
count. This is consistent with how most other code already behaves:
smgr calls don't return on error, so pgstat calls are implicitly only
reached on success.

Reported-by: Peter Eisentraut <peter@eisentraut.org>
Author: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Discussion: https://postgr.es/m/0db864e6-4477-4eba-b2be-d3523cc86564@eisentraut.org
---
 src/backend/access/transam/xlog.c         | 6 +++---
 src/backend/access/transam/xlogreader.c   | 8 +++++---
 src/backend/access/transam/xlogrecovery.c | 6 ++++--
 src/backend/replication/walreceiver.c     | 6 +++---
 4 files changed, 15 insertions(+), 11 deletions(-)
  99.6% src/backend/access/transam/

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6c2304fef33..652287da55f 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2455,9 +2455,6 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 				written = pg_pwrite(openLogFile, from, nleft, startoffset);
 				pgstat_report_wait_end();
 
-				pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
-										IOOP_WRITE, start, 1, written);
-
 				if (written <= 0)
 				{
 					char		xlogfname[MAXFNAMELEN];
@@ -2475,6 +2472,9 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 							 errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
 									xlogfname, startoffset, nleft)));
 				}
+
+				pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
+										IOOP_WRITE, start, 1, written);
 				nleft -= written;
 				from += written;
 				startoffset += written;
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 3145c58a9b1..9d64ae34932 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -1597,9 +1597,6 @@ WALRead(XLogReaderState *state,
 
 #ifndef FRONTEND
 		pgstat_report_wait_end();
-
-		pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
-								io_start, 1, readbytes);
 #endif
 
 		if (readbytes <= 0)
@@ -1612,6 +1609,11 @@ WALRead(XLogReaderState *state,
 			return false;
 		}
 
+#ifndef FRONTEND
+		pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
+								io_start, 1, readbytes);
+#endif
+
 		/* Update state for read */
 		recptr += readbytes;
 		nbytes -= readbytes;
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 73b78a83fa7..4d61795b483 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -3390,8 +3390,10 @@ retry:
 
 		pgstat_report_wait_end();
 
-		pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
-								io_start, 1, r);
+		/* Count I/O stats only for successful short reads */
+		if (r > 0)
+			pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
+									io_start, 1, r);
 
 		XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
 		if (r < 0)
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index d19317703c1..05e2f690fa7 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -954,9 +954,6 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli)
 		byteswritten = pg_pwrite(recvFile, buf, segbytes, (pgoff_t) startoff);
 		pgstat_report_wait_end();
 
-		pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
-								IOOP_WRITE, start, 1, byteswritten);
-
 		if (byteswritten <= 0)
 		{
 			char		xlogfname[MAXFNAMELEN];
@@ -976,6 +973,9 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli)
 							xlogfname, startoff, segbytes)));
 		}
 
+		pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
+								IOOP_WRITE, start, 1, byteswritten);
+
 		/* Update state for write */
 		recptr += byteswritten;
 
-- 
2.34.1

