From 8f38a49f7e929fa385b3784725e8a1955db9fc57 Mon Sep 17 00:00:00 2001
From: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Date: Wed, 25 Jun 2025 05:06:40 +0000
Subject: [PATCH v1] Work around Linux kernel bug in do_pages_stat()

do_pages_stat() is already handling the input arrays correctly in
32-bit mode, but at the end of the "while (nr_pages)" loop, it
incorrectly advances the pages pointer with the wrong word size.

Work around is to ensure that pg_numa_query_pages() does not pass more than
DO_PAGES_STAT_CHUNK_NR (see do_pages_stat()) pages to do_pages_stat() so that
the wrong pointer arithmetic has no effect (as the pages variable is not being
used).

Linux kernel bug reported here: https://marc.info/?l=linux-mm&m=175077821909222&w=2
---
 src/port/pg_numa.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/port/pg_numa.c b/src/port/pg_numa.c
index 4b487a2a4e8..86921a156b4 100644
--- a/src/port/pg_numa.c
+++ b/src/port/pg_numa.c
@@ -16,6 +16,7 @@
 #include "c.h"
 #include <unistd.h>
 
+#include "miscadmin.h"
 #include "port/pg_numa.h"
 
 /*
@@ -46,7 +47,31 @@ pg_numa_init(void)
 int
 pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
 {
+/*
+ * Work around Linux kernel bug in 32-bit compat mode: do_pages_stat() has
+ * incorrect pointer arithmetic for more than DO_PAGES_STAT_CHUNK_NR pages.
+ */
+#if SIZEOF_SIZE_T == 4
+#define NUMA_QUERY_CHUNK_SIZE 16	/* has to be <= DO_PAGES_STAT_CHUNK_NR
+									 * (do_pages_stat()) */
+	for (size_t chunk_start = 0; chunk_start < count; chunk_start += NUMA_QUERY_CHUNK_SIZE)
+	{
+		int			result;
+		uint64		chunk_size = Min(NUMA_QUERY_CHUNK_SIZE, count - chunk_start);
+
+		CHECK_FOR_INTERRUPTS();
+
+		result = numa_move_pages(pid, chunk_size, &pages[chunk_start], NULL,
+								 &status[chunk_start], 0);
+
+		if (result != 0)
+			return result;
+	}
+
+	return 0;
+#else
 	return numa_move_pages(pid, count, pages, NULL, status, 0);
+#endif
 }
 
 int
-- 
2.34.1

