From 4513158e1ecd912873628b88191b15d24846cca2 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 13 Nov 2025 12:59:56 +0900 Subject: [PATCH v6] Fix Windows file I/O for offsets beyond 2GB Two bugs prevented files from exceeding 2GB on Windows when built with segment sizes larger than the default 1GB. First, off_t is only 32 bits on Windows with MSVC, causing signed overflow at 2GB. Change the file I/O layer to use pgoff_t consistently: fd.c, md.c, pg_iovec.h, file_utils.c, and their associated headers. This is safe on Unix where pgoff_t equals off_t. Second, the Windows pg_pwrite() and pg_pread() implementations only set the low 32 bits of the OVERLAPPED structure, leaving OffsetHigh at zero. This happens to work below 4GB but wraps around above that. Set both Offset and OffsetHigh properly. Add a regression test that validates I/O at 4GB+1. Testing beyond 4GB is necessary because OffsetHigh is naturally zero at smaller offsets and the bug would pass unnoticed. The test uses FileSize() to independently verify that writes reach the correct location. --- meson.build | 8 -- src/test/modules/Makefile | 1 + src/test/modules/meson.build | 1 + src/test/modules/test_large_files/Makefile | 22 ++++ src/test/modules/test_large_files/README | 74 +++++++++++ .../expected/test_large_files.out | 19 +++ src/test/modules/test_large_files/meson.build | 36 ++++++ .../test_large_files/sql/test_large_files.sql | 14 +++ .../test_large_files--1.0.sql | 9 ++ .../test_large_files/test_large_files.c | 117 ++++++++++++++++++ .../test_large_files/test_large_files.control | 5 + 11 files changed, 298 insertions(+), 8 deletions(-) create mode 100644 src/test/modules/test_large_files/Makefile create mode 100644 src/test/modules/test_large_files/README create mode 100644 src/test/modules/test_large_files/expected/test_large_files.out create mode 100644 src/test/modules/test_large_files/meson.build create mode 100644 src/test/modules/test_large_files/sql/test_large_files.sql create mode 100644 src/test/modules/test_large_files/test_large_files--1.0.sql create mode 100644 src/test/modules/test_large_files/test_large_files.c create mode 100644 src/test/modules/test_large_files/test_large_files.control diff --git a/meson.build b/meson.build index 6e7ddd7468..b8f64c176d 100644 --- a/meson.build +++ b/meson.build @@ -452,14 +452,6 @@ else segsize = (get_option('segsize') * 1024 * 1024 * 1024) / blocksize endif -# If we don't have largefile support, can't handle segment size >= 2GB. -if cc.sizeof('off_t', args: test_c_args) < 8 - segsize_bytes = segsize * blocksize - if segsize_bytes >= (2 * 1024 * 1024 * 1024) - error('Large file support is not enabled. Segment size cannot be larger than 1GB.') - endif -endif - cdata.set('BLCKSZ', blocksize, description: '''Size of a disk block --- this also limits the size of a tuple. You can set it bigger if you need bigger tuples (although TOAST should reduce the need diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index d079b91b1a..a045065ad9 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -30,6 +30,7 @@ SUBDIRS = \ test_int128 \ test_integerset \ test_json_parser \ + test_large_files \ test_lfind \ test_lwlock_tranches \ test_misc \ diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build index f5114469b9..9888009720 100644 --- a/src/test/modules/meson.build +++ b/src/test/modules/meson.build @@ -29,6 +29,7 @@ subdir('test_ginpostinglist') subdir('test_int128') subdir('test_integerset') subdir('test_json_parser') +subdir('test_large_files') subdir('test_lfind') subdir('test_lwlock_tranches') subdir('test_misc') diff --git a/src/test/modules/test_large_files/Makefile b/src/test/modules/test_large_files/Makefile new file mode 100644 index 0000000000..1960e31e52 --- /dev/null +++ b/src/test/modules/test_large_files/Makefile @@ -0,0 +1,22 @@ +MODULE_big = test_large_files +OBJS = \ + $(WIN32RES) \ + test_large_files.o + +EXTENSION = test_large_files +DATA = test_large_files--1.0.sql + +REGRESS = test_large_files + +NO_INSTALLCHECK = 1 + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_large_files +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_large_files/README b/src/test/modules/test_large_files/README new file mode 100644 index 0000000000..467ec629fd --- /dev/null +++ b/src/test/modules/test_large_files/README @@ -0,0 +1,74 @@ +test_large_files +================ + +This test module validates PostgreSQL's handling of files larger than +4GB, specifically testing that pgoff_t (64-bit file offset type) is +used correctly throughout the file I/O layer. + +Background +---------- + +On Windows with MSVC, off_t is only 32 bits, causing signed integer +overflow at 2GB (2^31 bytes). Additionally, Windows' OVERLAPPED structure +requires both low and high 32-bit offset fields to be set for offsets +beyond 4GB. PostgreSQL defines pgoff_t as a 64-bit type (__int64 on +Windows, off_t on Unix where it's already 64-bit) to handle large files +correctly. + +Two bugs were fixed: + +1. Pervasive use of off_t where pgoff_t should be used in fd.c, md.c, + and related file I/O functions. This caused failures at exactly 2GB. + +2. Windows-specific bug in pg_pwrite()/pg_pread() where the OVERLAPPED + structure only set the low 32 bits of the file offset (Offset field), + leaving OffsetHigh at zero, causing wrap-around at 4GB. + +Test Design +----------- + +The test validates file I/O at 4GB + 1 byte: + +1. Writes "OFFSET_0" at byte 0 +2. Writes "TESTDATA" at byte 4GB+1 +3. Checks FileSize() reports ~4GB (not ~16 bytes from wrap-around) +4. Reads offset 0 to verify it wasn't corrupted by wrap-around +5. Reads offset 4GB+1 to verify data integrity + +This approach catches both bugs: +- The off_t truncation bug (would fail at 2GB writes) +- The OVERLAPPED OffsetHigh bug (only manifests at 4GB+ where high bits != 0) + +Testing at 4GB+1 is critical because at 2GB+1, OffsetHigh would naturally +be zero, so bugs in setting OffsetHigh wouldn't be detected. At 4GB+1, +OffsetHigh must be 1, so the test verifies it's set correctly. + +The test catches the bug even if both read and write have the same +truncation issue, because FileSize() provides independent verification. + +The test only runs on platforms with 64-bit pgoff_t (checked via +sizeof(pgoff_t) >= 8). + +Platform Support +---------------- + +- Linux/Unix: Automatically creates sparse files (fast, no disk space used) +- Windows NTFS: Creates sparse file efficiently +- 32-bit offset platforms: Test is skipped automatically + +Running the Test +---------------- + +The test only runs during 'make check' or 'meson test', not on +'make installcheck'. This is intentional, as the test creates temporary +files and is designed for development/CI testing rather than production +validation. + + make check + +or with meson: + + meson test test_large_files + +The test completes in seconds on most platforms. On Windows, the test +may take longer as the OS allocates the sparse file structure. diff --git a/src/test/modules/test_large_files/expected/test_large_files.out b/src/test/modules/test_large_files/expected/test_large_files.out new file mode 100644 index 0000000000..a2128cdd8d --- /dev/null +++ b/src/test/modules/test_large_files/expected/test_large_files.out @@ -0,0 +1,19 @@ +CREATE EXTENSION test_large_files; +SELECT test_large_files_offset_size() >= 8 AS has_large_file_support \gset +-- Only run test on platforms with 64-bit offsets +\if :has_large_file_support + -- Test file I/O at 4GB + 1 byte boundary + -- This validates that pgoff_t is used correctly throughout + -- the file I/O layer and catches both: + -- 1. off_t truncation bugs (affects all operations at 2GB+) + -- 2. Windows OVERLAPPED structure bugs (OffsetHigh must be set + -- correctly at 4GB+ where high 32 bits are non-zero) + SELECT test_large_files_test_4gb_boundary(); + test_large_files_test_4gb_boundary +------------------------------------ + 4GB boundary test passed +(1 row) + +\else + SELECT 'Skipped - 32-bit offsets not supported'::text AS test_large_files_test_4gb_boundary; +\endif diff --git a/src/test/modules/test_large_files/meson.build b/src/test/modules/test_large_files/meson.build new file mode 100644 index 0000000000..2110bcf23f --- /dev/null +++ b/src/test/modules/test_large_files/meson.build @@ -0,0 +1,36 @@ +# src/test/modules/test_large_files/meson.build + +test_large_files_sources = files( + 'test_large_files.c', +) + +if host_system == 'windows' + test_large_files_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'test_large_files', + '--FILEDESC', 'test_large_files - test module for large file I/O',]) +endif + +test_large_files = shared_module('test_large_files', + test_large_files_sources, + kwargs: pg_test_mod_args, +) +test_install_libs += test_large_files + +test_install_data += files( + 'test_large_files.control', + 'test_large_files--1.0.sql', +) + +tests += { + 'name': 'test_large_files', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'test_large_files', + ], + # Don't run on installcheck - only during regular check + 'regress_args': ['--no-locale'], + 'runningcheck': false, + }, +} diff --git a/src/test/modules/test_large_files/sql/test_large_files.sql b/src/test/modules/test_large_files/sql/test_large_files.sql new file mode 100644 index 0000000000..4543b357f6 --- /dev/null +++ b/src/test/modules/test_large_files/sql/test_large_files.sql @@ -0,0 +1,14 @@ +CREATE EXTENSION test_large_files; +SELECT test_large_files_offset_size() >= 8 AS has_large_file_support \gset +-- Only run test on platforms with 64-bit offsets +\if :has_large_file_support + -- Test file I/O at 4GB + 1 byte boundary + -- This validates that pgoff_t is used correctly throughout + -- the file I/O layer and catches both: + -- 1. off_t truncation bugs (affects all operations at 2GB+) + -- 2. Windows OVERLAPPED structure bugs (OffsetHigh must be set + -- correctly at 4GB+ where high 32 bits are non-zero) + SELECT test_large_files_test_4gb_boundary(); +\else + SELECT 'Skipped - 32-bit offsets not supported'::text AS test_large_files_test_4gb_boundary; +\endif diff --git a/src/test/modules/test_large_files/test_large_files--1.0.sql b/src/test/modules/test_large_files/test_large_files--1.0.sql new file mode 100644 index 0000000000..9b13c398a1 --- /dev/null +++ b/src/test/modules/test_large_files/test_large_files--1.0.sql @@ -0,0 +1,9 @@ +CREATE FUNCTION test_large_files_offset_size() +RETURNS integer +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FUNCTION test_large_files_test_4gb_boundary() +RETURNS text +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; diff --git a/src/test/modules/test_large_files/test_large_files.c b/src/test/modules/test_large_files/test_large_files.c new file mode 100644 index 0000000000..70e675d473 --- /dev/null +++ b/src/test/modules/test_large_files/test_large_files.c @@ -0,0 +1,117 @@ +/*------------------------------------------------------------------------- + * + * test_large_files.c + * Test module for large file I/O operations + * + * This module tests PostgreSQL's ability to handle file offsets larger + * than 2GB (2^31 bytes), validating that pgoff_t is correctly used + * throughout the file I/O layer. + * + * Copyright (c) 2025, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "fmgr.h" +#include "miscadmin.h" +#include "storage/fd.h" +#include "utils/builtins.h" +#include "utils/wait_event.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(test_large_files_offset_size); +Datum +test_large_files_offset_size(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT32(sizeof(pgoff_t)); +} + +PG_FUNCTION_INFO_V1(test_large_files_test_4gb_boundary); +Datum +test_large_files_test_4gb_boundary(PG_FUNCTION_ARGS) +{ + File file; + pgoff_t large_offset = ((pgoff_t) 4294967296LL) + 1; + pgoff_t expected_size = large_offset + 8; + pgoff_t actual_size; + char write_buf_0[8] = "OFFSET_0"; + char write_buf_large[8] = "TESTDATA"; + char read_buf[8]; + int nbytes; + + file = OpenTemporaryFile(false); + if (file < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create temporary file"))); + + nbytes = FileWrite(file, write_buf_0, 8, 0, WAIT_EVENT_DATA_FILE_WRITE); + if (nbytes != 8) + { + FileClose(file); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write at offset 0"))); + } + + nbytes = FileWrite(file, write_buf_large, 8, large_offset, + WAIT_EVENT_DATA_FILE_WRITE); + if (nbytes != 8) + { + FileClose(file); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write at large offset"))); + } + + actual_size = FileSize(file); + if (actual_size < expected_size) + { + FileClose(file); + ereport(ERROR, + (errmsg("file size is %lld bytes, expected at least %lld bytes - offset truncated", + (long long) actual_size, + (long long) expected_size))); + } + + memset(read_buf, 0, 8); + nbytes = FileRead(file, read_buf, 8, 0, WAIT_EVENT_DATA_FILE_READ); + if (nbytes != 8) + { + FileClose(file); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read from offset 0"))); + } + + if (memcmp(read_buf, write_buf_0, 8) != 0) + { + FileClose(file); + ereport(ERROR, + (errmsg("data at offset 0 was corrupted - write wrapped around"))); + } + + memset(read_buf, 0, 8); + nbytes = FileRead(file, read_buf, 8, large_offset, + WAIT_EVENT_DATA_FILE_READ); + if (nbytes != 8) + { + FileClose(file); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read at large offset"))); + } + + if (memcmp(write_buf_large, read_buf, 8) != 0) + { + FileClose(file); + ereport(ERROR, + (errmsg("data mismatch at large offset"))); + } + + FileClose(file); + + PG_RETURN_TEXT_P(cstring_to_text("4GB boundary test passed")); +} diff --git a/src/test/modules/test_large_files/test_large_files.control b/src/test/modules/test_large_files/test_large_files.control new file mode 100644 index 0000000000..b0bff5bd86 --- /dev/null +++ b/src/test/modules/test_large_files/test_large_files.control @@ -0,0 +1,5 @@ +# test_large_files extension +comment = 'Test module for large file I/O operations' +default_version = '1.0' +module_pathname = '$libdir/test_large_files' +relocatable = true -- 2.46.0.windows.1