From 1eb87b403b67dbb3e7d59c8c82cd3d274d2372de Mon Sep 17 00:00:00 2001 From: Bryan Green Date: Wed, 5 Nov 2025 22:24:35 -0600 Subject: [PATCH] Fix socket handle inheritance on Windows preventing restart On Windows, socket handles are inheritable by default, causing child processes spawned by backends (e.g., via COPY TO PROGRAM) to inherit socket handles. Windows reference counting then prevents sockets from being freed when the owning process exits, leading to "Address already in use" errors on restart or zombie connections in netstat. Fix by adding WSA_FLAG_NO_HANDLE_INHERIT to socket creation in pgwin32_socket(), and calling SetHandleInformation() in BackendInitialize() to make the inherited client socket non-inheritable before spawning children. The latter is needed because handles passed to child processes become inheritable again on Windows. --- src/backend/port/win32/socket.c | 9 +- src/backend/tcop/backend_startup.c | 12 ++ src/bin/pg_ctl/meson.build | 1 + .../pg_ctl/t/005_socket_handle_inheritance.pl | 134 ++++++++++++++++++ 4 files changed, 154 insertions(+), 2 deletions(-) create mode 100644 src/bin/pg_ctl/t/005_socket_handle_inheritance.pl diff --git a/src/backend/port/win32/socket.c b/src/backend/port/win32/socket.c index a8538afe68..3d3530e71f 100644 --- a/src/backend/port/win32/socket.c +++ b/src/backend/port/win32/socket.c @@ -285,7 +285,11 @@ pgwin32_waitforsinglesocket(SOCKET s, int what, int timeout) } /* - * Create a socket, setting it to overlapped and non-blocking + * Create a socket, setting it to overlapped, non-blocking, and non-inheritable. + * + * We must prevent child processes from inheriting socket handles. Otherwise, + * the kernel's reference counting means listening sockets can stay bound even + * after postmaster exit, preventing restart. */ SOCKET pgwin32_socket(int af, int type, int protocol) @@ -293,7 +297,8 @@ pgwin32_socket(int af, int type, int protocol) SOCKET s; unsigned long on = 1; - s = WSASocket(af, type, protocol, NULL, 0, WSA_FLAG_OVERLAPPED); + s = WSASocket(af, type, protocol, NULL, 0, + WSA_FLAG_OVERLAPPED | WSA_FLAG_NO_HANDLE_INHERIT); if (s == INVALID_SOCKET) { TranslateSocketError(); diff --git a/src/backend/tcop/backend_startup.c b/src/backend/tcop/backend_startup.c index 14d5fc0b19..2a1dd6b689 100644 --- a/src/backend/tcop/backend_startup.c +++ b/src/backend/tcop/backend_startup.c @@ -177,6 +177,18 @@ BackendInitialize(ClientSocket *client_sock, CAC_state cac) port = MyProcPort = pq_init(client_sock); MemoryContextSwitchTo(oldcontext); +#ifdef WIN32 + /* + * On Windows, the client socket inherited from the postmaster becomes + * inheritable again in this process. Prevent child processes spawned + * by this backend from inheriting it. + */ + if (!SetHandleInformation((HANDLE) port->sock, HANDLE_FLAG_INHERIT, 0)) + ereport(WARNING, + (errmsg_internal("could not disable socket handle inheritance: error code %lu", + GetLastError()))); +#endif + whereToSendOutput = DestRemote; /* now safe to ereport to client */ /* set these to empty in case they are needed before we set them up */ diff --git a/src/bin/pg_ctl/meson.build b/src/bin/pg_ctl/meson.build index e92ba50f8a..a73248e4a7 100644 --- a/src/bin/pg_ctl/meson.build +++ b/src/bin/pg_ctl/meson.build @@ -27,6 +27,7 @@ tests += { 't/002_status.pl', 't/003_promote.pl', 't/004_logrotate.pl', + 't/005_socket_handle_inheritance.pl', ], }, } diff --git a/src/bin/pg_ctl/t/005_socket_handle_inheritance.pl b/src/bin/pg_ctl/t/005_socket_handle_inheritance.pl new file mode 100644 index 0000000000..a58787ce89 --- /dev/null +++ b/src/bin/pg_ctl/t/005_socket_handle_inheritance.pl @@ -0,0 +1,134 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +# Test that socket handles are not inherited by child processes on Windows. +# +# Without the fix, child processes spawned via COPY TO PROGRAM inherit socket +# handles from the backend. Windows reference counting prevents these sockets +# from being freed when the postmaster exits, leaving the port bound to a dead +# process (a "zombie" binding). This test verifies that after killing the +# postmaster while a child process is still running, the listening port is +# immediately freed rather than remaining in a zombie state. + +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use Time::HiRes qw(sleep); + +# This test is Windows-specific +if ($^O ne 'MSWin32') +{ + plan skip_all => 'test is specific to Windows socket handle inheritance'; +} + +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init; +$node->start; + +# Get the port number for verification +my $port = $node->port; + +# Spawn a long-running child process via COPY TO PROGRAM that will outlive +# the postmaster. Without the fix, this child inherits socket handles. +my $marker_file = $node->data_dir . '/ps_marker.txt'; +unlink $marker_file if -e $marker_file; + +$node->safe_psql( + 'postgres', + qq{\\copy (select 1) to program 'powershell -Command "echo marker > $marker_file; Start-Sleep 120"'} +); + +# Wait for PowerShell to spawn +my $ps_spawned = 0; +for (my $i = 0; $i < 100; $i++) +{ + if (-e $marker_file) + { + $ps_spawned = 1; + last; + } + sleep 0.1; +} + +ok($ps_spawned, 'child process spawned successfully'); + +# Stop the postmaster (simulates a crash), leaving the child process running. +$node->stop('immediate'); +sleep 0.5; + +# Verify that the listening port is freed immediately. With the bug, the port +# remains bound to the dead postmaster PID because the child process inherited +# the socket handles. With the fix, the port is freed because socket handles +# were not inherited. +my $netstat_output = `netstat -ano | findstr ":$port.*LISTENING"`; + +if ($netstat_output) +{ + fail('listening port remains bound after postmaster exit (zombie port)'); + diag("Port is still bound - socket handles were inherited by child process"); + diag("netstat output:\n$netstat_output"); + + if ($netstat_output =~ /LISTENING\s+(\d+)/) + { + my $bound_pid = $1; + my $process_name = get_process_name($bound_pid); + + if ($process_name eq 'unknown' || $process_name eq '') + { + diag("Port bound to dead process (PID $bound_pid) - zombie binding detected"); + } + else + { + diag("Port bound to: $process_name (PID $bound_pid)"); + } + } +} +else +{ + pass('listening port freed immediately after postmaster exit'); +} + +# Additional verification: Confirm the port is actually available for binding. +# This tests the real-world scenario that matters to users. +my $can_bind = test_port_available($port); +ok($can_bind, "port $port is available for new connections"); + +# Cleanup +cleanup_powershell_processes(); +unlink $marker_file if -e $marker_file; + +done_testing(); + +# Test if port can actually be bound +sub test_port_available +{ + my ($port) = @_; + + use Socket; + + socket(my $sock, PF_INET, SOCK_STREAM, getprotobyname('tcp')) or return 0; + setsockopt($sock, SOL_SOCKET, SO_REUSEADDR, 1); + + my $addr = sockaddr_in($port, INADDR_ANY); + my $result = bind($sock, $addr); + close($sock); + + return $result ? 1 : 0; +} + +# Get process name by PID +sub get_process_name +{ + my ($pid) = @_; + my $name = `powershell -Command "(Get-Process -Id $pid -ErrorAction SilentlyContinue).ProcessName" 2>nul`; + chomp $name; + return $name || 'unknown'; +} + +# Clean up test child processes +sub cleanup_powershell_processes +{ + system('powershell -Command "Get-Process powershell -ErrorAction SilentlyContinue | Where-Object {$_.Id -ne $PID} | Stop-Process -Force -ErrorAction SilentlyContinue" 2>nul'); +} \ No newline at end of file -- 2.46.0.windows.1