From 530249a06d2173ea54d9e3af4a6e56921bf4471b Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <andrew@dunslane.net>
Date: Wed, 6 May 2026 07:39:28 -0400
Subject: [PATCH] Prevent thread-unsafe sharing of an ECPG default connection

ECPGconnect() published the new struct connection into all_connections,
the calling thread's per-thread default-connection slot (held in
thread-specific data via actual_connection_key), and the global
actual_connection before calling PQconnectdbParams().  If
PQconnectdbParams() then failed, ecpg_finish() removed the dead struct
from the list and -- by the same logic that ecpg_finish() uses for
DISCONNECT -- repointed the failing thread's per-thread slot at whatever
now headed all_connections, which is typically a sibling thread's
already-open connection.

After such a failed CONNECT the caller's whenever-sqlerror handler often
just logs and lets the thread continue; the next EXEC SQL statement
issued without an explicit connection name then resolved through the
poisoned per-thread slot or the actual_connection global default and ran
libpq calls on the sibling's PGconn concurrently with the sibling thread
itself.  libpq is not thread-safe at the per-connection level, so the
shared prep_stmts list got corrupted (segv in deallocate_one), the
connection input buffer got reallocated underneath an in-flight reader
(heap abort in pqCheckInBufferSpace), and conn->result occasionally went
NULL while another thread was mid-row-processing.

This was reproducible by running thread/prep and thread/alloc against a
server with max_connections set low enough to refuse some of the worker
threads' CONNECTs, e.g.

  max_connections = 10

in a TEMP_CONFIG passed to "make -C src/interfaces/ecpg/test check",
and is the family of crashes Alexander Lakhin observed on the dikkop
buildfarm animal.

Fix:

* Record on each successful CONNECT which thread opened that connection
  (new owner_thread field on struct connection).  When an EXEC SQL
  statement is issued without a connection name and the calling thread
  has nothing in its per-thread default-connection slot, only fall back
  to actual_connection if its owner matches the caller; otherwise return
  NULL so ecpg_init() raises ECPG_NO_CONN cleanly.  Threads that
  genuinely want to share a connection must do so explicitly by name (or
  via SET CONNECTION) and provide their own synchronization.

* In ECPGconnect()'s failure path, snapshot the prior per-thread slot
  value and global default before publishing the unopened struct, and
  restore them after ecpg_finish() returns.  This prevents the failing
  thread's slot from being aliased to a sibling's open connection.

A new ecpg_thread_id abstraction (pthread_t on POSIX, DWORD on Win32)
keeps ECPG_GET_THREAD_ID()/ECPG_THREAD_ID_EQUAL() portable across
threading models.

A new test thread/connfail forces a deterministic CONNECT failure (by
naming a database that does not exist) and verifies that the worker
thread's subsequent unnamed PREPARE returns ECPG_NO_CONN instead of
silently using the main thread's connection.

Reported-by: Alexander Lakhin <exclusion@gmail.com>
Discussion: https://postgr.es/m/75460b3c-255d-47eb-b889-d99de38e6758@gmail.com
---
 src/interfaces/ecpg/ecpglib/connect.c         |  73 +++++--
 src/interfaces/ecpg/ecpglib/ecpglib_extern.h  |   2 +
 .../ecpg/include/ecpg-pthread-win32.h         |  17 ++
 src/interfaces/ecpg/test/ecpg_schedule        |   1 +
 .../ecpg/test/expected/thread-connfail.c      | 192 ++++++++++++++++++
 .../ecpg/test/expected/thread-connfail.stderr |   0
 .../ecpg/test/expected/thread-connfail.stdout |   2 +
 src/interfaces/ecpg/test/thread/Makefile      |   3 +-
 src/interfaces/ecpg/test/thread/connfail.pgc  |  89 ++++++++
 src/interfaces/ecpg/test/thread/meson.build   |   1 +
 10 files changed, 359 insertions(+), 21 deletions(-)
 create mode 100644 src/interfaces/ecpg/test/expected/thread-connfail.c
 create mode 100644 src/interfaces/ecpg/test/expected/thread-connfail.stderr
 create mode 100644 src/interfaces/ecpg/test/expected/thread-connfail.stdout
 create mode 100644 src/interfaces/ecpg/test/thread/connfail.pgc

diff --git a/src/interfaces/ecpg/ecpglib/connect.c b/src/interfaces/ecpg/ecpglib/connect.c
index 78de9f298ba..e09f728a379 100644
--- a/src/interfaces/ecpg/ecpglib/connect.c
+++ b/src/interfaces/ecpg/ecpglib/connect.c
@@ -32,6 +32,30 @@ ecpg_pthreads_init(void)
 	pthread_once(&actual_connection_key_once, ecpg_actual_connection_init);
 }
 
+/*
+ * Resolve actual_connection as the implicit default for an unnamed EXEC SQL,
+ * but only if it was opened by the calling thread.  Otherwise we'd let one
+ * thread silently issue libpq calls on another thread's PGconn, which is
+ * not safe.  Threads that want to share a connection must do so explicitly
+ * by name (or via SET CONNECTION) and provide their own synchronization.
+ */
+static struct connection *
+ecpg_default_connection(void)
+{
+	struct connection *ret;
+
+	ret = pthread_getspecific(actual_connection_key);
+	if (ret != NULL)
+		return ret;
+
+	if (actual_connection != NULL &&
+		ECPG_THREAD_ID_EQUAL(actual_connection->owner_thread,
+							 ECPG_GET_THREAD_ID()))
+		return actual_connection;
+
+	return NULL;
+}
+
 static struct connection *
 ecpg_get_connection_nr(const char *connection_name)
 {
@@ -41,16 +65,7 @@ ecpg_get_connection_nr(const char *connection_name)
 	{
 		ecpg_pthreads_init();	/* ensure actual_connection_key is valid */
 
-		ret = pthread_getspecific(actual_connection_key);
-
-		/*
-		 * if no connection in TSD for this thread, get the global default
-		 * connection and hope the user knows what they're doing (i.e. using
-		 * their own mutex to protect that connection from concurrent accesses
-		 */
-		if (ret == NULL)
-			/* no TSD connection, going for global */
-			ret = actual_connection;
+		ret = ecpg_default_connection();
 	}
 	else
 	{
@@ -81,16 +96,7 @@ ecpg_get_connection(const char *connection_name)
 	{
 		ecpg_pthreads_init();	/* ensure actual_connection_key is valid */
 
-		ret = pthread_getspecific(actual_connection_key);
-
-		/*
-		 * if no connection in TSD for this thread, get the global default
-		 * connection and hope the user knows what they're doing (i.e. using
-		 * their own mutex to protect that connection from concurrent accesses
-		 */
-		if (ret == NULL)
-			/* no TSD connection here either, using global */
-			ret = actual_connection;
+		ret = ecpg_default_connection();
 	}
 	else
 	{
@@ -262,6 +268,8 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p
 	struct sqlca_t *sqlca = ECPGget_sqlca();
 	enum COMPAT_MODE compat = c;
 	struct connection *this;
+	struct connection *prev_actual_connection;
+	struct connection *prev_thread_connection;
 	int			i,
 				connect_params = 0;
 	bool		alloc_failed = (sqlca == NULL);
@@ -540,12 +548,25 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p
 
 	this->cache_head = NULL;
 	this->prep_stmts = NULL;
+	this->owner_thread = ECPG_GET_THREAD_ID();
 
 	if (all_connections == NULL)
 		this->next = NULL;
 	else
 		this->next = all_connections;
 
+	/*
+	 * Snapshot this thread's per-thread default connection (kept in
+	 * thread-specific data via actual_connection_key) and the global default,
+	 * so we can restore them if PQconnectdbParams() fails below.  Without
+	 * this, ecpg_finish() would re-point this thread's slot at whatever then
+	 * heads all_connections -- typically a sibling thread's already-open
+	 * PGconn, which would later be returned by ecpg_get_connection(NULL) and
+	 * used by libpq concurrently from two threads.
+	 */
+	prev_actual_connection = actual_connection;
+	prev_thread_connection = pthread_getspecific(actual_connection_key);
+
 	all_connections = this;
 	pthread_setspecific(actual_connection_key, all_connections);
 	actual_connection = all_connections;
@@ -668,6 +689,18 @@ ECPGconnect(int lineno, int c, const char *name, const char *user, const char *p
 		ecpg_log("ECPGconnect: %s", errmsg);
 
 		ecpg_finish(this);
+
+		/*
+		 * Restore this thread's per-thread default connection and the global
+		 * default to whatever they were before we published "this".
+		 * ecpg_finish() reset them to the new head of all_connections, but
+		 * that may be a different thread's open connection -- letting our
+		 * failed attempt aim this thread's slot at a sibling's PGconn would
+		 * be unsafe.
+		 */
+		pthread_setspecific(actual_connection_key, prev_thread_connection);
+		actual_connection = prev_actual_connection;
+
 		pthread_mutex_unlock(&connections_mutex);
 
 		ecpg_raise(lineno, ECPG_CONNECT, ECPG_SQLSTATE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION, db);
diff --git a/src/interfaces/ecpg/ecpglib/ecpglib_extern.h b/src/interfaces/ecpg/ecpglib/ecpglib_extern.h
index c92f0aa1081..b6c391bf717 100644
--- a/src/interfaces/ecpg/ecpglib/ecpglib_extern.h
+++ b/src/interfaces/ecpg/ecpglib/ecpglib_extern.h
@@ -4,6 +4,7 @@
 #define _ECPG_ECPGLIB_EXTERN_H
 
 #include "ecpg_config.h"
+#include "ecpg-pthread-win32.h"
 #include "ecpgtype.h"
 #include "libpq-fe.h"
 #include "sqlca.h"
@@ -103,6 +104,7 @@ struct connection
 	char	   *name;
 	PGconn	   *connection;
 	bool		autocommit;
+	ecpg_thread_id owner_thread;	/* thread that opened this connection */
 	struct ECPGtype_information_cache *cache_head;
 	struct prepared_statement *prep_stmts;
 	struct connection *next;
diff --git a/src/interfaces/ecpg/include/ecpg-pthread-win32.h b/src/interfaces/ecpg/include/ecpg-pthread-win32.h
index 7b6ba46b349..c742bb5232b 100644
--- a/src/interfaces/ecpg/include/ecpg-pthread-win32.h
+++ b/src/interfaces/ecpg/include/ecpg-pthread-win32.h
@@ -8,8 +8,25 @@
 #ifndef WIN32
 
 #include <pthread.h>
+
+/*
+ * Abstraction for capturing and comparing a thread identity.  Used by
+ * ecpglib to record which thread owns a given connection so that an
+ * unnamed EXEC SQL statement issued from a different thread (whose own
+ * CONNECT may have failed) does not silently fall through to it via the
+ * actual_connection global default -- libpq is not thread-safe at the
+ * per-connection level.
+ */
+typedef pthread_t ecpg_thread_id;
+#define ECPG_GET_THREAD_ID()			pthread_self()
+#define ECPG_THREAD_ID_EQUAL(a, b)		pthread_equal((a), (b))
+
 #else
 
+typedef DWORD ecpg_thread_id;
+#define ECPG_GET_THREAD_ID()			GetCurrentThreadId()
+#define ECPG_THREAD_ID_EQUAL(a, b)		((a) == (b))
+
 typedef struct pthread_mutex_t
 {
 	/* initstate = 0: not initialized; 1: init done; 2: init in progress */
diff --git a/src/interfaces/ecpg/test/ecpg_schedule b/src/interfaces/ecpg/test/ecpg_schedule
index d1f5d9452b7..d2c254fe9f3 100644
--- a/src/interfaces/ecpg/test/ecpg_schedule
+++ b/src/interfaces/ecpg/test/ecpg_schedule
@@ -64,3 +64,4 @@ test: thread/thread_implicit
 test: thread/prep
 test: thread/alloc
 test: thread/descriptor
+test: thread/connfail
diff --git a/src/interfaces/ecpg/test/expected/thread-connfail.c b/src/interfaces/ecpg/test/expected/thread-connfail.c
new file mode 100644
index 00000000000..53d8865e9a8
--- /dev/null
+++ b/src/interfaces/ecpg/test/expected/thread-connfail.c
@@ -0,0 +1,192 @@
+/* Processed by ecpg (regression mode) */
+/* These include files are added by the preprocessor */
+#include <ecpglib.h>
+#include <ecpgerrno.h>
+#include <sqlca.h>
+/* End of automatic include section */
+#define ECPGdebug(X,Y) ECPGdebug((X)+100,(Y))
+
+#line 1 "connfail.pgc"
+/*
+ * Verify that an EXEC SQL statement issued without a connection name from
+ * a worker thread whose own EXEC SQL CONNECT failed does not silently fall
+ * back to another thread's open PGconn.  Before the corresponding ecpglib
+ * fix, ecpg_get_connection(NULL) would return the global default
+ * connection regardless of which thread had opened it, letting the worker
+ * issue libpq calls on the main thread's connection concurrently with the
+ * main thread itself -- libpq is not thread-safe at the per-connection
+ * level, so cross-thread sharing must be opt-in by name, not implicit.
+ *
+ * The test forces a deterministic CONNECT failure (target database does
+ * not exist), then issues an unnamed EXEC SQL.  With the fix the unnamed
+ * statement must error with sqlcode = ECPG_NO_CONN (-220); without the
+ * fix it would silently succeed against the main thread's connection
+ * (or, under load, crash from concurrent libpq access).
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "ecpg_config.h"
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <process.h>
+#else
+#include <pthread.h>
+#endif
+
+
+#line 1 "sqlca.h"
+#ifndef POSTGRES_SQLCA_H
+#define POSTGRES_SQLCA_H
+
+#ifndef PGDLLIMPORT
+#if  defined(WIN32) || defined(__CYGWIN__)
+#define PGDLLIMPORT __declspec (dllimport)
+#else
+#define PGDLLIMPORT
+#endif							/* __CYGWIN__ */
+#endif							/* PGDLLIMPORT */
+
+#define SQLERRMC_LEN	150
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+struct sqlca_t
+{
+	char		sqlcaid[8];
+	long		sqlabc;
+	long		sqlcode;
+	struct
+	{
+		int			sqlerrml;
+		char		sqlerrmc[SQLERRMC_LEN];
+	}			sqlerrm;
+	char		sqlerrp[8];
+	long		sqlerrd[6];
+	/* Element 0: empty						*/
+	/* 1: OID of processed tuple if applicable			*/
+	/* 2: number of rows processed				*/
+	/* after an INSERT, UPDATE or				*/
+	/* DELETE statement					*/
+	/* 3: empty						*/
+	/* 4: empty						*/
+	/* 5: empty						*/
+	char		sqlwarn[8];
+	/* Element 0: set to 'W' if at least one other is 'W'	*/
+	/* 1: if 'W' at least one character string		*/
+	/* value was truncated when it was			*/
+	/* stored into a host variable.             */
+
+	/*
+	 * 2: if 'W' a (hopefully) non-fatal notice occurred
+	 */	/* 3: empty */
+	/* 4: empty						*/
+	/* 5: empty						*/
+	/* 6: empty						*/
+	/* 7: empty						*/
+
+	char		sqlstate[5];
+};
+
+struct sqlca_t *ECPGget_sqlca(void);
+
+#ifndef POSTGRES_ECPG_INTERNAL
+#define sqlca (*ECPGget_sqlca())
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+#line 30 "connfail.pgc"
+
+
+#line 1 "regression.h"
+
+
+
+
+
+
+#line 31 "connfail.pgc"
+
+
+#ifdef WIN32
+static unsigned __stdcall
+fn(void *arg)
+#else
+static void *
+fn(void *arg)
+#endif
+{
+	/* exec sql begin declare section */
+		     
+	
+#line 42 "connfail.pgc"
+ char * query = "SELECT 1" ;
+/* exec sql end declare section */
+#line 43 "connfail.pgc"
+
+
+	(void) arg;
+
+	/*
+	 * Connect to a database name that the test instance does not have.
+	 * This is guaranteed to fail with ECPG_CONNECT.
+	 */
+	{ ECPGconnect(__LINE__, 0, "nonexistent_xyzzy_db" , NULL, NULL , NULL, 0); }
+#line 51 "connfail.pgc"
+
+	printf("worker connect sqlcode = %ld\n", sqlca.sqlcode);
+
+	/*
+	 * After our own connect failed, an unnamed EXEC SQL must fail with
+	 * ECPG_NO_CONN (-220), NOT silently use the main thread's connection.
+	 */
+	{ ECPGprepare(__LINE__, NULL, 0, "iworker", query);}
+#line 58 "connfail.pgc"
+
+	printf("worker prepare sqlcode = %ld\n", sqlca.sqlcode);
+
+	return 0;
+}
+
+int
+main(void)
+{
+#ifdef WIN32
+	HANDLE		t;
+	unsigned	id;
+#else
+	pthread_t	t;
+#endif
+
+	{ ECPGconnect(__LINE__, 0, "ecpg1_regression" , NULL, NULL , NULL, 0); }
+#line 74 "connfail.pgc"
+
+	{ ECPGsetcommit(__LINE__, "on", NULL);}
+#line 75 "connfail.pgc"
+
+
+#ifdef WIN32
+	t = (HANDLE) _beginthreadex(NULL, 0, fn, NULL, 0, &id);
+	WaitForSingleObject(t, INFINITE);
+	CloseHandle(t);
+#else
+	pthread_create(&t, NULL, fn, NULL);
+	pthread_join(t, NULL);
+#endif
+
+	{ ECPGdisconnect(__LINE__, "CURRENT");}
+#line 86 "connfail.pgc"
+
+
+	return 0;
+}
diff --git a/src/interfaces/ecpg/test/expected/thread-connfail.stderr b/src/interfaces/ecpg/test/expected/thread-connfail.stderr
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/interfaces/ecpg/test/expected/thread-connfail.stdout b/src/interfaces/ecpg/test/expected/thread-connfail.stdout
new file mode 100644
index 00000000000..9d91f3f7358
--- /dev/null
+++ b/src/interfaces/ecpg/test/expected/thread-connfail.stdout
@@ -0,0 +1,2 @@
+worker connect sqlcode = -402
+worker prepare sqlcode = -220
diff --git a/src/interfaces/ecpg/test/thread/Makefile b/src/interfaces/ecpg/test/thread/Makefile
index 1b4ddcff61b..04b600420d9 100644
--- a/src/interfaces/ecpg/test/thread/Makefile
+++ b/src/interfaces/ecpg/test/thread/Makefile
@@ -8,6 +8,7 @@ TESTS = thread_implicit thread_implicit.c \
         thread thread.c \
         prep prep.c \
         descriptor descriptor.c \
-        alloc alloc.c
+        alloc alloc.c \
+        connfail connfail.c
 
 all: $(TESTS)
diff --git a/src/interfaces/ecpg/test/thread/connfail.pgc b/src/interfaces/ecpg/test/thread/connfail.pgc
new file mode 100644
index 00000000000..3e2e5271d47
--- /dev/null
+++ b/src/interfaces/ecpg/test/thread/connfail.pgc
@@ -0,0 +1,89 @@
+/*
+ * Verify that an EXEC SQL statement issued without a connection name from
+ * a worker thread whose own EXEC SQL CONNECT failed does not silently fall
+ * back to another thread's open PGconn.  Before the corresponding ecpglib
+ * fix, ecpg_get_connection(NULL) would return the global default
+ * connection regardless of which thread had opened it, letting the worker
+ * issue libpq calls on the main thread's connection concurrently with the
+ * main thread itself -- libpq is not thread-safe at the per-connection
+ * level, so cross-thread sharing must be opt-in by name, not implicit.
+ *
+ * The test forces a deterministic CONNECT failure (target database does
+ * not exist), then issues an unnamed EXEC SQL.  With the fix the unnamed
+ * statement must error with sqlcode = ECPG_NO_CONN (-220); without the
+ * fix it would silently succeed against the main thread's connection
+ * (or, under load, crash from concurrent libpq access).
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "ecpg_config.h"
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <process.h>
+#else
+#include <pthread.h>
+#endif
+
+exec sql include sqlca;
+exec sql include ../regression;
+
+#ifdef WIN32
+static unsigned __stdcall
+fn(void *arg)
+#else
+static void *
+fn(void *arg)
+#endif
+{
+	exec sql begin declare section;
+	char	   *query = "SELECT 1";
+	exec sql end declare section;
+
+	(void) arg;
+
+	/*
+	 * Connect to a database name that the test instance does not have.
+	 * This is guaranteed to fail with ECPG_CONNECT.
+	 */
+	exec sql connect to nonexistent_xyzzy_db;
+	printf("worker connect sqlcode = %ld\n", sqlca.sqlcode);
+
+	/*
+	 * After our own connect failed, an unnamed EXEC SQL must fail with
+	 * ECPG_NO_CONN (-220), NOT silently use the main thread's connection.
+	 */
+	exec sql prepare iworker from :query;
+	printf("worker prepare sqlcode = %ld\n", sqlca.sqlcode);
+
+	return 0;
+}
+
+int
+main(void)
+{
+#ifdef WIN32
+	HANDLE		t;
+	unsigned	id;
+#else
+	pthread_t	t;
+#endif
+
+	exec sql connect to REGRESSDB1;
+	exec sql set autocommit to on;
+
+#ifdef WIN32
+	t = (HANDLE) _beginthreadex(NULL, 0, fn, NULL, 0, &id);
+	WaitForSingleObject(t, INFINITE);
+	CloseHandle(t);
+#else
+	pthread_create(&t, NULL, fn, NULL);
+	pthread_join(t, NULL);
+#endif
+
+	exec sql disconnect;
+
+	return 0;
+}
diff --git a/src/interfaces/ecpg/test/thread/meson.build b/src/interfaces/ecpg/test/thread/meson.build
index b23289730b7..3af4604b90c 100644
--- a/src/interfaces/ecpg/test/thread/meson.build
+++ b/src/interfaces/ecpg/test/thread/meson.build
@@ -6,6 +6,7 @@ pgc_files = [
   'prep',
   'descriptor',
   'alloc',
+  'connfail',
 ]
 
 foreach pgc_file : pgc_files
-- 
2.43.0

