Re: Postgres 7.4.7 hang in async_notify

From: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
To: pgsql-bugs(at)counterstorm(dot)com
Cc: pgsql-bugs(at)postgresql(dot)org
Subject: Re: Postgres 7.4.7 hang in async_notify
Date: 2005-06-02 21:06:02
Message-ID: 28071.1117746362@sss.pgh.pa.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-bugs

pgsql-bugs(at)counterstorm(dot)com writes:
> We saw the problem with async_notify again (See thread with subject
> "Postgres 7.4.6 hang in async_notify" in first message to this list
> dated "Mon, 25 Apr 2005 15:42:35 -0400") in a production setting.

I've applied a patch that (I think) will fix this. Attached is the
7.4-branch version of it.

regards, tom lane

Index: src/backend/libpq/be-secure.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/libpq/be-secure.c,v
retrieving revision 1.43.2.1
diff -c -r1.43.2.1 be-secure.c
*** src/backend/libpq/be-secure.c 18 Dec 2003 22:49:34 -0000 1.43.2.1
--- src/backend/libpq/be-secure.c 2 Jun 2005 20:43:58 -0000
***************
*** 79,85 ****
#include <sys/stat.h>
#include <signal.h>
#include <fcntl.h>
- #include <errno.h>
#include <ctype.h>
#include <sys/socket.h>
#include <unistd.h>
--- 79,84 ----
***************
*** 97,102 ****
--- 96,103 ----

#include "libpq/libpq.h"
#include "miscadmin.h"
+ #include "tcop/tcopprot.h"
+

#ifdef USE_SSL
static DH *load_dh_file(int keylength);
***************
*** 301,308 ****
--- 302,315 ----
}
else
#endif
+ {
+ prepare_for_client_read();
+
n = recv(port->sock, ptr, len, 0);

+ client_read_ended();
+ }
+
return n;
}

***************
*** 395,400 ****
--- 402,474 ----
/* SSL specific code */
/* ------------------------------------------------------------ */
#ifdef USE_SSL
+
+ /*
+ * Private substitute BIO: this wraps the SSL library's standard socket BIO
+ * so that we can enable and disable interrupts just while calling recv().
+ * We cannot have interrupts occurring while the bulk of openssl runs,
+ * because it uses malloc() and possibly other non-reentrant libc facilities.
+ *
+ * As of openssl 0.9.7, we can use the reasonably clean method of interposing
+ * a wrapper around the standard socket BIO's sock_read() method. This relies
+ * on the fact that sock_read() doesn't call anything non-reentrant, in fact
+ * not much of anything at all except recv(). If this ever changes we'd
+ * probably need to duplicate the code of sock_read() in order to push the
+ * interrupt enable/disable down yet another level.
+ */
+
+ static bool my_bio_initialized = false;
+ static BIO_METHOD my_bio_methods;
+ static int (*std_sock_read) (BIO *h, char *buf, int size);
+
+ static int
+ my_sock_read(BIO *h, char *buf, int size)
+ {
+ int res;
+
+ prepare_for_client_read();
+
+ res = std_sock_read(h, buf, size);
+
+ client_read_ended();
+
+ return res;
+ }
+
+ static BIO_METHOD *
+ my_BIO_s_socket(void)
+ {
+ if (!my_bio_initialized)
+ {
+ memcpy(&my_bio_methods, BIO_s_socket(), sizeof(BIO_METHOD));
+ std_sock_read = my_bio_methods.bread;
+ my_bio_methods.bread = my_sock_read;
+ my_bio_initialized = true;
+ }
+ return &my_bio_methods;
+ }
+
+ /* This should exactly match openssl's SSL_set_fd except for using my BIO */
+ static int
+ my_SSL_set_fd(SSL *s, int fd)
+ {
+ int ret=0;
+ BIO *bio=NULL;
+
+ bio=BIO_new(my_BIO_s_socket());
+
+ if (bio == NULL)
+ {
+ SSLerr(SSL_F_SSL_SET_FD,ERR_R_BUF_LIB);
+ goto err;
+ }
+ BIO_set_fd(bio,fd,BIO_NOCLOSE);
+ SSL_set_bio(s,bio,bio);
+ ret=1;
+ err:
+ return(ret);
+ }
+
/*
* Load precomputed DH parameters.
*
***************
*** 718,724 ****
Assert(!port->peer);

if (!(port->ssl = SSL_new(SSL_context)) ||
! !SSL_set_fd(port->ssl, port->sock) ||
SSL_accept(port->ssl) <= 0)
{
ereport(COMMERROR,
--- 792,798 ----
Assert(!port->peer);

if (!(port->ssl = SSL_new(SSL_context)) ||
! !my_SSL_set_fd(port->ssl, port->sock) ||
SSL_accept(port->ssl) <= 0)
{
ereport(COMMERROR,
Index: src/backend/tcop/postgres.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/tcop/postgres.c,v
retrieving revision 1.375.2.2
diff -c -r1.375.2.2 postgres.c
*** src/backend/tcop/postgres.c 26 Sep 2004 00:26:53 -0000 1.375.2.2
--- src/backend/tcop/postgres.c 2 Jun 2005 20:43:58 -0000
***************
*** 109,114 ****
--- 109,121 ----
static bool xact_started = false;

/*
+ * Flag to indicate that we are doing the outer loop's read-from-client,
+ * as opposed to any random read from client that might happen within
+ * commands like COPY FROM STDIN.
+ */
+ static bool DoingCommandRead = false;
+
+ /*
* Flags to implement skip-till-Sync-after-error behavior for messages of
* the extended query protocol.
*/
***************
*** 400,405 ****
--- 407,456 ----
return result;
}

+ /*
+ * prepare_for_client_read -- set up to possibly block on client input
+ *
+ * This must be called immediately before any low-level read from the
+ * client connection. It is necessary to do it at a sufficiently low level
+ * that there won't be any other operations except the read kernel call
+ * itself between this call and the subsequent client_read_ended() call.
+ * In particular there mustn't be use of malloc() or other potentially
+ * non-reentrant libc functions. This restriction makes it safe for us
+ * to allow interrupt service routines to execute nontrivial code while
+ * we are waiting for input.
+ */
+ void
+ prepare_for_client_read(void)
+ {
+ if (DoingCommandRead)
+ {
+ /* Enable immediate processing of asynchronous signals */
+ EnableNotifyInterrupt();
+
+ /* Allow "die" interrupt to be processed while waiting */
+ ImmediateInterruptOK = true;
+
+ /* And don't forget to detect one that already arrived */
+ QueryCancelPending = false;
+ CHECK_FOR_INTERRUPTS();
+ }
+ }
+
+ /*
+ * client_read_ended -- get out of the client-input state
+ */
+ void
+ client_read_ended(void)
+ {
+ if (DoingCommandRead)
+ {
+ ImmediateInterruptOK = false;
+ QueryCancelPending = false; /* forget any CANCEL signal */
+
+ DisableNotifyInterrupt();
+ }
+ }
+

/*
* Parse a query string and pass it through the rewriter.
***************
*** 2706,2711 ****
--- 2757,2763 ----
CritSectionCount = 0; /* should be unnecessary, but... */
disable_sig_alarm(true);
QueryCancelPending = false; /* again in case timeout occurred */
+ DoingCommandRead = false;
DisableNotifyInterrupt();
debug_query_string = NULL;

***************
*** 2814,2833 ****
}

/*
! * (2) deal with pending asynchronous NOTIFY from other backends,
! * and enable async.c's signal handler to execute NOTIFY directly.
! * Then set up other stuff needed before blocking for input.
*/
! QueryCancelPending = false; /* forget any earlier CANCEL
! * signal */
!
! EnableNotifyInterrupt();
!
! /* Allow "die" interrupt to be processed while waiting */
! ImmediateInterruptOK = true;
! /* and don't forget to detect one that already arrived */
! QueryCancelPending = false;
! CHECK_FOR_INTERRUPTS();

/*
* (3) read a command (loop blocks here)
--- 2866,2878 ----
}

/*
! * (2) Allow asynchronous signals to be executed immediately
! * if they come in while we are waiting for client input.
! * (This must be conditional since we don't want, say, reads on
! * behalf of COPY FROM STDIN doing the same thing.)
*/
! QueryCancelPending = false; /* forget any earlier CANCEL signal */
! DoingCommandRead = true;

/*
* (3) read a command (loop blocks here)
***************
*** 2837,2846 ****
/*
* (4) disable async signal conditions again.
*/
! ImmediateInterruptOK = false;
! QueryCancelPending = false; /* forget any CANCEL signal */
!
! DisableNotifyInterrupt();

/*
* (5) check for any other interesting events that happened while
--- 2882,2888 ----
/*
* (4) disable async signal conditions again.
*/
! DoingCommandRead = false;

/*
* (5) check for any other interesting events that happened while
Index: src/include/tcop/tcopprot.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/tcop/tcopprot.h,v
retrieving revision 1.60
diff -c -r1.60 tcopprot.h
*** src/include/tcop/tcopprot.h 4 Aug 2003 02:40:15 -0000 1.60
--- src/include/tcop/tcopprot.h 2 Jun 2005 20:43:58 -0000
***************
*** 49,54 ****
--- 49,56 ----
extern void die(SIGNAL_ARGS);
extern void quickdie(SIGNAL_ARGS);
extern void authdie(SIGNAL_ARGS);
+ extern void prepare_for_client_read(void);
+ extern void client_read_ended(void);
extern int PostgresMain(int argc, char *argv[], const char *username);
extern void ResetUsage(void);
extern void ShowUsage(const char *title);

In response to

Responses

Browse pgsql-bugs by date

  From Date Subject
Next Message pgsql-bugs 2005-06-02 21:23:16 Re: Postgres 7.4.7 hang in async_notify
Previous Message Mauro Delfino 2005-06-02 19:22:07 BUG #1698: Different behavior in UNIQUE and DISTINCT