From 2dee9b996ed8f242ceb5d0f1924a9c0455620e53 Mon Sep 17 00:00:00 2001
From: Hari Babu <kommi.haribabu@gmail.com>
Date: Wed, 27 Mar 2019 17:56:48 +1100
Subject: [PATCH v13 4/8] New prefer-read target_session_attrs type

With this prefer-read option type, application can prefer
connecting to a read-only server if available from the list
of hosts, otherwise connect it to read-write server
---
 doc/src/sgml/libpq.sgml               |  21 ++--
 src/interfaces/libpq/fe-connect.c     | 161 ++++++++++++++++++++++----
 src/interfaces/libpq/libpq-fe.h       |   3 +-
 src/interfaces/libpq/libpq-int.h      |  13 ++-
 src/test/recovery/t/001_stream_rep.pl |  14 ++-
 5 files changed, 177 insertions(+), 35 deletions(-)

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index edda94196b..29f9ae5c78 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1648,12 +1648,12 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
       <term><literal>target_session_attrs</literal></term>
       <listitem>
        <para>
-        The supported options for this parameter are, <literal>any</literal> and
-        <literal>read-write</literal>. The default value of this parameter,
-        <literal>any</literal>, regards all connections as acceptable.
-        If multiple hosts were specified in the connection string, based on the
-        specified value, any remaining servers will be tried before confirming
-        succesful connection or failure.
+        The supported options for this parameter are, <literal>any</literal>,
+        <literal>read-write</literal> and <literal>prefer-read</literal>.
+        The default value of this parameter, <literal>any</literal>, regards
+        all connections as acceptable. If multiple hosts were specified in the
+        connection string, based on the specified value, any remaining servers
+        will be tried before confirming succesful connection or failure.
        </para>
 
        <para>
@@ -1662,6 +1662,13 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
         is considered acceptable.
        </para>
 
+       <para>
+        If this parameter is set to <literal>prefer-read</literal>, only a
+        connection in which read-only transactions are accepted by default
+        is preferred. If no such connections can be found, then a connection
+        in which read-write transactions accepted will be considered.
+       </para>
+
        <para>
         To find out whether the server supports read-write transactions are not,
         query <literal>SHOW transaction_read_only</literal> will be sent upon any
@@ -1671,7 +1678,7 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
         configuration parameter that is reported by the server upon successful connection.
        </para>
       </listitem>
-    </varlistentry>
+     </varlistentry>
     </variablelist>
    </para>
   </sect2>
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index abac93d6da..8673b8e903 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -341,7 +341,7 @@ static const internalPQconninfoOption PQconninfoOptions[] = {
 
 	{"target_session_attrs", "PGTARGETSESSIONATTRS",
 		DefaultTargetSessionAttrs, NULL,
-		"Target-Session-Attrs", "", 11, /* sizeof("read-write") = 11 */
+		"Target-Session-Attrs", "", 12, /* sizeof("prefer-read") = 12 */
 	offsetof(struct pg_conn, target_session_attrs)},
 
 	/* Terminating entry --- MUST BE LAST */
@@ -1299,6 +1299,8 @@ connectOptions2(PGconn *conn)
 			conn->requested_session_type = SESSION_TYPE_ANY;
 		else if (strcmp(conn->target_session_attrs, "read-write") == 0)
 			conn->requested_session_type = SESSION_TYPE_READ_WRITE;
+		else if (strcmp(conn->target_session_attrs, "prefer-read") == 0)
+			conn->requested_session_type = SESSION_TYPE_PREFER_READ;
 		else
 		{
 			conn->status = CONNECTION_BAD;
@@ -2232,13 +2234,31 @@ keep_going:						/* We will come back to here until there is
 
 		if (conn->whichhost + 1 >= conn->nconnhost)
 		{
-			/*
-			 * Oops, no more hosts.  An appropriate error message is already
-			 * set up, so just set the right status.
-			 */
-			goto error_return;
+			if (conn->read_write_host_index >= 0)
+			{
+				/*
+				 * Getting here means, failed to connect to read-only servers
+				 * and now try connect to read-write server again.
+				 */
+				conn->whichhost = conn->read_write_host_index;
+
+				/*
+				 * Reset the host index value to avoid recursion during the
+				 * second connection attempt.
+				 */
+				conn->read_write_host_index = -2;
+			}
+			else
+			{
+				/*
+				 * Oops, no more hosts.  An appropriate error message is
+				 * already set up, so just set the right status.
+				 */
+				goto error_return;
+			}
 		}
-		conn->whichhost++;
+		else
+			conn->whichhost++;
 
 		/* Drop any address info for previous host */
 		release_conn_addrinfo(conn);
@@ -3445,7 +3465,8 @@ keep_going:						/* We will come back to here until there is
 		case CONNECTION_CHECK_TARGET:
 			{
 				/*
-				 * If a read-write connection is required, see if we have one.
+				 * If a read-write or prefer-read connection is required, see
+				 * if we have one.
 				 *
 				 * Servers before 7.4 lack the transaction_read_only GUC, but
 				 * by the same token they don't have any read-only mode, so we
@@ -3460,8 +3481,8 @@ keep_going:						/* We will come back to here until there is
 						 * Save existing error messages across the PQsendQuery
 						 * attempt.  This is necessary because PQsendQuery is
 						 * going to reset conn->errorMessage, so we would lose
-						 * error messages related to previous hosts we have tried
-						 * and failed to connect to.
+						 * error messages related to previous hosts we have
+						 * tried and failed to connect to.
 						 */
 						if (!saveErrorMessage(conn, &savedMessage))
 							goto error_return;
@@ -3473,16 +3494,30 @@ keep_going:						/* We will come back to here until there is
 							restoreErrorMessage(conn, &savedMessage);
 							goto error_return;
 						}
+
 						conn->status = CONNECTION_CHECK_WRITABLE;
+
 						restoreErrorMessage(conn, &savedMessage);
 						return PGRES_POLLING_READING;
 					}
-					else if (conn->transaction_read_only)
+					else if ((conn->transaction_read_only &&
+							  conn->requested_session_type == SESSION_TYPE_READ_WRITE) ||
+							 (!conn->transaction_read_only &&
+							  conn->requested_session_type == SESSION_TYPE_PREFER_READ))
 					{
-						/* Not writable; fail this connection. */
+						/* Not a requested type; fail this connection. */
 						const char *displayed_host;
 						const char *displayed_port;
 
+						/*
+						 * The following scenario is possible only for the
+						 * prefer-read mode for the next pass of the list of
+						 * connections as it couldn't find any servers that
+						 * are default read-only.
+						 */
+						if (conn->read_write_host_index == -2)
+							goto consume_checked_target_connection;
+
 						/* Append error report to conn->errorMessage. */
 						if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
 							displayed_host = conn->connhost[conn->whichhost].hostaddr;
@@ -3492,16 +3527,28 @@ keep_going:						/* We will come back to here until there is
 						if (displayed_port == NULL || displayed_port[0] == '\0')
 							displayed_port = DEF_PGPORT_STR;
 
-						appendPQExpBuffer(&conn->errorMessage,
-										  libpq_gettext("could not make a writable "
-														"connection to server "
-														"\"%s:%s\"\n"),
-										  displayed_host, displayed_port);
+						if (conn->requested_session_type == SESSION_TYPE_READ_WRITE)
+							appendPQExpBuffer(&conn->errorMessage,
+											  libpq_gettext("could not make a writable "
+															"connection to server "
+															"\"%s:%s\"\n"),
+											  displayed_host, displayed_port);
+						else
+							appendPQExpBuffer(&conn->errorMessage,
+											  libpq_gettext("could not make a readonly "
+															"connection to server "
+															"\"%s:%s\"\n"),
+											  displayed_host, displayed_port);
 
 						/* Close connection politely. */
 						conn->status = CONNECTION_OK;
 						sendTerminateConn(conn);
 
+						/* Record read-write host index */
+						if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
+							conn->read_write_host_index == -1)
+							conn->read_write_host_index = conn->whichhost;
+
 						/*
 						 * Try next host if any, but we don't want to consider
 						 * additional addresses for this host.
@@ -3509,8 +3556,36 @@ keep_going:						/* We will come back to here until there is
 						conn->try_next_host = true;
 						goto keep_going;
 					}
+
+					/* obtained the requested type, consume it */
+					goto consume_checked_target_connection;
 				}
 
+				/*
+				 * Requested type is prefer-read, then record this host index
+				 * and try the other before considering it later
+				 */
+				if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
+					conn->read_write_host_index != -2)
+				{
+					/* Close connection politely. */
+					conn->status = CONNECTION_OK;
+					sendTerminateConn(conn);
+
+					/* Record read-write host index */
+					if (conn->read_write_host_index == -1)
+						conn->read_write_host_index = conn->whichhost;
+
+					/*
+					 * Try next host if any, but we don't want to consider
+					 * additional addresses for this host.
+					 */
+					conn->try_next_host = true;
+					goto keep_going;
+				}
+
+		consume_checked_target_connection:
+
 				/* We can release the address list now. */
 				release_conn_addrinfo(conn);
 
@@ -3608,11 +3683,33 @@ keep_going:						/* We will come back to here until there is
 					PQntuples(res) == 1)
 				{
 					char	   *val;
+					bool		readonly_server;
 
 					val = PQgetvalue(res, 0, 0);
-					if (strncmp(val, "on", 2) == 0)
+					readonly_server = (strncmp(val, "on", 2) == 0);
+
+					/*
+					 * Server is read-only and requested mode is read-write,
+					 * ignore it. Server is read-write and requested mode is
+					 * prefer-read, record it for the first time and try to
+					 * consume in the next scan (it means no read-only server
+					 * is found in the first scan).
+					 */
+					if ((readonly_server &&
+						 conn->requested_session_type == SESSION_TYPE_READ_WRITE) ||
+						(!readonly_server &&
+						 conn->requested_session_type == SESSION_TYPE_PREFER_READ))
 					{
-						/* Not writable; fail this connection. */
+						/*
+						 * The following scenario is possible only for the
+						 * prefer-read mode for the next pass of the list of
+						 * connections as it couldn't find any servers that
+						 * are default read-only.
+						 */
+						if (conn->read_write_host_index == -2)
+							goto consume_checked_write_connection;
+
+						/* Not a requested type; fail this connection. */
 						PQclear(res);
 						restoreErrorMessage(conn, &savedMessage);
 
@@ -3625,16 +3722,28 @@ keep_going:						/* We will come back to here until there is
 						if (displayed_port == NULL || displayed_port[0] == '\0')
 							displayed_port = DEF_PGPORT_STR;
 
-						appendPQExpBuffer(&conn->errorMessage,
-										  libpq_gettext("could not make a writable "
-														"connection to server "
-														"\"%s:%s\"\n"),
-										  displayed_host, displayed_port);
+						if (conn->requested_session_type == SESSION_TYPE_READ_WRITE)
+							appendPQExpBuffer(&conn->errorMessage,
+											  libpq_gettext("could not make a writable "
+															"connection to server "
+															"\"%s:%s\"\n"),
+											  displayed_host, displayed_port);
+						else
+							appendPQExpBuffer(&conn->errorMessage,
+											  libpq_gettext("could not make a readonly "
+															"connection to server "
+															"\"%s:%s\"\n"),
+											  displayed_host, displayed_port);
 
 						/* Close connection politely. */
 						conn->status = CONNECTION_OK;
 						sendTerminateConn(conn);
 
+						/* Record read-write host index */
+						if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
+							conn->read_write_host_index == -1)
+							conn->read_write_host_index = conn->whichhost;
+
 						/*
 						 * Try next host if any, but we don't want to consider
 						 * additional addresses for this host.
@@ -3643,7 +3752,8 @@ keep_going:						/* We will come back to here until there is
 						goto keep_going;
 					}
 
-					/* Session is read-write, so we're good. */
+			consume_checked_write_connection:
+					/* Session is requested type, so we're good. */
 					PQclear(res);
 					termPQExpBuffer(&savedMessage);
 
@@ -3830,6 +3940,7 @@ makeEmptyPGconn(void)
 #endif
 
 	conn->requested_session_type = SESSION_TYPE_ANY;
+	conn->read_write_host_index = -1;
 
 	/*
 	 * We try to send at least 8K at a time, which is the usual size of pipe
diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h
index 0612e68c62..563f8b98ce 100644
--- a/src/interfaces/libpq/libpq-fe.h
+++ b/src/interfaces/libpq/libpq-fe.h
@@ -74,7 +74,8 @@ typedef enum
 typedef enum
 {
 	SESSION_TYPE_ANY = 0,		/* Any session (default) */
-	SESSION_TYPE_READ_WRITE		/* Read-write session */
+	SESSION_TYPE_READ_WRITE,	/* Read-write session */
+	SESSION_TYPE_PREFER_READ	/* Prefer read only session */
 }			TargetSessionAttrsType;
 
 typedef enum
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 7d26b94f9f..174f370818 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -365,7 +365,10 @@ struct pg_conn
 	char	   *krbsrvname;		/* Kerberos service name */
 #endif
 
-	/* Type of connection to make.  Possible values: any, read-write. */
+	/*
+	 * Type of connection to make.  Possible values: any, read-write,
+	 * prefer-read.
+	 */
 	char	   *target_session_attrs;
 	TargetSessionAttrsType requested_session_type;
 
@@ -402,6 +405,14 @@ struct pg_conn
 	pg_conn_host *connhost;		/* details about each named host */
 	char	   *connip;			/* IP address for current network connection */
 
+	/*
+	 * First read-write host index in the connection string.
+	 *
+	 * Initial value is -1, then the index of the first read-write host, -2
+	 * during the second attempt of connection to avoid recursion.
+	 */
+	int			read_write_host_index;
+
 	/* Connection data */
 	pgsocket	sock;			/* FD for socket, PGINVALID_SOCKET if
 								 * unconnected */
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 3c743d7d7c..af465be505 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 32;
+use Test::More tests => 35;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -121,6 +121,18 @@ test_target_session_attrs($node_master, $node_standby_1, $node_master, "any",
 test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
 	"any", 0);
 
+# Connect to standby1 in "prefer-read" mode with master,standby1 list.
+test_target_session_attrs($node_master, $node_standby_1, $node_standby_1, "prefer-read",
+	0);
+
+# Connect to standby1 in "prefer-read" mode with standby1,master list.
+test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
+	"prefer-read", 0);
+
+# Connect to node_master in "prefer-read" mode with only master list.
+test_target_session_attrs($node_master, $node_master, $node_master,
+	"prefer-read", 0);
+
 # Test for SHOW commands using a WAL sender connection with a replication
 # role.
 note "testing SHOW commands for replication connection";
-- 
2.17.1

