From cfe184cbb45134360713405bb4d39b674a298672 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 25 Nov 2020 07:46:25 +0100 Subject: [PATCH v3] Add result_format_auto_binary_types setting The current way binary results are requested in the extended query protocol is too cumbersome for some practical uses. Some clients, for example the JDBC driver, have built-in support for handling specific data types in binary. Such a client would always have to request a result row description (Describe statement) before sending a Bind message, in order to be able to pick out the result columns it should request in binary. The feedback was that this extra round trip is often not worth it in terms of performance, and so it is not done and binary format is not used when it could be. The solution is to allow a client to register for a session which types it wants to always get in binary. This is done by a new GUC setting. For example, to get int2, int4, int8 in binary by default, you could set SET result_format_auto_binary_types = int2,int4,int8; To request result formats based on this setting, send format code -1 (instead of 0 or 1) in the Bind message. Discussion: https://www.postgresql.org/message-id/flat/40cbb35d-774f-23ed-3079-03f938aacdae@2ndquadrant.com --- doc/src/sgml/config.sgml | 53 ++++++++ doc/src/sgml/libpq.sgml | 10 +- doc/src/sgml/protocol.sgml | 7 +- src/backend/tcop/pquery.c | 246 ++++++++++++++++++++++++++++++++++- src/backend/utils/misc/guc.c | 12 ++ src/include/tcop/pquery.h | 5 + 6 files changed, 323 insertions(+), 10 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 3795c57004..20049fe7df 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8530,6 +8530,59 @@ Statement Behavior + + result_format_auto_binary_types (string) + + result_format_auto_binary_types + configuration parameter + + + + + This parameter specifies which data types are to be sent from the + server in binary format for rows returned in the extended query + protocol when result format code -1 is specified in the Bind message. It is intended + to be used by client libraries that prefer to handle specific, but not + all, data types in binary format. The typical usage would be that the + client library sets this value when it starts a connection. (A client + library that wants to handle all types in binary + doesn't need to use this because it can just specify the format code + for all types at once in the protocol message.) + + + + The value is a comma-separated list of type names. For example, if + you want to automatically get values of the types int2, + int4, and int8 in binary while leaving the + rest in text, an appropriate setting would be + int2,int4,int8 or + smallint,int,bigint. Type names may also be + double-quoted, as in SQL syntax. The parsing of this value is run + with a temporary search path that only contains the + pg_catalog schema. Therefore, if data types in + other schemas are to be included, they must be fully schema-qualified. + + + + A non-existing type is ignored (but a message is written to the server + log). This allows using the same value for different databases that + might have different extensions installed. Because validating this + setting requires system catalog access, invalid settings are not + diagnosed at the time the value is set but only when the value is used + the first time, which happens when the first extended query protocol + result is returned to the client. + + + + This setting applies only to result rows from the extended query + protocol, so it does not affect usage of + psql or pg_dump + for example. Also, it does not affect the format of query parameters. + + + + diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 9d4b6ab4a8..2a4ec7ff63 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -2707,10 +2707,12 @@ Main Functions resultFormat - Specify zero to obtain results in text format, or one to obtain - results in binary format. (There is not currently a provision - to obtain different result columns in different formats, - although that is possible in the underlying protocol.) + Specify 0 to obtain results in text format, or 1 to obtain results + in binary format, or -1 to have the setting of be applied by the + server. (There is not currently a provision to obtain different + result columns in different formats, although that is possible in + the underlying protocol.) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index cee28889e1..d0e447803a 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -3567,7 +3567,7 @@ Message Formats - + Bind (F) @@ -3709,8 +3709,9 @@ Message Formats - The result-column format codes. Each must presently be - zero (text) or one (binary). + The result-column format codes. Each must be 0 for text, or 1 + for binary, or -1 to apply the setting of . diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 96ea74f118..a06d5f3635 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -18,14 +18,19 @@ #include #include "access/xact.h" +#include "catalog/namespace.h" #include "commands/prepare.h" #include "executor/tstoreReceiver.h" #include "miscadmin.h" +#include "parser/parse_type.h" +#include "parser/scansup.h" #include "pg_trace.h" #include "tcop/pquery.h" #include "tcop/utility.h" +#include "utils/inval.h" #include "utils/memutils.h" #include "utils/snapmgr.h" +#include "utils/syscache.h" /* @@ -597,6 +602,235 @@ PortalStart(Portal portal, ParamListInfo params, portal->status = PORTAL_READY; } +/* + * Support for result_format_auto_binary_types setting + * + * Interpreting this setting requires catalog access, so we cannot do most of + * the work in the usual check or assign hooks. Instead, we do it the first + * time it is used. + */ + +/* GUC variable */ +char *result_format_auto_binary_types; + +/* remembers whether the internal representation is up to date */ +static bool result_format_auto_binary_types_internal_valid = false; + +/* + * SplitGUCTypeList() --- parse a list of types + * + * Similar to SplitIdentifierString() etc., but does not strip quotes or alter + * the list elements in any way, since this is done by parseTypeString() + * later. It only checks for comma as a separator and keeps track of balanced + * double quotes. + */ +static bool +SplitGUCTypeList(char *rawstring, List **namelist) +{ + const char separator = ','; + char *nextp = rawstring; + bool done = false; + + *namelist = NIL; + + while (scanner_isspace(*nextp)) + nextp++; /* skip leading whitespace */ + + if (*nextp == '\0') + return true; /* allow empty string */ + + /* At the top of the loop, we are at start of a new list element. */ + do + { + char *curname; + char *endp; + bool in_quote = false; + + curname = nextp; + + while (*nextp) + { + if (!*nextp) + break; + + if (*nextp == '"') + in_quote = !in_quote; + + if (*nextp == separator && !in_quote) + break; + + nextp++; + } + + endp = nextp; + if (curname == nextp) + return false; /* empty element not allowed */ + + if (*nextp == separator) + nextp++; + else if (*nextp == '\0') + { + if (in_quote) + return false; + done = true; + } + else + return false; /* invalid syntax */ + + /* Now safe to overwrite separator with a null */ + *endp = '\0'; + + /* + * Finished isolating current name --- add it to list + */ + *namelist = lappend(*namelist, curname); + + /* Loop back if we didn't reach end of string */ + } while (!done); + + return true; +} + +bool +check_result_format_auto_binary_types(char **newval, void **extra, GucSource source) +{ + char *rawstring; + List *elemlist; + + rawstring = pstrdup(*newval); + if (!SplitGUCTypeList(rawstring, &elemlist)) + { + GUC_check_errdetail("List syntax is invalid."); + pfree(rawstring); + list_free(elemlist); + return false; + } + + pfree(rawstring); + list_free(elemlist); + + return true; +} + +/* + * GUC assign hook invalidates internal representation when the setting changes + */ +void +assign_result_format_auto_binary_types(const char *newval, void *extra) +{ + result_format_auto_binary_types_internal_valid = false; +} + +/* + * Invalidate when anything relevant in the system catalogs changes + */ +static void +invalidate_result_format_auto_binary_types_internal(Datum arg, int cacheid, uint32 hashvalue) +{ + result_format_auto_binary_types_internal_valid = false; +} + +/* + * Error context callback, so the error messages are clearer, since they + * happen as part of query processing, not GUC processing. + */ +static void +_parse_rfabt_error_callback(void *arg) +{ + const char *value = (const char *) arg; + + errcontext("parsing %s element \"%s\"", + "result_format_auto_binary_types", + value); +} + +/* + * Subroutine for PortalSetResultFormat(): Return format code for type by + * using result_format_auto_binary_types setting. + */ +static int16 +resolve_result_format(Oid typeid) +{ + static List *result_format_auto_binary_types_internal = NIL; /* type OID list */ + static bool syscache_callback_set = false; + + if (!syscache_callback_set) + { + CacheRegisterSyscacheCallback(TYPEOID, invalidate_result_format_auto_binary_types_internal, (Datum) 0); + syscache_callback_set = true; + } + + if (!result_format_auto_binary_types_internal_valid) + { + MemoryContext oldcontext; + char *rawstring; + List *elemlist; + ListCell *lc; + + rawstring = pstrdup(result_format_auto_binary_types); + if (!SplitGUCTypeList(rawstring, &elemlist)) + { + pfree(rawstring); + list_free(elemlist); + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid list syntax in parameter \"%s\"", + "result_format_auto_binary_types"))); + } + + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + + list_free(result_format_auto_binary_types_internal); + result_format_auto_binary_types_internal = NIL; + + foreach(lc, elemlist) + { + char *str = lfirst(lc); + ErrorContextCallback myerrcontext; + OverrideSearchPath temppath = { .addCatalog = true }; + Oid typeid; + + myerrcontext.callback = _parse_rfabt_error_callback; + myerrcontext.arg = str; + myerrcontext.previous = error_context_stack; + error_context_stack = &myerrcontext; + + /* use pg_catalog-only search_path */ + PushOverrideSearchPath(&temppath); + + parseTypeString(str, &typeid, NULL, true); + + PopOverrideSearchPath(); + + if (typeid) + result_format_auto_binary_types_internal = + list_append_unique_oid(result_format_auto_binary_types_internal, + typeid); + else + /* + * We ignore nonexisting types, so that one setting can be + * shared between different databases that might have + * different extensions installed. But we should diagnose + * that we are ignoring a type, otherwise typos and similar + * might never get noticed. + */ + ereport(LOG, + errmsg("type %s does not exist", str)); + + error_context_stack = myerrcontext.previous; + } + + MemoryContextSwitchTo(oldcontext); + + pfree(rawstring); + list_free(elemlist); + + result_format_auto_binary_types_internal_valid = true; + } + + return list_member_oid(result_format_auto_binary_types_internal, typeid) ? 1 : 0; +} + /* * PortalSetResultFormat * Select the format codes for a portal's output. @@ -628,7 +862,11 @@ PortalSetResultFormat(Portal portal, int nFormats, int16 *formats) (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("bind message has %d result formats but query has %d columns", nFormats, natts))); - memcpy(portal->formats, formats, natts * sizeof(int16)); + + for (i = 0; i < natts; i++) + portal->formats[i] = (formats[i] == -1 + ? resolve_result_format(TupleDescAttr(portal->tupDesc, i)->atttypid) + : formats[i]); } else if (nFormats > 0) { @@ -636,11 +874,13 @@ PortalSetResultFormat(Portal portal, int nFormats, int16 *formats) int16 format1 = formats[0]; for (i = 0; i < natts; i++) - portal->formats[i] = format1; + portal->formats[i] = (format1 == -1 + ? resolve_result_format(TupleDescAttr(portal->tupDesc, i)->atttypid) + : format1); } else { - /* use default format for all columns */ + /* by default use text format for all columns */ for (i = 0; i < natts; i++) portal->formats[i] = 0; } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index bb34630e8e..79491857fa 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -83,6 +83,7 @@ #include "storage/predicate.h" #include "storage/proc.h" #include "storage/standby.h" +#include "tcop/pquery.h" #include "tcop/tcopprot.h" #include "tsearch/ts_cache.h" #include "utils/acl.h" @@ -4448,6 +4449,17 @@ static struct config_string ConfigureNamesString[] = check_backtrace_functions, assign_backtrace_functions, NULL }, + { + {"result_format_auto_binary_types", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Which data types to send in binary for format -1 in the extended query protocol."), + NULL, + GUC_LIST_INPUT | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE + }, + &result_format_auto_binary_types, + "", + check_result_format_auto_binary_types, assign_result_format_auto_binary_types, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, NULL, NULL, NULL, NULL diff --git a/src/include/tcop/pquery.h b/src/include/tcop/pquery.h index 437642cc72..bceca24d2d 100644 --- a/src/include/tcop/pquery.h +++ b/src/include/tcop/pquery.h @@ -15,10 +15,12 @@ #define PQUERY_H #include "nodes/parsenodes.h" +#include "utils/guc.h" #include "utils/portal.h" extern PGDLLIMPORT Portal ActivePortal; +extern char *result_format_auto_binary_types; extern PortalStrategy ChoosePortalStrategy(List *stmts); @@ -30,6 +32,9 @@ extern List *FetchStatementTargetList(Node *stmt); extern void PortalStart(Portal portal, ParamListInfo params, int eflags, Snapshot snapshot); +extern bool check_result_format_auto_binary_types(char **newval, void **extra, GucSource source); +extern void assign_result_format_auto_binary_types(const char *newval, void *extra); + extern void PortalSetResultFormat(Portal portal, int nFormats, int16 *formats); base-commit: a7e65dc88b6f088fc2fcf5a660d866de644b1300 -- 2.29.2