diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 139aa2b..b4a2898 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1778,6 +1778,27 @@ + parse_ident + + parse_ident(str text, + [ strictmode boolean DEFAULT true ] ) + + text[] + Split qualified identifier into array parts. + When strictmode is false, extra characters after the identifier are ignored. + This is useful for parsing identifiers for objects like functions and arrays that may have trailing + characters. By default, extra characters after the last identifier are considered an error. + second parameter is false, then chars after last identifier are ignored. Note that this function + does not truncate quoted identifiers. If you care about that you should cast the result of this + function to name[]. + + parse_ident('"SomeSchema".someTable') + "SomeSchema,sometable" + + + + + pg_client_encoding pg_client_encoding() diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 923fe58..61d5b80 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -965,3 +965,10 @@ RETURNS jsonb LANGUAGE INTERNAL STRICT IMMUTABLE AS 'jsonb_set'; + +CREATE OR REPLACE FUNCTION + parse_ident(str text, strictmode boolean DEFAULT true) +RETURNS text[] +LANGUAGE INTERNAL +STRICT IMMUTABLE +AS 'parse_ident'; diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c index 2b4ab20..7aa5b76 100644 --- a/src/backend/parser/scansup.c +++ b/src/backend/parser/scansup.c @@ -130,6 +130,15 @@ scanstr(const char *s) char * downcase_truncate_identifier(const char *ident, int len, bool warn) { + return downcase_identifier(ident, len, warn, true); +} + +/* + * a workhorse for downcase_truncate_identifier + */ +char * +downcase_identifier(const char *ident, int len, bool warn, bool truncate) +{ char *result; int i; bool enc_is_single_byte; @@ -158,12 +167,13 @@ downcase_truncate_identifier(const char *ident, int len, bool warn) } result[i] = '\0'; - if (i >= NAMEDATALEN) + if (i >= NAMEDATALEN && truncate) truncate_identifier(result, i, warn); return result; } + /* * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes. * diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c index 6a306f3..3072c32 100644 --- a/src/backend/utils/adt/misc.c +++ b/src/backend/utils/adt/misc.c @@ -21,12 +21,15 @@ #include #include "access/sysattr.h" +#include "access/htup_details.h" #include "catalog/catalog.h" +#include "catalog/namespace.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" #include "commands/dbcommands.h" #include "funcapi.h" #include "miscadmin.h" +#include "parser/scansup.h" #include "parser/keywords.h" #include "postmaster/syslogger.h" #include "rewrite/rewriteHandler.h" @@ -38,6 +41,7 @@ #include "utils/ruleutils.h" #include "tcop/tcopprot.h" #include "utils/acl.h" +#include "utils/array.h" #include "utils/builtins.h" #include "utils/timestamp.h" @@ -598,3 +602,173 @@ pg_column_is_updatable(PG_FUNCTION_ARGS) PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS); } + + +/* + * This simple parser utility are compatible with lexer implementation, + * used only in parse_ident function + */ +static bool +is_ident_start(unsigned char c) +{ + if (c == '_') + return true; + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) + return true; + + if (c >= 0200 && c <= 0377) + return true; + + return false; +} + +static bool +is_ident_cont(unsigned char c) +{ + if (c >= '0' && c <= '9') + return true; + + return is_ident_start(c); +} + +/* + * parse_ident - parse SQL composed identifier to separate identifiers. + * When strict mode is active (second parameter), then any chars after + * last identifiers are disallowed. + */ +Datum +parse_ident(PG_FUNCTION_ARGS) +{ + text *qualname; + char *qualname_str; + bool strict_mode; + ArrayBuildState *astate = NULL; + char *nextp; + bool after_dot = false; + + qualname = PG_GETARG_TEXT_PP(0); + qualname_str = text_to_cstring(qualname); + strict_mode = PG_GETARG_BOOL(1); + + nextp = qualname_str; + + /* skip leading whitespace */ + while (isspace((unsigned char) *nextp)) + nextp++; + + for (;;) + { + char *curname; + char *endp; + bool missing_ident; + + missing_ident = true; + + if (*nextp == '\"') + { + curname = nextp + 1; + for (;;) + { + endp = strchr(nextp + 1, '\"'); + if (endp == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unclosed double quotes"), + errdetail("string \"%s\" is not valid identifier", + text_to_cstring(qualname)))); + if (endp[1] != '\"') + break; + memmove(endp, endp + 1, strlen(endp)); + nextp = endp; + } + nextp = endp + 1; + *endp = '\0'; + + if (endp - curname == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("identifier should not be empty: \"%s\"", + text_to_cstring(qualname)))); + + astate = accumArrayResult(astate, + CStringGetTextDatum(curname), false, + TEXTOID, CurrentMemoryContext); + missing_ident = false; + } + else + { + if (is_ident_start((unsigned char) *nextp)) + { + char *downname; + int len; + text *part; + + curname = nextp++; + while (is_ident_cont((unsigned char) *nextp)) + nextp++; + + len = nextp - curname; + + /* + * We don't would to truncate identifier implicitli in this + * moment, so we use downcase_identifier instead + * dowcase_truncate_identifier. The user can do truncating + * later if it is necessary for his purpouse. + */ + downname = downcase_identifier(curname, len, false, false); + part = cstring_to_text_with_len(downname, len); + astate = accumArrayResult(astate, + PointerGetDatum(part), false, + TEXTOID, CurrentMemoryContext); + missing_ident = false; + } + } + + if (missing_ident) + { + /* choose text of adequate error message */ + if (*nextp == '.') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing identifier before \".\" symbol: \"%s\"", + text_to_cstring(qualname)))); + else if (after_dot) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing identifier after \".\" symbol: \"%s\"", + text_to_cstring(qualname)))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing identifier: \"%s\"", + text_to_cstring(qualname)))); + } + + while (isspace((unsigned char) *nextp)) + nextp++; + + if (*nextp == '.') + { + after_dot = true; + nextp++; + while (isspace((unsigned char) *nextp)) + nextp++; + continue; + } + else if (*nextp == '\0') + { + break; + } + else + { + if (strict_mode) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("identifier contains disallowed characters: \"%s\"", + text_to_cstring(qualname)))); + break; + } + } + + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); +} diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 5ded13e..d7c09a7 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -3451,6 +3451,9 @@ DESCR("I/O"); DATA(insert OID = 4086 ( to_regnamespace PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ _null_ _null_ )); DESCR("convert namespace name to regnamespace"); +DATA(insert OID = 3318 ( parse_ident PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 16" _null_ _null_ "{str,strictmode}" _null_ _null_ parse_ident _null_ _null_ _null_ )); +DESCR("parse qualified identifier to array of identifiers"); + DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_internal_validator _null_ _null_ _null_ )); DESCR("(internal)"); DATA(insert OID = 2247 ( fmgr_c_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator _null_ _null_ _null_ )); diff --git a/src/include/parser/scansup.h b/src/include/parser/scansup.h index 4f4164b..4f95c81 100644 --- a/src/include/parser/scansup.h +++ b/src/include/parser/scansup.h @@ -20,6 +20,9 @@ extern char *scanstr(const char *s); extern char *downcase_truncate_identifier(const char *ident, int len, bool warn); +extern char *downcase_identifier(const char *ident, int len, + bool warn, bool truncate); + extern void truncate_identifier(char *ident, int len, bool warn); extern bool scanner_isspace(char ch); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 5e8e832..b1b6ef6 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -504,6 +504,7 @@ extern Datum pg_typeof(PG_FUNCTION_ARGS); extern Datum pg_collation_for(PG_FUNCTION_ARGS); extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS); extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS); +extern Datum parse_ident(PG_FUNCTION_ARGS); /* oid.c */ extern Datum oidin(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/name.out b/src/test/regress/expected/name.out index b359d52..66569f4 100644 --- a/src/test/regress/expected/name.out +++ b/src/test/regress/expected/name.out @@ -124,3 +124,55 @@ SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*'; (2 rows) DROP TABLE NAME_TBL; +DO $$ +DECLARE r text[]; +BEGIN + r := parse_ident('Schemax.Tabley'); + RAISE NOTICE '%', format('%I.%I', r[1], r[2]); + r := parse_ident('"SchemaX"."TableY"'); + RAISE NOTICE '%', format('%I.%I', r[1], r[2]); +END; +$$; +NOTICE: schemax.tabley +NOTICE: "SchemaX"."TableY" +SELECT parse_ident('foo.boo'); + parse_ident +------------- + {foo,boo} +(1 row) + +SELECT parse_ident('foo.boo[]'); -- should fail +ERROR: identifier contains disallowed characters: "foo.boo[]" +SELECT parse_ident('foo.boo[]', strictmode => false); -- ok + parse_ident +------------- + {foo,boo} +(1 row) + +-- should fail +SELECT parse_ident(' '); +ERROR: missing identifier: " " +SELECT parse_ident(' .aaa'); +ERROR: missing identifier before "." symbol: " .aaa" +SELECT parse_ident(' aaa . '); +ERROR: missing identifier after "." symbol: " aaa . " +SELECT parse_ident('aaa.a%b'); +ERROR: identifier contains disallowed characters: "aaa.a%b" +SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ; + length | length +--------+-------- + 414 | 289 +(1 row) + +SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"'); + parse_ident +----------------------------------------------------------------------------------------------------------- + {first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"} +(1 row) + +SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[]; + parse_ident +------------------------------------------------------------------------------------------------------ + {first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"} +(1 row) + diff --git a/src/test/regress/sql/name.sql b/src/test/regress/sql/name.sql index 1c7a671..2e70e4e 100644 --- a/src/test/regress/sql/name.sql +++ b/src/test/regress/sql/name.sql @@ -52,3 +52,28 @@ SELECT '' AS three, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '[0-9]'; SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*'; DROP TABLE NAME_TBL; + +DO $$ +DECLARE r text[]; +BEGIN + r := parse_ident('Schemax.Tabley'); + RAISE NOTICE '%', format('%I.%I', r[1], r[2]); + r := parse_ident('"SchemaX"."TableY"'); + RAISE NOTICE '%', format('%I.%I', r[1], r[2]); +END; +$$; + +SELECT parse_ident('foo.boo'); +SELECT parse_ident('foo.boo[]'); -- should fail +SELECT parse_ident('foo.boo[]', strictmode => false); -- ok + +-- should fail +SELECT parse_ident(' '); +SELECT parse_ident(' .aaa'); +SELECT parse_ident(' aaa . '); +SELECT parse_ident('aaa.a%b'); + +SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ; + +SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"'); +SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[];