From a09f95bf56a7b6400ebce9e6faad90a434d21ac8 Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Sun, 1 Nov 2020 15:53:17 -0500 Subject: [PATCH] Support negative indexes in split_part Negative indexes count from the right instead of the left. For example: split_part('a@b@c@d', -2) -> c The motivation is to provide a straightforward alternative to the complicated and inefficient idioms available for this today, e.g.: reverse(split_part(reverse(haystack), reverse(needle), 1)) (regexp_match(haystack, needle || '$'))[1] Discussion: http://postgr.es/m/CAPWqQZR%2B-5pAZNSSrnmYczRaX-huemc%3DoO8URvDZvUA-M%3DMOBA%40mail.gmail.com --- doc/src/sgml/func.sgml | 4 +- src/backend/utils/adt/varlena.c | 62 ++++++++++++++++++++++++--- src/test/regress/expected/strings.out | 62 ++++++++++++++++++++++++++- src/test/regress/sql/strings.sql | 20 +++++++++ 4 files changed, 139 insertions(+), 9 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 7ef2ec9972..d0880ab1f3 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -3356,7 +3356,9 @@ repeat('Pg', 4) PgPgPgPg Splits string at occurrences of delimiter and returns - the n'th field (counting from one). + the n'th field (counting from one), + or the |n|'th-to-last field if + n is negative. split_part('abc~@~def~@~ghi', '~@~', 2) diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index d7bc330541..c90550a183 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1471,6 +1471,19 @@ text_position_get_match_pos(TextPositionState *state) } } +/* + * Resets to the initial state installed by text_position_setup. + * The next call to text_position_next will search from the beginning + * of the string. + */ +static void +text_position_reset(TextPositionState *state) +{ + state->last_match = NULL; + state->refpoint = state->str1; + state->refpos = 0; +} + static void text_position_cleanup(TextPositionState *state) { @@ -4582,7 +4595,7 @@ replace_text_regexp(text *src_text, void *regexp, /* * split_part * parse input string - * return ord item (1 based) + * return ord item (1 based, negative counts from end) * based on provided field separator */ Datum @@ -4600,10 +4613,10 @@ split_part(PG_FUNCTION_ARGS) bool found; /* field number is 1 based */ - if (fldnum < 1) + if (fldnum == 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("field position must be greater than zero"))); + errmsg("field position must not be zero"))); inputstring_len = VARSIZE_ANY_EXHDR(inputstring); fldsep_len = VARSIZE_ANY_EXHDR(fldsep); @@ -4615,8 +4628,8 @@ split_part(PG_FUNCTION_ARGS) /* empty field separator */ if (fldsep_len < 1) { - /* if first field, return input string, else empty string */ - if (fldnum == 1) + /* if first or last field, return input string, else empty string */ + if (fldnum == 1 || fldnum == -1) PG_RETURN_TEXT_P(inputstring); else PG_RETURN_TEXT_P(cstring_to_text("")); @@ -4632,14 +4645,49 @@ split_part(PG_FUNCTION_ARGS) if (!found) { text_position_cleanup(&state); - /* if field 1 requested, return input string, else empty string */ - if (fldnum == 1) + /* if first or last field, return input string, else empty string */ + if (fldnum == 1 || fldnum == -1) PG_RETURN_TEXT_P(inputstring); else PG_RETURN_TEXT_P(cstring_to_text("")); } end_ptr = text_position_get_match_ptr(&state); + /* + * negative fields count from the right + * convert to positive field number by counting total number of fields + */ + if (fldnum < 0) + { + /* searching from the first match, so string has two fields to start */ + int fldlen = 2; + + while (text_position_next(&state)) + fldlen++; + + /* special case of last field does not require extra pass */ + if (fldnum == -1) + { + start_ptr = text_position_get_match_ptr(&state) + fldsep_len; + end_ptr = VARDATA_ANY(inputstring) + inputstring_len; + text_position_cleanup(&state); + PG_RETURN_TEXT_P(cstring_to_text_with_len(start_ptr, + end_ptr - start_ptr)); + } + + /* nonexistent field, so return empty string */ + if (-fldnum > fldlen) + { + text_position_cleanup(&state); + PG_RETURN_TEXT_P(cstring_to_text("")); + } + + /* reset to pointing at first match, but now with positive fldnum */ + fldnum += fldlen + 1; + text_position_reset(&state); + text_position_next(&state); + } + while (found && --fldnum > 0) { /* identify bounds of next field */ diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 8c034c9599..3226f3c0f7 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -1552,8 +1552,44 @@ SELECT replace('yabadoo', 'bad', '') AS "yaoo"; -- -- test split_part -- +select split_part('','@',1) AS "empty string"; + empty string +-------------- + +(1 row) + +select split_part('','@',-1) AS "empty string"; + empty string +-------------- + +(1 row) + +select split_part('joeuser@mydatabase','',1) AS "joeuser@mydatabase"; + joeuser@mydatabase +-------------------- + joeuser@mydatabase +(1 row) + +select split_part('joeuser@mydatabase','',2) AS "empty string"; + empty string +-------------- + +(1 row) + +select split_part('joeuser@mydatabase','',-1) AS "joeuser@mydatabase"; + joeuser@mydatabase +-------------------- + joeuser@mydatabase +(1 row) + +select split_part('joeuser@mydatabase','',-2) AS "empty string"; + empty string +-------------- + +(1 row) + select split_part('joeuser@mydatabase','@',0) AS "an error"; -ERROR: field position must be greater than zero +ERROR: field position must not be zero select split_part('joeuser@mydatabase','@',1) AS "joeuser"; joeuser --------- @@ -1578,6 +1614,30 @@ select split_part('@joeuser@mydatabase@','@',2) AS "joeuser"; joeuser (1 row) +select split_part('joeuser@mydatabase','@',-1) AS "mydatabase"; + mydatabase +------------ + mydatabase +(1 row) + +select split_part('joeuser@mydatabase','@',-2) AS "joeuser"; + joeuser +--------- + joeuser +(1 row) + +select split_part('joeuser@mydatabase','@',-3) AS "empty string"; + empty string +-------------- + +(1 row) + +select split_part('@joeuser@mydatabase@','@',-2) AS "mydatabase"; + mydatabase +------------ + mydatabase +(1 row) + -- -- test to_hex -- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 14901a2692..3f33a501f7 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -533,6 +533,18 @@ SELECT replace('yabadoo', 'bad', '') AS "yaoo"; -- -- test split_part -- +select split_part('','@',1) AS "empty string"; + +select split_part('','@',-1) AS "empty string"; + +select split_part('joeuser@mydatabase','',1) AS "joeuser@mydatabase"; + +select split_part('joeuser@mydatabase','',2) AS "empty string"; + +select split_part('joeuser@mydatabase','',-1) AS "joeuser@mydatabase"; + +select split_part('joeuser@mydatabase','',-2) AS "empty string"; + select split_part('joeuser@mydatabase','@',0) AS "an error"; select split_part('joeuser@mydatabase','@',1) AS "joeuser"; @@ -543,6 +555,14 @@ select split_part('joeuser@mydatabase','@',3) AS "empty string"; select split_part('@joeuser@mydatabase@','@',2) AS "joeuser"; +select split_part('joeuser@mydatabase','@',-1) AS "mydatabase"; + +select split_part('joeuser@mydatabase','@',-2) AS "joeuser"; + +select split_part('joeuser@mydatabase','@',-3) AS "empty string"; + +select split_part('@joeuser@mydatabase@','@',-2) AS "mydatabase"; + -- -- test to_hex -- -- 2.25.1