From a09f95bf56a7b6400ebce9e6faad90a434d21ac8 Mon Sep 17 00:00:00 2001
From: Nikhil Benesch <nikhil.benesch@gmail.com>
Date: Sun, 1 Nov 2020 15:53:17 -0500
Subject: [PATCH] Support negative indexes in split_part

Negative indexes count from the right instead of the left.
For example:

    split_part('a@b@c@d', -2) -> c

The motivation is to provide a straightforward alternative to the
complicated and inefficient idioms available for this today, e.g.:

     reverse(split_part(reverse(haystack), reverse(needle), 1))
     (regexp_match(haystack, needle || '$'))[1]

Discussion: http://postgr.es/m/CAPWqQZR%2B-5pAZNSSrnmYczRaX-huemc%3DoO8URvDZvUA-M%3DMOBA%40mail.gmail.com
---
 doc/src/sgml/func.sgml                |  4 +-
 src/backend/utils/adt/varlena.c       | 62 ++++++++++++++++++++++++---
 src/test/regress/expected/strings.out | 62 ++++++++++++++++++++++++++-
 src/test/regress/sql/strings.sql      | 20 +++++++++
 4 files changed, 139 insertions(+), 9 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 7ef2ec9972..d0880ab1f3 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -3356,7 +3356,9 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
        <para>
         Splits <parameter>string</parameter> at occurrences
         of <parameter>delimiter</parameter> and returns
-        the <parameter>n</parameter>'th field (counting from one).
+        the <parameter>n</parameter>'th field (counting from one),
+        or the |<parameter>n</parameter>|'th-to-last field if
+        <parameter>n</parameter> is negative.
        </para>
        <para>
         <literal>split_part('abc~@~def~@~ghi', '~@~', 2)</literal>
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d7bc330541..c90550a183 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1471,6 +1471,19 @@ text_position_get_match_pos(TextPositionState *state)
 	}
 }
 
+/*
+ * Resets to the initial state installed by text_position_setup.
+ * The next call to text_position_next will search from the beginning
+ * of the string.
+ */
+static void
+text_position_reset(TextPositionState *state)
+{
+	state->last_match = NULL;
+	state->refpoint = state->str1;
+	state->refpos = 0;
+}
+
 static void
 text_position_cleanup(TextPositionState *state)
 {
@@ -4582,7 +4595,7 @@ replace_text_regexp(text *src_text, void *regexp,
 /*
  * split_part
  * parse input string
- * return ord item (1 based)
+ * return ord item (1 based, negative counts from end)
  * based on provided field separator
  */
 Datum
@@ -4600,10 +4613,10 @@ split_part(PG_FUNCTION_ARGS)
 	bool		found;
 
 	/* field number is 1 based */
-	if (fldnum < 1)
+	if (fldnum == 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("field position must be greater than zero")));
+				 errmsg("field position must not be zero")));
 
 	inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
 	fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
@@ -4615,8 +4628,8 @@ split_part(PG_FUNCTION_ARGS)
 	/* empty field separator */
 	if (fldsep_len < 1)
 	{
-		/* if first field, return input string, else empty string */
-		if (fldnum == 1)
+		/* if first or last field, return input string, else empty string */
+		if (fldnum == 1 || fldnum == -1)
 			PG_RETURN_TEXT_P(inputstring);
 		else
 			PG_RETURN_TEXT_P(cstring_to_text(""));
@@ -4632,14 +4645,49 @@ split_part(PG_FUNCTION_ARGS)
 	if (!found)
 	{
 		text_position_cleanup(&state);
-		/* if field 1 requested, return input string, else empty string */
-		if (fldnum == 1)
+		/* if first or last field, return input string, else empty string */
+		if (fldnum == 1 || fldnum == -1)
 			PG_RETURN_TEXT_P(inputstring);
 		else
 			PG_RETURN_TEXT_P(cstring_to_text(""));
 	}
 	end_ptr = text_position_get_match_ptr(&state);
 
+	/*
+	 * negative fields count from the right
+	 * convert to positive field number by counting total number of fields
+	 */
+	if (fldnum < 0)
+	{
+		/* searching from the first match, so string has two fields to start */
+		int			fldlen = 2;
+
+		while (text_position_next(&state))
+			fldlen++;
+
+		/* special case of last field does not require extra pass */
+		if (fldnum == -1)
+		{
+			start_ptr = text_position_get_match_ptr(&state) + fldsep_len;
+			end_ptr = VARDATA_ANY(inputstring) + inputstring_len;
+			text_position_cleanup(&state);
+			PG_RETURN_TEXT_P(cstring_to_text_with_len(start_ptr,
+													  end_ptr - start_ptr));
+		}
+
+		/* nonexistent field, so return empty string */
+		if (-fldnum > fldlen)
+		{
+			text_position_cleanup(&state);
+			PG_RETURN_TEXT_P(cstring_to_text(""));
+		}
+
+		/* reset to pointing at first match, but now with positive fldnum */
+		fldnum += fldlen + 1;
+		text_position_reset(&state);
+		text_position_next(&state);
+	}
+
 	while (found && --fldnum > 0)
 	{
 		/* identify bounds of next field */
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 8c034c9599..3226f3c0f7 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -1552,8 +1552,44 @@ SELECT replace('yabadoo', 'bad', '') AS "yaoo";
 --
 -- test split_part
 --
+select split_part('','@',1) AS "empty string";
+ empty string 
+--------------
+ 
+(1 row)
+
+select split_part('','@',-1) AS "empty string";
+ empty string 
+--------------
+ 
+(1 row)
+
+select split_part('joeuser@mydatabase','',1) AS "joeuser@mydatabase";
+ joeuser@mydatabase 
+--------------------
+ joeuser@mydatabase
+(1 row)
+
+select split_part('joeuser@mydatabase','',2) AS "empty string";
+ empty string 
+--------------
+ 
+(1 row)
+
+select split_part('joeuser@mydatabase','',-1) AS "joeuser@mydatabase";
+ joeuser@mydatabase 
+--------------------
+ joeuser@mydatabase
+(1 row)
+
+select split_part('joeuser@mydatabase','',-2) AS "empty string";
+ empty string 
+--------------
+ 
+(1 row)
+
 select split_part('joeuser@mydatabase','@',0) AS "an error";
-ERROR:  field position must be greater than zero
+ERROR:  field position must not be zero
 select split_part('joeuser@mydatabase','@',1) AS "joeuser";
  joeuser 
 ---------
@@ -1578,6 +1614,30 @@ select split_part('@joeuser@mydatabase@','@',2) AS "joeuser";
  joeuser
 (1 row)
 
+select split_part('joeuser@mydatabase','@',-1) AS "mydatabase";
+ mydatabase 
+------------
+ mydatabase
+(1 row)
+
+select split_part('joeuser@mydatabase','@',-2) AS "joeuser";
+ joeuser 
+---------
+ joeuser
+(1 row)
+
+select split_part('joeuser@mydatabase','@',-3) AS "empty string";
+ empty string 
+--------------
+ 
+(1 row)
+
+select split_part('@joeuser@mydatabase@','@',-2) AS "mydatabase";
+ mydatabase 
+------------
+ mydatabase
+(1 row)
+
 --
 -- test to_hex
 --
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 14901a2692..3f33a501f7 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -533,6 +533,18 @@ SELECT replace('yabadoo', 'bad', '') AS "yaoo";
 --
 -- test split_part
 --
+select split_part('','@',1) AS "empty string";
+
+select split_part('','@',-1) AS "empty string";
+
+select split_part('joeuser@mydatabase','',1) AS "joeuser@mydatabase";
+
+select split_part('joeuser@mydatabase','',2) AS "empty string";
+
+select split_part('joeuser@mydatabase','',-1) AS "joeuser@mydatabase";
+
+select split_part('joeuser@mydatabase','',-2) AS "empty string";
+
 select split_part('joeuser@mydatabase','@',0) AS "an error";
 
 select split_part('joeuser@mydatabase','@',1) AS "joeuser";
@@ -543,6 +555,14 @@ select split_part('joeuser@mydatabase','@',3) AS "empty string";
 
 select split_part('@joeuser@mydatabase@','@',2) AS "joeuser";
 
+select split_part('joeuser@mydatabase','@',-1) AS "mydatabase";
+
+select split_part('joeuser@mydatabase','@',-2) AS "joeuser";
+
+select split_part('joeuser@mydatabase','@',-3) AS "empty string";
+
+select split_part('@joeuser@mydatabase@','@',-2) AS "mydatabase";
+
 --
 -- test to_hex
 --
-- 
2.25.1