From 6a9fe8117e1b91958111c679d02a2bd7944fae22 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 1 May 2023 18:31:40 -0400
Subject: [PATCH v1 1/2] Simplify and speed up ReadArrayStr().

ReadArrayStr() seems to have been written on the assumption that
non-rectangular input is fine and it should pad with NULLs anywhere
that elements are missing.  We disallowed non-rectangular input
ages ago (commit 0e13d627b), but never simplified this function
as a follow-up.  In particular, the existing code recomputes each
element's linear location from scratch, which is quite unnecessary
for rectangular input: we can just assign the elements sequentially,
saving lots of arithmetic.  Add some more commentary while at it.

(This leaves ArrayGetOffset0() unused, but I'm unsure whether to
remove that.)
---
 src/backend/utils/adt/arrayfuncs.c | 69 ++++++++++++++----------------
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 87c987fb27..39b5efc661 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -93,7 +93,7 @@ static bool array_isspace(char ch);
 static int	ArrayCount(const char *str, int *dim, char typdelim,
 					   Node *escontext);
 static bool ReadArrayStr(char *arrayStr, const char *origStr,
-						 int nitems, int ndim, int *dim,
+						 int nitems,
 						 FmgrInfo *inputproc, Oid typioparam, int32 typmod,
 						 char typdelim,
 						 int typlen, bool typbyval, char typalign,
@@ -391,7 +391,7 @@ array_in(PG_FUNCTION_ARGS)
 	dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
 	nullsPtr = (bool *) palloc(nitems * sizeof(bool));
 	if (!ReadArrayStr(p, string,
-					  nitems, ndim, dim,
+					  nitems,
 					  &my_extra->proc, typioparam, typmod,
 					  typdelim,
 					  typlen, typbyval, typalign,
@@ -457,7 +457,8 @@ array_isspace(char ch)
 
 /*
  * ArrayCount
- *	 Determines the dimensions for an array string.
+ *	 Determines the dimensions for an array string.  This includes
+ *	 syntax-checking the array structure decoration (braces and delimiters).
  *
  * Returns number of dimensions as function result.  The axis lengths are
  * returned in dim[], which must be of size MAXDIM.
@@ -704,16 +705,14 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
 /*
  * ReadArrayStr :
  *	 parses the array string pointed to by "arrayStr" and converts the values
- *	 to internal format.  Unspecified elements are initialized to nulls.
- *	 The array dimensions must already have been determined.
+ *	 to internal format.  The array dimensions must have been determined,
+ *	 and the case of an empty array must have been handled earlier.
  *
  * Inputs:
  *	arrayStr: the string to parse.
  *			  CAUTION: the contents of "arrayStr" will be modified!
  *	origStr: the unmodified input string, used only in error messages.
  *	nitems: total number of array elements, as already determined.
- *	ndim: number of array dimensions
- *	dim[]: array axis lengths
  *	inputproc: type-specific input procedure for element datatype.
  *	typioparam, typmod: auxiliary values to pass to inputproc.
  *	typdelim: the value delimiter (type-specific).
@@ -738,8 +737,6 @@ static bool
 ReadArrayStr(char *arrayStr,
 			 const char *origStr,
 			 int nitems,
-			 int ndim,
-			 int *dim,
 			 FmgrInfo *inputproc,
 			 Oid typioparam,
 			 int32 typmod,
@@ -753,20 +750,13 @@ ReadArrayStr(char *arrayStr,
 			 int32 *nbytes,
 			 Node *escontext)
 {
-	int			i,
+	int			i = 0,
 				nest_level = 0;
 	char	   *srcptr;
 	bool		in_quotes = false;
 	bool		eoArray = false;
 	bool		hasnull;
 	int32		totbytes;
-	int			indx[MAXDIM] = {0},
-				prod[MAXDIM];
-
-	mda_get_prod(ndim, dim, prod);
-
-	/* Initialize is-null markers to true */
-	memset(nulls, true, nitems * sizeof(bool));
 
 	/*
 	 * We have to remove " and \ characters to create a clean item value to
@@ -789,11 +779,20 @@ ReadArrayStr(char *arrayStr,
 		bool		itemdone = false;
 		bool		leadingspace = true;
 		bool		hasquoting = false;
-		char	   *itemstart;
-		char	   *dstptr;
-		char	   *dstendptr;
+		char	   *itemstart;	/* start of de-escaped text */
+		char	   *dstptr;		/* next output point for de-escaped text */
+		char	   *dstendptr;	/* last significant output char + 1 */
 
-		i = -1;
+		/*
+		 * Parse next array element, collecting the de-escaped text into
+		 * itemstart..dstendptr-1.
+		 *
+		 * Notice that we do not set "itemdone" until we see a separator
+		 * (typdelim character) or the array's final right brace.  Since the
+		 * array is already verified to be nonempty and rectangular, there is
+		 * guaranteed to be another element to be processed in the first case,
+		 * while in the second case of course we'll exit the outer loop.
+		 */
 		itemstart = dstptr = dstendptr = srcptr;
 
 		while (!itemdone)
@@ -840,13 +839,7 @@ ReadArrayStr(char *arrayStr,
 				case '{':
 					if (!in_quotes)
 					{
-						if (nest_level >= ndim)
-							ereturn(escontext, false,
-									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-									 errmsg("malformed array literal: \"%s\"",
-											origStr)));
 						nest_level++;
-						indx[nest_level - 1] = 0;
 						srcptr++;
 					}
 					else
@@ -860,14 +853,9 @@ ReadArrayStr(char *arrayStr,
 									(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 									 errmsg("malformed array literal: \"%s\"",
 											origStr)));
-						if (i == -1)
-							i = ArrayGetOffset0(ndim, indx, prod);
-						indx[nest_level - 1] = 0;
 						nest_level--;
 						if (nest_level == 0)
 							eoArray = itemdone = true;
-						else
-							indx[nest_level - 1]++;
 						srcptr++;
 					}
 					else
@@ -878,10 +866,7 @@ ReadArrayStr(char *arrayStr,
 						*dstptr++ = *srcptr++;
 					else if (*srcptr == typdelim)
 					{
-						if (i == -1)
-							i = ArrayGetOffset0(ndim, indx, prod);
 						itemdone = true;
-						indx[ndim - 1]++;
 						srcptr++;
 					}
 					else if (array_isspace(*srcptr))
@@ -905,15 +890,18 @@ ReadArrayStr(char *arrayStr,
 			}
 		}
 
+		/* Terminate de-escaped string */
 		Assert(dstptr < srcptr);
 		*dstendptr = '\0';
 
-		if (i < 0 || i >= nitems)
+		/* Safety check that we don't write past the output arrays */
+		if (i >= nitems)
 			ereturn(escontext, false,
 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 					 errmsg("malformed array literal: \"%s\"",
 							origStr)));
 
+		/* Convert the de-escaped string into the next output array entries */
 		if (Array_nulls && !hasquoting &&
 			pg_strcasecmp(itemstart, "NULL") == 0)
 		{
@@ -934,8 +922,17 @@ ReadArrayStr(char *arrayStr,
 				return false;
 			nulls[i] = false;
 		}
+
+		i++;
 	}
 
+	/* Cross-check that we filled all the output array entries */
+	if (i != nitems)
+		ereturn(escontext, false,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("malformed array literal: \"%s\"",
+						origStr)));
+
 	/*
 	 * Check for nulls, compute total data space needed
 	 */
-- 
2.31.1

