From cd760f5d333a2f1c7b871f05d4a231c0381c660d Mon Sep 17 00:00:00 2001
From: James Coleman <jtc331@gmail.com>
Date: Mon, 27 Apr 2020 08:51:28 -0400
Subject: [PATCH v2 2/2] Try using dynahash

---
 src/backend/executor/execExpr.c           |  29 ++--
 src/backend/executor/execExprInterp.c     | 197 ++++++++++++----------
 src/include/executor/execExpr.h           |   7 +-
 src/test/regress/expected/expressions.out |   7 +
 src/test/regress/sql/expressions.sql      |   2 +
 5 files changed, 138 insertions(+), 104 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index c202cc7e89..6249db5426 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -31,6 +31,7 @@
 #include "postgres.h"
 
 #include "access/nbtree.h"
+#include "access/hash.h"
 #include "catalog/objectaccess.h"
 #include "catalog/pg_type.h"
 #include "executor/execExpr.h"
@@ -949,10 +950,13 @@ ExecInitExprRec(Expr *node, ExprState *state,
 			{
 				ScalarArrayOpExpr *opexpr = (ScalarArrayOpExpr *) node;
 				Oid			func;
+				Oid			hash_func;
 				Expr	   *scalararg;
 				Expr	   *arrayarg;
 				FmgrInfo   *finfo;
 				FunctionCallInfo fcinfo;
+				FmgrInfo   *hash_finfo;
+				FunctionCallInfo hash_fcinfo;
 				AclResult	aclresult;
 				bool		useBinarySearch = false;
 
@@ -983,11 +987,6 @@ ExecInitExprRec(Expr *node, ExprState *state,
 				{
 					Datum		arrdatum = ((Const *) arrayarg)->constvalue;
 					ArrayType  *arr = (ArrayType *) DatumGetPointer(arrdatum);
-					Oid			orderingOp;
-					Oid			orderingFunc;
-					Oid			opfamily;
-					Oid			opcintype;
-					int16		strategy;
 					int			nitems;
 
 					/*
@@ -998,21 +997,24 @@ ExecInitExprRec(Expr *node, ExprState *state,
 					if (nitems >= MIN_ARRAY_SIZE_FOR_BINARY_SEARCH)
 					{
 						/*
-						 * Find the ordering op that matches the originally
+						 * Find the hash op that matches the originally
 						 * planned equality op.
 						 */
-						orderingOp = get_ordering_op_for_equality_op(opexpr->opno, NULL);
-						get_ordering_op_properties(orderingOp, &opfamily, &opcintype, &strategy);
-						orderingFunc = get_opfamily_proc(opfamily, opcintype, opcintype, BTORDER_PROC);
+						useBinarySearch = get_op_hash_functions(opexpr->opno, NULL, &hash_func);
 
 						/*
 						 * But we may not have one, so fall back to the
 						 * default implementation if necessary.
 						 */
-						if (OidIsValid(orderingFunc))
+						if (useBinarySearch)
 						{
-							useBinarySearch = true;
-							func = orderingFunc;
+							hash_finfo = palloc0(sizeof(FmgrInfo));
+							hash_fcinfo = palloc0(SizeForFunctionCallInfo(2));
+							fmgr_info(hash_func, hash_finfo);
+							fmgr_info_set_expr((Node *) node, hash_finfo);
+							InitFunctionCallInfoData(*hash_fcinfo, hash_finfo, 2,
+													 opexpr->inputcollid, NULL, NULL);
+							InvokeFunctionExecuteHook(hash_func);
 						}
 					}
 				}
@@ -1042,6 +1044,9 @@ ExecInitExprRec(Expr *node, ExprState *state,
 					scratch.d.scalararraybinsearchop.finfo = finfo;
 					scratch.d.scalararraybinsearchop.fcinfo_data = fcinfo;
 					scratch.d.scalararraybinsearchop.fn_addr = finfo->fn_addr;
+					scratch.d.scalararraybinsearchop.hash_finfo = hash_finfo;
+					scratch.d.scalararraybinsearchop.hash_fcinfo_data = hash_fcinfo;
+					scratch.d.scalararraybinsearchop.hash_fn_addr = hash_finfo->fn_addr;
 				}
 				else
 				{
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 5bebafbf0c..e54e807c6b 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -178,8 +178,6 @@ ExecAggPlainTransByRef(AggState *aggstate, AggStatePerTrans pertrans,
 					   AggStatePerGroup pergroup,
 					   ExprContext *aggcontext, int setno);
 
-static int	compare_array_elements(const void *a, const void *b, void *arg);
-
 /*
  * Prepare ExprState for interpreted execution.
  */
@@ -3593,6 +3591,51 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
 	*op->resnull = resultnull;
 }
 
+static ExprEvalStep *current_saop_op;
+
+/*
+ * Hash function for elements.
+ *
+ * We use the element type's default hash opclass, and the column collation
+ * if the type is collation-sensitive.
+ */
+/* XXX: Name function to be specific to saop binsearch? */
+static uint32
+element_hash(const void *key, Size keysize)
+{
+	Datum hash;
+	FunctionCallInfo fcinfo = current_saop_op->d.scalararraybinsearchop.hash_fcinfo_data;
+
+	fcinfo->args[0].value = *((const Datum *) key);
+	fcinfo->args[0].isnull = false;
+
+	/* The keysize parameter is superfluous here */
+	hash = current_saop_op->d.scalararraybinsearchop.hash_fn_addr(fcinfo);
+
+	return DatumGetUInt32(hash);
+}
+
+/*
+ * Matching function for elements, to be used in hashtable lookups.
+ */
+/* XXX: Name function to be specific to saop binsearch? */
+static int
+element_match(const void *key1, const void *key2, Size keysize)
+{
+	Datum result;
+	FunctionCallInfo fcinfo = current_saop_op->d.scalararraybinsearchop.fcinfo_data;
+
+	fcinfo->args[0].value = *((const Datum *) key1);
+	fcinfo->args[0].isnull = false;
+	fcinfo->args[1].value = *((const Datum *) key2);
+	fcinfo->args[1].isnull = false;
+
+	/* The keysize parameter is superfluous here */
+	result = current_saop_op->d.scalararraybinsearchop.fn_addr(fcinfo);
+
+	return DatumGetBool(result) ? 0 : 1;
+}
+
 /*
  * Evaluate "scalar op ANY (const array)".
  *
@@ -3613,16 +3656,19 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 	FunctionCallInfo fcinfo = op->d.scalararraybinsearchop.fcinfo_data;
 	bool		strictfunc = op->d.scalararraybinsearchop.finfo->fn_strict;
 	ArrayType  *arr;
+	Datum		scalar = fcinfo->args[0].value;
 	Datum		result;
 	bool		resultnull;
-	bool	   *elem_nulls;
-	int			l = 0,
-				r,
-				res;
+	bool		hashfound;
+	int			res;
+
+	/* If we're only executing once, do we need a way to fall back to the regular loop? */
 
 	/* We don't setup a binary search op if the array const is null. */
 	Assert(!*op->resnull);
 
+	current_saop_op = op;
+
 	/*
 	 * If the scalar is NULL, and the function is strict, return NULL; no
 	 * point in executing the search.
@@ -3634,104 +3680,88 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 	}
 
 	/* Preprocess the array the first time we execute the op. */
-	if (op->d.scalararraybinsearchop.elem_values == NULL)
+	if (op->d.scalararraybinsearchop.elements_tab == NULL)
 	{
-		/* Cache the original lhs so we can scribble on it. */
-		Datum		scalar = fcinfo->args[0].value;
-		bool		scalar_isnull = fcinfo->args[0].isnull;
-		int			num_nonnulls = 0;
-		MemoryContext old_cxt;
-		MemoryContext array_cxt;
 		int16		typlen;
 		bool		typbyval;
 		char		typalign;
+		HASHCTL		elem_hash_ctl;
+		int			nitems;
+		int			num_nulls = 0;
+		char	   *s;
+		bits8	   *bitmap;
+		int			bitmask;
 
 		arr = DatumGetArrayTypeP(*op->resvalue);
+		nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
 
 		get_typlenbyvalalign(ARR_ELEMTYPE(arr),
 							 &typlen,
 							 &typbyval,
 							 &typalign);
 
-		array_cxt = AllocSetContextCreate(
-										  econtext->ecxt_per_query_memory,
-										  "scalararraybinsearchop context",
-										  ALLOCSET_SMALL_SIZES);
-		old_cxt = MemoryContextSwitchTo(array_cxt);
+		MemSet(&elem_hash_ctl, 0, sizeof(elem_hash_ctl));
+		elem_hash_ctl.keysize = sizeof(Datum);
+		elem_hash_ctl.entrysize = sizeof(Datum);
+		elem_hash_ctl.hash = element_hash;
+		elem_hash_ctl.match = element_match;
+		elem_hash_ctl.hcxt = econtext->ecxt_per_query_memory;
+		op->d.scalararraybinsearchop.elements_tab = hash_create("Scalar array op expr elements",
+								   nitems,
+								   &elem_hash_ctl,
+								   HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+
+		s = (char *) ARR_DATA_PTR(arr);
+		bitmap = ARR_NULLBITMAP(arr);
+		bitmask = 1;
+		for (int i = 0; i < nitems; i++)
+		{
+			Datum		element;
+
+			/* Get array element, checking for NULL. */
+			if (bitmap && (*bitmap & bitmask) == 0)
+			{
+				num_nulls++;
+			}
+			else
+			{
+				element = fetch_att(s, typbyval, typlen);
+				s = att_addlength_pointer(s, typlen, s);
+				s = (char *) att_align_nominal(s, typalign);
 
-		deconstruct_array(arr,
-						  ARR_ELEMTYPE(arr),
-						  typlen, typbyval, typalign,
-						  &op->d.scalararraybinsearchop.elem_values, &elem_nulls, &op->d.scalararraybinsearchop.num_elems);
+				hash_search(op->d.scalararraybinsearchop.elements_tab, (const void *) &element, HASH_ENTER, NULL);
+			}
 
-		/* Remove null entries from the array. */
-		for (int j = 0; j < op->d.scalararraybinsearchop.num_elems; j++)
-		{
-			if (!elem_nulls[j])
-				op->d.scalararraybinsearchop.elem_values[num_nonnulls++] = op->d.scalararraybinsearchop.elem_values[j];
+			/* Advance bitmap pointer if any. */
+			if (bitmap)
+			{
+				bitmask <<= 1;
+				if (bitmask == 0x100)
+				{
+					bitmap++;
+					bitmask = 1;
+				}
+			}
 		}
 
 		/*
 		 * Remember if we had any nulls so that we know if we need to execute
 		 * non-strict functions with a null lhs value if no match is found.
 		 */
-		op->d.scalararraybinsearchop.has_nulls = num_nonnulls < op->d.scalararraybinsearchop.num_elems;
-		op->d.scalararraybinsearchop.num_elems = num_nonnulls;
+		op->d.scalararraybinsearchop.has_nulls = num_nulls > 0;
 
 		/*
-		 * Setup the fcinfo for sorting. We've removed nulls, so both lhs and
-		 * rhs values are guaranteed to be non-null.
+		 * We only setup a binary search op if we have > 8 elements, so we don't
+		 * need to worry about adding an optimization for the empty array case.
 		 */
-		fcinfo->args[0].isnull = false;
-		fcinfo->args[1].isnull = false;
-
-		/* Sort the array and remove duplicate elements. */
-		qsort_arg(op->d.scalararraybinsearchop.elem_values, op->d.scalararraybinsearchop.num_elems, sizeof(Datum),
-				  compare_array_elements, op);
-		op->d.scalararraybinsearchop.num_elems = qunique_arg(op->d.scalararraybinsearchop.elem_values, op->d.scalararraybinsearchop.num_elems, sizeof(Datum),
-															 compare_array_elements, op);
-
-		/* Restore the lhs value after we scribbed on it for sorting. */
-		fcinfo->args[0].value = scalar;
-		fcinfo->args[0].isnull = scalar_isnull;
-
-		MemoryContextSwitchTo(old_cxt);
+		Assert(nitems > 0);
 	}
 
-	/*
-	 * We only setup a binary search op if we have > 8 elements, so we don't
-	 * need to worry about adding an optimization for the empty array case.
-	 */
-	Assert(!(op->d.scalararraybinsearchop.num_elems <= 0 && !op->d.scalararraybinsearchop.has_nulls));
-
-	/* Assume no match will be found until proven otherwise. */
-	result = BoolGetDatum(false);
+	/* Check the hash to see if we have a match. */
+	hash_search(op->d.scalararraybinsearchop.elements_tab, (const void *) &scalar, HASH_FIND, &hashfound);
+	result = BoolGetDatum(hashfound);
 	resultnull = false;
 
-	/* Binary search through the array. */
-	r = op->d.scalararraybinsearchop.num_elems - 1;
-	while (l <= r)
-	{
-		int			i = (l + r) / 2;
-
-		fcinfo->args[1].value = op->d.scalararraybinsearchop.elem_values[i];
-
-		/* Call comparison function */
-		fcinfo->isnull = false;
-		res = DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo));
-
-		if (res == 0)
-		{
-			result = BoolGetDatum(true);
-			resultnull = false;
-			break;
-		}
-		else if (res > 0)
-			l = i + 1;
-		else
-			r = i - 1;
-	}
-
 	/*
 	 * If we didn't find a match in the array, we still might need to handle
 	 * the possibility of null values (we've previously removed them from the
@@ -3761,19 +3791,6 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 	*op->resnull = resultnull;
 }
 
-/* XXX: Name function to be specific to saop binsearch? */
-static int
-compare_array_elements(const void *a, const void *b, void *arg)
-{
-	ExprEvalStep *op = (ExprEvalStep *) arg;
-	FunctionCallInfo fcinfo = op->d.scalararraybinsearchop.fcinfo_data;
-
-	fcinfo->args[0].value = *((const Datum *) a);
-	fcinfo->args[1].value = *((const Datum *) b);
-
-	return DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo));
-}
-
 /*
  * Evaluate a NOT NULL domain constraint.
  */
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index ac4478d060..2e93b1f990 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -554,13 +554,16 @@ typedef struct ExprEvalStep
 		/* for EEOP_SCALARARRAYOP_BINSEARCH */
 		struct
 		{
-			int			num_elems;
 			bool		has_nulls;
-			Datum	   *elem_values;
+			HTAB	   *elements_tab;
 			FmgrInfo   *finfo;	/* function's lookup data */
 			FunctionCallInfo fcinfo_data;	/* arguments etc */
 			/* faster to access without additional indirection: */
 			PGFunction	fn_addr;	/* actual call address */
+			FmgrInfo   *hash_finfo;	/* function's lookup data */
+			FunctionCallInfo hash_fcinfo_data;	/* arguments etc */
+			/* faster to access without additional indirection: */
+			PGFunction	hash_fn_addr;	/* actual call address */
 		}			scalararraybinsearchop;
 
 		/* for EEOP_XMLEXPR */
diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out
index 55b57b9c59..f37dfe1ce2 100644
--- a/src/test/regress/expected/expressions.out
+++ b/src/test/regress/expected/expressions.out
@@ -197,3 +197,10 @@ select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
  
 (1 row)
 
+select 'a' in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
+ ?column? 
+----------
+ t
+(1 row)
+
+-- TODO: test non-strict op?
diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql
index 3cb850d838..c30fe66c5e 100644
--- a/src/test/regress/sql/expressions.sql
+++ b/src/test/regress/sql/expressions.sql
@@ -76,3 +76,5 @@ select 1 in (null, null, null, null, null, null, null, null, null, null, null);
 select 1 in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null);
 select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
 select null::int in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+select 'a' in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
+-- TODO: test non-strict op?
-- 
2.17.1

