From d455a57701df064f96e5f2a3be2b1c736bacc485 Mon Sep 17 00:00:00 2001
From: James Coleman <jtc331@gmail.com>
Date: Tue, 28 Apr 2020 21:33:12 -0400
Subject: [PATCH v3 3/3] Try simple hash

---
 src/backend/executor/execExpr.c       | 24 +++----
 src/backend/executor/execExprInterp.c | 99 +++++++++++++++++----------
 src/include/executor/execExpr.h       | 26 ++++++-
 3 files changed, 100 insertions(+), 49 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 6249db5426..37c1669153 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -950,13 +950,10 @@ ExecInitExprRec(Expr *node, ExprState *state,
 			{
 				ScalarArrayOpExpr *opexpr = (ScalarArrayOpExpr *) node;
 				Oid			func;
-				Oid			hash_func;
 				Expr	   *scalararg;
 				Expr	   *arrayarg;
 				FmgrInfo   *finfo;
 				FunctionCallInfo fcinfo;
-				FmgrInfo   *hash_finfo;
-				FunctionCallInfo hash_fcinfo;
 				AclResult	aclresult;
 				bool		useBinarySearch = false;
 
@@ -996,18 +993,20 @@ ExecInitExprRec(Expr *node, ExprState *state,
 					nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
 					if (nitems >= MIN_ARRAY_SIZE_FOR_BINARY_SEARCH)
 					{
+						Oid			hash_func;
+
 						/*
-						 * Find the hash op that matches the originally
-						 * planned equality op.
+						 * Find the hash op that matches the originally planned
+						 * equality op. If we don't have one, we'll just fall
+						 * back to the default linear scan implementation.
 						 */
 						useBinarySearch = get_op_hash_functions(opexpr->opno, NULL, &hash_func);
 
-						/*
-						 * But we may not have one, so fall back to the
-						 * default implementation if necessary.
-						 */
 						if (useBinarySearch)
 						{
+							FmgrInfo   *hash_finfo;
+							FunctionCallInfo hash_fcinfo;
+
 							hash_finfo = palloc0(sizeof(FmgrInfo));
 							hash_fcinfo = palloc0(SizeForFunctionCallInfo(2));
 							fmgr_info(hash_func, hash_finfo);
@@ -1015,6 +1014,10 @@ ExecInitExprRec(Expr *node, ExprState *state,
 							InitFunctionCallInfoData(*hash_fcinfo, hash_finfo, 2,
 													 opexpr->inputcollid, NULL, NULL);
 							InvokeFunctionExecuteHook(hash_func);
+
+							scratch.d.scalararraybinsearchop.hash_finfo = hash_finfo;
+							scratch.d.scalararraybinsearchop.hash_fcinfo_data = hash_fcinfo;
+							scratch.d.scalararraybinsearchop.hash_fn_addr = hash_finfo->fn_addr;
 						}
 					}
 				}
@@ -1044,9 +1047,6 @@ ExecInitExprRec(Expr *node, ExprState *state,
 					scratch.d.scalararraybinsearchop.finfo = finfo;
 					scratch.d.scalararraybinsearchop.fcinfo_data = fcinfo;
 					scratch.d.scalararraybinsearchop.fn_addr = finfo->fn_addr;
-					scratch.d.scalararraybinsearchop.hash_finfo = hash_finfo;
-					scratch.d.scalararraybinsearchop.hash_fcinfo_data = hash_fcinfo;
-					scratch.d.scalararraybinsearchop.hash_fn_addr = hash_finfo->fn_addr;
 				}
 				else
 				{
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index e54e807c6b..aa7f5ae0df 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -178,6 +178,27 @@ ExecAggPlainTransByRef(AggState *aggstate, AggStatePerTrans pertrans,
 					   AggStatePerGroup pergroup,
 					   ExprContext *aggcontext, int setno);
 
+static bool
+element_match(struct saophash_hash *tb, Datum key1, Datum key2);
+static uint32 element_hash(struct saophash_hash *tb, Datum key);
+
+/*
+ * Define parameters for ScalarArrayOpExpr hash table code generation. The interface is
+ * *also* declared in execnodes.h (to generate the types, which are externally
+ * visible).
+ */
+#define SH_PREFIX saophash
+#define SH_ELEMENT_TYPE ScalarArrayOpExprHashEntryData
+#define SH_KEY_TYPE Datum
+#define SH_KEY key
+#define SH_HASH_KEY(tb, key) element_hash(tb, key)
+#define SH_EQUAL(tb, a, b) element_match(tb, a, b)
+#define SH_SCOPE extern
+#define SH_STORE_HASH
+#define SH_GET_HASH(tb, a) a->hash
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
 /*
  * Prepare ExprState for interpreted execution.
  */
@@ -3591,8 +3612,6 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
 	*op->resnull = resultnull;
 }
 
-static ExprEvalStep *current_saop_op;
-
 /*
  * Hash function for elements.
  *
@@ -3601,16 +3620,16 @@ static ExprEvalStep *current_saop_op;
  */
 /* XXX: Name function to be specific to saop binsearch? */
 static uint32
-element_hash(const void *key, Size keysize)
+element_hash(struct saophash_hash *tb, Datum key)
 {
+	ScalarArrayOpExprHashTable elements_tab = (ScalarArrayOpExprHashTable) tb->private_data;
+	FunctionCallInfo fcinfo = elements_tab->op->d.scalararraybinsearchop.hash_fcinfo_data;
 	Datum hash;
-	FunctionCallInfo fcinfo = current_saop_op->d.scalararraybinsearchop.hash_fcinfo_data;
 
-	fcinfo->args[0].value = *((const Datum *) key);
+	fcinfo->args[0].value = key;
 	fcinfo->args[0].isnull = false;
 
-	/* The keysize parameter is superfluous here */
-	hash = current_saop_op->d.scalararraybinsearchop.hash_fn_addr(fcinfo);
+	hash = elements_tab->op->d.scalararraybinsearchop.hash_fn_addr(fcinfo);
 
 	return DatumGetUInt32(hash);
 }
@@ -3619,21 +3638,22 @@ element_hash(const void *key, Size keysize)
  * Matching function for elements, to be used in hashtable lookups.
  */
 /* XXX: Name function to be specific to saop binsearch? */
-static int
-element_match(const void *key1, const void *key2, Size keysize)
+static bool
+element_match(struct saophash_hash *tb, Datum key1, Datum key2)
 {
 	Datum result;
-	FunctionCallInfo fcinfo = current_saop_op->d.scalararraybinsearchop.fcinfo_data;
 
-	fcinfo->args[0].value = *((const Datum *) key1);
+	ScalarArrayOpExprHashTable elements_tab = (ScalarArrayOpExprHashTable) tb->private_data;
+	FunctionCallInfo fcinfo = elements_tab->op->d.scalararraybinsearchop.fcinfo_data;
+
+	fcinfo->args[0].value = key1;
 	fcinfo->args[0].isnull = false;
-	fcinfo->args[1].value = *((const Datum *) key2);
+	fcinfo->args[1].value = key2;
 	fcinfo->args[1].isnull = false;
 
-	/* The keysize parameter is superfluous here */
-	result = current_saop_op->d.scalararraybinsearchop.fn_addr(fcinfo);
+	result = elements_tab->op->d.scalararraybinsearchop.fn_addr(fcinfo);
 
-	return DatumGetBool(result) ? 0 : 1;
+	return DatumGetBool(result);
 }
 
 /*
@@ -3653,22 +3673,21 @@ element_match(const void *key1, const void *key2, Size keysize)
 void
 ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
 {
+	ScalarArrayOpExprHashTable elements_tab = op->d.scalararraybinsearchop.elements_tab;
 	FunctionCallInfo fcinfo = op->d.scalararraybinsearchop.fcinfo_data;
 	bool		strictfunc = op->d.scalararraybinsearchop.finfo->fn_strict;
 	ArrayType  *arr;
 	Datum		scalar = fcinfo->args[0].value;
+	bool		scalar_isnull = fcinfo->args[0].isnull;
 	Datum		result;
 	bool		resultnull;
 	bool		hashfound;
-	int			res;
 
 	/* If we're only executing once, do we need a way to fall back to the regular loop? */
 
 	/* We don't setup a binary search op if the array const is null. */
 	Assert(!*op->resnull);
 
-	current_saop_op = op;
-
 	/*
 	 * If the scalar is NULL, and the function is strict, return NULL; no
 	 * point in executing the search.
@@ -3680,17 +3699,17 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 	}
 
 	/* Preprocess the array the first time we execute the op. */
-	if (op->d.scalararraybinsearchop.elements_tab == NULL)
+	if (elements_tab == NULL)
 	{
 		int16		typlen;
 		bool		typbyval;
 		char		typalign;
-		HASHCTL		elem_hash_ctl;
 		int			nitems;
 		int			num_nulls = 0;
 		char	   *s;
 		bits8	   *bitmap;
 		int			bitmask;
+		MemoryContext oldcontext;
 
 		arr = DatumGetArrayTypeP(*op->resvalue);
 		nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
@@ -3700,16 +3719,14 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 							 &typbyval,
 							 &typalign);
 
-		MemSet(&elem_hash_ctl, 0, sizeof(elem_hash_ctl));
-		elem_hash_ctl.keysize = sizeof(Datum);
-		elem_hash_ctl.entrysize = sizeof(Datum);
-		elem_hash_ctl.hash = element_hash;
-		elem_hash_ctl.match = element_match;
-		elem_hash_ctl.hcxt = econtext->ecxt_per_query_memory;
-		op->d.scalararraybinsearchop.elements_tab = hash_create("Scalar array op expr elements",
-								   nitems,
-								   &elem_hash_ctl,
-								   HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+		oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_query_memory);
+
+		elements_tab = (ScalarArrayOpExprHashTable) palloc(sizeof(ScalarArrayOpExprHashTableData));
+		op->d.scalararraybinsearchop.elements_tab = elements_tab;
+		elements_tab->op = op;
+		elements_tab->hashtab = saophash_create(CurrentMemoryContext, nitems, elements_tab);
+
+		MemoryContextSwitchTo(oldcontext);
 
 		s = (char *) ARR_DATA_PTR(arr);
 		bitmap = ARR_NULLBITMAP(arr);
@@ -3729,7 +3746,7 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 				s = att_addlength_pointer(s, typlen, s);
 				s = (char *) att_align_nominal(s, typalign);
 
-				hash_search(op->d.scalararraybinsearchop.elements_tab, (const void *) &element, HASH_ENTER, NULL);
+				saophash_insert(elements_tab->hashtab, element, &hashfound);
 			}
 
 			/* Advance bitmap pointer if any. */
@@ -3758,7 +3775,8 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 	}
 
 	/* Check the hash to see if we have a match. */
-	hash_search(op->d.scalararraybinsearchop.elements_tab, (const void *) &scalar, HASH_FIND, &hashfound);
+	hashfound = NULL != saophash_lookup(elements_tab->hashtab, scalar);
+
 	result = BoolGetDatum(hashfound);
 	resultnull = false;
 
@@ -3771,18 +3789,27 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *
 	{
 		if (strictfunc)
 		{
-			/* Had nulls, so strict function implies null. */
+			/* Had nulls and is a strict function, so instead of executing the
+			 * function onces with a null rhs, we can assume null. */
 			result = (Datum) 0;
 			resultnull = true;
 		}
 		else
 		{
-			/* Execute function will null rhs just once. */
+			/* TODO: No regression test for this branch. */
+			/*
+			 * Execute function will null rhs just once.
+			 *
+			 * The hash lookup path will have scribbled on the lhs argument so
+			 * we need to set it up also (even though we entered this function
+			 * with it already set).
+			 */
+			fcinfo->args[0].value = scalar;
+			fcinfo->args[0].isnull = scalar_isnull;
 			fcinfo->args[1].value = (Datum) 0;
 			fcinfo->args[1].isnull = true;
 
-			res = DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo));
-			result = BoolGetDatum(res == 0);
+			result = op->d.scalararraybinsearchop.fn_addr(fcinfo);
 			resultnull = fcinfo->isnull;
 		}
 	}
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index 2e93b1f990..847d344e8d 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -21,6 +21,30 @@
 struct ExprEvalStep;
 struct SubscriptingRefState;
 
+typedef struct ScalarArrayOpExprHashEntryData *ScalarArrayOpExprHashEntry;
+typedef struct ScalarArrayOpExprHashTableData *ScalarArrayOpExprHashTable;
+
+typedef struct ScalarArrayOpExprHashEntryData
+{
+	Datum key;
+	uint32		status;			/* hash status */
+	uint32		hash;			/* hash value (cached) */
+} ScalarArrayOpExprHashEntryData;
+
+/* define parameters necessary to generate the tuple hash table interface */
+#define SH_PREFIX saophash
+#define SH_ELEMENT_TYPE ScalarArrayOpExprHashEntryData
+#define SH_KEY_TYPE Datum
+#define SH_SCOPE extern
+#define SH_DECLARE
+#include "lib/simplehash.h"
+
+typedef struct ScalarArrayOpExprHashTableData
+{
+	saophash_hash *hashtab;	/* underlying hash table */
+	struct ExprEvalStep *op;
+}			ScalarArrayOpExprHashTableData;
+
 /* Bits in ExprState->flags (see also execnodes.h for public flag bits): */
 /* expression's interpreter has been initialized */
 #define EEO_FLAG_INTERPRETER_INITIALIZED	(1 << 1)
@@ -555,7 +579,7 @@ typedef struct ExprEvalStep
 		struct
 		{
 			bool		has_nulls;
-			HTAB	   *elements_tab;
+			ScalarArrayOpExprHashTable	   elements_tab;
 			FmgrInfo   *finfo;	/* function's lookup data */
 			FunctionCallInfo fcinfo_data;	/* arguments etc */
 			/* faster to access without additional indirection: */
-- 
2.17.1

