From a0e049aaceefd5e288d3c0e4b066cdb01c6985ea Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 24 Apr 2020 15:47:38 +0200 Subject: [PATCH 3/4] bloom filter --- src/backend/executor/execExpr.c | 46 +++- src/backend/executor/execExprInterp.c | 300 ++++++++++++++++++++++++++ src/backend/utils/cache/lsyscache.c | 96 +++++++++ src/backend/utils/misc/guc.c | 21 ++ src/include/executor/execExpr.h | 30 +++ src/include/utils/lsyscache.h | 2 + 6 files changed, 493 insertions(+), 2 deletions(-) diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 29a5b06852..6d5356f75f 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -947,12 +947,16 @@ ExecInitExprRec(Expr *node, ExprState *state, { ScalarArrayOpExpr *opexpr = (ScalarArrayOpExpr *) node; Oid func; + Oid hashfunc; Expr *scalararg; Expr *arrayarg; FmgrInfo *finfo; + FmgrInfo *hash_finfo; FunctionCallInfo fcinfo; + FunctionCallInfo hash_fcinfo; AclResult aclresult; bool useBinarySearch = false; + bool useBloomFilter = false; Assert(list_length(opexpr->args) == 2); scalararg = (Expr *) linitial(opexpr->args); @@ -971,6 +975,11 @@ ExecInitExprRec(Expr *node, ExprState *state, fcinfo = palloc0(SizeForFunctionCallInfo(2)); func = opexpr->opfuncid; + /* Set up the primary fmgr lookup information */ + hash_finfo = palloc0(sizeof(FmgrInfo)); + hash_fcinfo = palloc0(SizeForFunctionCallInfo(2)); + hashfunc = InvalidOid; + /* * If we have a constant array and want OR semantics, then we * can use a binary search to implement the op instead of @@ -978,7 +987,7 @@ ExecInitExprRec(Expr *node, ExprState *state, */ if (opexpr->useOr && arrayarg && IsA(arrayarg, Const) && !((Const *) arrayarg)->constisnull && - enable_saop_binsearch) + (enable_saop_bloom || enable_saop_binsearch)) { Datum arrdatum = ((Const *) arrayarg)->constvalue; ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum); @@ -988,6 +997,8 @@ ExecInitExprRec(Expr *node, ExprState *state, Oid opcintype; int16 strategy; int nitems; + Oid left_hashfn; + Oid right_hashfn; /* * Only do the optimization if we have a large enough @@ -1012,6 +1023,14 @@ ExecInitExprRec(Expr *node, ExprState *state, { useBinarySearch = true; func = orderingFunc; + + if ((nitems >= enable_saop_threshold) && + get_op_hash_ext_functions(opexpr->opno, &left_hashfn, &right_hashfn)) + { + /* Assert(left_hashfn == right_hashfn); */ + useBloomFilter = true; + hashfunc = left_hashfn; + } } } } @@ -1022,6 +1041,15 @@ ExecInitExprRec(Expr *node, ExprState *state, InitFunctionCallInfoData(*fcinfo, finfo, 2, opexpr->inputcollid, NULL, NULL); + if (OidIsValid(hashfunc)) + { + InvokeFunctionExecuteHook(hashfunc); + fmgr_info(hashfunc, hash_finfo); + fmgr_info_set_expr((Node *) node, hash_finfo); + InitFunctionCallInfoData(*hash_fcinfo, hash_finfo, 2, + opexpr->inputcollid, NULL, NULL); + } + /* Evaluate scalar directly into left function argument */ ExecInitExprRec(scalararg, state, &fcinfo->args[0].value, &fcinfo->args[0].isnull); @@ -1035,7 +1063,21 @@ ExecInitExprRec(Expr *node, ExprState *state, ExecInitExprRec(arrayarg, state, resv, resnull); /* And perform the operation */ - if (useBinarySearch) + if (useBloomFilter && enable_saop_bloom) + { + scratch.opcode = EEOP_SCALARARRAYOP_BLOOM; + + /* pre-sorted array */ + scratch.d.scalararraybloomop.finfo = finfo; + scratch.d.scalararraybloomop.fcinfo_data = fcinfo; + scratch.d.scalararraybloomop.fn_addr = finfo->fn_addr; + + /* bloom filter */ + scratch.d.scalararraybloomop.hash_finfo = hash_finfo; + scratch.d.scalararraybloomop.hash_fcinfo_data = hash_fcinfo; + scratch.d.scalararraybloomop.hash_fn_addr = hash_finfo->fn_addr; + } + else if (useBinarySearch && enable_saop_binsearch) { scratch.opcode = EEOP_SCALARARRAYOP_BINSEARCH; scratch.d.scalararraybinsearchop.finfo = finfo; diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index f72ff52f02..48a63391f5 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -88,7 +88,9 @@ #endif /* HAVE_COMPUTED_GOTO */ bool enable_saop_binsearch = true; +bool enable_saop_bloom = true; int enable_saop_threshold = 8; +double saop_bloom_false_positives = 0.05; /* * Macros for opcode dispatch. @@ -182,6 +184,7 @@ ExecAggPlainTransByRef(AggState *aggstate, AggStatePerTrans pertrans, ExprContext *aggcontext, int setno); static int compare_array_elements(const void *a, const void *b, void *arg); +static int compare_array_elements_bloom(const void *a, const void *b, void *arg); /* * Prepare ExprState for interpreted execution. @@ -432,6 +435,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) &&CASE_EEOP_CONVERT_ROWTYPE, &&CASE_EEOP_SCALARARRAYOP, &&CASE_EEOP_SCALARARRAYOP_BINSEARCH, + &&CASE_EEOP_SCALARARRAYOP_BLOOM, &&CASE_EEOP_XMLEXPR, &&CASE_EEOP_AGGREF, &&CASE_EEOP_GROUPING_FUNC, @@ -1479,6 +1483,14 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) EEO_NEXT(); } + EEO_CASE(EEOP_SCALARARRAYOP_BLOOM) + { + /* too complex for an inline implementation */ + ExecEvalScalarArrayOpBloom(state, op, econtext); + + EEO_NEXT(); + } + EEO_CASE(EEOP_DOMAIN_NOTNULL) { /* too complex for an inline implementation */ @@ -3764,6 +3776,281 @@ ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext * *op->resnull = resultnull; } +static void +bloom_filter_add(PGFunction hash_fn_addr, FunctionCallInfo fcinfo, + char *filter, int m, int k, Datum *seeds, Datum value) +{ + int i; + + /* Call hash function */ + fcinfo->args[0].value = value; + fcinfo->args[0].isnull = false; + + for (i = 0; i < k; i++) + { + uint64 h; + int byteIdx; + int bitIdx; + + fcinfo->args[1].isnull = false; + fcinfo->args[1].value = seeds[i]; + + h = DatumGetUInt64(hash_fn_addr(fcinfo)); + + bitIdx = h % m; + + byteIdx = bitIdx / 8; + bitIdx = bitIdx % 8; + + filter[byteIdx] |= (0x01 << bitIdx); + } +} + +static bool +bloom_filter_check(PGFunction hash_fn_addr, FunctionCallInfo fcinfo, + char *filter, int m, int k, Datum *seeds, Datum value) +{ + int i; + + /* Call hash function */ + fcinfo->args[0].value = value; + + fcinfo->args[0].isnull = false; + fcinfo->args[1].isnull = false; + + for (i = 0; i < k; i++) + { + uint64 h; + int byteIdx; + int bitIdx; + + fcinfo->args[1].value = seeds[i]; + + h = DatumGetUInt64(hash_fn_addr(fcinfo)); + + bitIdx = h % m; + + byteIdx = bitIdx / 8; + bitIdx = bitIdx % 8; + + if (! (filter[byteIdx] & (0x01 << bitIdx))) + return false; + } + + return true; +} + +/* + * Evaluate "scalar op ANY (const array)". + * + * This is an optimized version of ExecEvalScalarArrayOp that only supports + * ANY (i.e., OR semantics) because it binary searches through the array for a + * match after sorting the array and removing null and duplicate entries. + * + * Source array is in our result area, scalar arg is already evaluated into + * fcinfo->args[0]. + * + * The operator always yields boolean, and we combine the results across all + * array elements using OR. Of course we short-circuit as soon as the result + * is known. + */ +void +ExecEvalScalarArrayOpBloom(ExprState *state, ExprEvalStep *op, ExprContext *econtext) +{ + FunctionCallInfo fcinfo = op->d.scalararraybloomop.fcinfo_data; + bool strictfunc = op->d.scalararraybloomop.finfo->fn_strict; + ArrayType *arr; + Datum result; + bool resultnull; + bool *elem_nulls; + int l = 0, + r, + res; + + /* We don't setup a binary search op if the array const is null. */ + Assert(!*op->resnull); + + /* + * If the scalar is NULL, and the function is strict, return NULL; no + * point in executing the search. + */ + if (fcinfo->args[0].isnull && strictfunc) + { + *op->resnull = true; + return; + } + + /* Preprocess the array the first time we execute the op. */ + if (op->d.scalararraybloomop.elem_values == NULL) + { + /* Cache the original lhs so we can scribble on it. */ + Datum scalar = fcinfo->args[0].value; + bool scalar_isnull = fcinfo->args[0].isnull; + int num_nonnulls = 0; + MemoryContext old_cxt; + MemoryContext array_cxt; + int16 typlen; + bool typbyval; + char typalign; + + /* bloom filter parameters */ + int m; + + arr = DatumGetArrayTypeP(*op->resvalue); + + get_typlenbyvalalign(ARR_ELEMTYPE(arr), + &typlen, + &typbyval, + &typalign); + + array_cxt = AllocSetContextCreate( + econtext->ecxt_per_query_memory, + "scalararraybloomop context", + ALLOCSET_SMALL_SIZES); + old_cxt = MemoryContextSwitchTo(array_cxt); + + deconstruct_array(arr, + ARR_ELEMTYPE(arr), + typlen, typbyval, typalign, + &op->d.scalararraybloomop.elem_values, &elem_nulls, &op->d.scalararraybloomop.num_elems); + + /* size the bloom filter */ + m = ceil((op->d.scalararraybloomop.num_elems * log(saop_bloom_false_positives)) / log(1 / pow(2, log(2)))); + + op->d.scalararraybloomop.num_bits = 8; + while (op->d.scalararraybloomop.num_bits < m) + op->d.scalararraybloomop.num_bits *= 2; + + /* XXX hardcoded values */ + op->d.scalararraybloomop.filter = palloc0(op->d.scalararraybloomop.num_bits / 8); + op->d.scalararraybloomop.num_hashes = round((m / (double) op->d.scalararraybloomop.num_elems) * log(2)); + + op->d.scalararraybloomop.seeds = palloc(sizeof(Datum) * op->d.scalararraybloomop.num_hashes); + for (int j = 0; j < op->d.scalararraybloomop.num_hashes; j++) + op->d.scalararraybloomop.seeds[j] = Int64GetDatum(random()); + + /* Remove null entries from the array. */ + for (int j = 0; j < op->d.scalararraybloomop.num_elems; j++) + { + if (!elem_nulls[j]) + op->d.scalararraybloomop.elem_values[num_nonnulls++] = op->d.scalararraybloomop.elem_values[j]; + } + + /* + * Remember if we had any nulls so that we know if we need to execute + * non-strict functions with a null lhs value if no match is found. + */ + op->d.scalararraybloomop.has_nulls = num_nonnulls < op->d.scalararraybloomop.num_elems; + op->d.scalararraybloomop.num_elems = num_nonnulls; + + for (int j = 0; j < num_nonnulls; j++) + { + /* build the bloom filter */ + bloom_filter_add(op->d.scalararraybloomop.hash_fn_addr, + op->d.scalararraybloomop.hash_fcinfo_data, + op->d.scalararraybloomop.filter, + op->d.scalararraybloomop.num_bits, + op->d.scalararraybloomop.num_hashes, + op->d.scalararraybloomop.seeds, + op->d.scalararraybloomop.elem_values[j]); + } + + /* + * Setup the fcinfo for sorting. We've removed nulls, so both lhs and + * rhs values are guaranteed to be non-null. + */ + fcinfo->args[0].isnull = false; + fcinfo->args[1].isnull = false; + + /* Sort the array and remove duplicate elements. */ + qsort_arg(op->d.scalararraybloomop.elem_values, op->d.scalararraybloomop.num_elems, sizeof(Datum), + compare_array_elements_bloom, op); + op->d.scalararraybloomop.num_elems = qunique_arg(op->d.scalararraybloomop.elem_values, op->d.scalararraybloomop.num_elems, sizeof(Datum), + compare_array_elements_bloom, op); + + /* Restore the lhs value after we scribbed on it for sorting. */ + fcinfo->args[0].value = scalar; + fcinfo->args[0].isnull = scalar_isnull; + + MemoryContextSwitchTo(old_cxt); + } + + /* + * We only setup a binary search op if we have > 8 elements, so we don't + * need to worry about adding an optimization for the empty array case. + */ + Assert(!(op->d.scalararraybloomop.num_elems <= 0 && !op->d.scalararraybloomop.has_nulls)); + + /* Assume no match will be found until proven otherwise. */ + result = BoolGetDatum(false); + resultnull = false; + + /* + * First check the bloom filter, and only do the binary search if bloom + * filter says there might be a match. + */ + if (bloom_filter_check(op->d.scalararraybloomop.hash_fn_addr, + op->d.scalararraybloomop.hash_fcinfo_data, + op->d.scalararraybloomop.filter, + op->d.scalararraybloomop.num_bits, + op->d.scalararraybloomop.num_hashes, + op->d.scalararraybloomop.seeds, + fcinfo->args[0].value)) + { + /* Binary search through the array. */ + r = op->d.scalararraybloomop.num_elems - 1; + while (l <= r) + { + int i = (l + r) / 2; + + fcinfo->args[1].value = op->d.scalararraybloomop.elem_values[i]; + + /* Call comparison function */ + fcinfo->isnull = false; + res = DatumGetInt32(op->d.scalararraybloomop.fn_addr(fcinfo)); + + if (res == 0) + { + result = BoolGetDatum(true); + resultnull = false; + break; + } + else if (res > 0) + l = i + 1; + else + r = i - 1; + } + } + + /* + * If we didn't find a match in the array, we still might need to handle + * the possibility of null values (we've previously removed them from the + * array). + */ + if (!DatumGetBool(result) && op->d.scalararraybloomop.has_nulls) + { + if (strictfunc) + { + /* Had nulls, so strict function implies null. */ + result = (Datum) 0; + resultnull = true; + } + else + { + /* Execute function will null rhs just once. */ + fcinfo->args[1].value = (Datum) 0; + fcinfo->args[1].isnull = true; + + res = DatumGetInt32(op->d.scalararraybloomop.fn_addr(fcinfo)); + result = BoolGetDatum(res == 0); + resultnull = fcinfo->isnull; + } + } + + *op->resvalue = result; + *op->resnull = resultnull; +} + /* XXX: Name function to be specific to saop binsearch? */ static int compare_array_elements(const void *a, const void *b, void *arg) @@ -3777,6 +4064,19 @@ compare_array_elements(const void *a, const void *b, void *arg) return DatumGetInt32(op->d.scalararraybinsearchop.fn_addr(fcinfo)); } +/* XXX: Name function to be specific to saop binsearch? */ +static int +compare_array_elements_bloom(const void *a, const void *b, void *arg) +{ + ExprEvalStep *op = (ExprEvalStep *) arg; + FunctionCallInfo fcinfo = op->d.scalararraybloomop.fcinfo_data; + + fcinfo->args[0].value = *((const Datum *) a); + fcinfo->args[1].value = *((const Datum *) b); + + return DatumGetInt32(op->d.scalararraybloomop.fn_addr(fcinfo)); +} + /* * Evaluate a NOT NULL domain constraint. */ diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index a7d63f107f..f7dcf3de5b 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -585,6 +585,102 @@ get_op_hash_functions(Oid opno, return result; } +/* + * get_op_hash_ext_functions + * Get the OID(s) of the standard hash support function(s) compatible with + * the given operator, operating on its LHS and/or RHS datatype as required. + * + * A function for the LHS type is sought and returned into *lhs_procno if + * lhs_procno isn't NULL. Similarly, a function for the RHS type is sought + * and returned into *rhs_procno if rhs_procno isn't NULL. + * + * If the given operator is not cross-type, the results should be the same + * function, but in cross-type situations they will be different. + * + * Returns true if able to find the requested function(s), false if not. + * (This indicates that the operator should not have been marked oprcanhash.) + */ +bool +get_op_hash_ext_functions(Oid opno, + RegProcedure *lhs_procno, RegProcedure *rhs_procno) +{ + bool result = false; + CatCList *catlist; + int i; + + /* Ensure output args are initialized on failure */ + if (lhs_procno) + *lhs_procno = InvalidOid; + if (rhs_procno) + *rhs_procno = InvalidOid; + + /* + * Search pg_amop to see if the target operator is registered as the "=" + * operator of any hash opfamily. If the operator is registered in + * multiple opfamilies, assume we can use any one. + */ + catlist = SearchSysCacheList1(AMOPOPID, ObjectIdGetDatum(opno)); + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple tuple = &catlist->members[i]->tuple; + Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); + + if (aform->amopmethod == HASH_AM_OID && + aform->amopstrategy == HTEqualStrategyNumber) + { + /* + * Get the matching support function(s). Failure probably + * shouldn't happen --- it implies a bogus opfamily --- but + * continue looking if so. + */ + if (lhs_procno) + { + *lhs_procno = get_opfamily_proc(aform->amopfamily, + aform->amoplefttype, + aform->amoplefttype, + HASHEXTENDED_PROC); + if (!OidIsValid(*lhs_procno)) + continue; + /* Matching LHS found, done if caller doesn't want RHS */ + if (!rhs_procno) + { + result = true; + break; + } + /* Only one lookup needed if given operator is single-type */ + if (aform->amoplefttype == aform->amoprighttype) + { + *rhs_procno = *lhs_procno; + result = true; + break; + } + } + if (rhs_procno) + { + *rhs_procno = get_opfamily_proc(aform->amopfamily, + aform->amoprighttype, + aform->amoprighttype, + HASHEXTENDED_PROC); + if (!OidIsValid(*rhs_procno)) + { + /* Forget any LHS function from this opfamily */ + if (lhs_procno) + *lhs_procno = InvalidOid; + continue; + } + /* Matching RHS found, so done */ + result = true; + break; + } + } + } + + ReleaseSysCacheList(catlist); + + return result; +} + /* * get_op_btree_interpretation * Given an operator's OID, find out which btree opfamilies it belongs to, diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index ff9143a4ab..437d2d4f5a 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2072,6 +2072,17 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"enable_saop_bloom", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables the use of bloom filters when processing SAOP conditions."), + NULL, + GUC_EXPLAIN + }, + &enable_saop_bloom, + true, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL @@ -3692,6 +3703,16 @@ static struct config_real ConfigureNamesReal[] = NULL, NULL, NULL }, + { + {"saop_bloom_false_positives", PGC_SUSET, QUERY_TUNING_COST, + gettext_noop("Target false positives rate for bloom filter."), + NULL + }, + &saop_bloom_false_positives, + 0.05, 0.0, 100.0, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0.0, 0.0, 0.0, NULL, NULL, NULL diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index feddde264d..32ae3b7a9a 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -18,7 +18,9 @@ #include "nodes/execnodes.h" extern PGDLLIMPORT bool enable_saop_binsearch; +extern PGDLLIMPORT bool enable_saop_bloom; extern PGDLLIMPORT int enable_saop_threshold; +extern PGDLLIMPORT double saop_bloom_false_positives; /* forward references to avoid circularity */ struct ExprEvalStep; @@ -217,6 +219,7 @@ typedef enum ExprEvalOp EEOP_CONVERT_ROWTYPE, EEOP_SCALARARRAYOP, EEOP_SCALARARRAYOP_BINSEARCH, + EEOP_SCALARARRAYOP_BLOOM, EEOP_XMLEXPR, EEOP_AGGREF, EEOP_GROUPING_FUNC, @@ -564,8 +567,34 @@ typedef struct ExprEvalStep FunctionCallInfo fcinfo_data; /* arguments etc */ /* faster to access without additional indirection: */ PGFunction fn_addr; /* actual call address */ + } scalararraybinsearchop; + /* for EEOP_SCALARARRAYOP_BLOOM */ + struct + { + bool has_nulls; + + /* pre-sorted array */ + int num_elems; + Datum *elem_values; + FmgrInfo *finfo; /* function's lookup data */ + FunctionCallInfo fcinfo_data; /* arguments etc */ + /* faster to access without additional indirection: */ + PGFunction fn_addr; /* actual call address */ + + /* bloom filter */ + Datum *seeds; + int num_hashes; /* number of filters to compute */ + int num_bits; /* size of bloom filter */ + char *filter; /* bloom filter */ + FmgrInfo *hash_finfo; /* function's lookup data */ + FunctionCallInfo hash_fcinfo_data; /* arguments etc */ + /* faster to access without additional indirection: */ + PGFunction hash_fn_addr; /* actual call address */ + + } scalararraybloomop; + /* for EEOP_XMLEXPR */ struct { @@ -745,6 +774,7 @@ extern void ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext); extern void ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op); extern void ExecEvalScalarArrayOpBinSearch(ExprState *state, ExprEvalStep *op, ExprContext *econtext); +extern void ExecEvalScalarArrayOpBloom(ExprState *state, ExprEvalStep *op, ExprContext *econtext); extern void ExecEvalConstraintNotNull(ExprState *state, ExprEvalStep *op); extern void ExecEvalConstraintCheck(ExprState *state, ExprEvalStep *op); extern void ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index c9c68e2f4f..852262c70b 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -80,6 +80,8 @@ extern bool get_compatible_hash_operators(Oid opno, Oid *lhs_opno, Oid *rhs_opno); extern bool get_op_hash_functions(Oid opno, RegProcedure *lhs_procno, RegProcedure *rhs_procno); +extern bool get_op_hash_ext_functions(Oid opno, + RegProcedure *lhs_procno, RegProcedure *rhs_procno); extern List *get_op_btree_interpretation(Oid opno); extern bool equality_ops_are_compatible(Oid opno1, Oid opno2); extern Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, -- 2.21.1