From deb998bcbaac39ce90b6ad47b4828e2857ae1a57 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Tue, 17 Mar 2020 02:57:12 +0100 Subject: [PATCH 2/2] Support clauses of the form Var op Var --- src/backend/statistics/extended_stats.c | 67 ++++++++++++----- src/backend/statistics/mcv.c | 71 +++++++++++++++++- .../statistics/extended_stats_internal.h | 2 +- src/test/regress/expected/stats_ext.out | 72 +++++++++++++++++++ src/test/regress/sql/stats_ext.sql | 22 ++++++ 5 files changed, 215 insertions(+), 19 deletions(-) diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 93d67791c5..f8947d9097 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -987,14 +987,18 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, { RangeTblEntry *rte = root->simple_rte_array[relid]; OpExpr *expr = (OpExpr *) clause; - Var *var; + Var *var, + *var2; /* Only expressions with two arguments are considered compatible. */ if (list_length(expr->args) != 2) return false; - /* Check if the expression has the right shape (one Var, one Const) */ - if (!examine_clause_args(expr->args, &var, NULL, NULL)) + /* + * Check if the expression has the right shape (one Var and one Const, + * or two Vars). + */ + if (!examine_clause_args(expr->args, &var, &var2, NULL, NULL)) return false; /* @@ -1034,7 +1038,20 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, !get_func_leakproof(get_opcode(expr->opno))) return false; - return statext_is_compatible_clause_internal(root, (Node *) var, + /* + * Check compatibility of the first Var - we get this one for both + * types of supported expressions (Var op Const) and (Var op Var). + */ + if (!statext_is_compatible_clause_internal(root, (Node *) var, + relid, attnums)) + return false; + + /* For (Var op Const) we don't get the second Var, and we're done. */ + if (!var2) + return true; + + /* For (Var op Var) check compatibility of the second Var. */ + return statext_is_compatible_clause_internal(root, (Node *) var2, relid, attnums); } @@ -1050,7 +1067,7 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, return false; /* Check if the expression has the right shape (one Var, one Const) */ - if (!examine_clause_args(expr->args, &var, NULL, NULL)) + if (!examine_clause_args(expr->args, &var, NULL, NULL, NULL)) return false; /* @@ -1456,22 +1473,24 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, } /* - * examine_opclause_expression + * examine_clause_args * Split expression into Var and Const parts. * - * Attempts to match the arguments to either (Var op Const) or (Const op Var), - * possibly with a RelabelType on top. When the expression matches this form, - * returns true, otherwise returns false. + * Attempts to match the arguments to either (Var op Const) or (Const op Var) + * or (Var op Var), possibly with a RelabelType on top. When the expression + * matches this form, returns true, otherwise returns false. * * Optionally returns pointers to the extracted Var/Const nodes, when passed * non-null pointers (varp, cstp and varonleftp). The varonleftp flag specifies * on which side of the operator we found the Var node. */ bool -examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp) +examine_clause_args(List *args, Var **var1p, Var **var2p, Const **cstp, + bool *varonleftp) { - Var *var; - Const *cst; + Var *var1 = NULL; + Var *var2 = NULL; + Const *cst = NULL; bool varonleft; Node *leftop, *rightop; @@ -1491,22 +1510,38 @@ examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp) if (IsA(leftop, Var) && IsA(rightop, Const)) { - var = (Var *) leftop; + var1 = (Var *) leftop; cst = (Const *) rightop; varonleft = true; } else if (IsA(leftop, Const) && IsA(rightop, Var)) { - var = (Var *) rightop; + var1 = (Var *) rightop; cst = (Const *) leftop; varonleft = false; } + else if (IsA(leftop, Var) && IsA(rightop, Var)) + { + var1 = (Var *) leftop; + var2 = (Var *) rightop; + varonleft = false; + + /* + * Both variables have to be for the same relation (otherwise it's + * a join clause, and we don't deal with those yet. + */ + if (var1->varno != var2->varno) + return false; + } else return false; /* return pointers to the extracted parts if requested */ - if (varp) - *varp = var; + if (var1p) + *var1p = var1; + + if (var2p) + *var2p = var2; if (cstp) *cstp = cst; diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 343011178b..3e58d36d68 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -1581,13 +1581,17 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, /* valid only after examine_clause_args returns true */ Var *var; + Var *var2; Const *cst; bool varonleft; fmgr_info(get_opcode(expr->opno), &opproc); /* extract the var and const from the expression */ - if (examine_clause_args(expr->args, &var, &cst, &varonleft)) + if (!examine_clause_args(expr->args, &var, &var2, &cst, &varonleft)) + continue; + + if (cst) /* Var op Const */ { int idx; @@ -1651,6 +1655,68 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, matches[i] = RESULT_MERGE(matches[i], is_or, match); } } + else /* Var op Var */ + { + int idx; + int idx2; + + Assert(var2); + + /* match the attribute to a dimension of the statistic */ + idx = bms_member_index(keys, var->varattno); + idx2 = bms_member_index(keys, var2->varattno); + + /* + * Walk through the MCV items and evaluate the current clause. + * We can skip items that were already ruled out, and + * terminate if there are no remaining MCV items that might + * possibly match. + */ + for (i = 0; i < mcvlist->nitems; i++) + { + bool match = true; + MCVItem *item = &mcvlist->items[i]; + + /* + * When either of the MCV items is NULL we can treat this + * as a mismatch. We must not call the operator because + * of strictness. + */ + if (item->isnull[idx] || item->isnull[idx2]) + { + matches[i] = RESULT_MERGE(matches[i], is_or, false); + continue; + } + + /* + * Skip MCV items that can't change result in the bitmap. + * Once the value gets false for AND-lists, or true for + * OR-lists, we don't need to look at more clauses. + */ + if (RESULT_IS_FINAL(matches[i], is_or)) + continue; + + /* + * First check whether the constant is below the lower + * boundary (in that case we can skip the bucket, because + * there's no overlap). + * + * We don't store collations used to build the statistics, + * but we can use the collation for the attribute itself, + * as stored in varcollid. We do reset the statistics after + * a type change (including collation change), so this is + * OK. We may need to relax this after allowing extended + * statistics on expressions. + */ + match = DatumGetBool(FunctionCall2Coll(&opproc, + var->varcollid, + item->values[idx], + item->values[idx2])); + + /* update the match bitmap with the result */ + matches[i] = RESULT_MERGE(matches[i], is_or, match); + } + } } else if (IsA(clause, ScalarArrayOpExpr)) { @@ -1665,7 +1731,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, fmgr_info(get_opcode(expr->opno), &opproc); /* extract the var and const from the expression */ - if (examine_clause_args(expr->args, &var, &cst, &varonleft)) + if (examine_clause_args(expr->args, &var, NULL, &cst, &varonleft)) { int idx; @@ -1679,6 +1745,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, /* ScalarArrayOpExpr has the Var always on the left */ Assert(varonleft); + Assert(cst); if (!cst->constisnull) { diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index 6c039a81c2..ee34a897c8 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -96,7 +96,7 @@ extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc, MultiSortSupport mss, int numattrs, AttrNumber *attnums); -extern bool examine_clause_args(List *args, Var **varp, +extern bool examine_clause_args(List *args, Var **var1p, Var **var2p, Const **cstp, bool *varonleftp); extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root, diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 3aba4a37c6..481be40141 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -940,6 +940,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' 343 | 200 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c'); + estimated | actual +-----------+-------- + 1667 | 3750 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c'); + estimated | actual +-----------+-------- + 1667 | 0 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); estimated | actual -----------+-------- @@ -1081,6 +1093,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' 200 | 200 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c'); + estimated | actual +-----------+-------- + 3750 | 3750 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c'); + estimated | actual +-----------+-------- + 1 | 0 +(1 row) + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); estimated | actual -----------+-------- @@ -1284,6 +1308,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND 3750 | 2500 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d'); + estimated | actual +-----------+-------- + 25 | 2500 +(1 row) + -- create statistics CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, d FROM mcv_lists; ANALYZE mcv_lists; @@ -1317,6 +1347,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND 2500 | 2500 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d'); + estimated | actual +-----------+-------- + 2500 | 2500 +(1 row) + -- mcv with pass-by-ref fixlen types, e.g. uuid CREATE TABLE mcv_lists_uuid ( a UUID, @@ -1408,6 +1444,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND 1094 | 0 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b'); + estimated | actual +-----------+-------- + 9950 | 2500 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c'); + estimated | actual +-----------+-------- + 50 | 2500 +(1 row) + CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c FROM mcv_lists_bool; ANALYZE mcv_lists_bool; @@ -1435,6 +1483,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND 1 | 0 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b'); + estimated | actual +-----------+-------- + 2500 | 2500 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c'); + estimated | actual +-----------+-------- + 2500 | 2500 +(1 row) + -- check the ability to use multiple MCV lists CREATE TABLE mcv_lists_multi ( a INTEGER, @@ -1493,6 +1553,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR 2649 | 1572 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d'); + estimated | actual +-----------+-------- + 1 | 5000 +(1 row) + -- create separate MCV statistics CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi; CREATE STATISTICS mcv_lists_multi_2 (mcv) ON c, d FROM mcv_lists_multi; @@ -1539,6 +1605,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR 1571 | 1572 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d'); + estimated | actual +-----------+-------- + 5000 | 5000 +(1 row) + DROP TABLE mcv_lists_multi; -- Permission tests. Users should not be able to see specific data values in -- the extended statistics, if they lack permission to see those values in diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 54ceb1f4ee..f5e4468129 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -498,6 +498,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)'); @@ -549,6 +553,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c'); + SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)'); @@ -656,6 +664,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d'); + -- create statistics CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, d FROM mcv_lists; @@ -674,6 +684,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d'); + -- mcv with pass-by-ref fixlen types, e.g. uuid CREATE TABLE mcv_lists_uuid ( a UUID, @@ -746,6 +758,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c'); + CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c FROM mcv_lists_bool; @@ -759,6 +775,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c'); + -- check the ability to use multiple MCV lists CREATE TABLE mcv_lists_multi ( a INTEGER, @@ -785,6 +805,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)'); SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d'); -- create separate MCV statistics CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi; @@ -799,6 +820,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)'); SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d'); DROP TABLE mcv_lists_multi; -- 2.21.1