From dacca343647a4357ac79b6d2dded3dc2fd140aa0 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Tue, 12 Nov 2019 18:01:07 +0100 Subject: [PATCH 2/9] Apply all available functional dependencies When considering functional dependencies during selectivity estimation, it's not necessary to bother with selection the best extended statistic. We can simply consider all applicable functional dependencies at once. Author: Tomas Vondra Discussion: https://postgr.es/m/20191028152048.jc6pqv5hb7j77ocp@development --- src/backend/statistics/dependencies.c | 102 +++++++++++++++--------- src/test/regress/expected/stats_ext.out | 58 ++++++++++++++ src/test/regress/sql/stats_ext.sql | 36 +++++++++ 3 files changed, 157 insertions(+), 39 deletions(-) diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index ce31c959a9..c6570c21f3 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -77,8 +77,8 @@ static bool dependency_implies_attribute(MVDependency *dependency, AttrNumber attnum); static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum); -static MVDependency *find_strongest_dependency(StatisticExtInfo *stats, - MVDependencies *dependencies, +static MVDependency *find_strongest_dependency(MVDependencies **dependencies, + int ndependencies, Bitmapset *attnums); static void @@ -862,10 +862,10 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) * (see the comment in dependencies_clauselist_selectivity). */ static MVDependency * -find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies, +find_strongest_dependency(MVDependencies **dependencies, int ndependencies, Bitmapset *attnums) { - int i; + int i, j; MVDependency *strongest = NULL; /* number of attnums in clauses */ @@ -876,36 +876,39 @@ find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies, * fully-matched dependencies. We do the cheap checks first, before * matching it against the attnums. */ - for (i = 0; i < dependencies->ndeps; i++) + for (i = 0; i < ndependencies; i++) { - MVDependency *dependency = dependencies->deps[i]; - - /* - * Skip dependencies referencing more attributes than available - * clauses, as those can't be fully matched. - */ - if (dependency->nattributes > nattnums) - continue; - - if (strongest) + for (j = 0; j < dependencies[i]->ndeps; j++) { - /* skip dependencies on fewer attributes than the strongest. */ - if (dependency->nattributes < strongest->nattributes) - continue; + MVDependency *dependency = dependencies[i]->deps[j]; - /* also skip weaker dependencies when attribute count matches */ - if (strongest->nattributes == dependency->nattributes && - strongest->degree > dependency->degree) + /* + * Skip dependencies referencing more attributes than available + * clauses, as those can't be fully matched. + */ + if (dependency->nattributes > nattnums) continue; - } - /* - * this dependency is stronger, but we must still check that it's - * fully matched to these attnums. We perform this check last as it's - * slightly more expensive than the previous checks. - */ - if (dependency_is_fully_matched(dependency, attnums)) - strongest = dependency; /* save new best match */ + if (strongest) + { + /* skip dependencies on fewer attributes than the strongest. */ + if (dependency->nattributes < strongest->nattributes) + continue; + + /* also skip weaker dependencies when attribute count matches */ + if (strongest->nattributes == dependency->nattributes && + strongest->degree > dependency->degree) + continue; + } + + /* + * this dependency is stronger, but we must still check that it's + * fully matched to these attnums. We perform this check last as it's + * slightly more expensive than the previous checks. + */ + if (dependency_is_fully_matched(dependency, attnums)) + strongest = dependency; /* save new best match */ + } } return strongest; @@ -949,10 +952,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root, Selectivity s1 = 1.0; ListCell *l; Bitmapset *clauses_attnums = NULL; - StatisticExtInfo *stat; - MVDependencies *dependencies; Bitmapset **list_attnums; int listidx; + MVDependencies **dependencies = NULL; + int ndependencies = 0; /* check if there's any stats that might be useful for us. */ if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES)) @@ -1001,20 +1004,41 @@ dependencies_clauselist_selectivity(PlannerInfo *root, return 1.0; } - /* find the best suited statistics object for these attnums */ - stat = choose_best_statistics(rel->statlist, STATS_EXT_DEPENDENCIES, - list_attnums, list_length(clauses)); + /* + * Load all functional dependencies matching at least two parameters. + */ + ndependencies = 0; + dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) * + list_length(rel->statlist)); + + foreach(l,rel->statlist) + { + StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l); + Bitmapset *matched; + int num_matched; + + /* skip statistics that are not of the correct type */ + if (stat->kind != STATS_EXT_DEPENDENCIES) + continue; + + matched = bms_intersect(clauses_attnums, stat->keys); + num_matched = bms_num_members(matched); + bms_free(matched); + + if (num_matched < 2) + continue; + + dependencies[ndependencies++] + = statext_dependencies_load(stat->statOid); + } /* if no matching stats could be found then we've nothing to do */ - if (!stat) + if (!ndependencies) { pfree(list_attnums); return 1.0; } - /* load the dependency items stored in the statistics object */ - dependencies = statext_dependencies_load(stat->statOid); - /* * Apply the dependencies recursively, starting with the widest/strongest * ones, and proceeding to the smaller/weaker ones. At the end of each @@ -1027,7 +1051,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root, MVDependency *dependency; /* the widest/strongest dependency, fully matched by clauses */ - dependency = find_strongest_dependency(stat, dependencies, + dependency = find_strongest_dependency(dependencies, ndependencies, clauses_attnums); /* if no suitable dependency was found, we're done */ diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 3311d8305c..d42a372197 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -451,6 +451,64 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE 50 | 50 (1 row) +-- check the ability to use multiple functional dependencies +CREATE TABLE functional_dependencies_multi ( + a INTEGER, + b INTEGER, + c INTEGER, + d INTEGER +); +-- +INSERT INTO functional_dependencies_multi (a, b, c, d) + SELECT + mod(i,7), + mod(i,7), + mod(i,11), + mod(i,11) + FROM generate_series(1,5000) s(i); +ANALYZE functional_dependencies_multi; +-- estimates without any functional dependencies +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0'); + estimated | actual +-----------+-------- + 102 | 714 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 41 | 454 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 1 | 64 +(1 row) + +-- create separate functional dependencies +CREATE STATISTICS functional_dependencies_multi_1 (dependencies) ON a, b FROM functional_dependencies_multi; +CREATE STATISTICS functional_dependencies_multi_2 (dependencies) ON c, d FROM functional_dependencies_multi; +ANALYZE functional_dependencies_multi; +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0'); + estimated | actual +-----------+-------- + 714 | 714 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 454 | 454 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); + estimated | actual +-----------+-------- + 65 | 64 +(1 row) + +DROP TABLE functional_dependencies_multi; -- MCV lists CREATE TABLE mcv_lists ( filler1 TEXT, diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index ee0d40ac18..992cf4b2cc 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -291,6 +291,42 @@ ANALYZE functional_dependencies; SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1'); +-- check the ability to use multiple functional dependencies +CREATE TABLE functional_dependencies_multi ( + a INTEGER, + b INTEGER, + c INTEGER, + d INTEGER +); + +-- +INSERT INTO functional_dependencies_multi (a, b, c, d) + SELECT + mod(i,7), + mod(i,7), + mod(i,11), + mod(i,11) + FROM generate_series(1,5000) s(i); + +ANALYZE functional_dependencies_multi; + +-- estimates without any functional dependencies +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0'); +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0'); +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); + +-- create separate functional dependencies +CREATE STATISTICS functional_dependencies_multi_1 (dependencies) ON a, b FROM functional_dependencies_multi; +CREATE STATISTICS functional_dependencies_multi_2 (dependencies) ON c, d FROM functional_dependencies_multi; + +ANALYZE functional_dependencies_multi; + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0'); +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE c = 0 AND d = 0'); +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0'); + +DROP TABLE functional_dependencies_multi; + -- MCV lists CREATE TABLE mcv_lists ( filler1 TEXT, -- 2.21.0