From 174a6425036e2d4ca7d3d68c635cd55a58a9b9e6 Mon Sep 17 00:00:00 2001 From: jesperpedersen Date: Tue, 9 Jul 2019 06:44:57 -0400 Subject: [PATCH] UniqueKey --- src/backend/nodes/print.c | 39 +++++++ src/backend/optimizer/path/Makefile | 2 +- src/backend/optimizer/path/allpaths.c | 8 ++ src/backend/optimizer/path/costsize.c | 5 + src/backend/optimizer/path/indxpath.c | 39 +++++++ src/backend/optimizer/path/uniquekey.c | 149 +++++++++++++++++++++++++ src/backend/optimizer/plan/planner.c | 12 +- src/backend/optimizer/util/pathnode.c | 12 ++ src/include/nodes/nodes.h | 1 + src/include/nodes/pathnodes.h | 18 +++ src/include/nodes/print.h | 2 +- src/include/optimizer/pathnode.h | 1 + src/include/optimizer/paths.h | 8 ++ 13 files changed, 293 insertions(+), 3 deletions(-) create mode 100644 src/backend/optimizer/path/uniquekey.c diff --git a/src/backend/nodes/print.c b/src/backend/nodes/print.c index 4ecde6b421..ed5684bf19 100644 --- a/src/backend/nodes/print.c +++ b/src/backend/nodes/print.c @@ -459,6 +459,45 @@ print_pathkeys(const List *pathkeys, const List *rtable) printf(")\n"); } +/* + * print_unique_keys - + * unique_key an UniqueKey + */ +void +print_unique_keys(const List *unique_keys, const List *rtable) +{ + ListCell *l; + + printf("("); + foreach(l, unique_keys) + { + UniqueKey *unique_key = (UniqueKey *) lfirst(l); + EquivalenceClass *eclass = (EquivalenceClass *) unique_key->eq_clause; + ListCell *k; + bool first = true; + + /* chase up */ + while (eclass->ec_merged) + eclass = eclass->ec_merged; + + printf("("); + foreach(k, eclass->ec_members) + { + EquivalenceMember *mem = (EquivalenceMember *) lfirst(k); + + if (first) + first = false; + else + printf(", "); + print_expr((Node *) mem->em_expr, rtable); + } + printf(")"); + if (lnext(unique_keys, l)) + printf(", "); + } + printf(")\n"); +} + /* * print_tl * print targetlist in a more legible way. diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile index 6864a62132..8249a6b6db 100644 --- a/src/backend/optimizer/path/Makefile +++ b/src/backend/optimizer/path/Makefile @@ -13,6 +13,6 @@ top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \ - joinpath.o joinrels.o pathkeys.o tidpath.o + joinpath.o joinrels.o pathkeys.o tidpath.o uniquekey.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index e9ee32b7f4..acd22653c2 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -3957,6 +3957,14 @@ print_path(PlannerInfo *root, Path *path, int indent) print_pathkeys(path->pathkeys, root->parse->rtable); } + if (path->unique_keys) + { + for (i = 0; i < indent; i++) + printf("\t"); + printf(" unique_keys: "); + print_unique_keys(path->unique_keys, root->parse->rtable); + } + if (join) { JoinPath *jp = (JoinPath *) path; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 3a9a994733..62d7815a76 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -705,6 +705,11 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, path->path.parallel_aware = true; } + /* Consider cost based on unique key */ + if (path->path.unique_keys) + { + } + /* * Now interpolate based on estimated index order correlation to get total * disk I/O cost for main table accesses. diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 5f339fdfde..f053ee6794 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -189,6 +189,7 @@ static Expr *match_clause_to_ordering_op(IndexOptInfo *index, static bool ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel, EquivalenceClass *ec, EquivalenceMember *em, void *arg); +static List *get_uniquekeys_for_index(PlannerInfo *root, List *pathkeys); /* @@ -874,6 +875,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, List *orderbyclausecols; List *index_pathkeys; List *useful_pathkeys; + List *useful_uniquekeys; bool found_lower_saop_clause; bool pathkeys_possibly_useful; bool index_is_ordered; @@ -1036,11 +1038,14 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, if (index_clauses != NIL || useful_pathkeys != NIL || useful_predicate || index_only_scan) { + useful_uniquekeys = get_uniquekeys_for_index(root, useful_pathkeys); + ipath = create_index_path(root, index, index_clauses, orderbyclauses, orderbyclausecols, useful_pathkeys, + useful_uniquekeys, index_is_ordered ? ForwardScanDirection : NoMovementScanDirection, @@ -1063,6 +1068,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, orderbyclauses, orderbyclausecols, useful_pathkeys, + useful_uniquekeys, index_is_ordered ? ForwardScanDirection : NoMovementScanDirection, @@ -1093,11 +1099,14 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, index_pathkeys); if (useful_pathkeys != NIL) { + useful_uniquekeys = get_uniquekeys_for_index(root, useful_pathkeys); + ipath = create_index_path(root, index, index_clauses, NIL, NIL, useful_pathkeys, + useful_uniquekeys, BackwardScanDirection, index_only_scan, outer_relids, @@ -1115,6 +1124,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, NIL, NIL, useful_pathkeys, + useful_uniquekeys, BackwardScanDirection, index_only_scan, outer_relids, @@ -3369,6 +3379,35 @@ match_clause_to_ordering_op(IndexOptInfo *index, return clause; } +/* + * get_uniquekeys_for_index + */ +static List * +get_uniquekeys_for_index(PlannerInfo *root, List *pathkeys) +{ + ListCell *lc; + + if (pathkeys) + { + List *uniquekeys = NIL; + foreach(lc, pathkeys) + { + UniqueKey *unique_key; + PathKey *pk = (PathKey *) lfirst(lc); + EquivalenceClass *ec = (EquivalenceClass *) pk->pk_eclass; + + unique_key = makeNode(UniqueKey); + unique_key->eq_clause = ec; + + lappend(uniquekeys, unique_key); + } + + if (uniquekeys_contained_in(root->canon_uniquekeys, uniquekeys)) + return uniquekeys; + } + + return NIL; +} /**************************************************************************** * ---- ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS ---- diff --git a/src/backend/optimizer/path/uniquekey.c b/src/backend/optimizer/path/uniquekey.c new file mode 100644 index 0000000000..9eecaef56b --- /dev/null +++ b/src/backend/optimizer/path/uniquekey.c @@ -0,0 +1,149 @@ +/*------------------------------------------------------------------------- + * + * uniquekey.c + * Utilities for matching and building unique keys + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/optimizer/path/uniquekey.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "nodes/pg_list.h" + +static UniqueKey *make_canonical_uniquekey(PlannerInfo *root, EquivalenceClass *eclass); +static List* build_uniquekeys(PlannerInfo *root, List *pathkeys); + +/* + * Build unique keys for GROUP BY + */ +List* +build_group_uniquekeys(PlannerInfo *root) +{ + return build_uniquekeys(root, root->group_pathkeys); +} + +/* + * Build unique keys for DISTINCT + */ +List* +build_distinct_uniquekeys(PlannerInfo *root) +{ + return build_uniquekeys(root, root->distinct_pathkeys); +} + +/* + * uniquekeys_contained_in + * Are the keys2 included in the keys1 superset + */ +bool +uniquekeys_contained_in(List *keys1, List *keys2) +{ + ListCell *key1, + *key2; + + /* + * Fall out quickly if we are passed two identical lists. This mostly + * catches the case where both are NIL, but that's common enough to + * warrant the test. + */ + if (keys1 == keys2) + return true; + + foreach(key2, keys2) + { + bool found = false; + UniqueKey *uniquekey2 = (UniqueKey *) lfirst(key2); + + foreach(key1, keys1) + { + UniqueKey *uniquekey1 = (UniqueKey *) lfirst(key1); + + if (uniquekey1->eq_clause == uniquekey2->eq_clause) + { + found = true; + break; + } + } + + if (!found) + return false; + } + + return true; +} + +/* + * make_canonical_uniquekey + * Given the parameters for a UniqueKey, find any pre-existing matching + * uniquekey in the query's list of "canonical" uniquekeys. Make a new + * entry if there's not one already. + * + * Note that this function must not be used until after we have completed + * merging EquivalenceClasses. (We don't try to enforce that here; instead, + * equivclass.c will complain if a merge occurs after root->canon_uniquekeys + * has become nonempty.) + */ +static UniqueKey * +make_canonical_uniquekey(PlannerInfo *root, + EquivalenceClass *eclass) +{ + UniqueKey *uk; + ListCell *lc; + MemoryContext oldcontext; + + /* The passed eclass might be non-canonical, so chase up to the top */ + while (eclass->ec_merged) + eclass = eclass->ec_merged; + + foreach(lc, root->canon_uniquekeys) + { + uk = (UniqueKey *) lfirst(lc); + if (eclass == uk->eq_clause) + return uk; + } + + /* + * Be sure canonical uniquekeys are allocated in the main planning context. + * Not an issue in normal planning, but it is for GEQO. + */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + uk = makeNode(UniqueKey); + uk->eq_clause = eclass; + + root->canon_uniquekeys = lappend(root->canon_uniquekeys, uk); + + MemoryContextSwitchTo(oldcontext); + + return uk; +} + +/* + * Build a list of unique keys + */ +static List* +build_uniquekeys(PlannerInfo *root, List *pathkeys) +{ + List *result = NIL; + ListCell *l; + + /* Create a uniquekey and add it to the list */ + foreach(l, pathkeys) + { + UniqueKey *unique_key; + PathKey *pk = (PathKey *) lfirst(l); + EquivalenceClass *ec = (EquivalenceClass *) pk->pk_eclass; + + unique_key = make_canonical_uniquekey(root, ec); + result = lappend(result, unique_key); + } + + return result; +} diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index ca3b7f29e1..dab3142e51 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3650,13 +3650,23 @@ standard_qp_callback(PlannerInfo *root, void *extra) * much easier, since we know that the parser ensured that one is a * superset of the other. */ + root->query_uniquekeys = NIL; + if (root->group_pathkeys) + { root->query_pathkeys = root->group_pathkeys; + + if (!root->parse->hasAggs) + root->query_uniquekeys = build_group_uniquekeys(root); + } else if (root->window_pathkeys) root->query_pathkeys = root->window_pathkeys; else if (list_length(root->distinct_pathkeys) > list_length(root->sort_pathkeys)) + { root->query_pathkeys = root->distinct_pathkeys; + root->query_uniquekeys = build_distinct_uniquekeys(root); + } else if (root->sort_pathkeys) root->query_pathkeys = root->sort_pathkeys; else @@ -6216,7 +6226,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) /* Estimate the cost of index scan */ indexScanPath = create_index_path(root, indexInfo, - NIL, NIL, NIL, NIL, + NIL, NIL, NIL, NIL, NIL, ForwardScanDirection, false, NULL, 1.0, false); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 0ac73984d2..13766e0bd1 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -941,6 +941,7 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->parallel_safe = rel->consider_parallel; pathnode->parallel_workers = parallel_workers; pathnode->pathkeys = NIL; /* seqscan has unordered result */ + pathnode->unique_keys = NIL; cost_seqscan(pathnode, root, rel, pathnode->param_info); @@ -965,6 +966,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer pathnode->parallel_safe = rel->consider_parallel; pathnode->parallel_workers = 0; pathnode->pathkeys = NIL; /* samplescan has unordered result */ + pathnode->unique_keys = NIL; cost_samplescan(pathnode, root, rel, pathnode->param_info); @@ -1001,6 +1003,7 @@ create_index_path(PlannerInfo *root, List *indexorderbys, List *indexorderbycols, List *pathkeys, + List *uniquekeys, ScanDirection indexscandir, bool indexonly, Relids required_outer, @@ -1019,6 +1022,7 @@ create_index_path(PlannerInfo *root, pathnode->path.parallel_safe = rel->consider_parallel; pathnode->path.parallel_workers = 0; pathnode->path.pathkeys = pathkeys; + pathnode->path.unique_keys = uniquekeys; pathnode->indexinfo = index; pathnode->indexclauses = indexclauses; @@ -1062,6 +1066,7 @@ create_bitmap_heap_path(PlannerInfo *root, pathnode->path.parallel_safe = rel->consider_parallel; pathnode->path.parallel_workers = parallel_degree; pathnode->path.pathkeys = NIL; /* always unordered */ + pathnode->path.unique_keys = NIL; pathnode->bitmapqual = bitmapqual; @@ -1923,6 +1928,7 @@ create_functionscan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->parallel_safe = rel->consider_parallel; pathnode->parallel_workers = 0; pathnode->pathkeys = pathkeys; + pathnode->unique_keys = NIL; cost_functionscan(pathnode, root, rel, pathnode->param_info); @@ -1949,6 +1955,7 @@ create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->parallel_safe = rel->consider_parallel; pathnode->parallel_workers = 0; pathnode->pathkeys = NIL; /* result is always unordered */ + pathnode->unique_keys = NIL; cost_tablefuncscan(pathnode, root, rel, pathnode->param_info); @@ -1975,6 +1982,7 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->parallel_safe = rel->consider_parallel; pathnode->parallel_workers = 0; pathnode->pathkeys = NIL; /* result is always unordered */ + pathnode->unique_keys = NIL; cost_valuesscan(pathnode, root, rel, pathnode->param_info); @@ -2000,6 +2008,7 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer) pathnode->parallel_safe = rel->consider_parallel; pathnode->parallel_workers = 0; pathnode->pathkeys = NIL; /* XXX for now, result is always unordered */ + pathnode->unique_keys = NIL; cost_ctescan(pathnode, root, rel, pathnode->param_info); @@ -2026,6 +2035,7 @@ create_namedtuplestorescan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->parallel_safe = rel->consider_parallel; pathnode->parallel_workers = 0; pathnode->pathkeys = NIL; /* result is always unordered */ + pathnode->unique_keys = NIL; cost_namedtuplestorescan(pathnode, root, rel, pathnode->param_info); @@ -2052,6 +2062,7 @@ create_resultscan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->parallel_safe = rel->consider_parallel; pathnode->parallel_workers = 0; pathnode->pathkeys = NIL; /* result is always unordered */ + pathnode->unique_keys = NIL; cost_resultscan(pathnode, root, rel, pathnode->param_info); @@ -2078,6 +2089,7 @@ create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->parallel_safe = rel->consider_parallel; pathnode->parallel_workers = 0; pathnode->pathkeys = NIL; /* result is always unordered */ + pathnode->unique_keys = NIL; /* Cost is the same as for a regular CTE scan */ cost_ctescan(pathnode, root, rel, pathnode->param_info); diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 4e2fb39105..a9b67c64f8 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -261,6 +261,7 @@ typedef enum NodeTag T_EquivalenceMember, T_PathKey, T_PathTarget, + T_UniqueKey, T_RestrictInfo, T_IndexClause, T_PlaceHolderVar, diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 441e64eca9..485986a61a 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -267,6 +267,8 @@ struct PlannerInfo List *canon_pathkeys; /* list of "canonical" PathKeys */ + List *canon_uniquekeys; /* list of "canonical" UniqueKeys */ + List *left_join_clauses; /* list of RestrictInfos for mergejoinable * outer join clauses w/nonnullable var on * left */ @@ -295,6 +297,8 @@ struct PlannerInfo List *query_pathkeys; /* desired pathkeys for query_planner() */ + List *query_uniquekeys; /* */ + List *group_pathkeys; /* groupClause pathkeys, if any */ List *window_pathkeys; /* pathkeys of bottom window, if any */ List *distinct_pathkeys; /* distinctClause pathkeys, if any */ @@ -1071,6 +1075,15 @@ typedef struct ParamPathInfo List *ppi_clauses; /* join clauses available from outer rels */ } ParamPathInfo; +/* + * UniqueKey + */ +typedef struct UniqueKey +{ + NodeTag type; + + EquivalenceClass *eq_clause; /* equivalence class */ +} UniqueKey; /* * Type "Path" is used as-is for sequential-scan paths, as well as some other @@ -1100,6 +1113,9 @@ typedef struct ParamPathInfo * * "pathkeys" is a List of PathKey nodes (see above), describing the sort * ordering of the path's output rows. + * + * "unique_keys", if not NIL, is a list of UniqueKey nodes (see above), + * describing the XXX. */ typedef struct Path { @@ -1123,6 +1139,8 @@ typedef struct Path List *pathkeys; /* sort ordering of path's output */ /* pathkeys is a List of PathKey nodes; see above */ + + List *unique_keys; /* the unique keys, or NIL if none */ } Path; /* Macro for extracting a path's parameterization relids; beware double eval */ diff --git a/src/include/nodes/print.h b/src/include/nodes/print.h index cbff56a724..196d3a0783 100644 --- a/src/include/nodes/print.h +++ b/src/include/nodes/print.h @@ -16,7 +16,6 @@ #include "executor/tuptable.h" - #define nodeDisplay(x) pprint(x) extern void print(const void *obj); @@ -28,6 +27,7 @@ extern char *pretty_format_node_dump(const char *dump); extern void print_rt(const List *rtable); extern void print_expr(const Node *expr, const List *rtable); extern void print_pathkeys(const List *pathkeys, const List *rtable); +extern void print_unique_keys(const List *unique_keys, const List *rtable); extern void print_tl(const List *tlist, const List *rtable); extern void print_slot(TupleTableSlot *slot); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 182ffeef4b..374cac157b 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -44,6 +44,7 @@ extern IndexPath *create_index_path(PlannerInfo *root, List *indexorderbys, List *indexorderbycols, List *pathkeys, + List *uniquekeys, ScanDirection indexscandir, bool indexonly, Relids required_outer, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 7345137d1d..10b6d2a8c7 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -235,4 +235,12 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root, extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *live_childrels); +/* + * uniquekey.c + * Utilities for matching and building unique keys + */ +extern List *build_group_uniquekeys(PlannerInfo *root); +extern List *build_distinct_uniquekeys(PlannerInfo *root); +extern bool uniquekeys_contained_in(List *keys1, List *keys2); + #endif /* PATHS_H */ -- 2.21.0