From 56fc55058ca44f18a4a3c878a5588b01c67df0e0 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Tue, 9 Jul 2019 00:12:45 +0200 Subject: [PATCH 1/4] fix pathkey processing in generate_gather_paths --- src/backend/optimizer/path/allpaths.c | 269 ++++++++++++++++++++++++ src/backend/optimizer/plan/createplan.c | 10 +- 2 files changed, 277 insertions(+), 2 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 3efc807164..34a0fb4d32 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -2665,6 +2665,242 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) add_path(rel, create_worktablescan_path(root, rel, required_outer)); } + + +/* + * Find an equivalence class member expression, all of whose Vars, come from + * the indicated relation. + */ +static Expr * +find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) +{ + ListCell *lc_em; + + foreach(lc_em, ec->ec_members) + { + EquivalenceMember *em = lfirst(lc_em); + + if (bms_is_subset(em->em_relids, rel->relids) && + !bms_is_empty(em->em_relids)) + { + /* + * If there is more than one equivalence member whose Vars are + * taken entirely from this relation, we'll be content to choose + * any one of those. + */ + return em->em_expr; + } + } + + /* We didn't find any suitable equivalence class expression */ + return NULL; +} + +/* + * get_useful_ecs_for_relation + * Determine which EquivalenceClasses might be involved in useful + * orderings of this relation. + * + * This function is in some respects a mirror image of the core function + * pathkeys_useful_for_merging: for a regular table, we know what indexes + * we have and want to test whether any of them are useful. For a foreign + * table, we don't know what indexes are present on the remote side but + * want to speculate about which ones we'd like to use if they existed. + * + * This function returns a list of potentially-useful equivalence classes, + * but it does not guarantee that an EquivalenceMember exists which contains + * Vars only from the given relation. For example, given ft1 JOIN t1 ON + * ft1.x + t1.x = 0, this function will say that the equivalence class + * containing ft1.x + t1.x is potentially useful. Supposing ft1 is remote and + * t1 is local (or on a different server), it will turn out that no useful + * ORDER BY clause can be generated. It's not our job to figure that out + * here; we're only interested in identifying relevant ECs. + */ +static List * +get_useful_ecs_for_relation(PlannerInfo *root, RelOptInfo *rel) +{ + List *useful_eclass_list = NIL; + ListCell *lc; + Relids relids; + + /* + * First, consider whether any active EC is potentially useful for a merge + * join against this relation. + */ + if (rel->has_eclass_joins) + { + foreach(lc, root->eq_classes) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc); + + if (eclass_useful_for_merging(root, cur_ec, rel)) + useful_eclass_list = lappend(useful_eclass_list, cur_ec); + } + } + + /* + * Next, consider whether there are any non-EC derivable join clauses that + * are merge-joinable. If the joininfo list is empty, we can exit + * quickly. + */ + if (rel->joininfo == NIL) + return useful_eclass_list; + + /* If this is a child rel, we must use the topmost parent rel to search. */ + if (IS_OTHER_REL(rel)) + { + Assert(!bms_is_empty(rel->top_parent_relids)); + relids = rel->top_parent_relids; + } + else + relids = rel->relids; + + /* Check each join clause in turn. */ + foreach(lc, rel->joininfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + + /* Consider only mergejoinable clauses */ + if (restrictinfo->mergeopfamilies == NIL) + continue; + + /* Make sure we've got canonical ECs. */ + update_mergeclause_eclasses(root, restrictinfo); + + /* + * restrictinfo->mergeopfamilies != NIL is sufficient to guarantee + * that left_ec and right_ec will be initialized, per comments in + * distribute_qual_to_rels. + * + * We want to identify which side of this merge-joinable clause + * contains columns from the relation produced by this RelOptInfo. We + * test for overlap, not containment, because there could be extra + * relations on either side. For example, suppose we've got something + * like ((A JOIN B ON A.x = B.x) JOIN C ON A.y = C.y) LEFT JOIN D ON + * A.y = D.y. The input rel might be the joinrel between A and B, and + * we'll consider the join clause A.y = D.y. relids contains a + * relation not involved in the join class (B) and the equivalence + * class for the left-hand side of the clause contains a relation not + * involved in the input rel (C). Despite the fact that we have only + * overlap and not containment in either direction, A.y is potentially + * useful as a sort column. + * + * Note that it's even possible that relids overlaps neither side of + * the join clause. For example, consider A LEFT JOIN B ON A.x = B.x + * AND A.x = 1. The clause A.x = 1 will appear in B's joininfo list, + * but overlaps neither side of B. In that case, we just skip this + * join clause, since it doesn't suggest a useful sort order for this + * relation. + */ + if (bms_overlap(relids, restrictinfo->right_ec->ec_relids)) + useful_eclass_list = list_append_unique_ptr(useful_eclass_list, + restrictinfo->right_ec); + else if (bms_overlap(relids, restrictinfo->left_ec->ec_relids)) + useful_eclass_list = list_append_unique_ptr(useful_eclass_list, + restrictinfo->left_ec); + } + + return useful_eclass_list; +} + +/* + * get_useful_pathkeys_for_relation + * Determine which orderings of a relation might be useful. + * + * Getting data in sorted order can be useful either because the requested + * order matches the final output ordering for the overall query we're + * planning, or because it enables an efficient merge join. Here, we try + * to figure out which pathkeys to consider. + */ +static List * +get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) +{ + List *useful_pathkeys_list = NIL; + List *useful_eclass_list; + EquivalenceClass *query_ec = NULL; + ListCell *lc; + + /* + * Pushing the query_pathkeys to the remote server is always worth + * considering, because it might let us avoid a local sort. + */ + if (root->query_pathkeys) + { + bool query_pathkeys_ok = true; + + foreach(lc, root->query_pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; + Expr *em_expr; + + /* + * The planner and executor don't have any clever strategy for + * taking data sorted by a prefix of the query's pathkeys and + * getting it to be sorted by all of those pathkeys. We'll just + * end up resorting the entire data set. So, unless we can push + * down all of the query pathkeys, forget it. + * + * is_foreign_expr would detect volatile expressions as well, but + * checking ec_has_volatile here saves some cycles. + */ + if (pathkey_ec->ec_has_volatile || + !(em_expr = find_em_expr_for_rel(pathkey_ec, rel))) + { + query_pathkeys_ok = false; + break; + } + } + + if (query_pathkeys_ok) + useful_pathkeys_list = list_make1(list_copy(root->query_pathkeys)); + } + + /* Get the list of interesting EquivalenceClasses. */ + useful_eclass_list = get_useful_ecs_for_relation(root, rel); + + /* Extract unique EC for query, if any, so we don't consider it again. */ + if (list_length(root->query_pathkeys) == 1) + { + PathKey *query_pathkey = linitial(root->query_pathkeys); + + query_ec = query_pathkey->pk_eclass; + } + + /* + * As a heuristic, the only pathkeys we consider here are those of length + * one. It's surely possible to consider more, but since each one we + * choose to consider will generate a round-trip to the remote side, we + * need to be a bit cautious here. It would sure be nice to have a local + * cache of information about remote index definitions... + */ + foreach(lc, useful_eclass_list) + { + EquivalenceClass *cur_ec = lfirst(lc); + Expr *em_expr; + PathKey *pathkey; + + /* If redundant with what we did above, skip it. */ + if (cur_ec == query_ec) + continue; + + /* If no pushable expression for this rel, skip it. */ + em_expr = find_em_expr_for_rel(cur_ec, rel); + if (em_expr == NULL) + continue; + + /* Looks like we can generate a pathkey, so let's do it. */ + pathkey = make_canonical_pathkey(root, cur_ec, + linitial_oid(cur_ec->ec_opfamilies), + BTLessStrategyNumber, + false); + useful_pathkeys_list = lappend(useful_pathkeys_list, + list_make1(pathkey)); + } + + return useful_pathkeys_list; +} + /* * generate_gather_paths * Generate parallel access paths for a relation by pushing a Gather or @@ -2719,6 +2955,10 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) { Path *subpath = (Path *) lfirst(lc); GatherMergePath *path; + bool is_sorted; + int presorted_keys; + List *useful_pathkeys_list = NIL; /* List of all pathkeys */ + ListCell *lc; if (subpath->pathkeys == NIL) continue; @@ -2727,6 +2967,35 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) path = create_gather_merge_path(root, rel, subpath, rel->reltarget, subpath->pathkeys, NULL, rowsp); add_path(rel, &path->path); + + /* consider incremental sort for interesting orderings */ + useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel); + + foreach(lc, useful_pathkeys_list) + { + List *useful_pathkeys = lfirst(lc); + + is_sorted = pathkeys_common_contained_in(useful_pathkeys, + subpath->pathkeys, + &presorted_keys); + + if (!is_sorted && (presorted_keys > 0)) + { + /* Also consider incremental sort. */ + subpath = (Path *) create_incremental_sort_path(root, + rel, + subpath, + useful_pathkeys, + presorted_keys, + -1); + + path = create_gather_merge_path(root, rel, subpath, rel->reltarget, + subpath->pathkeys, NULL, rowsp); + + add_path(rel, &path->path); + } + } + } } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index bfb52f21ab..c2877942cb 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -5932,7 +5932,10 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, } } if (!j) - elog(ERROR, "could not find pathkey item to sort"); + { + elog(WARNING, "could not find pathkey item to sort"); + Assert(false); + } /* * Do we need to insert a Result node? @@ -6491,7 +6494,10 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols) } if (!tle) - elog(ERROR, "could not find pathkey item to sort"); + { + elog(WARNING, "could not find pathkey item to sort"); + Assert(false); + } /* * Look up the correct equality operator from the PathKey's slightly -- 2.20.1