From 091627c63cfb7ab47bfb76f6a96f94370aeea28d Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Tue, 9 Jul 2019 02:14:18 +0200 Subject: [PATCH 4/4] rework where incremental sort paths are created --- src/backend/optimizer/path/allpaths.c | 269 ----------------------- src/backend/optimizer/plan/planner.c | 299 ++++++++++++++++++++++++++ 2 files changed, 299 insertions(+), 269 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 34a0fb4d32..3efc807164 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -2665,242 +2665,6 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) add_path(rel, create_worktablescan_path(root, rel, required_outer)); } - - -/* - * Find an equivalence class member expression, all of whose Vars, come from - * the indicated relation. - */ -static Expr * -find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) -{ - ListCell *lc_em; - - foreach(lc_em, ec->ec_members) - { - EquivalenceMember *em = lfirst(lc_em); - - if (bms_is_subset(em->em_relids, rel->relids) && - !bms_is_empty(em->em_relids)) - { - /* - * If there is more than one equivalence member whose Vars are - * taken entirely from this relation, we'll be content to choose - * any one of those. - */ - return em->em_expr; - } - } - - /* We didn't find any suitable equivalence class expression */ - return NULL; -} - -/* - * get_useful_ecs_for_relation - * Determine which EquivalenceClasses might be involved in useful - * orderings of this relation. - * - * This function is in some respects a mirror image of the core function - * pathkeys_useful_for_merging: for a regular table, we know what indexes - * we have and want to test whether any of them are useful. For a foreign - * table, we don't know what indexes are present on the remote side but - * want to speculate about which ones we'd like to use if they existed. - * - * This function returns a list of potentially-useful equivalence classes, - * but it does not guarantee that an EquivalenceMember exists which contains - * Vars only from the given relation. For example, given ft1 JOIN t1 ON - * ft1.x + t1.x = 0, this function will say that the equivalence class - * containing ft1.x + t1.x is potentially useful. Supposing ft1 is remote and - * t1 is local (or on a different server), it will turn out that no useful - * ORDER BY clause can be generated. It's not our job to figure that out - * here; we're only interested in identifying relevant ECs. - */ -static List * -get_useful_ecs_for_relation(PlannerInfo *root, RelOptInfo *rel) -{ - List *useful_eclass_list = NIL; - ListCell *lc; - Relids relids; - - /* - * First, consider whether any active EC is potentially useful for a merge - * join against this relation. - */ - if (rel->has_eclass_joins) - { - foreach(lc, root->eq_classes) - { - EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc); - - if (eclass_useful_for_merging(root, cur_ec, rel)) - useful_eclass_list = lappend(useful_eclass_list, cur_ec); - } - } - - /* - * Next, consider whether there are any non-EC derivable join clauses that - * are merge-joinable. If the joininfo list is empty, we can exit - * quickly. - */ - if (rel->joininfo == NIL) - return useful_eclass_list; - - /* If this is a child rel, we must use the topmost parent rel to search. */ - if (IS_OTHER_REL(rel)) - { - Assert(!bms_is_empty(rel->top_parent_relids)); - relids = rel->top_parent_relids; - } - else - relids = rel->relids; - - /* Check each join clause in turn. */ - foreach(lc, rel->joininfo) - { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); - - /* Consider only mergejoinable clauses */ - if (restrictinfo->mergeopfamilies == NIL) - continue; - - /* Make sure we've got canonical ECs. */ - update_mergeclause_eclasses(root, restrictinfo); - - /* - * restrictinfo->mergeopfamilies != NIL is sufficient to guarantee - * that left_ec and right_ec will be initialized, per comments in - * distribute_qual_to_rels. - * - * We want to identify which side of this merge-joinable clause - * contains columns from the relation produced by this RelOptInfo. We - * test for overlap, not containment, because there could be extra - * relations on either side. For example, suppose we've got something - * like ((A JOIN B ON A.x = B.x) JOIN C ON A.y = C.y) LEFT JOIN D ON - * A.y = D.y. The input rel might be the joinrel between A and B, and - * we'll consider the join clause A.y = D.y. relids contains a - * relation not involved in the join class (B) and the equivalence - * class for the left-hand side of the clause contains a relation not - * involved in the input rel (C). Despite the fact that we have only - * overlap and not containment in either direction, A.y is potentially - * useful as a sort column. - * - * Note that it's even possible that relids overlaps neither side of - * the join clause. For example, consider A LEFT JOIN B ON A.x = B.x - * AND A.x = 1. The clause A.x = 1 will appear in B's joininfo list, - * but overlaps neither side of B. In that case, we just skip this - * join clause, since it doesn't suggest a useful sort order for this - * relation. - */ - if (bms_overlap(relids, restrictinfo->right_ec->ec_relids)) - useful_eclass_list = list_append_unique_ptr(useful_eclass_list, - restrictinfo->right_ec); - else if (bms_overlap(relids, restrictinfo->left_ec->ec_relids)) - useful_eclass_list = list_append_unique_ptr(useful_eclass_list, - restrictinfo->left_ec); - } - - return useful_eclass_list; -} - -/* - * get_useful_pathkeys_for_relation - * Determine which orderings of a relation might be useful. - * - * Getting data in sorted order can be useful either because the requested - * order matches the final output ordering for the overall query we're - * planning, or because it enables an efficient merge join. Here, we try - * to figure out which pathkeys to consider. - */ -static List * -get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) -{ - List *useful_pathkeys_list = NIL; - List *useful_eclass_list; - EquivalenceClass *query_ec = NULL; - ListCell *lc; - - /* - * Pushing the query_pathkeys to the remote server is always worth - * considering, because it might let us avoid a local sort. - */ - if (root->query_pathkeys) - { - bool query_pathkeys_ok = true; - - foreach(lc, root->query_pathkeys) - { - PathKey *pathkey = (PathKey *) lfirst(lc); - EquivalenceClass *pathkey_ec = pathkey->pk_eclass; - Expr *em_expr; - - /* - * The planner and executor don't have any clever strategy for - * taking data sorted by a prefix of the query's pathkeys and - * getting it to be sorted by all of those pathkeys. We'll just - * end up resorting the entire data set. So, unless we can push - * down all of the query pathkeys, forget it. - * - * is_foreign_expr would detect volatile expressions as well, but - * checking ec_has_volatile here saves some cycles. - */ - if (pathkey_ec->ec_has_volatile || - !(em_expr = find_em_expr_for_rel(pathkey_ec, rel))) - { - query_pathkeys_ok = false; - break; - } - } - - if (query_pathkeys_ok) - useful_pathkeys_list = list_make1(list_copy(root->query_pathkeys)); - } - - /* Get the list of interesting EquivalenceClasses. */ - useful_eclass_list = get_useful_ecs_for_relation(root, rel); - - /* Extract unique EC for query, if any, so we don't consider it again. */ - if (list_length(root->query_pathkeys) == 1) - { - PathKey *query_pathkey = linitial(root->query_pathkeys); - - query_ec = query_pathkey->pk_eclass; - } - - /* - * As a heuristic, the only pathkeys we consider here are those of length - * one. It's surely possible to consider more, but since each one we - * choose to consider will generate a round-trip to the remote side, we - * need to be a bit cautious here. It would sure be nice to have a local - * cache of information about remote index definitions... - */ - foreach(lc, useful_eclass_list) - { - EquivalenceClass *cur_ec = lfirst(lc); - Expr *em_expr; - PathKey *pathkey; - - /* If redundant with what we did above, skip it. */ - if (cur_ec == query_ec) - continue; - - /* If no pushable expression for this rel, skip it. */ - em_expr = find_em_expr_for_rel(cur_ec, rel); - if (em_expr == NULL) - continue; - - /* Looks like we can generate a pathkey, so let's do it. */ - pathkey = make_canonical_pathkey(root, cur_ec, - linitial_oid(cur_ec->ec_opfamilies), - BTLessStrategyNumber, - false); - useful_pathkeys_list = lappend(useful_pathkeys_list, - list_make1(pathkey)); - } - - return useful_pathkeys_list; -} - /* * generate_gather_paths * Generate parallel access paths for a relation by pushing a Gather or @@ -2955,10 +2719,6 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) { Path *subpath = (Path *) lfirst(lc); GatherMergePath *path; - bool is_sorted; - int presorted_keys; - List *useful_pathkeys_list = NIL; /* List of all pathkeys */ - ListCell *lc; if (subpath->pathkeys == NIL) continue; @@ -2967,35 +2727,6 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) path = create_gather_merge_path(root, rel, subpath, rel->reltarget, subpath->pathkeys, NULL, rowsp); add_path(rel, &path->path); - - /* consider incremental sort for interesting orderings */ - useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel); - - foreach(lc, useful_pathkeys_list) - { - List *useful_pathkeys = lfirst(lc); - - is_sorted = pathkeys_common_contained_in(useful_pathkeys, - subpath->pathkeys, - &presorted_keys); - - if (!is_sorted && (presorted_keys > 0)) - { - /* Also consider incremental sort. */ - subpath = (Path *) create_incremental_sort_path(root, - rel, - subpath, - useful_pathkeys, - presorted_keys, - -1); - - path = create_gather_merge_path(root, rel, subpath, rel->reltarget, - subpath->pathkeys, NULL, rowsp); - - add_path(rel, &path->path); - } - } - } } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 16996b1bc2..ecad427c40 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -5068,6 +5068,48 @@ create_ordered_paths(PlannerInfo *root, add_path(ordered_rel, path); } + + /* also consider incremental sorts on all partial paths */ + { + ListCell *lc; + foreach (lc, input_rel->partial_pathlist) + { + Path *input_path = (Path *) lfirst(lc); + Path *sorted_path = input_path; + bool is_sorted; + int presorted_keys; + + /* already handled above */ + if (input_path == cheapest_partial_path) + continue; + + is_sorted = pathkeys_common_contained_in(root->sort_pathkeys, + input_path->pathkeys, &presorted_keys); + + /* also ignore already sorted paths */ + if (is_sorted) + continue; + + if (presorted_keys > 0) + { + /* Also consider incremental sort. */ + sorted_path = (Path *) create_incremental_sort_path(root, + ordered_rel, + input_path, + root->sort_pathkeys, + presorted_keys, + limit_tuples); + + /* Add projection step if needed */ + if (sorted_path->pathtarget != target) + sorted_path = apply_projection_to_path(root, ordered_rel, + sorted_path, target); + + add_path(ordered_rel, sorted_path); + } + } + + } } /* @@ -6484,6 +6526,80 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, } } + + /* + * Use any available suitably-sorted path as input, with incremental + * sort path. + */ + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + /* Now decide what to stick atop it */ + if (parse->groupingSets) + { + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, + gd, agg_costs, dNumGroups); + } + else if (parse->hasAggs) + { + /* + * We have aggregation, possibly with plain GROUP BY. Make + * an AggPath. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_SIMPLE, + parse->groupClause, + havingQual, + agg_costs, + dNumGroups)); + } + else if (parse->groupClause) + { + /* + * We have GROUP BY without aggregation or grouping sets. + * Make a GroupPath. + */ + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); + } + else + { + /* Other cases should have been handled above */ + Assert(false); + } + } + /* * Instead of operating directly on the input relation, we can * consider finalizing a partially aggregated path. @@ -6530,6 +6646,53 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, havingQual, dNumGroups)); } + + /* incremental sort */ + foreach(lc, partially_grouped_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + parse->groupClause, + havingQual, + agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); + } + } } @@ -6798,6 +6961,57 @@ create_partial_grouping_paths(PlannerInfo *root, dNumPartialGroups)); } } + + /* + * Use any available suitably-sorted path as input, and also consider + * sorting the cheapest partial path. + */ + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + /* also ignore already sorted paths */ + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + /* add incremental sort */ + path = (Path *) create_incremental_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialGroups)); + else + add_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialGroups)); + } } if (can_sort && cheapest_partial_path != NULL) @@ -6842,6 +7056,52 @@ create_partial_grouping_paths(PlannerInfo *root, dNumPartialPartialGroups)); } } + + /* consider incremental sort */ + foreach(lc, input_rel->partial_pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + path = (Path *) create_incremental_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_partial_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialPartialGroups)); + else + add_partial_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialPartialGroups)); + } } if (can_hash && cheapest_total_path != NULL) @@ -6938,6 +7198,7 @@ create_partial_grouping_paths(PlannerInfo *root, static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) { + ListCell *lc; Path *cheapest_partial_path; /* Try Gather for unordered paths and Gather Merge for ordered ones. */ @@ -6967,6 +7228,44 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) add_path(rel, path); } + + /* also consider incremental sort on all partial paths */ + foreach (lc, rel->partial_pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + double total_groups; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + path = (Path *) create_incremental_sort_path(root, + rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + path = (Path *) + create_gather_merge_path(root, + rel, + path, + rel->reltarget, + root->group_pathkeys, + NULL, + &total_groups); + + add_path(rel, path); + } + } /* -- 2.20.1