From 4959c782bbfea165876754021ad9b9287898edd6 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Sun, 28 Jul 2019 15:59:05 +0200 Subject: [PATCH 4/4] Consider incremental sort paths in additional places --- contrib/postgres_fdw/postgres_fdw.c | 29 -- src/backend/optimizer/geqo/geqo_eval.c | 2 +- src/backend/optimizer/path/allpaths.c | 208 +++++++++++++- src/backend/optimizer/path/equivclass.c | 28 ++ src/backend/optimizer/plan/planner.c | 346 +++++++++++++++++++++++- src/include/optimizer/paths.h | 3 + 6 files changed, 580 insertions(+), 36 deletions(-) diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 2175dff824..9fc53cad68 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -6523,35 +6523,6 @@ conversion_error_callback(void *arg) } } -/* - * Find an equivalence class member expression, all of whose Vars, come from - * the indicated relation. - */ -Expr * -find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) -{ - ListCell *lc_em; - - foreach(lc_em, ec->ec_members) - { - EquivalenceMember *em = lfirst(lc_em); - - if (bms_is_subset(em->em_relids, rel->relids) && - !bms_is_empty(em->em_relids)) - { - /* - * If there is more than one equivalence member whose Vars are - * taken entirely from this relation, we'll be content to choose - * any one of those. - */ - return em->em_expr; - } - } - - /* We didn't find any suitable equivalence class expression */ - return NULL; -} - /* * Find an equivalence class member expression to be computed as a sort column * in the given target. diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index 6d897936d7..ff33acc7b6 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -274,7 +274,7 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene, * grouping_planner). */ if (old_clump->size + new_clump->size < num_gene) - generate_gather_paths(root, joinrel, false); + generate_useful_gather_paths(root, joinrel, false); /* Find and save the cheapest paths for this joinrel */ set_cheapest(joinrel); diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index ccf46dd0aa..93d967e812 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -556,7 +556,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, */ if (rel->reloptkind == RELOPT_BASEREL && bms_membership(root->all_baserels) != BMS_SINGLETON) - generate_gather_paths(root, rel, false); + generate_useful_gather_paths(root, rel, false); /* Now find the cheapest of the paths for this rel */ set_cheapest(rel); @@ -2727,6 +2727,210 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) } } +/* + * get_useful_pathkeys_for_relation + * Determine which orderings of a relation might be useful. + * + * Getting data in sorted order can be useful either because the requested + * order matches the final output ordering for the overall query we're + * planning, or because it enables an efficient merge join. Here, we try + * to figure out which pathkeys to consider. + * + * This allows us to do incremental sort on top of an index scan under a gather + * merge node, i.e. parallelized. + * + * XXX At the moment this can only ever return a list with a single element, + * because it looks at query_pathkeys only. So we might return the pathkeys + * directly, but it seems plausible we'll want to consider other orderings + * in the future. For example, we might want to consider pathkeys useful for + * merge joins. + */ +static List * +get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel) +{ + List *useful_pathkeys_list = NIL; + + /* + * Considering query_pathkeys is always worth it, because it might allow us + * to avoid a total sort when we have a partially presorted path available. + */ + if (root->query_pathkeys) + { + ListCell *lc; + List *pathkeys = NIL; + + foreach(lc, root->query_pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(lc); + EquivalenceClass *pathkey_ec = pathkey->pk_eclass; + + /* + * We can only build an Incremental Sort for pathkeys which contain + * an EC member in the current relation, so ignore any suffix of the + * list as soon as we find a pathkey without an EC member the + * relation. + * + * By still returning the prefix of the pathkeys list that does meet + * criteria of EC membership in the current relation, we enable not + * just an incremental sort on the entirety of query_pathkeys but + * also incremental sort below a JOIN. + */ + if (!find_em_expr_for_rel(pathkey_ec, rel)) + break; + + pathkeys = lappend(pathkeys, pathkey); + } + + if (pathkeys) + useful_pathkeys_list = lappend(useful_pathkeys_list, pathkeys); + } + + return useful_pathkeys_list; +} + +/* + * generate_useful_gather_paths + * Generate parallel access paths for a relation by pushing a Gather or + * Gather Merge on top of a partial path. + * + * Unlike plain generate_gather_paths, this looks both at pathkeys of input + * paths (aiming to preserve the ordering), but also considers ordering that + * might be useful for nodes above the gather merge node, and tries to add + * a sort (regular or incremental) to provide that. + */ +void +generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows) +{ + ListCell *lc; + double rows; + double *rowsp = NULL; + List *useful_pathkeys_list = NIL; + Path *cheapest_partial_path = NULL; + + /* If there are no partial paths, there's nothing to do here. */ + if (rel->partial_pathlist == NIL) + return; + + /* Should we override the rel's rowcount estimate? */ + if (override_rows) + rowsp = &rows; + + /* generate the regular gather (merge) paths */ + generate_gather_paths(root, rel, override_rows); + + /* consider incremental sort for interesting orderings */ + useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel); + + /* used for explicit (full) sort paths */ + cheapest_partial_path = linitial(rel->partial_pathlist); + + /* + * Consider incremental sort paths for each interesting ordering. + * + * XXX I wonder if we need to consider adding a projection here, as + * create_ordered_paths does. + */ + foreach(lc, useful_pathkeys_list) + { + List *useful_pathkeys = lfirst(lc); + ListCell *lc2; + bool is_sorted; + int presorted_keys; + + foreach(lc2, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc2); + GatherMergePath *path; + + /* path has no ordering at all, can't use incremental sort */ + if (subpath->pathkeys == NIL) + continue; + + is_sorted = pathkeys_common_contained_in(useful_pathkeys, + subpath->pathkeys, + &presorted_keys); + + /* + * When the partial path is already sorted, we can just add a gather + * merge on top, and we're done - no point in adding explicit sort. + * + * XXX Can't we skip this (maybe only for the cheapest partial path) + * when the path is already sorted? Then it's likely duplicate with + * the path created by generate_gather_paths. + */ + if (is_sorted) + { + path = create_gather_merge_path(root, rel, subpath, rel->reltarget, + subpath->pathkeys, NULL, rowsp); + + add_path(rel, &path->path); + continue; + } + + Assert(!is_sorted); + + /* + * Consider regular sort for the cheapest partial path (for each + * useful pathkeys). We know the path is not sorted, because we'd + * not get here otherwise. + * + * XXX This is not redundant with the gather merge path created in + * generate_gather_paths, because that merely preserves ordering of + * the cheapest partial path, while here we add an explicit sort to + * get match the useful ordering. + */ + if (cheapest_partial_path == subpath) + { + Path *tmp; + + tmp = (Path *) create_sort_path(root, + rel, + subpath, + useful_pathkeys, + -1.0); + + rows = tmp->rows * tmp->parallel_workers; + + path = create_gather_merge_path(root, rel, + tmp, + rel->reltarget, + tmp->pathkeys, + NULL, + rowsp); + + add_path(rel, &path->path); + + /* Fall through */ + } + + /* + * Consider incremental sort, but only when the subpath is already + * partially sorted on a pathkey prefix. + */ + if (enable_incrementalsort && presorted_keys > 0) + { + Path *tmp; + + tmp = (Path *) create_incremental_sort_path(root, + rel, + subpath, + useful_pathkeys, + presorted_keys, + -1); + + path = create_gather_merge_path(root, rel, + tmp, + rel->reltarget, + tmp->pathkeys, + NULL, + rowsp); + + add_path(rel, &path->path); + } + } + } +} + /* * make_rel_from_joinlist * Build access paths using a "joinlist" to guide the join path search. @@ -2899,7 +3103,7 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) * once we know the final targetlist (see grouping_planner). */ if (lev < levels_needed) - generate_gather_paths(root, rel, false); + generate_useful_gather_paths(root, rel, false); /* Find and save the cheapest paths for this rel */ set_cheapest(rel); diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 4ef12547ee..b99cec00cb 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -774,6 +774,34 @@ get_eclass_for_sort_expr(PlannerInfo *root, return newec; } +/* + * Find an equivalence class member expression, all of whose Vars, come from + * the indicated relation. + */ +Expr * +find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) +{ + ListCell *lc_em; + + foreach(lc_em, ec->ec_members) + { + EquivalenceMember *em = lfirst(lc_em); + + if (bms_is_subset(em->em_relids, rel->relids) && + !bms_is_empty(em->em_relids)) + { + /* + * If there is more than one equivalence member whose Vars are + * taken entirely from this relation, we'll be content to choose + * any one of those. + */ + return em->em_expr; + } + } + + /* We didn't find any suitable equivalence class expression */ + return NULL; +} /* * generate_base_implied_equalities diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 753e23676b..fb094e3be0 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -5077,6 +5077,67 @@ create_ordered_paths(PlannerInfo *root, add_path(ordered_rel, path); } + + /* + * Consider incremental sort with a gather merge on partial paths. + * + * XXX This is probably duplicate with the paths we already generate + * in generate_useful_gather_paths in apply_scanjoin_target_to_paths. + */ + if (enable_incrementalsort) + { + ListCell *lc; + + foreach(lc, input_rel->partial_pathlist) + { + Path *input_path = (Path *) lfirst(lc); + Path *sorted_path = input_path; + bool is_sorted; + int presorted_keys; + double total_groups; + + /* + * We don't care if this is the cheapest partial path - we can't + * simply skip it, because it may be partially sorted in which + * case we want to consider adding incremental sort (instead of + * full sort, which is what happens above). + */ + + is_sorted = pathkeys_common_contained_in(root->sort_pathkeys, + input_path->pathkeys, + &presorted_keys); + + /* No point in adding incremental sort on fully sorted paths. */ + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + /* Since we have presorted keys, consider incremental sort. */ + sorted_path = (Path *) create_incremental_sort_path(root, + ordered_rel, + input_path, + root->sort_pathkeys, + presorted_keys, + limit_tuples); + total_groups = input_path->rows * + input_path->parallel_workers; + sorted_path = (Path *) + create_gather_merge_path(root, ordered_rel, + sorted_path, + sorted_path->pathtarget, + root->sort_pathkeys, NULL, + &total_groups); + + /* Add projection step if needed */ + if (sorted_path->pathtarget != target) + sorted_path = apply_projection_to_path(root, ordered_rel, + sorted_path, target); + + add_path(ordered_rel, sorted_path); + } + } } /* @@ -6431,7 +6492,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, foreach(lc, input_rel->pathlist) { Path *path = (Path *) lfirst(lc); + Path *path_original = path; bool is_sorted; + int presorted_keys; is_sorted = pathkeys_contained_in(root->group_pathkeys, path->pathkeys); @@ -6490,6 +6553,80 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, Assert(false); } } + + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental sort + * is enabled. + */ + if (is_sorted || !enable_incrementalsort) + continue; + + /* Restore the input path (we might have added Sort on top). */ + path = path_original; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + /* We've already skipped fully sorted paths above. */ + Assert(!is_sorted); + + /* no shared prefix, no point in building incremental sort */ + if (presorted_keys == 0) + continue; + + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + /* Now decide what to stick atop it */ + if (parse->groupingSets) + { + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, + gd, agg_costs, dNumGroups); + } + else if (parse->hasAggs) + { + /* + * We have aggregation, possibly with plain GROUP BY. Make + * an AggPath. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_SIMPLE, + parse->groupClause, + havingQual, + agg_costs, + dNumGroups)); + } + else if (parse->groupClause) + { + /* + * We have GROUP BY without aggregation or grouping sets. + * Make a GroupPath. + */ + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); + } + else + { + /* Other cases should have been handled above */ + Assert(false); + } } /* @@ -6501,12 +6638,18 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, foreach(lc, partially_grouped_rel->pathlist) { Path *path = (Path *) lfirst(lc); + Path *path_original = path; + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_contained_in(root->group_pathkeys, + path->pathkeys); /* * Insert a Sort node, if required. But there's no point in * sorting anything but the cheapest path. */ - if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys)) + if (!is_sorted) { if (path != partially_grouped_rel->cheapest_total_path) continue; @@ -6537,6 +6680,56 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, parse->groupClause, havingQual, dNumGroups)); + + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental + * sort is enabled. + */ + if (is_sorted || !enable_incrementalsort) + continue; + + /* Restore the input path (we might have added Sort on top). */ + path = path_original; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + /* We've already skipped fully sorted paths above. */ + Assert(!is_sorted); + + /* no shared prefix, not point in building incremental sort */ + if (presorted_keys == 0) + continue; + + path = (Path *) create_incremental_sort_path(root, + grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + path, + grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + parse->groupClause, + havingQual, + agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + path, + parse->groupClause, + havingQual, + dNumGroups)); } } } @@ -6808,6 +7001,58 @@ create_partial_grouping_paths(PlannerInfo *root, dNumPartialGroups)); } } + + /* Consider incremental sort on all partial paths, if enabled. */ + if (enable_incrementalsort) + { + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + /* Ignore already sorted paths */ + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + /* Since we have presorted keys, consider incremental sort. */ + path = (Path *) create_incremental_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialGroups)); + else + add_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialGroups)); + } + } + } if (can_sort && cheapest_partial_path != NULL) @@ -6816,7 +7061,9 @@ create_partial_grouping_paths(PlannerInfo *root, foreach(lc, input_rel->partial_pathlist) { Path *path = (Path *) lfirst(lc); + Path *path_original = path; bool is_sorted; + int presorted_keys; is_sorted = pathkeys_contained_in(root->group_pathkeys, path->pathkeys); @@ -6851,6 +7098,56 @@ create_partial_grouping_paths(PlannerInfo *root, NIL, dNumPartialPartialGroups)); } + + /* + * Now we may consider incremental sort on this path, but only + * when the path is not already sorted and when incremental sort + * is enabled. + */ + if (is_sorted || !enable_incrementalsort) + continue; + + /* Restore the input path (we might have added Sort on top). */ + path = path_original; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + /* We've already skipped fully sorted paths above. */ + Assert(!is_sorted); + + /* no shared prefix, not point in building incremental sort */ + if (presorted_keys == 0) + continue; + + path = (Path *) create_incremental_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + if (parse->hasAggs) + add_partial_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialPartialGroups)); + else + add_partial_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialPartialGroups)); } } @@ -6948,10 +7245,11 @@ create_partial_grouping_paths(PlannerInfo *root, static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) { + ListCell *lc; Path *cheapest_partial_path; /* Try Gather for unordered paths and Gather Merge for ordered ones. */ - generate_gather_paths(root, rel, true); + generate_useful_gather_paths(root, rel, true); /* Try cheapest partial path + explicit Sort + Gather Merge. */ cheapest_partial_path = linitial(rel->partial_pathlist); @@ -6977,6 +7275,46 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel) add_path(rel, path); } + + if (!enable_incrementalsort) + return; + + /* also consider incremental sort on partial paths, if enabled */ + foreach(lc, rel->partial_pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + int presorted_keys; + double total_groups; + + is_sorted = pathkeys_common_contained_in(root->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (is_sorted) + continue; + + if (presorted_keys == 0) + continue; + + path = (Path *) create_incremental_sort_path(root, + rel, + path, + root->group_pathkeys, + presorted_keys, + -1.0); + + path = (Path *) + create_gather_merge_path(root, + rel, + path, + rel->reltarget, + root->group_pathkeys, + NULL, + &total_groups); + + add_path(rel, path); + } } /* @@ -7078,7 +7416,7 @@ apply_scanjoin_target_to_paths(PlannerInfo *root, * paths by doing it after the final scan/join target has been * applied. */ - generate_gather_paths(root, rel, false); + generate_useful_gather_paths(root, rel, false); /* Can't use parallel query above this level. */ rel->partial_pathlist = NIL; @@ -7232,7 +7570,7 @@ apply_scanjoin_target_to_paths(PlannerInfo *root, * one of the generated paths may turn out to be the cheapest one. */ if (rel->consider_parallel && !IS_OTHER_REL(rel)) - generate_gather_paths(root, rel, false); + generate_useful_gather_paths(root, rel, false); /* * Reassess which paths are the cheapest, now that we've potentially added diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 85f5fe37ea..665f4065a4 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -54,6 +54,8 @@ extern RelOptInfo *standard_join_search(PlannerInfo *root, int levels_needed, extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows); +extern void generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, + bool override_rows); extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages, int max_workers); extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, @@ -135,6 +137,7 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, Index sortref, Relids rel, bool create_it); +extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel); extern void generate_base_implied_equalities(PlannerInfo *root); extern List *generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, -- 2.21.1