From b8f33c0074f3546139a396a683926c29db2a193f Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Sun, 25 Oct 2020 12:38:58 +1300 Subject: [PATCH v1] Allow run-time pruning on nested Append/MergeAppend nodes Previously we only tagged on the required information to allow the executor to perform run-time partition pruning for Append/MergeAppend nodes belonging to base relations. It was thought that nested Append/MergeAppend nodes were just about always pulled up into the top-level Append/MergeAppend and that making the run-time pruning info for any sub Append/MergeAppend nodes was a waste of time. However, that was likely badly thought through. Some examples of cases we're unable to pullup nested Append/MergeAppends are: 1) Parallel Append nodes with a mix of parallel and non-parallel paths into a Parallel Append. 2) When planning an ordered Append scan a sub-partition which is unordered may require a nested MergeAppend path to ensure sub-partitions don't mix up the order of tuples being fed into the top-level Append. Unfortunately, it was not just as simple as removing the lines in createplan.c which were purposefully not building the run-time pruning info for anything but RELOPT_BASEREL relations. The code in add_paths_to_append_rel() was far too sloppy about which partitioned_rels it included for the Append/MergeAppend paths. The original code there would always assume accumulate_append_subpath() would pull each sub-Append and sub-MergeAppend path into the top level path. While it does not appear that there were any actual bugs caused by those surplus RT indexes, what it did mean is that later in planning, when we built the run-time pruning info that we wasted effort and built PartitionedRelPruneInfos for partitioned tables that we had no subpaths for the executor to run-time prune. Here we tighten that up so that partitioned_rels only ever contains the RT index for partitioned tables which actually have subpaths in the given Append/MergeAppend. We can now Assert that every PartitionedRelPruneInfo has a non-empty present_parts. That should allow us to catch any weird corner cases that have been missed. In passing, it seems there is no longer a good reason to have the AppendPath and MergeAppendPath's partitioned_rel fields a List of IntList. We can simply have a List of Relids. We still know which is the root level partition as these always have a lower relid than their children. Once upon a time this field did get passed to the executor to instruct it which partitioned tables needed to be locked. We now have something much more robust for that. Here we also get rid of the RelOptInfo partitioned_child_rels field. This is what was previously used to (sometimes incorrectly) set the Append/MergeAppend path's partitioned_rels field. That was the only user of that field, so we can happily just nuke it out of existence. I also couldn't resist changing some nearby code to make use of the newly added for_each_from macro so we can skip the first element in the list without checking if the current item was the first one on each iteration. A bug report from Andreas Kretschmer prompted all this work, however, after some consideration, I'm not personally classing this as a bug fix. So no backpatch. In Andreas' test case, it just wasn't that clear that there was a nested Append since the top-level Append just had a single sub-path and was pulled up a level, per 8edd0e794. Author: David Rowley Discussion: https://postgr.es/m/flat/CAApHDvqSchs%2BubdybcfFaSPB%2B%2BEA7kqMaoqajtP0GtZvzOOR3g%40mail.gmail.com --- src/backend/nodes/outfuncs.c | 1 - src/backend/optimizer/path/allpaths.c | 233 +++++++++++------- src/backend/optimizer/plan/createplan.c | 2 - src/backend/optimizer/util/relnode.c | 3 - src/backend/partitioning/partprune.c | 25 +- src/include/nodes/pathnodes.h | 16 +- src/test/regress/expected/partition_prune.out | 102 ++++++++ src/test/regress/sql/partition_prune.sql | 49 ++++ 8 files changed, 318 insertions(+), 113 deletions(-) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 08a049232e..7049d9eef3 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2310,7 +2310,6 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node) WRITE_BITMAPSET_FIELD(top_parent_relids); WRITE_BOOL_FIELD(partbounds_merged); WRITE_BITMAPSET_FIELD(all_partrels); - WRITE_NODE_FIELD(partitioned_child_rels); } static void diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index b399592ff8..ea7410dd42 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -104,8 +104,13 @@ static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer); +static List *accumulate_partitioned_rels(List *partitioned_rels, + List *sub_partitioned_rels, + bool flatten_partitioned_rels); static void accumulate_append_subpath(Path *path, - List **subpaths, List **special_subpaths); + List **subpaths, List **special_subpaths, + List **partitioned_rels, + bool flatten_partitioned_rels); static Path *get_singleton_append_subpath(Path *path); static void set_dummy_rel_pathlist(RelOptInfo *rel); static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, @@ -959,17 +964,6 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Assert(IS_SIMPLE_REL(rel)); - /* - * Initialize partitioned_child_rels to contain this RT index. - * - * Note that during the set_append_rel_pathlist() phase, we will bubble up - * the indexes of partitioned relations that appear down in the tree, so - * that when we've created Paths for all the children, the root - * partitioned table's list will contain all such indexes. - */ - if (rte->relkind == RELKIND_PARTITIONED_TABLE) - rel->partitioned_child_rels = list_make1_int(rti); - /* * If this is a partitioned baserel, set the consider_partitionwise_join * flag; currently, we only consider partitionwise joins with the baserel @@ -1269,12 +1263,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, if (IS_DUMMY_REL(childrel)) continue; - /* Bubble up childrel's partitioned children. */ - if (rel->part_scheme) - rel->partitioned_child_rels = - list_concat(rel->partitioned_child_rels, - childrel->partitioned_child_rels); - /* * Child is live, so add it to the live_childrels list for use below. */ @@ -1312,56 +1300,34 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *all_child_outers = NIL; ListCell *l; List *partitioned_rels = NIL; + List *partial_partitioned_rels = NIL; + List *pa_partitioned_rels = NIL; double partial_rows = -1; + bool flatten_partitioned_rels; /* If appropriate, consider parallel append */ pa_subpaths_valid = enable_parallel_append && rel->consider_parallel; + /* What we do with the partitioned_rels list is different for UNION ALL */ + flatten_partitioned_rels = (rel->rtekind != RTE_SUBQUERY); + /* - * AppendPath generated for partitioned tables must record the RT indexes - * of partitioned tables that are direct or indirect children of this - * Append rel. - * - * AppendPath may be for a sub-query RTE (UNION ALL), in which case, 'rel' - * itself does not represent a partitioned relation, but the child sub- - * queries may contain references to partitioned relations. The loop - * below will look for such children and collect them in a list to be - * passed to the path creation function. (This assumes that we don't need - * to look through multiple levels of subquery RTEs; if we ever do, we - * could consider stuffing the list we generate here into sub-query RTE's - * RelOptInfo, just like we do for partitioned rels, which would be used - * when populating our parent rel with paths. For the present, that - * appears to be unnecessary.) + * For partitioned tables, we accumulate a list of the partitioned RT + * indexes for the subpaths that are directly under this Append. This is + * used later for run-time partition pruning. We must maintain separate + * lists for each Append Path that we create as accumulate_append_subpath + * sometimes can't flatten sub-Appends into the top-level Append. + * We needn't bother doing this for join rels as no run-time pruning is + * done on those. */ - if (rel->part_scheme != NULL) + if (rel->reloptkind != RELOPT_JOINREL && rel->part_scheme != NULL) { - if (IS_SIMPLE_REL(rel)) - partitioned_rels = list_make1(rel->partitioned_child_rels); - else if (IS_JOIN_REL(rel)) - { - int relid = -1; - List *partrels = NIL; - - /* - * For a partitioned joinrel, concatenate the component rels' - * partitioned_child_rels lists. - */ - while ((relid = bms_next_member(rel->relids, relid)) >= 0) - { - RelOptInfo *component; - - Assert(relid >= 1 && relid < root->simple_rel_array_size); - component = root->simple_rel_array[relid]; - Assert(component->part_scheme != NULL); - Assert(list_length(component->partitioned_child_rels) >= 1); - partrels = list_concat(partrels, - component->partitioned_child_rels); - } + partitioned_rels = list_make1(bms_make_singleton(rel->relid)); + partial_partitioned_rels = list_make1(bms_make_singleton(rel->relid)); - partitioned_rels = list_make1(partrels); - } - - Assert(list_length(partitioned_rels) >= 1); + /* skip this one if we're not going to make a Parallel Append path */ + if (pa_subpaths_valid) + pa_partitioned_rels = list_make1(bms_make_singleton(rel->relid)); } /* @@ -1375,14 +1341,6 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, ListCell *lcp; Path *cheapest_partial_path = NULL; - /* - * For UNION ALLs with non-empty partitioned_child_rels, accumulate - * the Lists of child relations. - */ - if (rel->rtekind == RTE_SUBQUERY && childrel->partitioned_child_rels != NIL) - partitioned_rels = lappend(partitioned_rels, - childrel->partitioned_child_rels); - /* * If child has an unparameterized cheapest-total path, add that to * the unparameterized Append path we are constructing for the parent. @@ -1394,7 +1352,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, if (childrel->pathlist != NIL && childrel->cheapest_total_path->param_info == NULL) accumulate_append_subpath(childrel->cheapest_total_path, - &subpaths, NULL); + &subpaths, NULL, &partitioned_rels, + flatten_partitioned_rels); else subpaths_valid = false; @@ -1403,7 +1362,9 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, { cheapest_partial_path = linitial(childrel->partial_pathlist); accumulate_append_subpath(cheapest_partial_path, - &partial_subpaths, NULL); + &partial_subpaths, NULL, + &partial_partitioned_rels, + flatten_partitioned_rels); } else partial_subpaths_valid = false; @@ -1432,7 +1393,9 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, Assert(cheapest_partial_path != NULL); accumulate_append_subpath(cheapest_partial_path, &pa_partial_subpaths, - &pa_nonpartial_subpaths); + &pa_nonpartial_subpaths, + &pa_partitioned_rels, + flatten_partitioned_rels); } else @@ -1452,7 +1415,9 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, */ accumulate_append_subpath(nppath, &pa_nonpartial_subpaths, - NULL); + NULL, + &pa_partitioned_rels, + flatten_partitioned_rels); } } @@ -1572,7 +1537,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, appendpath = create_append_path(root, rel, NIL, partial_subpaths, NIL, NULL, parallel_workers, enable_parallel_append, - partitioned_rels, -1); + partial_partitioned_rels, -1); /* * Make sure any subsequent partial paths use the same row count @@ -1621,7 +1586,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, appendpath = create_append_path(root, rel, pa_nonpartial_subpaths, pa_partial_subpaths, NIL, NULL, parallel_workers, true, - partitioned_rels, partial_rows); + pa_partitioned_rels, partial_rows); add_partial_path(rel, (Path *) appendpath); } @@ -1651,6 +1616,10 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, { Relids required_outer = (Relids) lfirst(l); ListCell *lcr; + List *part_rels = NIL; + + if (rel->reloptkind != RELOPT_JOINREL && rel->part_scheme != NULL) + part_rels = list_make1(bms_make_singleton(rel->relid)); /* Select the child paths for an Append with this parameterization */ subpaths = NIL; @@ -1676,14 +1645,17 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, subpaths_valid = false; break; } - accumulate_append_subpath(subpath, &subpaths, NULL); + accumulate_append_subpath(subpath, &subpaths, NULL, &part_rels, + flatten_partitioned_rels); } if (subpaths_valid) add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL, NIL, required_outer, 0, false, - partitioned_rels, -1)); + part_rels, -1)); + else + list_free(part_rels); /* XXX need we bother? */ } /* @@ -1697,17 +1669,14 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, { RelOptInfo *childrel = (RelOptInfo *) linitial(live_childrels); - foreach(l, childrel->partial_pathlist) + /* skip the cheapest partial path, since we already used that above */ + for_each_from(l, childrel->partial_pathlist, 1) { Path *path = (Path *) lfirst(l); AppendPath *appendpath; - /* - * Skip paths with no pathkeys. Also skip the cheapest partial - * path, since we already used that above. - */ - if (path->pathkeys == NIL || - path == linitial(childrel->partial_pathlist)) + /* skip paths with no pathkeys. */ + if (path->pathkeys == NIL) continue; appendpath = create_append_path(root, rel, NIL, list_make1(path), @@ -1757,6 +1726,18 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, List *partition_pathkeys_desc = NIL; bool partition_pathkeys_partial = true; bool partition_pathkeys_desc_partial = true; + List *startup_partitioned_rels = NIL; + List *total_partitioned_rels = NIL; + bool flatten_partitioned_rels; + + /* Set up the method for building the partitioned rels lists */ + flatten_partitioned_rels = (rel->rtekind != RTE_SUBQUERY); + + if (rel->reloptkind != RELOPT_JOINREL && rel->part_scheme != NULL) + { + startup_partitioned_rels = list_make1(bms_make_singleton(rel->relid)); + total_partitioned_rels = list_make1(bms_make_singleton(rel->relid)); + } /* * Some partitioned table setups may allow us to use an Append node @@ -1898,9 +1879,13 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, * child paths for the MergeAppend. */ accumulate_append_subpath(cheapest_startup, - &startup_subpaths, NULL); + &startup_subpaths, NULL, + &startup_partitioned_rels, + flatten_partitioned_rels); accumulate_append_subpath(cheapest_total, - &total_subpaths, NULL); + &total_subpaths, NULL, + &total_partitioned_rels, + flatten_partitioned_rels); } } @@ -1916,7 +1901,7 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, NULL, 0, false, - partitioned_rels, + startup_partitioned_rels, -1)); if (startup_neq_total) add_path(rel, (Path *) create_append_path(root, @@ -1927,7 +1912,7 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, NULL, 0, false, - partitioned_rels, + total_partitioned_rels, -1)); } else @@ -1938,14 +1923,14 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, startup_subpaths, pathkeys, NULL, - partitioned_rels)); + startup_partitioned_rels)); if (startup_neq_total) add_path(rel, (Path *) create_merge_append_path(root, rel, total_subpaths, pathkeys, NULL, - partitioned_rels)); + total_partitioned_rels)); } } } @@ -2024,6 +2009,54 @@ get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, return cheapest; } +/* + * accumulate_partitioned_rels + * Record 'sub_partitioned_rels' in the 'partitioned_rels' list, + * flattening as appropriate. + */ +static List * +accumulate_partitioned_rels(List *partitioned_rels, + List *sub_partitioned_rels, + bool flatten) +{ + if (flatten) + { + /* + * We're only called with flatten == true when the partitioned_rels + * list has at most 1 element. So we can just add the members from + * sub list's first element onto the first element of + * partitioned_rels. Only later in planning when doing UNION ALL + * Append processing will we see flatten == false. partitioned_rels + * may end up with more than 1 element then, but we never expect to be + * called with flatten == true again after that, so we needn't bother + * doing anything here for anything but the initial element. + */ + if (partitioned_rels != NIL && sub_partitioned_rels != NIL) + { + Relids partrels = (Relids) linitial(partitioned_rels); + Relids subpartrels = (Relids) linitial(sub_partitioned_rels); + + /* Ensure the above comment holds true */ + Assert(list_length(partitioned_rels) == 1); + Assert(list_length(sub_partitioned_rels) == 1); + + linitial(partitioned_rels) = bms_add_members(partrels, subpartrels); + } + } + else + { + /* + * Handle UNION ALL to partitioned tables. This always occurs after + * we've done the accumulation for sub-partitioned tables, so there's + * no need to consider how adding multiple elements to the top level + * list affects the flatten == true case above. + */ + partitioned_rels = list_concat(partitioned_rels, sub_partitioned_rels); + } + + return partitioned_rels; +} + /* * accumulate_append_subpath * Add a subpath to the list being built for an Append or MergeAppend. @@ -2044,9 +2077,22 @@ get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, * children to subpaths and the rest to special_subpaths. If the latter is * NULL, we don't flatten the path at all (unless it contains only partial * paths). + * + * When pulling up sub-Appends and sub-Merge Appends, we also gather the + * path's list of partitioned tables and store in 'partitioned_rels'. + * When 'flatten_partitioned_rels' is true, 'partitioned_rels' will contain at + * most one element which is a RelIds of the partitioned relations which there + * are subpaths for. In this case we just add the RT indexes for the + * partitioned tables for the subpath we're pulling up to the single entry in + * 'partitioned_rels'. When 'flatten_partitioned_rels' is false we + * concatenate the path's partitioned rel list onto the top-level list. This + * done for UNION ALLs which could have a partitioned table in each union + * branch. */ static void -accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths) +accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths, + List **partitioned_rels, + bool flatten_partitioned_rels) { if (IsA(path, AppendPath)) { @@ -2055,6 +2101,9 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths) if (!apath->path.parallel_aware || apath->first_partial_path == 0) { *subpaths = list_concat(*subpaths, apath->subpaths); + *partitioned_rels = accumulate_partitioned_rels(*partitioned_rels, + apath->partitioned_rels, + flatten_partitioned_rels); return; } else if (special_subpaths != NULL) @@ -2070,6 +2119,9 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths) apath->first_partial_path); *special_subpaths = list_concat(*special_subpaths, new_special_subpaths); + *partitioned_rels = accumulate_partitioned_rels(*partitioned_rels, + apath->partitioned_rels, + flatten_partitioned_rels); return; } } @@ -2078,6 +2130,9 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths) MergeAppendPath *mpath = (MergeAppendPath *) path; *subpaths = list_concat(*subpaths, mpath->subpaths); + *partitioned_rels = accumulate_partitioned_rels(*partitioned_rels, + mpath->partitioned_rels, + flatten_partitioned_rels); return; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 94280a730c..40abe6f9f6 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1228,7 +1228,6 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) * do partition pruning. */ if (enable_partition_pruning && - rel->reloptkind == RELOPT_BASEREL && best_path->partitioned_rels != NIL) { List *prunequal; @@ -1395,7 +1394,6 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, * do partition pruning. */ if (enable_partition_pruning && - rel->reloptkind == RELOPT_BASEREL && best_path->partitioned_rels != NIL) { List *prunequal; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index a203e6f1ff..76245c1ff3 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -257,7 +257,6 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->all_partrels = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; - rel->partitioned_child_rels = NIL; /* * Pass assorted information down the inheritance hierarchy. @@ -672,7 +671,6 @@ build_join_rel(PlannerInfo *root, joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; - joinrel->partitioned_child_rels = NIL; /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); @@ -850,7 +848,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; - joinrel->partitioned_child_rels = NIL; joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, inner_rel->top_parent_relids); diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 6268623d56..fdbfddb30b 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -141,7 +141,7 @@ typedef struct PruneStepResult static List *make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, int *relid_subplan_map, - List *partitioned_rels, List *prunequal, + Relids partrelids, List *prunequal, Bitmapset **matchedsubplans); static void gen_partprune_steps(RelOptInfo *rel, List *clauses, PartClauseTarget target, @@ -267,13 +267,13 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, prunerelinfos = NIL; foreach(lc, partitioned_rels) { - List *rels = (List *) lfirst(lc); + Relids partrelids = (Relids) lfirst(lc); List *pinfolist; Bitmapset *matchedsubplans = NULL; pinfolist = make_partitionedrel_pruneinfo(root, parentrel, relid_subplan_map, - rels, prunequal, + partrelids, prunequal, &matchedsubplans); /* When pruning is possible, record the matched subplans */ @@ -342,7 +342,7 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, static List * make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, int *relid_subplan_map, - List *partitioned_rels, List *prunequal, + Relids partrelids, List *prunequal, Bitmapset **matchedsubplans) { RelOptInfo *targetpart = NULL; @@ -351,6 +351,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, int *relid_subpart_map; Bitmapset *subplansfound = NULL; ListCell *lc; + int rti; int i; /* @@ -364,9 +365,9 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, relid_subpart_map = palloc0(sizeof(int) * root->simple_rel_array_size); i = 1; - foreach(lc, partitioned_rels) + rti = -1; + while ((rti = bms_next_member(partrelids, rti)) > 0) { - Index rti = lfirst_int(lc); RelOptInfo *subpart = find_base_rel(root, rti); PartitionedRelPruneInfo *pinfo; List *partprunequal; @@ -379,14 +380,12 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, * Fill the mapping array. * * relid_subpart_map maps relid of a non-leaf partition to the index - * in 'partitioned_rels' of that rel (which will also be the index in - * the returned PartitionedRelPruneInfo list of the info for that + * in 'partrelids' of that rel (which will also be the index in the + * returned PartitionedRelPruneInfo list of the info for that * partition). We use 1-based indexes here, so that zero can * represent an un-filled array entry. */ Assert(rti < root->simple_rel_array_size); - /* No duplicates please */ - Assert(relid_subpart_map[rti] == 0); relid_subpart_map[rti] = i++; /* @@ -582,6 +581,12 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, present_parts = bms_add_member(present_parts, i); } + /* + * Ensure there were no stray PartitionedRelPruneInfo generated for + * partitioned tables that had no sub-paths for. + */ + Assert(!bms_is_empty(present_parts)); + /* Record the maps and other information. */ pinfo->present_parts = present_parts; pinfo->nparts = nparts; diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 3dd16b9ad5..6c23b7bc8c 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -601,9 +601,6 @@ typedef struct PartitionSchemeData *PartitionScheme; * part_rels - RelOptInfos for each partition * all_partrels - Relids set of all partition relids * partexprs, nullable_partexprs - Partition key expressions - * partitioned_child_rels - RT indexes of unpruned partitions of - * this relation that are partitioned tables - * themselves, in hierarchical order * * The partexprs and nullable_partexprs arrays each contain * part_scheme->partnatts elements. Each of the elements is a list of @@ -751,7 +748,6 @@ typedef struct RelOptInfo Relids all_partrels; /* Relids set of all partition relids */ List **partexprs; /* Non-nullable partition key expressions */ List **nullable_partexprs; /* Nullable partition key expressions */ - List *partitioned_child_rels; /* List of RT indexes */ } RelOptInfo; /* @@ -1398,8 +1394,10 @@ typedef struct CustomPath typedef struct AppendPath { Path path; - /* RT indexes of non-leaf tables in a partition tree */ - List *partitioned_rels; + List *partitioned_rels; /* List of Relids for each non-leaf + * partitioned table in the partition + * tree. One for each partition hierarchy. + */ List *subpaths; /* list of component Paths */ /* Index of first partial path in subpaths; list_length(subpaths) if none */ int first_partial_path; @@ -1424,8 +1422,10 @@ extern bool is_dummy_rel(RelOptInfo *rel); typedef struct MergeAppendPath { Path path; - /* RT indexes of non-leaf tables in a partition tree */ - List *partitioned_rels; + List *partitioned_rels; /* List of Relids for each non-leaf + * partitioned table in the partition + * tree. One for each partition hierarchy. + */ List *subpaths; /* list of component Paths */ double limit_tuples; /* hard limit on output tuples, or -1 */ } MergeAppendPath; diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 50d2a7e4b9..80e71b8e2b 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -3671,6 +3671,108 @@ explain (costs off) update listp1 set a = 1 where a = 2; reset constraint_exclusion; reset enable_partition_pruning; drop table listp; +-- Ensure run-time pruning works correctly for nested Append nodes +set parallel_setup_cost to 0; +set parallel_tuple_cost to 0; +create table listp (a int) partition by list(a); +create table listp_12 partition of listp for values in(1,2) partition by list(a); +create table listp_12_1 partition of listp_12 for values in(1); +create table listp_12_2 partition of listp_12 for values in(2); +-- Force the 2nd subnode of the Append to be non-parallel. This results in +-- a nested Append node because the mixed parallel / non-parallel paths cannot +-- be pulled into the top-level Append. +alter table listp_12_1 set (parallel_workers = 0); +-- Ensure that listp_12_2 is not scanned. (The nested Append is not seen in +-- the plan as it's pulled in setref.c due to having just a single subnode). +explain (analyze on, costs off, timing off, summary off) +select * from listp where a = (select 1); + QUERY PLAN +---------------------------------------------------------------------- + Gather (actual rows=0 loops=1) + Workers Planned: 2 + Params Evaluated: $0 + Workers Launched: 2 + InitPlan 1 (returns $0) + -> Result (actual rows=1 loops=1) + -> Parallel Append (actual rows=0 loops=3) + -> Seq Scan on listp_12_1 listp_1 (actual rows=0 loops=1) + Filter: (a = $0) + -> Parallel Seq Scan on listp_12_2 listp_2 (never executed) + Filter: (a = $0) +(11 rows) + +-- Like the above but throw some more complexity at the planner by adding +-- a UNION ALL. We expect both sides of the union not to scan the +-- non-required partitions. +explain (analyze on, costs off, timing off, summary off) +select * from listp where a = (select 1) + union all +select * from listp where a = (select 2); + QUERY PLAN +----------------------------------------------------------------------------------- + Append (actual rows=0 loops=1) + -> Gather (actual rows=0 loops=1) + Workers Planned: 2 + Params Evaluated: $0 + Workers Launched: 2 + InitPlan 1 (returns $0) + -> Result (actual rows=1 loops=1) + -> Parallel Append (actual rows=0 loops=3) + -> Seq Scan on listp_12_1 listp_1 (actual rows=0 loops=1) + Filter: (a = $0) + -> Parallel Seq Scan on listp_12_2 listp_2 (never executed) + Filter: (a = $0) + -> Gather (actual rows=0 loops=1) + Workers Planned: 2 + Params Evaluated: $1 + Workers Launched: 2 + InitPlan 2 (returns $1) + -> Result (actual rows=1 loops=1) + -> Parallel Append (actual rows=0 loops=3) + -> Seq Scan on listp_12_1 listp_4 (never executed) + Filter: (a = $1) + -> Parallel Seq Scan on listp_12_2 listp_5 (actual rows=0 loops=1) + Filter: (a = $1) +(23 rows) + +drop table listp; +reset parallel_tuple_cost; +reset parallel_setup_cost; +-- Test case for run-time pruning with a nested Merge Append +set enable_sort to 0; +create table rangep (a int, b int) partition by range (a); +create table rangep_0_to_100 partition of rangep for values from (0) to (100) partition by list (b); +-- We need 3 sub-partitions. 1 to validate pruning worked and another two +-- because a single remaining partition would be pulled up to the main Append. +create table rangep_0_to_100_1 partition of rangep_0_to_100 for values in(1); +create table rangep_0_to_100_2 partition of rangep_0_to_100 for values in(2); +create table rangep_0_to_100_3 partition of rangep_0_to_100 for values in(3); +create table rangep_100_to_200 partition of rangep for values from (100) to (200); +create index on rangep (a); +-- Ensure run-time pruning works on the nested Merge Append +explain (analyze on, costs off, timing off, summary off) +select * from rangep where b IN((select 1),(select 2)) order by a; + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Append (actual rows=0 loops=1) + InitPlan 1 (returns $0) + -> Result (actual rows=1 loops=1) + InitPlan 2 (returns $1) + -> Result (actual rows=1 loops=1) + -> Merge Append (actual rows=0 loops=1) + Sort Key: rangep_2.a + -> Index Scan using rangep_0_to_100_1_a_idx on rangep_0_to_100_1 rangep_2 (actual rows=0 loops=1) + Filter: (b = ANY (ARRAY[$0, $1])) + -> Index Scan using rangep_0_to_100_2_a_idx on rangep_0_to_100_2 rangep_3 (actual rows=0 loops=1) + Filter: (b = ANY (ARRAY[$0, $1])) + -> Index Scan using rangep_0_to_100_3_a_idx on rangep_0_to_100_3 rangep_4 (never executed) + Filter: (b = ANY (ARRAY[$0, $1])) + -> Index Scan using rangep_100_to_200_a_idx on rangep_100_to_200 rangep_5 (actual rows=0 loops=1) + Filter: (b = ANY (ARRAY[$0, $1])) +(15 rows) + +reset enable_sort; +drop table rangep; -- -- Check that gen_prune_steps_from_opexps() works well for various cases of -- clauses for different partition keys diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 1e904a8c5b..939a9b1193 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -1051,6 +1051,55 @@ reset enable_partition_pruning; drop table listp; +-- Ensure run-time pruning works correctly for nested Append nodes +set parallel_setup_cost to 0; +set parallel_tuple_cost to 0; + +create table listp (a int) partition by list(a); +create table listp_12 partition of listp for values in(1,2) partition by list(a); +create table listp_12_1 partition of listp_12 for values in(1); +create table listp_12_2 partition of listp_12 for values in(2); + +-- Force the 2nd subnode of the Append to be non-parallel. This results in +-- a nested Append node because the mixed parallel / non-parallel paths cannot +-- be pulled into the top-level Append. +alter table listp_12_1 set (parallel_workers = 0); + +-- Ensure that listp_12_2 is not scanned. (The nested Append is not seen in +-- the plan as it's pulled in setref.c due to having just a single subnode). +explain (analyze on, costs off, timing off, summary off) +select * from listp where a = (select 1); + +-- Like the above but throw some more complexity at the planner by adding +-- a UNION ALL. We expect both sides of the union not to scan the +-- non-required partitions. +explain (analyze on, costs off, timing off, summary off) +select * from listp where a = (select 1) + union all +select * from listp where a = (select 2); + +drop table listp; +reset parallel_tuple_cost; +reset parallel_setup_cost; + +-- Test case for run-time pruning with a nested Merge Append +set enable_sort to 0; +create table rangep (a int, b int) partition by range (a); +create table rangep_0_to_100 partition of rangep for values from (0) to (100) partition by list (b); +-- We need 3 sub-partitions. 1 to validate pruning worked and another two +-- because a single remaining partition would be pulled up to the main Append. +create table rangep_0_to_100_1 partition of rangep_0_to_100 for values in(1); +create table rangep_0_to_100_2 partition of rangep_0_to_100 for values in(2); +create table rangep_0_to_100_3 partition of rangep_0_to_100 for values in(3); +create table rangep_100_to_200 partition of rangep for values from (100) to (200); +create index on rangep (a); + +-- Ensure run-time pruning works on the nested Merge Append +explain (analyze on, costs off, timing off, summary off) +select * from rangep where b IN((select 1),(select 2)) order by a; +reset enable_sort; +drop table rangep; + -- -- Check that gen_prune_steps_from_opexps() works well for various cases of -- clauses for different partition keys -- 2.27.0