From 4bc98cfd662d2d8022fabd8cd2c515ce22e9e8ff Mon Sep 17 00:00:00 2001 From: David Rowley Date: Tue, 27 Feb 2018 15:20:02 +0900 Subject: [PATCH v13 2/2] Allow partition elimination to occur during execution The query planner supports eliminating partitions of a partitioned table during query planning. This has its limitations as it can only perform the elimination using information which is available during planning. Allowing this partition elimination to occur during execution allows the values of Params to be used for elimination too, thus opening the door for PREPAREd statements to participate too. We can also perform partition elimination in the following cases: 1. Parameterized Nested Loop Joins: The parameter from the outer side of the join can be used to determine the minimum set of inner side partitions to scan. 2. Initplans: Once an initplan has been executed we can then determine which partitions match the value from the initplan. Unlike the case of partition elimination during query planning, when the elimination takes place during execution, the Append's subnode are still visible in the EXPLAIN output. In order to determine if pruning has actually taken place the EXPLAIN ANALYZE must be viewed. If a certain Append subplan was never executed due to elimination of the partition then the execution timing area will state "(never executed)". Whereas, if, for example in the case of parameterized nested loops, the number of loops stated in the EXPLAIN ANALYZE output for certain subplans may appear lower than others due to the subplan having been scanned fewer times. For now, only the Append nodes when used in a SELECT query allows this execution time elimination to take place. The features added here are likely easily extendable into MergeAppend and possibly also for DML statements too, such as UPDATE and DELETE. --- src/backend/catalog/partition.c | 50 ++ src/backend/executor/nodeAppend.c | 330 +++++++- src/backend/nodes/copyfuncs.c | 32 + src/backend/optimizer/path/allpaths.c | 12 +- src/backend/optimizer/path/joinrels.c | 2 +- src/backend/optimizer/plan/createplan.c | 49 +- src/backend/optimizer/plan/planner.c | 3 +- src/backend/optimizer/prep/prepunion.c | 4 +- src/backend/optimizer/util/clauses.c | 19 + src/backend/optimizer/util/partprune.c | 266 +++++- src/backend/optimizer/util/pathnode.c | 37 +- src/backend/optimizer/util/plancat.c | 36 +- src/include/catalog/partition.h | 9 + src/include/nodes/execnodes.h | 8 + src/include/nodes/nodes.h | 1 + src/include/nodes/plannodes.h | 7 + src/include/nodes/primnodes.h | 23 + src/include/nodes/relation.h | 2 + src/include/optimizer/clauses.h | 3 + src/include/optimizer/partprune.h | 5 + src/include/optimizer/pathnode.h | 2 +- src/test/regress/expected/partition_prune.out | 1115 +++++++++++++++++++++++++ src/test/regress/sql/partition_prune.sql | 312 +++++++ 23 files changed, 2219 insertions(+), 108 deletions(-) diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 6a2761c350..c4542b00cf 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -4203,3 +4203,53 @@ satisfies_hash_partition(PG_FUNCTION_ARGS) PG_RETURN_BOOL(rowHash % modulus == remainder); } + +/* + * build_partition_expressions + * Build a list of partition key expressions. Plain attibute keys will + * be build as Vars with the varno set to 'varno'. + */ +List ** +build_partition_expressions(PartitionKey partkey, Index varno) +{ + int partnatts; + int cnt; + List **partexprs; + ListCell *lc; + + partnatts = partkey->partnatts; + partexprs = (List **) palloc(sizeof(List *) * partnatts); + lc = list_head(partkey->partexprs); + + for (cnt = 0; cnt < partnatts; cnt++) + { + Expr *partexpr; + AttrNumber attno = partkey->partattrs[cnt]; + + if (attno != InvalidAttrNumber) + { + /* Single column partition key is stored as a Var node. */ + Assert(attno > 0); + + partexpr = (Expr *) makeVar(varno, attno, + partkey->parttypid[cnt], + partkey->parttypmod[cnt], + partkey->parttypcoll[cnt], 0); + } + else + { + if (lc == NULL) + elog(ERROR, "wrong number of partition key expressions"); + + /* Re-stamp the expression with given varno. */ + partexpr = (Expr *) copyObject(lfirst(lc)); + if (varno != 1) + ChangeVarNodes((Node *) partexpr, 1, varno, 0); + lc = lnext(lc); + } + + partexprs[cnt] = list_make1(partexpr); + } + + return partexprs; +} diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c index 7a3dd2ee2d..b1f147d357 100644 --- a/src/backend/executor/nodeAppend.c +++ b/src/backend/executor/nodeAppend.c @@ -60,6 +60,10 @@ #include "executor/execdebug.h" #include "executor/nodeAppend.h" #include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "optimizer/partprune.h" +#include "optimizer/plancat.h" +#include "utils/memutils.h" /* Shared state for parallel-aware Append. */ struct ParallelAppendState @@ -76,12 +80,26 @@ struct ParallelAppendState bool pa_finished[FLEXIBLE_ARRAY_MEMBER]; }; +struct PartitionPruneContextCache +{ + PartitionPruneContext *context; + PartitionClauseInfo *partclauseinfo; + PartitionPruneContextCache *subcache; +}; + #define INVALID_SUBPLAN_INDEX -1 static TupleTableSlot *ExecAppend(PlanState *pstate); static bool choose_next_subplan_locally(AppendState *node); static bool choose_next_subplan_for_leader(AppendState *node); static bool choose_next_subplan_for_worker(AppendState *node); +static void set_valid_runtime_subplans(AppendState *node); +static void set_valid_runtime_subplans_recurse(AppendState *node, + PartitionPruneInfo *pinfo, + PartitionPruneContextCache *ctxcache, + Bitmapset **validsubplans); +static void mark_invalid_subplans_as_finished(AppendState *node); + /* ---------------------------------------------------------------- * ExecInitAppend @@ -127,6 +145,34 @@ ExecInitAppend(Append *node, EState *estate, int eflags) appendstate->ps.ExecProcNode = ExecAppend; appendstate->appendplans = appendplanstates; appendstate->as_nplans = nplans; + appendstate->as_valid_subplans = NULL; + appendstate->part_prune_params = NULL; /* determined later */ + appendstate->part_prune_info = node->part_prune_info; + appendstate->contextcache = NULL; /* populate this as needed below */ + + if (node->part_prune_info) + { + /* + * When run-time partition pruning is enabled we make calls to a query + * planner function to determine which partitions will match. The + * planner is not too careful about freeing memory, so we'll ensure we + * call the function in a temporary memory context to avoid any memory + * leaking in the executor's memory context. + */ + appendstate->prune_context = + AllocSetContextCreate(CurrentMemoryContext, + "Partition Prune", + ALLOCSET_DEFAULT_SIZES); + } + else + { + /* + * When run-time partition pruning is not enabled we can just mark + * all subplans as valid. + */ + appendstate->as_valid_subplans = bms_add_range(NULL, 0, nplans - 1); + appendstate->prune_context = NULL; + } /* * Initialize result tuple type and slot. @@ -149,18 +195,14 @@ ExecInitAppend(Append *node, EState *estate, int eflags) /* * Miscellaneous initialization * - * Append plans don't have expression contexts because they never call - * ExecQual or ExecProject. + * create expression context for node */ + ExecAssignExprContext(estate, &appendstate->ps); + appendstate->ps.ps_ProjInfo = NULL; - /* - * Parallel-aware append plans must choose the first subplan to execute by - * looking at shared memory, but non-parallel-aware append plans can - * always start with the first subplan. - */ - appendstate->as_whichplan = - appendstate->ps.plan->parallel_aware ? INVALID_SUBPLAN_INDEX : 0; + /* Let choose_next_subplan_* function handle setting the first subplan */ + appendstate->as_whichplan = INVALID_SUBPLAN_INDEX; /* If parallel-aware, this will be overridden later. */ appendstate->choose_next_subplan = choose_next_subplan_locally; @@ -251,6 +293,17 @@ ExecReScanAppend(AppendState *node) { int i; + /* + * If any of the parameters being used for partition pruning have changed, + * then we'd better unset the valid subplans so that they can reselected + * for the new parameter values. + */ + if (bms_overlap(node->ps.chgParam, node->part_prune_params)) + { + bms_free(node->as_valid_subplans); + node->as_valid_subplans = NULL; + } + for (i = 0; i < node->as_nplans; i++) { PlanState *subnode = node->appendplans[i]; @@ -270,8 +323,8 @@ ExecReScanAppend(AppendState *node) ExecReScan(subnode); } - node->as_whichplan = - node->ps.plan->parallel_aware ? INVALID_SUBPLAN_INDEX : 0; + /* Let choose_next_subplan_* function handle setting the first subplan */ + node->as_whichplan = INVALID_SUBPLAN_INDEX; } /* ---------------------------------------------------------------- @@ -360,22 +413,35 @@ static bool choose_next_subplan_locally(AppendState *node) { int whichplan = node->as_whichplan; + int nextplan; - /* We should never see INVALID_SUBPLAN_INDEX in this case. */ - Assert(whichplan >= 0 && whichplan <= node->as_nplans); + /* + * If first call then have the bms member function choose the first valid + * subplan by initializing whichplan to -1. If there happen to be no + * valid subplans then the bms member function will handle that by + * returning a negative number which will allow us to exit returning a + * false value. + */ + if (whichplan == INVALID_SUBPLAN_INDEX) + { + if (node->as_valid_subplans == NULL) + set_valid_runtime_subplans(node); + + whichplan = -1; + } + + /* Ensure whichplan is within the expected range */ + Assert(whichplan >= -1 && whichplan <= node->as_nplans); if (ScanDirectionIsForward(node->ps.state->es_direction)) - { - if (whichplan >= node->as_nplans - 1) - return false; - node->as_whichplan++; - } + nextplan = bms_next_member(node->as_valid_subplans, whichplan); else - { - if (whichplan <= 0) - return false; - node->as_whichplan--; - } + nextplan = bms_prev_member(node->as_valid_subplans, whichplan); + + if (nextplan < 0) + return false; + + node->as_whichplan = nextplan; return true; } @@ -408,6 +474,17 @@ choose_next_subplan_for_leader(AppendState *node) { /* Start with last subplan. */ node->as_whichplan = node->as_nplans - 1; + + /* + * If we've yet to determine the valid subplans for these parameters + * then do so now. If run-time pruning is disabled then the valid + * subplans will always be set to all subplans. + */ + if (node->as_valid_subplans == NULL) + { + set_valid_runtime_subplans(node); + mark_invalid_subplans_as_finished(node); + } } /* Loop until we find a subplan to execute. */ @@ -460,6 +537,17 @@ choose_next_subplan_for_worker(AppendState *node) if (node->as_whichplan != INVALID_SUBPLAN_INDEX) node->as_pstate->pa_finished[node->as_whichplan] = true; + /* + * If we've yet to determine the valid subplans for these parameters then + * do so now. If run-time pruning is disabled then the valid subplans + * will always be set to all subplans. + */ + else if (node->as_valid_subplans == NULL) + { + set_valid_runtime_subplans(node); + mark_invalid_subplans_as_finished(node); + } + /* If all the plans are already done, we have nothing to do */ if (pstate->pa_next_plan == INVALID_SUBPLAN_INDEX) { @@ -525,3 +613,199 @@ choose_next_subplan_for_worker(AppendState *node) return true; } + +/* + * set_valid_runtime_subplans + * Determine which subset of subplan nodes we need to scan based on + * the details stored in node's 'part_prune_info'. All subplans which + * provably cannot possibly have matching records are eliminated and the + * remainder are set in the AppendState's 'as_valid_subplans' variable. + */ +static void +set_valid_runtime_subplans(AppendState *node) +{ + MemoryContext oldcontext; + Bitmapset *validsubplans = NULL; + + /* Should never be called when already set */ + Assert(node->as_valid_subplans == NULL); + + if (!node->contextcache) + node->contextcache = palloc0(sizeof(PartitionPruneContextCache)); + + /* + * Switch to a temp context to avoid leaking memory in the + * executor's memory context. + */ + oldcontext = MemoryContextSwitchTo(node->prune_context); + + set_valid_runtime_subplans_recurse(node, node->part_prune_info, + node->contextcache, + &validsubplans); + + MemoryContextSwitchTo(oldcontext); + + /* Move to the correct memory context */ + node->as_valid_subplans = bms_copy(validsubplans); + + MemoryContextReset(node->prune_context); +} + +static void +set_valid_runtime_subplans_recurse(AppendState *node, + PartitionPruneInfo *pinfo, + PartitionPruneContextCache *ctxcache, + Bitmapset **validsubplans) +{ + PartitionPruneContext *context; + Bitmapset *partset; + int i; + + check_stack_depth(); + + /* + * If the PartitionPruneContext has not yet been initialized for this rel + * yet, then do that now. + */ + if (!ctxcache->context) + { + PartitionDesc partdesc; + Relation rel; + PartitionKey partkey; + MemoryContext oldContext; + List **partexprs; + int partnatts; + int i; + + oldContext = MemoryContextSwitchTo(node->ps.state->es_query_cxt); + + ctxcache->context = context = palloc(sizeof(PartitionPruneContext)); + ctxcache->subcache = palloc0(sizeof(PartitionPruneContextCache) * + pinfo->nparts); + + rel = relation_open(pinfo->parentoid, NoLock); + + partkey = RelationGetPartitionKey(rel); + partdesc = RelationGetPartitionDesc(rel); + + context->relid = pinfo->relid; + context->strategy = partkey->strategy; + context->partnatts = partnatts = partkey->partnatts; + partexprs = build_partition_expressions(partkey, pinfo->relid); + context->partkeys = (Expr **) palloc(sizeof(Expr *) * partnatts); + + for (i = 0; i < partnatts; i++) + context->partkeys[i] = (Expr *) linitial(partexprs[i]); + + context->partopfamily = partkey->partopfamily; + context->partopcintype = partkey->partopcintype; + context->partcollation = partkey->partcollation; + context->partsupfunc = partkey->partsupfunc; + context->nparts = pinfo->nparts; + context->boundinfo = partition_bounds_copy(partdesc->boundinfo, partkey); + + if (OidIsValid(get_default_oid_from_partdesc(partdesc))) + context->has_default_part = true; + else + context->has_default_part = false; + + context->partition_qual = RelationGetPartitionQual(rel); + + context->planstate = &node->ps; + context->econtext = node->ps.ps_ExprContext; + context->paramids = NULL; + + ctxcache->partclauseinfo = generate_partition_clauses(context, pinfo->prunequal); + + node->part_prune_params = bms_add_members(node->part_prune_params, + context->paramids); + + relation_close(rel, NoLock); + + MemoryContextSwitchTo(oldContext); + } + else + context = ctxcache->context; + + /* + * Detect if any impossibilities were discovered during + * generate_partition_clauses + */ + if (ctxcache->partclauseinfo->constfalse) + { + bms_free(*validsubplans); + *validsubplans = NULL; + return; + } + + /* + * We only need to determine the matching partitions if there are any + * params matching the partition key at this level. If there are no + * matching params, then we can simply return all subnodes which belong + * to this parent partition. The planner should have already determined + * these to be the minimum possible set. We must still recursively visit + * any subpartitions as we may find their partitions keys match some + * params. + */ + if (!bms_is_empty(context->paramids)) + partset = get_partitions_from_clauses(context, + ctxcache->partclauseinfo); + else + partset = pinfo->allsubnodes; + + /* Translate partset into subnode indexes */ + i = -1; + while ((i = bms_next_member(partset, i)) >= 0) + { + if (pinfo->subnodeindex[i] >= 0) + *validsubplans = bms_add_member(*validsubplans, + pinfo->subnodeindex[i]); + else if (pinfo->subpartindex[i] != NULL) + set_valid_runtime_subplans_recurse(node, + pinfo->subpartindex[i], + &ctxcache->subcache[i], + validsubplans); + else + { + /* + * If this happens then we're somehow missing an Append subnode. + * This shouldn't happen and could only happen if a more + * restrictive clause list was used for partition elimination + * during planning than what was used here. + */ + elog(ERROR, "partition missing from Append subplans"); + } + } + +} + +/* + * mark_invalid_subplans_as_finished + * Marks the ParallelAppendState's pa_finished as true for each invalid + * subplan. + * + * This function should only be called for parallel Append with run-time + * pruning enabled. + */ +static void +mark_invalid_subplans_as_finished(AppendState *node) +{ + int i; + + /* Only valid to call this while in parallel Append mode */ + Assert(node->as_pstate); + + /* Shouldn't have been called when run-time pruning is not enabled */ + Assert(node->part_prune_info != NULL); + + /* Nothing to do if all plans are valid */ + if (bms_num_members(node->as_valid_subplans) == node->as_nplans) + return; + + /* Mark all non-valid plans as finished */ + for (i = 0; i < node->as_nplans; i++) + { + if (!bms_is_member(i, node->as_valid_subplans)) + node->as_pstate->pa_finished[i] = true; + } +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 169c697c08..629cdc05a0 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -244,6 +244,7 @@ _copyAppend(const Append *from) COPY_NODE_FIELD(partitioned_rels); COPY_NODE_FIELD(appendplans); COPY_SCALAR_FIELD(first_partial_plan); + COPY_NODE_FIELD(part_prune_info); return newnode; } @@ -2132,6 +2133,34 @@ _copyOnConflictExpr(const OnConflictExpr *from) return newnode; } +static PartitionPruneInfo * +_copyPartitionPruneInfo(const PartitionPruneInfo *from) +{ + PartitionPruneInfo *newnode = makeNode(PartitionPruneInfo); + int i; + + COPY_SCALAR_FIELD(relid); + COPY_SCALAR_FIELD(parentoid); + COPY_NODE_FIELD(prunequal); + COPY_BITMAPSET_FIELD(allsubnodes); + COPY_SCALAR_FIELD(nparts); + COPY_POINTER_FIELD(subnodeindex, from->nparts * sizeof(int)); + COPY_POINTER_FIELD(subpartindex, from->nparts * + sizeof(PartitionPruneInfo *)); + + /* + * The above copied the entire array, but we still need to create copies + * of each PartitionPruneInfo contained in that array. + */ + for (i = 0; i < from->nparts; i++) + { + if (newnode->subpartindex[i] != NULL) + COPY_NODE_FIELD(subpartindex[i]); + } + + return newnode; +} + /* **************************************************************** * relation.h copy functions * @@ -5028,6 +5057,9 @@ copyObjectImpl(const void *from) case T_PlaceHolderInfo: retval = _copyPlaceHolderInfo(from); break; + case T_PartitionPruneInfo: + retval = _copyPartitionPruneInfo(from); + break; /* * VALUE NODES diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 08570ce25d..5bcb19947c 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -1587,7 +1587,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, * if we have zero or one live subpath due to constraint exclusion.) */ if (subpaths_valid) - add_path(rel, (Path *) create_append_path(rel, subpaths, NIL, + add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL, NULL, 0, false, partitioned_rels, -1)); @@ -1629,8 +1629,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, Assert(parallel_workers > 0); /* Generate a partial append path. */ - appendpath = create_append_path(rel, NIL, partial_subpaths, NULL, - parallel_workers, + appendpath = create_append_path(root, rel, NIL, partial_subpaths, + NULL, parallel_workers, enable_parallel_append, partitioned_rels, -1); @@ -1678,7 +1678,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, max_parallel_workers_per_gather); Assert(parallel_workers > 0); - appendpath = create_append_path(rel, pa_nonpartial_subpaths, + appendpath = create_append_path(root, rel, pa_nonpartial_subpaths, pa_partial_subpaths, NULL, parallel_workers, true, partitioned_rels, partial_rows); @@ -1734,7 +1734,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, if (subpaths_valid) add_path(rel, (Path *) - create_append_path(rel, subpaths, NIL, + create_append_path(root, rel, subpaths, NIL, required_outer, 0, false, partitioned_rels, -1)); } @@ -2000,7 +2000,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel) rel->pathlist = NIL; rel->partial_pathlist = NIL; - add_path(rel, (Path *) create_append_path(rel, NIL, NIL, NULL, + add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NULL, 0, false, NIL, -1)); /* diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 3f1c1b3477..2e289d475e 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1230,7 +1230,7 @@ mark_dummy_rel(RelOptInfo *rel) rel->partial_pathlist = NIL; /* Set up the dummy path */ - add_path(rel, (Path *) create_append_path(rel, NIL, NIL, NULL, + add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NULL, 0, false, NIL, -1)); /* Set or update cheapest_total_path and related fields */ diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 9ae1bf31d5..4adf6d07c2 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -29,6 +29,7 @@ #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" +#include "optimizer/partprune.h" #include "optimizer/paths.h" #include "optimizer/placeholder.h" #include "optimizer/plancat.h" @@ -204,7 +205,8 @@ static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual, Index scanrelid, int wtParam); static Append *make_append(List *appendplans, int first_partial_plan, - List *tlist, List *partitioned_rels); + List *tlist, List *partitioned_rels, + PartitionPruneInfo *partpruneinfo); static RecursiveUnion *make_recursive_union(List *tlist, Plan *lefttree, Plan *righttree, @@ -1022,6 +1024,8 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) List *tlist = build_path_tlist(root, &best_path->path); List *subplans = NIL; ListCell *subpaths; + RelOptInfo *rel = best_path->path.parent; + PartitionPruneInfo *pinfo = NULL; /* * The subpaths list could be empty, if every child was proven empty by @@ -1059,6 +1063,41 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) subplans = lappend(subplans, subplan); } + + if (best_path->trypartitionprune) + { + List *prunequal; + + /* Not for join rels */ + Assert(bms_membership(rel->relids) == BMS_SINGLETON); + + prunequal = + extract_actual_clauses(best_path->path.parent->baserestrictinfo, + false); + + if (best_path->path.param_info) + { + + List *prmquals = best_path->path.param_info->ppi_clauses; + + prmquals = extract_actual_clauses(prmquals, false); + prmquals = (List *) replace_nestloop_params(root, + (Node *) prmquals); + + prunequal = list_concat(prunequal, prmquals); + } + + /* + * If any quals exist that could possibly be useful to use for + * performing further partition pruning during execution, then + * we'll generate a PartitionPruneInfo to store these quals and + * allow translation of partition indexes into subpath indexes. + */ + if (prunequal != NIL) + pinfo = make_partition_pruneinfo(root, best_path->path.parent, + best_path->partitioned_rels, + best_path->subpaths, prunequal); + } /* * XXX ideally, if there's just one child, we'd not bother to generate an * Append node but just return the single child. At the moment this does @@ -1067,7 +1106,8 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) */ plan = make_append(subplans, best_path->first_partial_path, - tlist, best_path->partitioned_rels); + tlist, best_path->partitioned_rels, + pinfo); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -5315,7 +5355,8 @@ make_foreignscan(List *qptlist, static Append * make_append(List *appendplans, int first_partial_plan, - List *tlist, List *partitioned_rels) + List *tlist, List *partitioned_rels, + PartitionPruneInfo *partpruneinfo) { Append *node = makeNode(Append); Plan *plan = &node->plan; @@ -5327,7 +5368,7 @@ make_append(List *appendplans, int first_partial_plan, node->partitioned_rels = partitioned_rels; node->appendplans = appendplans; node->first_partial_plan = first_partial_plan; - + node->part_prune_info = partpruneinfo; return node; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 1671f450b0..6d0d96cd81 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3737,7 +3737,8 @@ create_grouping_paths(PlannerInfo *root, paths = lappend(paths, path); } path = (Path *) - create_append_path(grouped_rel, + create_append_path(root, + grouped_rel, paths, NIL, NULL, diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index f01119eff1..146e202bb0 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -593,7 +593,7 @@ generate_union_path(SetOperationStmt *op, PlannerInfo *root, /* * Append the child results together. */ - path = (Path *) create_append_path(result_rel, pathlist, NIL, + path = (Path *) create_append_path(root, result_rel, pathlist, NIL, NULL, 0, false, NIL, -1); /* We have to manually jam the right tlist into the path; ick */ path->pathtarget = create_pathtarget(root, tlist); @@ -705,7 +705,7 @@ generate_nonunion_path(SetOperationStmt *op, PlannerInfo *root, /* * Append the child results together. */ - path = (Path *) create_append_path(result_rel, pathlist, NIL, + path = (Path *) create_append_path(root, result_rel, pathlist, NIL, NULL, 0, false, NIL, -1); /* We have to manually jam the right tlist into the path; ick */ diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 0c1f23951a..dccbcc19cc 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -2473,6 +2473,25 @@ eval_const_expressions(PlannerInfo *root, Node *node) } /*-------------------- + * eval_const_expressions_from_list + * + * This is similar to eval_const_expression except that it takes ParamListInfo + * argument instead of PlannerInfo to create the context. + */ +Node * +eval_const_expressions_from_list(ParamListInfo prmlist, Node *node) +{ + eval_const_expressions_context context; + + context.boundParams = prmlist; /* bound Params */ + context.root = NULL; + context.active_fns = NIL; /* nothing being recursively simplified */ + context.case_val = NULL; /* no CASE being examined */ + context.estimate = false; /* safe transformations only */ + return eval_const_expressions_mutator(node, &context); +} + +/*-------------------- * estimate_expression_value * * This function attempts to estimate the value of an expression for diff --git a/src/backend/optimizer/util/partprune.c b/src/backend/optimizer/util/partprune.c index 905bd3571c..a59bac005c 100644 --- a/src/backend/optimizer/util/partprune.c +++ b/src/backend/optimizer/util/partprune.c @@ -38,6 +38,11 @@ * while also taking into account strategies of the operators in the matched * clauses. * + * make_partition_pruneinfo() + * + * Generates a PartitionPruneInfo node for use in the executor to allow it + * to perform partition pruning during execution. + * * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * @@ -53,10 +58,14 @@ #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_type.h" +#include "executor/executor.h" +#include "executor/nodeSubplan.h" +#include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/partprune.h" +#include "optimizer/pathnode.h" #include "optimizer/planner.h" #include "optimizer/predtest.h" #include "optimizer/prep.h" @@ -104,15 +113,16 @@ static Bitmapset *get_partitions_from_or_args(PartitionPruneContext *context, static void remove_redundant_clauses(PartitionPruneContext *context, PartitionClauseInfo *partclauseinfo, List **minimalclauses); -static bool partition_cmp_args(Oid parttypid, Oid partopfamily, - PartClause *pc, PartClause *leftarg, PartClause *rightarg, - bool *result); +static bool partition_cmp_args(PartitionPruneContext *context, Oid parttypid, + Oid partopfamily, PartClause *pc, PartClause *leftarg, + PartClause *rightarg, bool *result); static bool extract_bounding_datums(PartitionPruneContext *context, PartitionClauseInfo *clauseinfo, List **minimalclauses, PartScanKeyInfo *keys); static PartOpStrategy partition_op_strategy(char part_strategy, PartClause *pc, bool *incl); -static bool partkey_datum_from_expr(Oid parttypid, Expr *expr, Datum *value); +static bool partkey_datum_from_expr(PartitionPruneContext *context, Oid parttypid, + Expr *expr, Datum *value); /* * prune_append_rel_partitions @@ -158,6 +168,8 @@ prune_append_rel_partitions(PlannerInfo *root, RelOptInfo *rel) context.boundinfo = rel->boundinfo; context.has_default_part = rel->has_default_part; context.partition_qual = rel->partition_qual; + context.econtext = NULL; + context.paramids = NULL; /* process clauses */ partclauseinfo = generate_partition_clauses(&context, clauses); @@ -534,6 +546,10 @@ extract_partition_clauses(PartitionPruneContext *context, List *clauses) pc->inputcollid = opclause->inputcollid; pc->value = valueexpr; + if (IsA(valueexpr, Param)) + context->paramids = bms_add_member(context->paramids, + ((Param *) valueexpr)->paramid); + /* * We don't turn a <> operator clause into a key right away. * Instead, the caller will hand over such clauses to @@ -695,6 +711,11 @@ extract_partition_clauses(PartitionPruneContext *context, List *clauses) leftop, rightop, InvalidOid, saop_coll); + + if (IsA(rightop, Param)) + context->paramids = bms_add_member(context->paramids, + ((Param *) rightop)->paramid); + elem_clauses = lappend(elem_clauses, elem_clause); } @@ -966,7 +987,8 @@ remove_redundant_clauses(PartitionPruneContext *context, if (hash_clause == NULL) hash_clause = pc; /* check if another clause would contradict the one we have */ - else if (partition_cmp_args(context->partopcintype[i], + else if (partition_cmp_args(context, + context->partopcintype[i], context->partopfamily[i], pc, pc, hash_clause, &test_result)) @@ -1023,7 +1045,8 @@ remove_redundant_clauses(PartitionPruneContext *context, * then because 7 < 5 is false, we leave a < 5 where it is and * effectively discard a < 7 as being redundant. */ - if (partition_cmp_args(context->partopcintype[i], + if (partition_cmp_args(context, + context->partopcintype[i], context->partopfamily[i], pc, pc, btree_clauses[s], &test_result)) @@ -1080,7 +1103,8 @@ remove_redundant_clauses(PartitionPruneContext *context, * eq clause is a = 3, then because 3 < 5, we no longer need * a < 5, because a = 3 is more restrictive. */ - if (partition_cmp_args(context->partopcintype[i], + if (partition_cmp_args(context, + context->partopcintype[i], context->partopfamily[i], chk, eq, chk, &test_result)) @@ -1111,7 +1135,8 @@ remove_redundant_clauses(PartitionPruneContext *context, PartClause *lt = btree_clauses[BTLessStrategyNumber - 1], *le = btree_clauses[BTLessEqualStrategyNumber - 1]; - if (partition_cmp_args(context->partopcintype[i], + if (partition_cmp_args(context, + context->partopcintype[i], context->partopfamily[i], le, lt, le, &test_result)) @@ -1130,7 +1155,8 @@ remove_redundant_clauses(PartitionPruneContext *context, PartClause *gt = btree_clauses[BTGreaterStrategyNumber - 1], *ge = btree_clauses[BTGreaterEqualStrategyNumber - 1]; - if (partition_cmp_args(context->partopcintype[i], + if (partition_cmp_args(context, + context->partopcintype[i], context->partopfamily[i], ge, gt, ge, &test_result)) @@ -1168,7 +1194,8 @@ remove_redundant_clauses(PartitionPruneContext *context, * incompatible with the operator. */ static bool -partition_cmp_args(Oid partopcintype, Oid partopfamily, +partition_cmp_args(PartitionPruneContext *context, + Oid partopcintype, Oid partopfamily, PartClause *pc, PartClause *leftarg, PartClause *rightarg, bool *result) { @@ -1181,10 +1208,12 @@ partition_cmp_args(Oid partopcintype, Oid partopfamily, * Try to extract an actual value from each arg. This may fail if the * value is unknown in this context, in which case we cannot compare. */ - if (!partkey_datum_from_expr(partopcintype, leftarg->value, &left_value)) + if (!partkey_datum_from_expr(context, + partopcintype, leftarg->value, &left_value)) return false; - if (!partkey_datum_from_expr(partopcintype, rightarg->value, &right_value)) + if (!partkey_datum_from_expr(context, + partopcintype, rightarg->value, &right_value)) return false; /* @@ -1308,12 +1337,14 @@ extract_bounding_datums(PartitionPruneContext *context, case PART_OP_EQUAL: Assert(incl); if (need_next_eq && - partkey_datum_from_expr(context->partopcintype[i], + partkey_datum_from_expr(context, + context->partopcintype[i], value, &keys->eqkeys[i])) keys->n_eqkeys++; if (need_next_max && - partkey_datum_from_expr(context->partopcintype[i], + partkey_datum_from_expr(context, + context->partopcintype[i], value, &keys->maxkeys[i])) { keys->n_maxkeys++; @@ -1321,7 +1352,8 @@ extract_bounding_datums(PartitionPruneContext *context, } if (need_next_min && - partkey_datum_from_expr(context->partopcintype[i], + partkey_datum_from_expr(context, + context->partopcintype[i], value, &keys->minkeys[i])) { keys->n_minkeys++; @@ -1331,7 +1363,8 @@ extract_bounding_datums(PartitionPruneContext *context, case PART_OP_LESS: if (need_next_max && - partkey_datum_from_expr(context->partopcintype[i], + partkey_datum_from_expr(context, + context->partopcintype[i], value, &keys->maxkeys[i])) { keys->n_maxkeys++; @@ -1343,7 +1376,8 @@ extract_bounding_datums(PartitionPruneContext *context, case PART_OP_GREATER: if (need_next_min && - partkey_datum_from_expr(context->partopcintype[i], + partkey_datum_from_expr(context, + context->partopcintype[i], value, &keys->minkeys[i])) { keys->n_minkeys++; @@ -1388,7 +1422,8 @@ extract_bounding_datums(PartitionPruneContext *context, PartClause *pc = (PartClause *) lfirst(lc); Datum datum; - if (partkey_datum_from_expr(context->partopcintype[0], + if (partkey_datum_from_expr(context, + context->partopcintype[0], pc->value, &datum)) keys->ne_datums[i++] = datum; } @@ -1469,7 +1504,8 @@ partition_op_strategy(char part_strategy, PartClause *pc, bool *incl) * set. True is returned otherwise. */ static bool -partkey_datum_from_expr(Oid partopcintype, Expr *expr, Datum *value) +partkey_datum_from_expr(PartitionPruneContext *context, + Oid partopcintype, Expr *expr, Datum *value) { Oid exprtype = exprType((Node *) expr); @@ -1493,25 +1529,193 @@ partkey_datum_from_expr(Oid partopcintype, Expr *expr, Datum *value) if (expr == NULL) return false; - /* - * Transform into a form that the following code can do something - * useful with. - */ - expr = evaluate_expr(expr, - exprType((Node *) expr), - exprTypmod((Node *) expr), - exprCollation((Node *) expr)); + if (context->planstate && context->econtext) + { + ExprState *exprstate; + bool isNull; + + exprstate = ExecInitExpr(expr, context->planstate); + + *value = ExecEvalExprSwitchContext(exprstate, + context->econtext, + &isNull); + + if (isNull) + return false; + + return true; + } + else + { + /* + * Transform into a form that the following code can do something + * useful with. + */ + expr = evaluate_expr(expr, + exprType((Node *) expr), + exprTypmod((Node *) expr), + exprCollation((Node *) expr)); + } } /* * Add more expression types here as needed to support the requirements * of the higher-level code. */ - if (IsA(expr, Const)) + switch (nodeTag(expr)) { - *value = ((Const *) expr)->constvalue; - return true; + case T_Const: + *value = ((Const *) expr)->constvalue; + return true; + + case T_Param: + if (context->planstate && context->econtext) + { + ExprState *exprstate; + bool isNull; + + exprstate = ExecInitExpr(expr, context->planstate); + *value = ExecEvalExprSwitchContext(exprstate, + context->econtext, + &isNull); + + if (isNull) + return false; + + return true; + } + + default: + return false; + } +} + +/* + * make_partition_pruneinfo + * Build PartitionPruneInfo tree to allow the output of + * get_partitions_from_clauses to be translated into + * 'subpaths' indexes. This is required in order to allow + * us to perform any further partition pruning during execution. + */ +PartitionPruneInfo * +make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *rel, + List *partition_rels, List *subpaths, + List *prunequal) +{ + PartitionPruneInfo *pinfo; + AppendRelInfo *appinfo; + RangeTblEntry *rte; + ListCell *lc; + int i; + int partidx; + int nparts = rel->nparts; + + check_stack_depth(); + + rte = root->simple_rte_array[rel->relid]; + + pinfo = makeNode(PartitionPruneInfo); + pinfo->relid = rel->relid; + pinfo->parentoid = rte->relid; + pinfo->prunequal = prunequal; + pinfo->allsubnodes = NULL; + pinfo->nparts = nparts; + pinfo->subnodeindex = (int *) palloc(sizeof(int) * nparts); + pinfo->subpartindex = (PartitionPruneInfo **) + palloc0(sizeof(PartitionPruneInfo *) * nparts); + /* + * -1 represents a partition that has been pruned. Set them all to this + * initially. We'll determine the subpath index for the non-pruned + * ones below. + */ + for (i = 0; i < nparts; i++) + pinfo->subnodeindex[i] = -1; + + i = -1; + foreach(lc, subpaths) + { + Path *path = (Path *) lfirst(lc); + + i++; /* track subnode index */ + + /* Find the AppendRelInfo for the Append child */ + appinfo = find_childrel_appendrelinfo(root, path->parent); + + /* + * Skip subpaths which belong to relations not directly parented by + * rel. We'll process any we skip here below when looping through + * partition_rels + */ + if (appinfo->parent_relid != rel->relid) + continue; + + /* Determine the element in part_rel which belongs to this subpath. */ + for (partidx = 0; partidx < nparts; partidx++) + { + if (rel->part_rels[partidx]->relid != appinfo->child_relid) + continue; + + /* found it! Save the subnode index */ + pinfo->subnodeindex[partidx] = i; + pinfo->allsubnodes = bms_add_member(pinfo->allsubnodes, partidx); + break; + } } - return false; + /* + * Some of the relations returned by get_partitions_from_clauses may be + * other partitioned tables. Unlike the case above, these won't be + * subpaths of the Append. To handle these we must create a + * sub-PartitionPruneInfo to allow us to determine if subnodes which + * belong to sub-partitioned tables are required during partition pruning. + */ + foreach(lc, partition_rels) + { + Index rti = lfirst_int(lc); + RelOptInfo *subpart = find_base_rel(root, rti); + + /* + * partition_rels contains the rti of the base relation being queried. + * We only care about sub-partition parents here, so skip this. + */ + if (subpart->reloptkind == RELOPT_BASEREL) + continue; + + appinfo = find_childrel_appendrelinfo(root, subpart); + + /* + * We only want to deal with sub-partition parents that are directly + * below rel. We'll deal with any we skip here later in a recursive + * call which is made below. + */ + if (appinfo->parent_relid != rel->relid) + continue; + + /* + * Handle sub-partition parents by building a sub-PartitionPruneInfo. + */ + for (partidx = 0; partidx < nparts; partidx++) + { + List *subprunequal; + + if (rel->part_rels[partidx]->relid != appinfo->child_relid) + continue; + + /* Adjust the prune qual to be compatible with this subpartition */ + subprunequal = (List *) adjust_appendrel_attrs(root, + (Node *) prunequal, + 1, + &appinfo); + + pinfo->subpartindex[partidx] = make_partition_pruneinfo(root, + subpart, + partition_rels, + subpaths, + subprunequal); + pinfo->allsubnodes = bms_add_member(pinfo->allsubnodes, partidx); + break; + } + } + + return pinfo; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index fe3b4582d4..448c05adaa 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1210,7 +1210,8 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, * Note that we must handle subpaths = NIL, representing a dummy access path. */ AppendPath * -create_append_path(RelOptInfo *rel, +create_append_path(PlannerInfo *root, + RelOptInfo *rel, List *subpaths, List *partial_subpaths, Relids required_outer, int parallel_workers, bool parallel_aware, @@ -1224,8 +1225,36 @@ create_append_path(RelOptInfo *rel, pathnode->path.pathtype = T_Append; pathnode->path.parent = rel; pathnode->path.pathtarget = rel->reltarget; - pathnode->path.param_info = get_appendrel_parampathinfo(rel, - required_outer); + pathnode->trypartitionprune = false; + + /* + * When generating an Append path for a partitioned table we'll try to + * enable additional partition pruning at run-time. Useful pruning quals + * may be in parameterized path quals, so we'll go all the way and + * generate the qual list for the Append's parameterized paths. We need + * only bother trying this for RELOPT_BASEREL rels, as + * RELOPT_OTHER_MEMBER_REL's Append paths are merged into the base rel's + * Append subpaths. + */ + if (rel->reloptkind == RELOPT_BASEREL && root) + { + RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); + + if (rte->rtekind == RTE_RELATION && + rte->relkind == RELKIND_PARTITIONED_TABLE) + { + pathnode->path.param_info = get_baserel_parampathinfo(root, + rel, + required_outer); + pathnode->trypartitionprune = true; + } + else + pathnode->path.param_info = get_appendrel_parampathinfo(rel, + required_outer); + } + else + pathnode->path.param_info = get_appendrel_parampathinfo(rel, + required_outer); pathnode->path.parallel_aware = parallel_aware; pathnode->path.parallel_safe = rel->consider_parallel; pathnode->path.parallel_workers = parallel_workers; @@ -3567,7 +3596,7 @@ reparameterize_path(PlannerInfo *root, Path *path, i++; } return (Path *) - create_append_path(rel, childpaths, partialpaths, + create_append_path(root, rel, childpaths, partialpaths, required_outer, apath->path.parallel_workers, apath->path.parallel_aware, diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 6eba13c244..621e8f13b1 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1984,9 +1984,6 @@ set_baserel_partition_key_exprs(Relation relation, { PartitionKey partkey = RelationGetPartitionKey(relation); int partnatts; - int cnt; - List **partexprs; - ListCell *lc; Index varno = rel->relid; Assert(IS_SIMPLE_REL(rel) && rel->relid > 0); @@ -1995,39 +1992,8 @@ set_baserel_partition_key_exprs(Relation relation, Assert(partkey != NULL); partnatts = partkey->partnatts; - partexprs = (List **) palloc(sizeof(List *) * partnatts); - lc = list_head(partkey->partexprs); - for (cnt = 0; cnt < partnatts; cnt++) - { - Expr *partexpr; - AttrNumber attno = partkey->partattrs[cnt]; - - if (attno != InvalidAttrNumber) - { - /* Single column partition key is stored as a Var node. */ - Assert(attno > 0); - - partexpr = (Expr *) makeVar(varno, attno, - partkey->parttypid[cnt], - partkey->parttypmod[cnt], - partkey->parttypcoll[cnt], 0); - } - else - { - if (lc == NULL) - elog(ERROR, "wrong number of partition key expressions"); - - /* Re-stamp the expression with given varno. */ - partexpr = (Expr *) copyObject(lfirst(lc)); - ChangeVarNodes((Node *) partexpr, 1, varno, 0); - lc = lnext(lc); - } - - partexprs[cnt] = list_make1(partexpr); - } - - rel->partexprs = partexprs; + rel->partexprs = build_partition_expressions(partkey, varno); /* * A base relation can not have nullable partition key expressions. We diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 4e9281d3d5..f63eb70335 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -16,6 +16,7 @@ #include "fmgr.h" #include "executor/tuptable.h" #include "nodes/execnodes.h" +#include "nodes/relation.h" #include "parser/parse_node.h" #include "utils/rel.h" @@ -67,6 +68,12 @@ typedef struct PartitionPruneContext /* Partition boundary info */ PartitionBoundInfo boundinfo; + + PlanState *planstate; + ExprContext *econtext; + + /* ParamIds of clauses being used to determine partitions */ + Bitmapset *paramids; } PartitionPruneContext; /* @@ -162,4 +169,6 @@ extern int get_partition_for_tuple(Relation relation, Datum *values, extern Bitmapset *get_partitions_for_keys(PartitionPruneContext *context, PartScanKeyInfo *keys); +extern List **build_partition_expressions(PartitionKey partkey, Index varno); + #endif /* PARTITION_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index a953820f43..7db3a79e71 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1010,11 +1010,14 @@ typedef struct ModifyTableState * * nplans how many plans are in the array * whichplan which plan is being executed (0 .. n-1) + * valid_subplans for runtime pruning, valid appendplans indexes to scan * ---------------- */ struct AppendState; typedef struct AppendState AppendState; +struct PartitionPruneContextCache; +typedef struct PartitionPruneContextCache PartitionPruneContextCache; struct ParallelAppendState; typedef struct ParallelAppendState ParallelAppendState; @@ -1026,6 +1029,11 @@ struct AppendState int as_whichplan; ParallelAppendState *as_pstate; /* parallel coordination info */ Size pstate_len; /* size of parallel coordination info */ + Bitmapset *as_valid_subplans; /* mask of non-pruned subplans */ + Bitmapset *part_prune_params; /* ParamIds useful for partition pruning */ + PartitionPruneInfo *part_prune_info; /* details for partition pruning */ + PartitionPruneContextCache *contextcache; /* cache of prune contexts */ + MemoryContext prune_context; /* used when calling planner pruning code */ bool (*choose_next_subplan) (AppendState *); }; diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index c097da6425..d693f3711c 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -191,6 +191,7 @@ typedef enum NodeTag T_FromExpr, T_OnConflictExpr, T_IntoClause, + T_PartitionPruneInfo, /* * TAGS FOR EXPRESSION STATE NODES (execnodes.h) diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index f2e19eae68..2264d54c2a 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -250,6 +250,13 @@ typedef struct Append List *partitioned_rels; List *appendplans; int first_partial_plan; + + /* + * Mapping details for run-time subplan pruning. This allows translation + * from partition index into subplan indexes. This is set to NULL when + * run-time subplan pruning is disabled. + */ + PartitionPruneInfo *part_prune_info; } Append; /* ---------------- diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 1b4b0d75af..ffb6daffdd 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1506,4 +1506,27 @@ typedef struct OnConflictExpr List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */ } OnConflictExpr; +/*---------- + * PartitionPruneInfo - Allows pruning of Append subplans + * + * Here we store mapping details to allow translation of a partitioned table's + * index into an Append node's subplan index. This structure is used to + * recursively search for all subplan nodes when there are sub-partitioned + * tables in the Append plan. + *---------- + */ +typedef struct PartitionPruneInfo +{ + NodeTag type; + int relid; /* relation index of parent partition rel */ + Oid parentoid; /* Oid of parent partition rel */ + List *prunequal; /* qual list for pruning partitions */ + Bitmapset *allsubnodes; /* All subnode indexes at this level */ + int nparts; /* length of the following arrays */ + int *subnodeindex; /* subnode index indexed by partition id */ + + /* sub-PartitionPruneInfo indexed by partition id */ + struct PartitionPruneInfo **subpartindex; +} PartitionPruneInfo; + #endif /* PRIMNODES_H */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 34d79f284b..dbf280cc6e 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1304,6 +1304,8 @@ typedef struct AppendPath /* Index of first partial path in subpaths */ int first_partial_path; + + bool trypartitionprune; /* Attempt to enable partition pruning? */ } AppendPath; #define IS_DUMMY_PATH(p) \ diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index 3c2f54964b..bedffc4189 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -80,6 +80,9 @@ extern void CommuteRowCompareExpr(RowCompareExpr *clause); extern Node *eval_const_expressions(PlannerInfo *root, Node *node); +extern Node *eval_const_expressions_from_list(ParamListInfo prmlist, + Node *node); + extern Node *estimate_expression_value(PlannerInfo *root, Node *node); extern Query *inline_set_returning_function(PlannerInfo *root, diff --git a/src/include/optimizer/partprune.h b/src/include/optimizer/partprune.h index b654691e9b..9d62e13c72 100644 --- a/src/include/optimizer/partprune.h +++ b/src/include/optimizer/partprune.h @@ -68,4 +68,9 @@ extern PartitionClauseInfo *generate_partition_clauses( extern Bitmapset *get_partitions_from_clauses(PartitionPruneContext *context, PartitionClauseInfo *partclauseinfo); +extern PartitionPruneInfo *make_partition_pruneinfo(PlannerInfo *root, + RelOptInfo *rel, + List *partition_rels, List *subpaths, + List *prunequal); + #endif /* PARTPRUNE_H */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index ef7173fbf8..bde18582e9 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -64,7 +64,7 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root, List *bitmapquals); extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, Relids required_outer); -extern AppendPath *create_append_path(RelOptInfo *rel, +extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, Relids required_outer, int parallel_workers, bool parallel_aware, diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 948cad4c3d..424b3a7b1b 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -1475,3 +1475,1118 @@ explain (costs off) select * from like_op_noprune where a like '%BC'; (5 rows) drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart, hp, rp, coll_pruning_multi, like_op_noprune; +-- +-- Test runtime partition pruning +-- +create table ab (a int not null, b int not null) partition by list (a); +create table ab_a2 partition of ab for values in(2) partition by list (b); +create table ab_a2_b1 partition of ab_a2 for values in (1); +create table ab_a2_b2 partition of ab_a2 for values in (2); +create table ab_a2_b3 partition of ab_a2 for values in (3); +create table ab_a1 partition of ab for values in(1) partition by list (b); +create table ab_a1_b1 partition of ab_a1 for values in (1); +create table ab_a1_b2 partition of ab_a1 for values in (2); +create table ab_a1_b3 partition of ab_a1 for values in (3); +create table ab_a3 partition of ab for values in(3) partition by list (b); +create table ab_a3_b1 partition of ab_a3 for values in (1); +create table ab_a3_b2 partition of ab_a3 for values in (2); +create table ab_a3_b3 partition of ab_a3 for values in (3); +prepare ab_q1 (int, int, int) as +select * from ab where a between $1 and $2 and b <= $3; +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute ab_q1 (1, 8, 3); + a | b +---+--- +(0 rows) + +execute ab_q1 (1, 8, 3); + a | b +---+--- +(0 rows) + +execute ab_q1 (1, 8, 3); + a | b +---+--- +(0 rows) + +execute ab_q1 (1, 8, 3); + a | b +---+--- +(0 rows) + +execute ab_q1 (1, 8, 3); + a | b +---+--- +(0 rows) + +explain (analyze, costs off, summary off, timing off) execute ab_q1 (2, 2, 3); + QUERY PLAN +--------------------------------------------------------- + Append (actual rows=0 loops=1) + -> Seq Scan on ab_a1_b1 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a1_b2 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a1_b3 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a2_b1 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a2_b2 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a2_b3 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a3_b1 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a3_b2 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a3_b3 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) +(19 rows) + +explain (analyze, costs off, summary off, timing off) execute ab_q1 (1, 2, 3); + QUERY PLAN +--------------------------------------------------------- + Append (actual rows=0 loops=1) + -> Seq Scan on ab_a1_b1 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a1_b2 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a1_b3 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a2_b1 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a2_b2 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a2_b3 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a3_b1 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a3_b2 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) + -> Seq Scan on ab_a3_b3 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b <= $3)) +(19 rows) + +deallocate ab_q1; +-- runtime pruning after optimizer pruning +prepare ab_q1 (int, int) as +select a from ab where a between $1 and $2 and b < 3; +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute ab_q1 (1, 8); + a +--- +(0 rows) + +execute ab_q1 (1, 8); + a +--- +(0 rows) + +execute ab_q1 (1, 8); + a +--- +(0 rows) + +execute ab_q1 (1, 8); + a +--- +(0 rows) + +execute ab_q1 (1, 8); + a +--- +(0 rows) + +explain (analyze, costs off, summary off, timing off) execute ab_q1 (2, 2); + QUERY PLAN +------------------------------------------------------- + Append (actual rows=0 loops=1) + -> Seq Scan on ab_a1_b1 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b < 3)) + -> Seq Scan on ab_a1_b2 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b < 3)) + -> Seq Scan on ab_a2_b1 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b < 3)) + -> Seq Scan on ab_a2_b2 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b < 3)) + -> Seq Scan on ab_a3_b1 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b < 3)) + -> Seq Scan on ab_a3_b2 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b < 3)) +(13 rows) + +explain (analyze, costs off, summary off, timing off) execute ab_q1 (2, 4); + QUERY PLAN +------------------------------------------------------- + Append (actual rows=0 loops=1) + -> Seq Scan on ab_a1_b1 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b < 3)) + -> Seq Scan on ab_a1_b2 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b < 3)) + -> Seq Scan on ab_a2_b1 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b < 3)) + -> Seq Scan on ab_a2_b2 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b < 3)) + -> Seq Scan on ab_a3_b1 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b < 3)) + -> Seq Scan on ab_a3_b2 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b < 3)) +(13 rows) + +-- parallel append +prepare ab_q2 (int, int) as +select avg(a) from ab where a between $1 and $2 and b < 4; +-- encourage use of parallel plans +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set min_parallel_table_scan_size = 0; +set max_parallel_workers_per_gather = 2; +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute ab_q2 (1, 8); + avg +----- + +(1 row) + +execute ab_q2 (1, 8); + avg +----- + +(1 row) + +execute ab_q2 (1, 8); + avg +----- + +(1 row) + +execute ab_q2 (1, 8); + avg +----- + +(1 row) + +execute ab_q2 (1, 8); + avg +----- + +(1 row) + +explain (analyze, costs off, summary off, timing off) execute ab_q2 (2, 2); + QUERY PLAN +------------------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + -> Gather (actual rows=3 loops=1) + Workers Planned: 2 + Workers Launched: 2 + -> Partial Aggregate (actual rows=1 loops=3) + -> Parallel Append (actual rows=0 loops=3) + -> Parallel Seq Scan on ab_a1_b1 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b < 4)) + -> Parallel Seq Scan on ab_a1_b2 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b < 4)) + -> Parallel Seq Scan on ab_a1_b3 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b < 4)) + -> Parallel Seq Scan on ab_a2_b1 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b < 4)) + -> Parallel Seq Scan on ab_a2_b2 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b < 4)) + -> Parallel Seq Scan on ab_a2_b3 (actual rows=0 loops=1) + Filter: ((a >= $1) AND (a <= $2) AND (b < 4)) + -> Parallel Seq Scan on ab_a3_b1 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b < 4)) + -> Parallel Seq Scan on ab_a3_b2 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b < 4)) + -> Parallel Seq Scan on ab_a3_b3 (never executed) + Filter: ((a >= $1) AND (a <= $2) AND (b < 4)) +(24 rows) + +-- Test run-time pruning with IN lists. +prepare ab_q3 (int, int, int) as +select avg(a) from ab where a in($1,$2,$3) and b < 4; +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute ab_q3 (1, 2, 3); + avg +----- + +(1 row) + +execute ab_q3 (1, 2, 3); + avg +----- + +(1 row) + +execute ab_q3 (1, 2, 3); + avg +----- + +(1 row) + +execute ab_q3 (1, 2, 3); + avg +----- + +(1 row) + +execute ab_q3 (1, 2, 3); + avg +----- + +(1 row) + +explain (analyze, costs off, summary off, timing off) execute ab_q3 (1, 1, 1); + QUERY PLAN +------------------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + -> Gather (actual rows=3 loops=1) + Workers Planned: 2 + Workers Launched: 2 + -> Partial Aggregate (actual rows=1 loops=3) + -> Parallel Append (actual rows=0 loops=3) + -> Parallel Seq Scan on ab_a1_b1 (actual rows=0 loops=1) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a1_b2 (actual rows=0 loops=1) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a1_b3 (actual rows=0 loops=1) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a2_b1 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a2_b2 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a2_b3 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a3_b1 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a3_b2 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a3_b3 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) +(24 rows) + +explain (analyze, costs off, summary off, timing off) execute ab_q3 (2, 3, 3); + QUERY PLAN +------------------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + -> Gather (actual rows=3 loops=1) + Workers Planned: 2 + Workers Launched: 2 + -> Partial Aggregate (actual rows=1 loops=3) + -> Parallel Append (actual rows=0 loops=3) + -> Parallel Seq Scan on ab_a1_b1 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a1_b2 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a1_b3 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a2_b1 (actual rows=0 loops=1) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a2_b2 (actual rows=0 loops=1) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a2_b3 (actual rows=0 loops=1) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a3_b1 (actual rows=0 loops=1) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a3_b2 (actual rows=0 loops=1) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a3_b3 (actual rows=0 loops=1) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) +(24 rows) + +-- try some params whose values do not belong to any partition +explain (analyze, costs off, summary off, timing off) execute ab_q3 (33, 44, 55); + QUERY PLAN +------------------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + -> Gather (actual rows=3 loops=1) + Workers Planned: 2 + Workers Launched: 2 + -> Partial Aggregate (actual rows=1 loops=3) + -> Parallel Append (actual rows=0 loops=3) + -> Parallel Seq Scan on ab_a1_b1 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a1_b2 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a1_b3 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a2_b1 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a2_b2 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a2_b3 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a3_b1 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a3_b2 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) + -> Parallel Seq Scan on ab_a3_b3 (never executed) + Filter: ((b < 4) AND (a = ANY (ARRAY[$1, $2, $3]))) +(24 rows) + +-- test parallel Append with IN list and parameterized nested loops +create table lprt_a (a int not null); +-- insert some values we won't find in ab +insert into lprt_a select 0 from generate_series(1,100); +-- and insert some values that we should find. +insert into lprt_a values(1),(1); +analyze lprt_a; +create index ab_a2_b1_a_idx on ab_a2_b1 (a); +create index ab_a2_b2_a_idx on ab_a2_b2 (a); +create index ab_a2_b3_a_idx on ab_a2_b3 (a); +create index ab_a1_b1_a_idx on ab_a1_b1 (a); +create index ab_a1_b2_a_idx on ab_a1_b2 (a); +create index ab_a1_b3_a_idx on ab_a1_b3 (a); +create index ab_a3_b1_a_idx on ab_a3_b1 (a); +create index ab_a3_b2_a_idx on ab_a3_b2 (a); +create index ab_a3_b3_a_idx on ab_a3_b3 (a); +set enable_hashjoin = 0; +set enable_mergejoin = 0; +prepare ab_q4 (int, int, int) as +select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in($1,$2,$3); +execute ab_q4 (1, 2, 3); + avg +----- + +(1 row) + +execute ab_q4 (1, 2, 3); + avg +----- + +(1 row) + +execute ab_q4 (1, 2, 3); + avg +----- + +(1 row) + +execute ab_q4 (1, 2, 3); + avg +----- + +(1 row) + +execute ab_q4 (1, 2, 3); + avg +----- + +(1 row) + +explain (analyze, costs off, summary off, timing off) execute ab_q4 (0, 0, 1); + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + -> Gather (actual rows=2 loops=1) + Workers Planned: 1 + Workers Launched: 1 + -> Partial Aggregate (actual rows=1 loops=2) + -> Nested Loop (actual rows=0 loops=2) + -> Parallel Seq Scan on lprt_a a (actual rows=51 loops=2) + Filter: (a = ANY ('{0,0,1}'::integer[])) + -> Append (actual rows=0 loops=102) + -> Index Only Scan using ab_a1_b1_a_idx on ab_a1_b1 (actual rows=0 loops=2) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a1_b2_a_idx on ab_a1_b2 (actual rows=0 loops=2) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a1_b3_a_idx on ab_a1_b3 (actual rows=0 loops=2) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a2_b1_a_idx on ab_a2_b1 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a2_b2_a_idx on ab_a2_b2 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a2_b3_a_idx on ab_a2_b3 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a3_b1_a_idx on ab_a3_b1 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a3_b2_a_idx on ab_a3_b2 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a3_b3_a_idx on ab_a3_b3 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 +(36 rows) + +insert into lprt_a values(3),(3); +explain (analyze, costs off, summary off, timing off) execute ab_q4 (1, 0, 3); + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + -> Gather (actual rows=2 loops=1) + Workers Planned: 1 + Workers Launched: 1 + -> Partial Aggregate (actual rows=1 loops=2) + -> Nested Loop (actual rows=0 loops=2) + -> Parallel Seq Scan on lprt_a a (actual rows=52 loops=2) + Filter: (a = ANY ('{1,0,3}'::integer[])) + -> Append (actual rows=0 loops=104) + -> Index Only Scan using ab_a1_b1_a_idx on ab_a1_b1 (actual rows=0 loops=2) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a1_b2_a_idx on ab_a1_b2 (actual rows=0 loops=2) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a1_b3_a_idx on ab_a1_b3 (actual rows=0 loops=2) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a2_b1_a_idx on ab_a2_b1 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a2_b2_a_idx on ab_a2_b2 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a2_b3_a_idx on ab_a2_b3 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a3_b1_a_idx on ab_a3_b1 (actual rows=0 loops=2) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a3_b2_a_idx on ab_a3_b2 (actual rows=0 loops=2) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a3_b3_a_idx on ab_a3_b3 (actual rows=0 loops=2) + Index Cond: (a = a.a) + Heap Fetches: 0 +(36 rows) + +explain (analyze, costs off, summary off, timing off) execute ab_q4 (1, 0, 0); + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + -> Gather (actual rows=2 loops=1) + Workers Planned: 1 + Workers Launched: 1 + -> Partial Aggregate (actual rows=1 loops=2) + -> Nested Loop (actual rows=0 loops=2) + -> Parallel Seq Scan on lprt_a a (actual rows=51 loops=2) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: 1 + -> Append (actual rows=0 loops=102) + -> Index Only Scan using ab_a1_b1_a_idx on ab_a1_b1 (actual rows=0 loops=2) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a1_b2_a_idx on ab_a1_b2 (actual rows=0 loops=2) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a1_b3_a_idx on ab_a1_b3 (actual rows=0 loops=2) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a2_b1_a_idx on ab_a2_b1 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a2_b2_a_idx on ab_a2_b2 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a2_b3_a_idx on ab_a2_b3 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a3_b1_a_idx on ab_a3_b1 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a3_b2_a_idx on ab_a3_b2 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a3_b3_a_idx on ab_a3_b3 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 +(37 rows) + +delete from lprt_a where a = 1; +explain (analyze, costs off, summary off, timing off) execute ab_q4 (1, 0, 0); + QUERY PLAN +------------------------------------------------------------------------------------------------- + Finalize Aggregate (actual rows=1 loops=1) + -> Gather (actual rows=2 loops=1) + Workers Planned: 1 + Workers Launched: 1 + -> Partial Aggregate (actual rows=1 loops=2) + -> Nested Loop (actual rows=0 loops=2) + -> Parallel Seq Scan on lprt_a a (actual rows=50 loops=2) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: 1 + -> Append (actual rows=0 loops=100) + -> Index Only Scan using ab_a1_b1_a_idx on ab_a1_b1 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a1_b2_a_idx on ab_a1_b2 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a1_b3_a_idx on ab_a1_b3 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a2_b1_a_idx on ab_a2_b1 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a2_b2_a_idx on ab_a2_b2 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a2_b3_a_idx on ab_a2_b3 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a3_b1_a_idx on ab_a3_b1 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a3_b2_a_idx on ab_a3_b2 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 + -> Index Only Scan using ab_a3_b3_a_idx on ab_a3_b3 (never executed) + Index Cond: (a = a.a) + Heap Fetches: 0 +(37 rows) + +reset enable_hashjoin; +reset enable_mergejoin; +reset parallel_setup_cost; +reset parallel_tuple_cost; +reset min_parallel_table_scan_size; +reset max_parallel_workers_per_gather; +-- Test run-time partition pruning with an initplan +explain (analyze, costs off, summary off, timing off) +select * from ab where a = (select max(a) from lprt_a) and b = (select max(a)-1 from lprt_a); + QUERY PLAN +------------------------------------------------------------------------- + Append (actual rows=0 loops=1) + InitPlan 1 (returns $0) + -> Aggregate (actual rows=1 loops=1) + -> Seq Scan on lprt_a (actual rows=102 loops=1) + InitPlan 2 (returns $1) + -> Aggregate (actual rows=1 loops=1) + -> Seq Scan on lprt_a lprt_a_1 (actual rows=102 loops=1) + -> Bitmap Heap Scan on ab_a1_b1 (never executed) + Recheck Cond: (a = $0) + Filter: (b = $1) + -> Bitmap Index Scan on ab_a1_b1_a_idx (never executed) + Index Cond: (a = $0) + -> Bitmap Heap Scan on ab_a1_b2 (never executed) + Recheck Cond: (a = $0) + Filter: (b = $1) + -> Bitmap Index Scan on ab_a1_b2_a_idx (never executed) + Index Cond: (a = $0) + -> Bitmap Heap Scan on ab_a1_b3 (never executed) + Recheck Cond: (a = $0) + Filter: (b = $1) + -> Bitmap Index Scan on ab_a1_b3_a_idx (never executed) + Index Cond: (a = $0) + -> Bitmap Heap Scan on ab_a2_b1 (never executed) + Recheck Cond: (a = $0) + Filter: (b = $1) + -> Bitmap Index Scan on ab_a2_b1_a_idx (never executed) + Index Cond: (a = $0) + -> Bitmap Heap Scan on ab_a2_b2 (never executed) + Recheck Cond: (a = $0) + Filter: (b = $1) + -> Bitmap Index Scan on ab_a2_b2_a_idx (never executed) + Index Cond: (a = $0) + -> Bitmap Heap Scan on ab_a2_b3 (never executed) + Recheck Cond: (a = $0) + Filter: (b = $1) + -> Bitmap Index Scan on ab_a2_b3_a_idx (never executed) + Index Cond: (a = $0) + -> Bitmap Heap Scan on ab_a3_b1 (never executed) + Recheck Cond: (a = $0) + Filter: (b = $1) + -> Bitmap Index Scan on ab_a3_b1_a_idx (never executed) + Index Cond: (a = $0) + -> Bitmap Heap Scan on ab_a3_b2 (actual rows=0 loops=1) + Recheck Cond: (a = $0) + Filter: (b = $1) + -> Bitmap Index Scan on ab_a3_b2_a_idx (actual rows=0 loops=1) + Index Cond: (a = $0) + -> Bitmap Heap Scan on ab_a3_b3 (never executed) + Recheck Cond: (a = $0) + Filter: (b = $1) + -> Bitmap Index Scan on ab_a3_b3_a_idx (never executed) + Index Cond: (a = $0) +(52 rows) + +deallocate ab_q1; +deallocate ab_q2; +deallocate ab_q3; +deallocate ab_q4; +drop table ab, lprt_a; +-- join +create table tbl1(col1 int); +insert into tbl1 values (501), (505); +-- basic table +create table tprt (col1 int) partition by range (col1); +create table tprt_1 partition of tprt for values from (1) to (501); +create table tprt_2 partition of tprt for values from (501) to (1001); +create table tprt_3 partition of tprt for values from (1001) to (2001); +create table tprt_4 partition of tprt for values from (2001) to (3001); +create table tprt_5 partition of tprt for values from (3001) to (4001); +create table tprt_6 partition of tprt for values from (4001) to (5001); +create index tprt1_idx on tprt_1 (col1); +create index tprt2_idx on tprt_2 (col1); +create index tprt3_idx on tprt_3 (col1); +create index tprt4_idx on tprt_4 (col1); +create index tprt5_idx on tprt_5 (col1); +create index tprt6_idx on tprt_6 (col1); +insert into tprt values (10), (20), (501), (502), (505), (1001), (4500); +set enable_hashjoin = off; +set enable_mergejoin = off; +explain (analyze, costs off, summary off, timing off) +select * from tbl1 join tprt on tbl1.col1 > tprt.col1; + QUERY PLAN +------------------------------------------------------------------------------- + Nested Loop (actual rows=6 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) + -> Append (actual rows=3 loops=2) + -> Index Only Scan using tprt1_idx on tprt_1 (actual rows=2 loops=2) + Index Cond: (col1 < tbl1.col1) + Heap Fetches: 4 + -> Index Only Scan using tprt2_idx on tprt_2 (actual rows=2 loops=1) + Index Cond: (col1 < tbl1.col1) + Heap Fetches: 2 + -> Index Only Scan using tprt3_idx on tprt_3 (never executed) + Index Cond: (col1 < tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt4_idx on tprt_4 (never executed) + Index Cond: (col1 < tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt5_idx on tprt_5 (never executed) + Index Cond: (col1 < tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt6_idx on tprt_6 (never executed) + Index Cond: (col1 < tbl1.col1) + Heap Fetches: 0 +(21 rows) + +explain (analyze, costs off, summary off, timing off) +select * from tbl1 join tprt on tbl1.col1 = tprt.col1; + QUERY PLAN +------------------------------------------------------------------------------- + Nested Loop (actual rows=2 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) + -> Append (actual rows=1 loops=2) + -> Index Only Scan using tprt1_idx on tprt_1 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt2_idx on tprt_2 (actual rows=1 loops=2) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 2 + -> Index Only Scan using tprt3_idx on tprt_3 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt4_idx on tprt_4 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt5_idx on tprt_5 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt6_idx on tprt_6 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 +(21 rows) + +select tbl1.col1, tprt.col1 from tbl1 +inner join tprt on tbl1.col1 > tprt.col1 +order by tbl1.col1, tprt.col1; + col1 | col1 +------+------ + 501 | 10 + 501 | 20 + 505 | 10 + 505 | 20 + 505 | 501 + 505 | 502 +(6 rows) + +select tbl1.col1, tprt.col1 from tbl1 +inner join tprt on tbl1.col1 = tprt.col1 +order by tbl1.col1, tprt.col1; + col1 | col1 +------+------ + 501 | 501 + 505 | 505 +(2 rows) + +-- multiple partitions +insert into tbl1 values (1001), (1010), (1011); +explain (analyze, costs off, summary off, timing off) +select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1; + QUERY PLAN +------------------------------------------------------------------------------- + Nested Loop (actual rows=23 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) + -> Append (actual rows=5 loops=5) + -> Index Only Scan using tprt1_idx on tprt_1 (actual rows=2 loops=5) + Index Cond: (col1 < tbl1.col1) + Heap Fetches: 10 + -> Index Only Scan using tprt2_idx on tprt_2 (actual rows=3 loops=4) + Index Cond: (col1 < tbl1.col1) + Heap Fetches: 11 + -> Index Only Scan using tprt3_idx on tprt_3 (actual rows=1 loops=2) + Index Cond: (col1 < tbl1.col1) + Heap Fetches: 2 + -> Index Only Scan using tprt4_idx on tprt_4 (never executed) + Index Cond: (col1 < tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt5_idx on tprt_5 (never executed) + Index Cond: (col1 < tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt6_idx on tprt_6 (never executed) + Index Cond: (col1 < tbl1.col1) + Heap Fetches: 0 +(21 rows) + +explain (analyze, costs off, summary off, timing off) +select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1; + QUERY PLAN +------------------------------------------------------------------------------- + Nested Loop (actual rows=3 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) + -> Append (actual rows=1 loops=5) + -> Index Only Scan using tprt1_idx on tprt_1 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt2_idx on tprt_2 (actual rows=1 loops=2) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 2 + -> Index Only Scan using tprt3_idx on tprt_3 (actual rows=0 loops=3) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 1 + -> Index Only Scan using tprt4_idx on tprt_4 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt5_idx on tprt_5 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt6_idx on tprt_6 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 +(21 rows) + +select tbl1.col1, tprt.col1 from tbl1 +inner join tprt on tbl1.col1 > tprt.col1 +order by tbl1.col1, tprt.col1; + col1 | col1 +------+------ + 501 | 10 + 501 | 20 + 505 | 10 + 505 | 20 + 505 | 501 + 505 | 502 + 1001 | 10 + 1001 | 20 + 1001 | 501 + 1001 | 502 + 1001 | 505 + 1010 | 10 + 1010 | 20 + 1010 | 501 + 1010 | 502 + 1010 | 505 + 1010 | 1001 + 1011 | 10 + 1011 | 20 + 1011 | 501 + 1011 | 502 + 1011 | 505 + 1011 | 1001 +(23 rows) + +select tbl1.col1, tprt.col1 from tbl1 +inner join tprt on tbl1.col1 = tprt.col1 +order by tbl1.col1, tprt.col1; + col1 | col1 +------+------ + 501 | 501 + 505 | 505 + 1001 | 1001 +(3 rows) + +-- last partition +delete from tbl1; +insert into tbl1 values (4400); +explain (analyze, costs off, summary off, timing off) +select * from tbl1 join tprt on tbl1.col1 < tprt.col1; + QUERY PLAN +------------------------------------------------------------------------------- + Nested Loop (actual rows=1 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) + -> Append (actual rows=1 loops=1) + -> Index Only Scan using tprt1_idx on tprt_1 (never executed) + Index Cond: (col1 > tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt2_idx on tprt_2 (never executed) + Index Cond: (col1 > tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt3_idx on tprt_3 (never executed) + Index Cond: (col1 > tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt4_idx on tprt_4 (never executed) + Index Cond: (col1 > tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt5_idx on tprt_5 (never executed) + Index Cond: (col1 > tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt6_idx on tprt_6 (actual rows=1 loops=1) + Index Cond: (col1 > tbl1.col1) + Heap Fetches: 1 +(21 rows) + +select tbl1.col1, tprt.col1 from tbl1 +inner join tprt on tbl1.col1 < tprt.col1 +order by tbl1.col1, tprt.col1; + col1 | col1 +------+------ + 4400 | 4500 +(1 row) + +-- no matching partition +delete from tbl1; +insert into tbl1 values (10000); +explain (analyze, costs off, summary off, timing off) +select * from tbl1 join tprt on tbl1.col1 = tprt.col1; + QUERY PLAN +------------------------------------------------------------------------ + Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) + -> Append (actual rows=0 loops=1) + -> Index Only Scan using tprt1_idx on tprt_1 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt2_idx on tprt_2 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt3_idx on tprt_3 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt4_idx on tprt_4 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt5_idx on tprt_5 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 + -> Index Only Scan using tprt6_idx on tprt_6 (never executed) + Index Cond: (col1 = tbl1.col1) + Heap Fetches: 0 +(21 rows) + +select tbl1.col1, tprt.col1 from tbl1 +inner join tprt on tbl1.col1 = tprt.col1 +order by tbl1.col1, tprt.col1; + col1 | col1 +------+------ +(0 rows) + +drop table tbl1, tprt; +-- test with columns defined in varying orders between each level +create table part_abc (a int not null, b int not null, c int not null) partition by list (a); +create table part_bac (b int not null, a int not null, c int not null) partition by list (b); +create table part_cab (c int not null, a int not null, b int not null) partition by list (c); +create table part_abc_p1 (a int not null, b int not null, c int not null); +alter table part_abc attach partition part_bac for values in(1); +alter table part_bac attach partition part_cab for values in(2); +alter table part_cab attach partition part_abc_p1 for values in(3); +prepare part_abc_q1 (int, int, int) as +select * from part_abc where a = $1 and b = $2 and c = $3; +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute part_abc_q1 (1, 2, 3); + a | b | c +---+---+--- +(0 rows) + +execute part_abc_q1 (1, 2, 3); + a | b | c +---+---+--- +(0 rows) + +execute part_abc_q1 (1, 2, 3); + a | b | c +---+---+--- +(0 rows) + +execute part_abc_q1 (1, 2, 3); + a | b | c +---+---+--- +(0 rows) + +execute part_abc_q1 (1, 2, 3); + a | b | c +---+---+--- +(0 rows) + +-- single partition should be scanned. +explain (analyze, costs off, summary off, timing off) execute part_abc_q1 (1, 2, 3); + QUERY PLAN +------------------------------------------------------- + Append (actual rows=0 loops=1) + -> Seq Scan on part_abc_p1 (actual rows=0 loops=1) + Filter: ((a = $1) AND (b = $2) AND (c = $3)) +(3 rows) + +deallocate part_abc_q1; +drop table part_abc; +-- ensure that an Append node properly handles a sub-partitioned table +-- matching without any of its leaf partitions matching the clause. +create table listp (a int, b int) partition by list (a); +create table listp_1 partition of listp for values in(1) partition by list (b); +create table listp_1_1 partition of listp_1 for values in(1); +create table listp_2 partition of listp for values in(2) partition by list (b); +create table listp_2_1 partition of listp_2 for values in(2); +select * from listp where b = 1; + a | b +---+--- +(0 rows) + +-- Ensure that an Append node properly can handle selection of all first level +-- partitions before finally detecting the correct set of 2nd level partitions +-- which match the given parameter. +prepare q1 (int,int) as select * from listp where b in ($1,$2); +execute q1 (1,2); + a | b +---+--- +(0 rows) + +execute q1 (1,2); + a | b +---+--- +(0 rows) + +execute q1 (1,2); + a | b +---+--- +(0 rows) + +execute q1 (1,2); + a | b +---+--- +(0 rows) + +execute q1 (1,2); + a | b +---+--- +(0 rows) + +explain (analyze, costs off, summary off, timing off) execute q1 (1,1); + QUERY PLAN +----------------------------------------------------- + Append (actual rows=0 loops=1) + -> Seq Scan on listp_1_1 (actual rows=0 loops=1) + Filter: (b = ANY (ARRAY[$1, $2])) + -> Seq Scan on listp_2_1 (never executed) + Filter: (b = ANY (ARRAY[$1, $2])) +(5 rows) + +explain (analyze, costs off, summary off, timing off) execute q1 (2,2); + QUERY PLAN +----------------------------------------------------- + Append (actual rows=0 loops=1) + -> Seq Scan on listp_1_1 (never executed) + Filter: (b = ANY (ARRAY[$1, $2])) + -> Seq Scan on listp_2_1 (actual rows=0 loops=1) + Filter: (b = ANY (ARRAY[$1, $2])) +(5 rows) + +-- try with no matching partitions +explain (analyze, costs off, summary off, timing off) execute q1 (0,0); + QUERY PLAN +---------------------------------------------- + Append (actual rows=0 loops=1) + -> Seq Scan on listp_1_1 (never executed) + Filter: (b = ANY (ARRAY[$1, $2])) + -> Seq Scan on listp_2_1 (never executed) + Filter: (b = ANY (ARRAY[$1, $2])) +(5 rows) + +deallocate q1; +-- test more complex cases where a not-equal condition further eliminates partitions. +prepare q1 (int,int,int,int) as select * from listp where b in($1,$2) and $3 <> b and $4 <> b; +execute q1 (1,2,3,4); + a | b +---+--- +(0 rows) + +execute q1 (1,2,3,4); + a | b +---+--- +(0 rows) + +execute q1 (1,2,3,4); + a | b +---+--- +(0 rows) + +execute q1 (1,2,3,4); + a | b +---+--- +(0 rows) + +execute q1 (1,2,3,4); + a | b +---+--- +(0 rows) + +-- both partitions allowed by IN clause, but one disallowed by <> clause +explain (analyze, costs off, summary off, timing off) execute q1 (1,2,2,0); + QUERY PLAN +------------------------------------------------------------------------- + Append (actual rows=0 loops=1) + -> Seq Scan on listp_1_1 (actual rows=0 loops=1) + Filter: ((b = ANY (ARRAY[$1, $2])) AND ($3 <> b) AND ($4 <> b)) + -> Seq Scan on listp_2_1 (never executed) + Filter: ((b = ANY (ARRAY[$1, $2])) AND ($3 <> b) AND ($4 <> b)) +(5 rows) + +-- both partitions allowed by IN clause, then both excluded again by <> clauses +explain (analyze, costs off, summary off, timing off) execute q1 (1,2,2,1); + QUERY PLAN +------------------------------------------------------------------------- + Append (actual rows=0 loops=1) + -> Seq Scan on listp_1_1 (never executed) + Filter: ((b = ANY (ARRAY[$1, $2])) AND ($3 <> b) AND ($4 <> b)) + -> Seq Scan on listp_2_1 (never executed) + Filter: ((b = ANY (ARRAY[$1, $2])) AND ($3 <> b) AND ($4 <> b)) +(5 rows) + +drop table listp; +-- Ensure runtime pruning works with initplans params with boolean types +create table boolvalues (value bool not null); +insert into boolvalues values('t'),('f'); +create table boolp (a bool) partition by list (a); +create table boolp_t partition of boolp for values in('t'); +create table boolp_f partition of boolp for values in('f'); +explain (analyze, costs off, summary off, timing off) +select * from boolp where a = (select value from boolvalues where value); + QUERY PLAN +-------------------------------------------------------- + Append (actual rows=0 loops=1) + InitPlan 1 (returns $0) + -> Seq Scan on boolvalues (actual rows=1 loops=1) + Filter: value + Rows Removed by Filter: 1 + -> Seq Scan on boolp_f (never executed) + Filter: (a = $0) + -> Seq Scan on boolp_t (actual rows=0 loops=1) + Filter: (a = $0) +(9 rows) + +explain (analyze, costs off, summary off, timing off) +select * from boolp where a = (select value from boolvalues where not value); + QUERY PLAN +-------------------------------------------------------- + Append (actual rows=0 loops=1) + InitPlan 1 (returns $0) + -> Seq Scan on boolvalues (actual rows=1 loops=1) + Filter: (NOT value) + Rows Removed by Filter: 1 + -> Seq Scan on boolp_f (actual rows=0 loops=1) + Filter: (a = $0) + -> Seq Scan on boolp_t (never executed) + Filter: (a = $0) +(9 rows) + diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 08fc2dbc21..73b4d109a5 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -253,3 +253,315 @@ create table like_op_noprune2 partition of like_op_noprune for values in ('BCD') explain (costs off) select * from like_op_noprune where a like '%BC'; drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart, hp, rp, coll_pruning_multi, like_op_noprune; + +-- +-- Test runtime partition pruning +-- +create table ab (a int not null, b int not null) partition by list (a); +create table ab_a2 partition of ab for values in(2) partition by list (b); +create table ab_a2_b1 partition of ab_a2 for values in (1); +create table ab_a2_b2 partition of ab_a2 for values in (2); +create table ab_a2_b3 partition of ab_a2 for values in (3); +create table ab_a1 partition of ab for values in(1) partition by list (b); +create table ab_a1_b1 partition of ab_a1 for values in (1); +create table ab_a1_b2 partition of ab_a1 for values in (2); +create table ab_a1_b3 partition of ab_a1 for values in (3); +create table ab_a3 partition of ab for values in(3) partition by list (b); +create table ab_a3_b1 partition of ab_a3 for values in (1); +create table ab_a3_b2 partition of ab_a3 for values in (2); +create table ab_a3_b3 partition of ab_a3 for values in (3); + +prepare ab_q1 (int, int, int) as +select * from ab where a between $1 and $2 and b <= $3; + +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute ab_q1 (1, 8, 3); +execute ab_q1 (1, 8, 3); +execute ab_q1 (1, 8, 3); +execute ab_q1 (1, 8, 3); +execute ab_q1 (1, 8, 3); + +explain (analyze, costs off, summary off, timing off) execute ab_q1 (2, 2, 3); +explain (analyze, costs off, summary off, timing off) execute ab_q1 (1, 2, 3); + +deallocate ab_q1; + +-- runtime pruning after optimizer pruning +prepare ab_q1 (int, int) as +select a from ab where a between $1 and $2 and b < 3; + +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute ab_q1 (1, 8); +execute ab_q1 (1, 8); +execute ab_q1 (1, 8); +execute ab_q1 (1, 8); +execute ab_q1 (1, 8); + +explain (analyze, costs off, summary off, timing off) execute ab_q1 (2, 2); +explain (analyze, costs off, summary off, timing off) execute ab_q1 (2, 4); + +-- parallel append +prepare ab_q2 (int, int) as +select avg(a) from ab where a between $1 and $2 and b < 4; + +-- encourage use of parallel plans +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set min_parallel_table_scan_size = 0; +set max_parallel_workers_per_gather = 2; + +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute ab_q2 (1, 8); +execute ab_q2 (1, 8); +execute ab_q2 (1, 8); +execute ab_q2 (1, 8); +execute ab_q2 (1, 8); + +explain (analyze, costs off, summary off, timing off) execute ab_q2 (2, 2); + +-- Test run-time pruning with IN lists. +prepare ab_q3 (int, int, int) as +select avg(a) from ab where a in($1,$2,$3) and b < 4; + +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute ab_q3 (1, 2, 3); +execute ab_q3 (1, 2, 3); +execute ab_q3 (1, 2, 3); +execute ab_q3 (1, 2, 3); +execute ab_q3 (1, 2, 3); + +explain (analyze, costs off, summary off, timing off) execute ab_q3 (1, 1, 1); +explain (analyze, costs off, summary off, timing off) execute ab_q3 (2, 3, 3); + +-- try some params whose values do not belong to any partition +explain (analyze, costs off, summary off, timing off) execute ab_q3 (33, 44, 55); + +-- test parallel Append with IN list and parameterized nested loops +create table lprt_a (a int not null); +-- insert some values we won't find in ab +insert into lprt_a select 0 from generate_series(1,100); + +-- and insert some values that we should find. +insert into lprt_a values(1),(1); + +analyze lprt_a; + +create index ab_a2_b1_a_idx on ab_a2_b1 (a); +create index ab_a2_b2_a_idx on ab_a2_b2 (a); +create index ab_a2_b3_a_idx on ab_a2_b3 (a); +create index ab_a1_b1_a_idx on ab_a1_b1 (a); +create index ab_a1_b2_a_idx on ab_a1_b2 (a); +create index ab_a1_b3_a_idx on ab_a1_b3 (a); +create index ab_a3_b1_a_idx on ab_a3_b1 (a); +create index ab_a3_b2_a_idx on ab_a3_b2 (a); +create index ab_a3_b3_a_idx on ab_a3_b3 (a); + +set enable_hashjoin = 0; +set enable_mergejoin = 0; + +prepare ab_q4 (int, int, int) as +select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in($1,$2,$3); +execute ab_q4 (1, 2, 3); +execute ab_q4 (1, 2, 3); +execute ab_q4 (1, 2, 3); +execute ab_q4 (1, 2, 3); +execute ab_q4 (1, 2, 3); + +explain (analyze, costs off, summary off, timing off) execute ab_q4 (0, 0, 1); + +insert into lprt_a values(3),(3); + +explain (analyze, costs off, summary off, timing off) execute ab_q4 (1, 0, 3); +explain (analyze, costs off, summary off, timing off) execute ab_q4 (1, 0, 0); + +delete from lprt_a where a = 1; + +explain (analyze, costs off, summary off, timing off) execute ab_q4 (1, 0, 0); + +reset enable_hashjoin; +reset enable_mergejoin; +reset parallel_setup_cost; +reset parallel_tuple_cost; +reset min_parallel_table_scan_size; +reset max_parallel_workers_per_gather; + +-- Test run-time partition pruning with an initplan +explain (analyze, costs off, summary off, timing off) +select * from ab where a = (select max(a) from lprt_a) and b = (select max(a)-1 from lprt_a); + +deallocate ab_q1; +deallocate ab_q2; +deallocate ab_q3; +deallocate ab_q4; + +drop table ab, lprt_a; + +-- join +create table tbl1(col1 int); +insert into tbl1 values (501), (505); + +-- basic table +create table tprt (col1 int) partition by range (col1); +create table tprt_1 partition of tprt for values from (1) to (501); +create table tprt_2 partition of tprt for values from (501) to (1001); +create table tprt_3 partition of tprt for values from (1001) to (2001); +create table tprt_4 partition of tprt for values from (2001) to (3001); +create table tprt_5 partition of tprt for values from (3001) to (4001); +create table tprt_6 partition of tprt for values from (4001) to (5001); + +create index tprt1_idx on tprt_1 (col1); +create index tprt2_idx on tprt_2 (col1); +create index tprt3_idx on tprt_3 (col1); +create index tprt4_idx on tprt_4 (col1); +create index tprt5_idx on tprt_5 (col1); +create index tprt6_idx on tprt_6 (col1); + +insert into tprt values (10), (20), (501), (502), (505), (1001), (4500); + +set enable_hashjoin = off; +set enable_mergejoin = off; + +explain (analyze, costs off, summary off, timing off) +select * from tbl1 join tprt on tbl1.col1 > tprt.col1; + +explain (analyze, costs off, summary off, timing off) +select * from tbl1 join tprt on tbl1.col1 = tprt.col1; + +select tbl1.col1, tprt.col1 from tbl1 +inner join tprt on tbl1.col1 > tprt.col1 +order by tbl1.col1, tprt.col1; + +select tbl1.col1, tprt.col1 from tbl1 +inner join tprt on tbl1.col1 = tprt.col1 +order by tbl1.col1, tprt.col1; + +-- multiple partitions +insert into tbl1 values (1001), (1010), (1011); +explain (analyze, costs off, summary off, timing off) +select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1; + +explain (analyze, costs off, summary off, timing off) +select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1; + +select tbl1.col1, tprt.col1 from tbl1 +inner join tprt on tbl1.col1 > tprt.col1 +order by tbl1.col1, tprt.col1; + +select tbl1.col1, tprt.col1 from tbl1 +inner join tprt on tbl1.col1 = tprt.col1 +order by tbl1.col1, tprt.col1; + +-- last partition +delete from tbl1; +insert into tbl1 values (4400); +explain (analyze, costs off, summary off, timing off) +select * from tbl1 join tprt on tbl1.col1 < tprt.col1; + +select tbl1.col1, tprt.col1 from tbl1 +inner join tprt on tbl1.col1 < tprt.col1 +order by tbl1.col1, tprt.col1; + +-- no matching partition +delete from tbl1; +insert into tbl1 values (10000); +explain (analyze, costs off, summary off, timing off) +select * from tbl1 join tprt on tbl1.col1 = tprt.col1; + +select tbl1.col1, tprt.col1 from tbl1 +inner join tprt on tbl1.col1 = tprt.col1 +order by tbl1.col1, tprt.col1; + +drop table tbl1, tprt; + +-- test with columns defined in varying orders between each level + +create table part_abc (a int not null, b int not null, c int not null) partition by list (a); +create table part_bac (b int not null, a int not null, c int not null) partition by list (b); +create table part_cab (c int not null, a int not null, b int not null) partition by list (c); +create table part_abc_p1 (a int not null, b int not null, c int not null); + +alter table part_abc attach partition part_bac for values in(1); +alter table part_bac attach partition part_cab for values in(2); +alter table part_cab attach partition part_abc_p1 for values in(3); + +prepare part_abc_q1 (int, int, int) as +select * from part_abc where a = $1 and b = $2 and c = $3; + +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute part_abc_q1 (1, 2, 3); +execute part_abc_q1 (1, 2, 3); +execute part_abc_q1 (1, 2, 3); +execute part_abc_q1 (1, 2, 3); +execute part_abc_q1 (1, 2, 3); + +-- single partition should be scanned. +explain (analyze, costs off, summary off, timing off) execute part_abc_q1 (1, 2, 3); + +deallocate part_abc_q1; + +drop table part_abc; + +-- ensure that an Append node properly handles a sub-partitioned table +-- matching without any of its leaf partitions matching the clause. +create table listp (a int, b int) partition by list (a); +create table listp_1 partition of listp for values in(1) partition by list (b); +create table listp_1_1 partition of listp_1 for values in(1); +create table listp_2 partition of listp for values in(2) partition by list (b); +create table listp_2_1 partition of listp_2 for values in(2); +select * from listp where b = 1; + +-- Ensure that an Append node properly can handle selection of all first level +-- partitions before finally detecting the correct set of 2nd level partitions +-- which match the given parameter. +prepare q1 (int,int) as select * from listp where b in ($1,$2); + +execute q1 (1,2); +execute q1 (1,2); +execute q1 (1,2); +execute q1 (1,2); +execute q1 (1,2); + +explain (analyze, costs off, summary off, timing off) execute q1 (1,1); + +explain (analyze, costs off, summary off, timing off) execute q1 (2,2); + +-- try with no matching partitions +explain (analyze, costs off, summary off, timing off) execute q1 (0,0); + +deallocate q1; + +-- test more complex cases where a not-equal condition further eliminates partitions. +prepare q1 (int,int,int,int) as select * from listp where b in($1,$2) and $3 <> b and $4 <> b; + +execute q1 (1,2,3,4); +execute q1 (1,2,3,4); +execute q1 (1,2,3,4); +execute q1 (1,2,3,4); +execute q1 (1,2,3,4); + +-- both partitions allowed by IN clause, but one disallowed by <> clause +explain (analyze, costs off, summary off, timing off) execute q1 (1,2,2,0); + +-- both partitions allowed by IN clause, then both excluded again by <> clauses +explain (analyze, costs off, summary off, timing off) execute q1 (1,2,2,1); + +drop table listp; + +-- Ensure runtime pruning works with initplans params with boolean types +create table boolvalues (value bool not null); +insert into boolvalues values('t'),('f'); + +create table boolp (a bool) partition by list (a); +create table boolp_t partition of boolp for values in('t'); +create table boolp_f partition of boolp for values in('f'); + +explain (analyze, costs off, summary off, timing off) +select * from boolp where a = (select value from boolvalues where value); + +explain (analyze, costs off, summary off, timing off) +select * from boolp where a = (select value from boolvalues where not value); -- 2.11.0