From 93a0fdedf326a930a7113489250ab2ddd1f478e3 Mon Sep 17 00:00:00 2001 From: amit Date: Tue, 19 Dec 2017 16:20:09 +0900 Subject: [PATCH v7] During tuple-routing, initialize per-partition objects lazily Those objects include ResultRelInfo, tuple conversion map, WITH CHECK OPTION quals and RETURNING projections. This means we don't allocate these objects for partitions that are never inserted into. --- src/backend/commands/copy.c | 10 +- src/backend/executor/execPartition.c | 302 ++++++++++++++++++++++++--------- src/backend/executor/nodeModifyTable.c | 131 ++------------ src/include/executor/execPartition.h | 9 +- 4 files changed, 244 insertions(+), 208 deletions(-) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index b3933df9af..118452b602 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2470,7 +2470,7 @@ CopyFrom(CopyState cstate) PartitionTupleRouting *proute; proute = cstate->partition_tuple_routing = - ExecSetupPartitionTupleRouting(NULL, cstate->rel, 1, estate); + ExecSetupPartitionTupleRouting(NULL, cstate->rel); /* * If we are capturing transition tuples, they may need to be @@ -2607,6 +2607,14 @@ CopyFrom(CopyState cstate) */ saved_resultRelInfo = resultRelInfo; resultRelInfo = proute->partitions[leaf_part_index]; + if (resultRelInfo == NULL) + { + resultRelInfo = ExecInitPartitionInfo(NULL, + saved_resultRelInfo, + proute, estate, + leaf_part_index); + Assert(resultRelInfo != NULL); + } /* We do not yet have a way to insert into a foreign partition */ if (resultRelInfo->ri_FdwRoutine) diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 4048c3ebc6..9e3910a4a6 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -44,18 +44,23 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel, * * Note that all the relations in the partition tree are locked using the * RowExclusiveLock mode upon return from this function. + * + * While we allocate the arrays of pointers of ResultRelInfo and + * TupleConversionMap for all partitions here, actual objects themselves are + * lazily allocated for a given partition if a tuple is actually routed to it; + * see ExecInitPartitionInfo. However, if the function is invoked for update + * tuple routing, caller would already have initialized ResultRelInfo's for + * some of the partitions, which are reused and assigned to their respective + * slot in the aforementioned array. */ PartitionTupleRouting * -ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, - Relation rel, Index resultRTindex, - EState *estate) +ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) { TupleDesc tupDesc = RelationGetDescr(rel); List *leaf_parts; ListCell *cell; int i; - ResultRelInfo *leaf_part_arr = NULL, - *update_rri = NULL; + ResultRelInfo *update_rri = NULL; int num_update_rri = 0, update_rri_index = 0; bool is_update = false; @@ -76,6 +81,8 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, proute->parent_child_tupconv_maps = (TupleConversionMap **) palloc0(proute->num_partitions * sizeof(TupleConversionMap *)); + proute->partition_oids = (Oid *) palloc(proute->num_partitions * + sizeof(Oid)); /* Set up details specific to the type of tuple routing we are doing. */ if (mtstate && mtstate->operation == CMD_UPDATE) @@ -95,16 +102,6 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, */ proute->root_tuple_slot = MakeTupleTableSlot(); } - else - { - /* - * Since we are inserting tuples, we need to create all new result - * rels. Avoid repeated pallocs by allocating memory for all the - * result rels in bulk. - */ - leaf_part_arr = (ResultRelInfo *) palloc0(proute->num_partitions * - sizeof(ResultRelInfo)); - } /* * Initialize an empty slot that will be used to manipulate tuples of any @@ -117,11 +114,10 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, i = 0; foreach(cell, leaf_parts) { - ResultRelInfo *leaf_part_rri; - Relation partrel = NULL; - TupleDesc part_tupdesc; + ResultRelInfo *leaf_part_rri = NULL; Oid leaf_oid = lfirst_oid(cell); + proute->partition_oids[i] = leaf_oid; if (is_update) { /* @@ -136,6 +132,9 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, if (update_rri_index < num_update_rri && RelationGetRelid(update_rri[update_rri_index].ri_RelationDesc) == leaf_oid) { + Relation partrel = NULL; + TupleDesc part_tupdesc; + leaf_part_rri = &update_rri[update_rri_index]; partrel = leaf_part_rri->ri_RelationDesc; @@ -151,73 +150,26 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, proute->subplan_partition_offsets[update_rri_index] = i; update_rri_index++; - } - else - leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo)); - } - else - { - /* For INSERTs, we already have an array of result rels allocated */ - leaf_part_rri = &leaf_part_arr[i]; - } - - /* - * If we didn't open the partition rel, it means we haven't - * initialized the result rel either. - */ - if (!partrel) - { - /* - * We locked all the partitions above including the leaf - * partitions. Note that each of the newly opened relations in - * proute->partitions are eventually closed by the caller. - */ - partrel = heap_open(leaf_oid, NoLock); - InitResultRelInfo(leaf_part_rri, - partrel, - resultRTindex, - rel, - estate->es_instrument); - - /* - * Since we've just initialized this ResultRelInfo, it's not in - * any list attached to the estate as yet. Add it, so that it can - * be found later. - */ - estate->es_tuple_routing_result_relations = - lappend(estate->es_tuple_routing_result_relations, - leaf_part_rri); - } - - part_tupdesc = RelationGetDescr(partrel); - /* - * Save a tuple conversion map to convert a tuple routed to this - * partition from the parent's type to the partition's. - */ - proute->parent_child_tupconv_maps[i] = - convert_tuples_by_name(tupDesc, part_tupdesc, - gettext_noop("could not convert row type")); + part_tupdesc = RelationGetDescr(partrel); - /* - * Verify result relation is a valid target for an INSERT. An UPDATE - * of a partition-key becomes a DELETE+INSERT operation, so this check - * is still required when the operation is CMD_UPDATE. - */ - CheckValidResultRel(leaf_part_rri, CMD_INSERT); + /* + * Save a tuple conversion map to convert a tuple routed to + * this partition from the parent's type to the partition's. + */ + proute->parent_child_tupconv_maps[i] = + convert_tuples_by_name(tupDesc, part_tupdesc, + gettext_noop("could not convert row type")); - /* - * Open partition indices. The user may have asked to check for - * conflicts within this leaf partition and do "nothing" instead of - * throwing an error. Be prepared in that case by initializing the - * index information needed by ExecInsert() to perform speculative - * insertions. - */ - if (leaf_part_rri->ri_RelationDesc->rd_rel->relhasindex && - leaf_part_rri->ri_IndexRelationDescs == NULL) - ExecOpenIndices(leaf_part_rri, - mtstate != NULL && - mtstate->mt_onconflict != ONCONFLICT_NONE); + /* + * Verify result relation is a valid target for an INSERT. An + * UPDATE of a partition-key becomes a DELETE+INSERT operation, + * so this check is required even when the operation is + * CMD_UPDATE. + */ + CheckValidResultRel(leaf_part_rri, CMD_INSERT); + } + } proute->partitions[i] = leaf_part_rri; i++; @@ -352,6 +304,185 @@ ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd, } /* + * ExecInitPartitionInfo + * Initialize ResultRelInfo and other information for a partition if not + * already done + * + * Returns the ResultRelInfo + */ +ResultRelInfo * +ExecInitPartitionInfo(ModifyTableState *mtstate, + ResultRelInfo *resultRelInfo, + PartitionTupleRouting *proute, + EState *estate, int partidx) +{ + Relation rootrel = resultRelInfo->ri_RelationDesc, + partrel; + ResultRelInfo *leaf_part_rri; + int firstVarno; + Relation firstResultRel; + ModifyTable *node = NULL; + + if (mtstate) + { + node = (ModifyTable *) mtstate->ps.plan; + firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; + firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + } + + /* + * We locked all the partitions in ExecSetupPartitionTupleRouting + * including the leaf partitions. + */ + partrel = heap_open(proute->partition_oids[partidx], NoLock); + leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo)); + InitResultRelInfo(leaf_part_rri, + partrel, + node ? node->nominalRelation : 1, + rootrel, + estate->es_instrument); + + /* + * Verify result relation is a valid target for an INSERT. An UPDATE + * of a partition-key becomes a DELETE+INSERT operation, so this check + * is still required when the operation is CMD_UPDATE. + */ + CheckValidResultRel(leaf_part_rri, CMD_INSERT); + + /* + * Since we've just initialized this ResultRelInfo, it's not in + * any list attached to the estate as yet. Add it, so that it can + * be found later. + * + * Note that the entries in this list appear in no predetermined + * order, because partition result rels are initialized as and when + * they're needed. + */ + estate->es_tuple_routing_result_relations = + lappend(estate->es_tuple_routing_result_relations, + leaf_part_rri); + + /* + * Open partition indices. The user may have asked to check for + * conflicts within this leaf partition and do "nothing" instead of + * throwing an error. Be prepared in that case by initializing the + * index information needed by ExecInsert() to perform speculative + * insertions. + */ + if (partrel->rd_rel->relhasindex && + leaf_part_rri->ri_IndexRelationDescs == NULL) + ExecOpenIndices(leaf_part_rri, + (mtstate != NULL && + mtstate->mt_onconflict != ONCONFLICT_NONE)); + + /* + * Build WITH CHECK OPTION constraints for this partition rel. Note + * that we didn't build the withCheckOptionList for each partition + * within the planner, but simple translation of the varattnos will + * suffice. This only occurs for the INSERT case or in the case of + * UPDATE for which we didn't find a result rel above to reuse. + */ + if (node && node->withCheckOptionLists != NIL) + { + List *wcoList; + List *mapped_wcoList; + List *wcoExprs = NIL; + ListCell *ll; + + /* + * In the case of INSERT on partitioned tables, there is only one + * plan. Likewise, there is only one WCO list, not one per + * partition. For UPDATE, there would be as many WCO lists as + * there are plans, but we use the first one as reference. Note + * that if there are SubPlans in there, they all end up attached + * to the one parent Plan node. + */ + Assert((node->operation == CMD_INSERT && + list_length(node->withCheckOptionLists) == 1 && + list_length(node->plans) == 1) || + (node->operation == CMD_UPDATE && + list_length(node->withCheckOptionLists) == + list_length(node->plans))); + wcoList = linitial(node->withCheckOptionLists); + + mapped_wcoList = map_partition_varattnos(wcoList, firstVarno, + partrel, firstResultRel, + NULL); + foreach(ll, mapped_wcoList) + { + WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll)); + ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual), + mtstate->mt_plans[0]); + wcoExprs = lappend(wcoExprs, wcoExpr); + } + + leaf_part_rri->ri_WithCheckOptions = mapped_wcoList; + leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs; + } + + /* + * Build the RETURNING projection if any for the partition. Note that + * we didn't build the returningList for each partition within the + * planner, but simple translation of the varattnos will suffice. + * This only occurs for the INSERT case; in the UPDATE/DELETE cases, + * ExecInitModifyTable() would've initialized this. + */ + if (node && node->returningLists != NIL) + { + TupleTableSlot *slot; + ExprContext *econtext; + List *returningList; + List *rlist; + TupleDesc tupDesc; + + /* See the comment written above for WCO lists. */ + Assert((node->operation == CMD_INSERT && + list_length(node->returningLists) == 1 && + list_length(node->plans) == 1) || + (node->operation == CMD_UPDATE && + list_length(node->returningLists) == + list_length(node->plans))); + returningList = linitial(node->returningLists); + + /* + * Initialize result tuple slot and assign its rowtype using the first + * RETURNING list. We assume the rest will look the same. + */ + tupDesc = ExecTypeFromTL(returningList, false); + + /* Set up a slot for the output of the RETURNING projection(s) */ + ExecInitResultTupleSlot(estate, &mtstate->ps); + ExecAssignResultType(&mtstate->ps, tupDesc); + slot = mtstate->ps.ps_ResultTupleSlot; + + /* Need an econtext too */ + if (mtstate->ps.ps_ExprContext == NULL) + ExecAssignExprContext(estate, &mtstate->ps); + econtext = mtstate->ps.ps_ExprContext; + + rlist = map_partition_varattnos(returningList, firstVarno, + partrel, firstResultRel, NULL); + leaf_part_rri->ri_projectReturning = + ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, + RelationGetDescr(partrel)); + } + + Assert (proute->partitions[partidx] == NULL); + proute->partitions[partidx] = leaf_part_rri; + + /* + * Save a tuple conversion map to convert a tuple routed to this + * partition from the parent's type to the partition's. + */ + proute->parent_child_tupconv_maps[partidx] = + convert_tuples_by_name(RelationGetDescr(rootrel), + RelationGetDescr(partrel), + gettext_noop("could not convert row type")); + + return leaf_part_rri; +} + +/* * ExecSetupChildParentMapForLeaf -- Initialize the per-leaf-partition * child-to-root tuple conversion map array. * @@ -495,8 +626,11 @@ ExecCleanupTupleRouting(PartitionTupleRouting *proute) continue; } - ExecCloseIndices(resultRelInfo); - heap_close(resultRelInfo->ri_RelationDesc, NoLock); + if (resultRelInfo) + { + ExecCloseIndices(resultRelInfo); + heap_close(resultRelInfo->ri_RelationDesc, NoLock); + } } /* Release the standalone partition tuple descriptors, if any */ diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 2a8ecbd830..36e2041755 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -310,6 +310,14 @@ ExecInsert(ModifyTableState *mtstate, */ saved_resultRelInfo = resultRelInfo; resultRelInfo = proute->partitions[leaf_part_index]; + if (resultRelInfo == NULL) + { + resultRelInfo = ExecInitPartitionInfo(mtstate, + saved_resultRelInfo, + proute, estate, + leaf_part_index); + Assert(resultRelInfo != NULL); + } /* We do not yet have a way to insert into a foreign partition */ if (resultRelInfo->ri_FdwRoutine) @@ -2098,14 +2106,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ResultRelInfo *saved_resultRelInfo; ResultRelInfo *resultRelInfo; Plan *subplan; - int firstVarno = 0; - Relation firstResultRel = NULL; ListCell *l; int i; Relation rel; bool update_tuple_routing_needed = node->partColsUpdated; - PartitionTupleRouting *proute = NULL; - int num_partitions = 0; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -2228,20 +2232,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && (operation == CMD_INSERT || update_tuple_routing_needed)) - { - proute = mtstate->mt_partition_tuple_routing = - ExecSetupPartitionTupleRouting(mtstate, - rel, node->nominalRelation, - estate); - num_partitions = proute->num_partitions; - - /* - * Below are required as reference objects for mapping partition - * attno's in expressions such as WithCheckOptions and RETURNING. - */ - firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; - firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; - } + mtstate->mt_partition_tuple_routing = + ExecSetupPartitionTupleRouting(mtstate, rel); /* * Build state for collecting transition tuples. This requires having a @@ -2288,77 +2280,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) } /* - * Build WITH CHECK OPTION constraints for each leaf partition rel. Note - * that we didn't build the withCheckOptionList for each partition within - * the planner, but simple translation of the varattnos for each partition - * will suffice. This only occurs for the INSERT case or for UPDATE row - * movement. DELETEs and local UPDATEs are handled above. - */ - if (node->withCheckOptionLists != NIL && num_partitions > 0) - { - List *first_wcoList; - - /* - * In case of INSERT on partitioned tables, there is only one plan. - * Likewise, there is only one WITH CHECK OPTIONS list, not one per - * partition. Whereas for UPDATE, there are as many WCOs as there are - * plans. So in either case, use the WCO expression of the first - * resultRelInfo as a reference to calculate attno's for the WCO - * expression of each of the partitions. We make a copy of the WCO - * qual for each partition. Note that, if there are SubPlans in there, - * they all end up attached to the one parent Plan node. - */ - Assert(update_tuple_routing_needed || - (operation == CMD_INSERT && - list_length(node->withCheckOptionLists) == 1 && - mtstate->mt_nplans == 1)); - - first_wcoList = linitial(node->withCheckOptionLists); - for (i = 0; i < num_partitions; i++) - { - Relation partrel; - List *mapped_wcoList; - List *wcoExprs = NIL; - ListCell *ll; - - resultRelInfo = proute->partitions[i]; - - /* - * If we are referring to a resultRelInfo from one of the update - * result rels, that result rel would already have - * WithCheckOptions initialized. - */ - if (resultRelInfo->ri_WithCheckOptions) - continue; - - partrel = resultRelInfo->ri_RelationDesc; - - mapped_wcoList = map_partition_varattnos(first_wcoList, - firstVarno, - partrel, firstResultRel, - NULL); - foreach(ll, mapped_wcoList) - { - WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll)); - ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual), - &mtstate->ps); - - wcoExprs = lappend(wcoExprs, wcoExpr); - } - - resultRelInfo->ri_WithCheckOptions = mapped_wcoList; - resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; - } - } - - /* * Initialize RETURNING projections if needed. */ if (node->returningLists) { TupleTableSlot *slot; ExprContext *econtext; - List *firstReturningList; /* * Initialize result tuple slot and assign its rowtype using the first @@ -2389,44 +2316,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) resultRelInfo->ri_RelationDesc->rd_att); resultRelInfo++; } - - /* - * Build a projection for each leaf partition rel. Note that we - * didn't build the returningList for each partition within the - * planner, but simple translation of the varattnos for each partition - * will suffice. This only occurs for the INSERT case or for UPDATE - * row movement. DELETEs and local UPDATEs are handled above. - */ - firstReturningList = linitial(node->returningLists); - for (i = 0; i < num_partitions; i++) - { - Relation partrel; - List *rlist; - - resultRelInfo = proute->partitions[i]; - - /* - * If we are referring to a resultRelInfo from one of the update - * result rels, that result rel would already have a returningList - * built. - */ - if (resultRelInfo->ri_projectReturning) - continue; - - partrel = resultRelInfo->ri_RelationDesc; - - /* - * Use the returning expression of the first resultRelInfo as a - * reference to calculate attno's for the returning expression of - * each of the partitions. - */ - rlist = map_partition_varattnos(firstReturningList, - firstVarno, - partrel, firstResultRel, NULL); - resultRelInfo->ri_projectReturning = - ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, - resultRelInfo->ri_RelationDesc->rd_att); - } } else { diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 3df9c498bb..e94718608f 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -58,6 +58,7 @@ typedef struct PartitionDispatchData *PartitionDispatch; * partition tree. * num_dispatch number of partitioned tables in the partition * tree (= length of partition_dispatch_info[]) + * partition_oids Array of leaf partitions OIDs * partitions Array of ResultRelInfo* objects with one entry * for every leaf partition in the partition tree. * num_partitions Number of leaf partitions in the partition tree @@ -91,6 +92,7 @@ typedef struct PartitionTupleRouting { PartitionDispatch *partition_dispatch_info; int num_dispatch; + Oid *partition_oids; ResultRelInfo **partitions; int num_partitions; TupleConversionMap **parent_child_tupconv_maps; @@ -103,12 +105,15 @@ typedef struct PartitionTupleRouting } PartitionTupleRouting; extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, - Relation rel, Index resultRTindex, - EState *estate); + Relation rel); extern int ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd, TupleTableSlot *slot, EState *estate); +extern ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate, + ResultRelInfo *resultRelInfo, + PartitionTupleRouting *proute, + EState *estate, int partidx); extern void ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute); extern TupleConversionMap *TupConvMapForLeaf(PartitionTupleRouting *proute, ResultRelInfo *rootRelInfo, int leaf_index); -- 2.11.0