From ed8469d38a0747fe1b3d1fb3bb8c45b4cb2a2b45 Mon Sep 17 00:00:00 2001 From: amit Date: Wed, 1 Nov 2017 10:31:21 +0900 Subject: [PATCH 4/4] During tuple-routing, initialize per-partition objects lazily Those objects include ResultRelInfo, tuple conversion map, WITH CHECK OPTION quals and RETURNING projections. This means we don't allocate these objects for partitions that are never inserted into. --- src/backend/commands/copy.c | 15 +-- src/backend/executor/execPartition.c | 225 ++++++++++++++++++++++++--------- src/backend/executor/nodeModifyTable.c | 108 ++-------------- src/include/nodes/execnodes.h | 1 + 4 files changed, 180 insertions(+), 169 deletions(-) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index e7fe020fa7..3674aea9b3 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2479,23 +2479,14 @@ CopyFrom(CopyState cstate) * If we are capturing transition tuples, they may need to be * converted from partition format back to partitioned table format * (this is only ever necessary if a BEFORE trigger modifies the - * tuple). + * tuple). Note that we don't allocate the actual maps here; they'll + * be allocated by ExecInitPartitionResultRelInfo() if and when + * needed. */ if (cstate->transition_capture != NULL) - { - int i; - mtstate->mt_transition_tupconv_maps = (TupleConversionMap **) palloc0(sizeof(TupleConversionMap *) * mtstate->mt_num_partitions); - for (i = 0; i < mtstate->mt_num_partitions; ++i) - { - mtstate->mt_transition_tupconv_maps[i] = - convert_tuples_by_name(RelationGetDescr(mtstate->mt_partitions[i]->ri_RelationDesc), - RelationGetDescr(cstate->rel), - gettext_noop("could not convert row type")); - } - } } /* diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index a495b165bd..3e2226e5f8 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -23,6 +23,8 @@ #include "utils/rls.h" #include "utils/ruleutils.h" +static void ExecInitPartitionResultRelInfo(ModifyTableState *mtstate, + int partidx); static PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel, int *num_parted, List **leaf_part_oids); static void get_partition_dispatch_recurse(Relation rel, Relation parent, @@ -47,14 +49,9 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel, void ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) { - TupleDesc tupDesc = RelationGetDescr(rel); List *leaf_parts; ListCell *cell; int i; - EState *estate = mtstate->ps.state; - ModifyTable *node = (ModifyTable *) mtstate->ps.plan; - Index resultRTindex = node->nominalRelation; - ResultRelInfo *leaf_part_rri; /* * Get the information about the partition tree after locking all the @@ -66,6 +63,11 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) &mtstate->mt_num_dispatch, &leaf_parts); mtstate->mt_num_partitions = list_length(leaf_parts); + mtstate->mt_partition_oids = (Oid *) palloc0(sizeof(Oid) * + mtstate->mt_num_partitions); + i = 0; + foreach (cell, leaf_parts) + mtstate->mt_partition_oids[i++] = lfirst_oid(cell); /* * Allocate an array of ResultRelInfo pointers, but actual @@ -87,62 +89,6 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) * processing. */ mtstate->mt_partition_tuple_slot = MakeTupleTableSlot(); - - leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo) * - mtstate->mt_num_partitions); - i = 0; - foreach(cell, leaf_parts) - { - Relation partrel; - TupleDesc part_tupdesc; - - /* - * We locked all the partitions above including the leaf partitions. - * Note that each of the relations in *partitions are eventually - * closed by the caller. - */ - partrel = heap_open(lfirst_oid(cell), NoLock); - part_tupdesc = RelationGetDescr(partrel); - - /* - * Save a tuple conversion map to convert a tuple routed to this - * partition from the parent's type to the partition's. - */ - mtstate->mt_partition_tupconv_maps[i] = - convert_tuples_by_name(tupDesc, - part_tupdesc, - gettext_noop("could not convert row type")); - - InitResultRelInfo(leaf_part_rri, - partrel, - resultRTindex, - rel, - estate->es_instrument); - - /* - * Verify result relation is a valid target for INSERT. - */ - CheckValidResultRel(leaf_part_rri, CMD_INSERT); - - /* - * Open partition indices. The user may have asked to check for - * conflicts within this leaf partition and do "nothing" instead of - * throwing an error. Be prepared in that case by initializing the - * index information needed by ExecInsert() to perform speculative - * insertions. - */ - if (leaf_part_rri->ri_RelationDesc->rd_rel->relhasindex && - leaf_part_rri->ri_IndexRelationDescs == NULL) - ExecOpenIndices(leaf_part_rri, - mtstate != NULL && - mtstate->mt_onconflict != ONCONFLICT_NONE); - - estate->es_leaf_result_relations = - lappend(estate->es_leaf_result_relations, leaf_part_rri); - - mtstate->mt_partitions[i] = leaf_part_rri++; - i++; - } } /* @@ -257,11 +203,168 @@ ExecFindPartition(ModifyTableState *mtstate, TupleTableSlot *slot) val_desc ? errdetail("Partition key of the failing row contains %s.", val_desc) : 0)); } + /* Initialize the partition result rel, if not done already. */ + ExecInitPartitionResultRelInfo(mtstate, result); ecxt->ecxt_scantuple = ecxt_scantuple_old; return result; } /* + * ExecInitPartitionResultRelInfo + * Initialize ResultRelInfo for a partition if not done already + */ +static void +ExecInitPartitionResultRelInfo(ModifyTableState *mtstate, int partidx) +{ + EState *estate = mtstate->ps.state; + Relation rootrel = mtstate->resultRelInfo->ri_RelationDesc; + Index resultRTindex = mtstate->resultRelInfo->ri_RangeTableIndex; + ModifyTable *node = (ModifyTable *) mtstate->ps.plan; + Relation partrel; + TupleDesc tupDesc = RelationGetDescr(rootrel), + part_tupdesc; + + /* Nothing to do if already set.*/ + if (mtstate->mt_partitions[partidx]) + return; + + mtstate->mt_partitions[partidx] = (ResultRelInfo *) + palloc0(sizeof(ResultRelInfo)); + + /* + * We locked all the partitions in ExecSetupPartitionTupleRouting + * including the leaf partitions. + */ + partrel = heap_open(mtstate->mt_partition_oids[partidx], NoLock); + part_tupdesc = RelationGetDescr(partrel); + InitResultRelInfo(mtstate->mt_partitions[partidx], + partrel, + resultRTindex, + rootrel, + estate->es_instrument); + + /* + * Verify result relation is a valid target for INSERT. + */ + CheckValidResultRel(mtstate->mt_partitions[partidx], CMD_INSERT); + + /* + * Open partition indices. The user may have asked to check for + * conflicts within this leaf partition and do "nothing" instead of + * throwing an error. Be prepared in that case by initializing the + * index information needed by ExecInsert() to perform speculative + * insertions. + */ + if (partrel->rd_rel->relhasindex && + mtstate->mt_partitions[partidx]->ri_IndexRelationDescs == NULL) + ExecOpenIndices(mtstate->mt_partitions[partidx], + mtstate->mt_onconflict != ONCONFLICT_NONE); + + /* + * Save a tuple conversion map to convert a tuple routed to this + * partition from the parent's type to the partition's. + */ + mtstate->mt_partition_tupconv_maps[partidx] = + convert_tuples_by_name(tupDesc, part_tupdesc, + gettext_noop("could not convert row type")); + + /* + * Also, if needed, the map to convert from partition's rowtype to the + * parent's that is needed to store the partition's tuples into the + * transition tuplestore which only accepts tuples of parent's rowtype. + */ + if (mtstate->mt_transition_tupconv_maps) + mtstate->mt_transition_tupconv_maps[partidx] = + convert_tuples_by_name(part_tupdesc, tupDesc, + gettext_noop("could not convert row type")); + + /* + * Build WITH CHECK OPTION constraints for each leaf partition rel. Note + * that we didn't build the withCheckOptionList for each partition within + * the planner, but simple translation of the varattnos for each partition + * will suffice. This only occurs for the INSERT case; UPDATE/DELETE + * cases are handled above. + */ + if (node && node->withCheckOptionLists != NIL) + { + List *wcoList; + List *mapped_wcoList; + List *wcoExprs = NIL; + ListCell *ll; + + /* + * In case of INSERT on partitioned tables, there is only one plan. + * Likewise, there is only one WITH CHECK OPTIONS list, not one per + * partition. We make a copy of the WCO qual for each partition; note + * that, if there are SubPlans in there, they all end up attached to + * the one parent Plan node. + */ + Assert(mtstate->operation == CMD_INSERT && + list_length(node->withCheckOptionLists) == 1 && + mtstate->mt_nplans == 1); + wcoList = linitial(node->withCheckOptionLists); + mapped_wcoList = map_partition_varattnos(wcoList, + resultRTindex, + partrel, rootrel, NULL); + foreach(ll, mapped_wcoList) + { + WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll)); + ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual), + mtstate->mt_plans[0]); + wcoExprs = lappend(wcoExprs, wcoExpr); + } + + mtstate->mt_partitions[partidx]->ri_WithCheckOptions = mapped_wcoList; + mtstate->mt_partitions[partidx]->ri_WithCheckOptionExprs = wcoExprs; + } + + /* + * Build a projection for each leaf partition rel. Note that we + * didn't build the returningList for each partition within the + * planner, but simple translation of the varattnos for each partition + * will suffice. This only occurs for the INSERT case; UPDATE/DELETE + * are handled above. + */ + if (node && node->returningLists != NIL) + { + TupleTableSlot *slot; + ExprContext *econtext; + List *returningList; + List *rlist; + + returningList = linitial(node->returningLists); + + /* + * Initialize result tuple slot and assign its rowtype using the first + * RETURNING list. We assume the rest will look the same. + */ + tupDesc = ExecTypeFromTL(returningList, false); + + /* Set up a slot for the output of the RETURNING projection(s) */ + ExecInitResultTupleSlot(estate, &mtstate->ps); + ExecAssignResultType(&mtstate->ps, tupDesc); + slot = mtstate->ps.ps_ResultTupleSlot; + + /* Need an econtext too */ + if (mtstate->ps.ps_ExprContext == NULL) + ExecAssignExprContext(estate, &mtstate->ps); + econtext = mtstate->ps.ps_ExprContext; + + rlist = map_partition_varattnos(returningList, + resultRTindex, + partrel, rootrel, NULL); + mtstate->mt_partitions[partidx]->ri_projectReturning = + ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, + part_tupdesc); + } + + /* Note that the entries in this list appear in no predetermined order. */ + estate->es_leaf_result_relations = + lappend(estate->es_leaf_result_relations, + mtstate->mt_partitions[partidx]); +} + +/* * RelationGetPartitionDispatchInfo * Returns information necessary to route tuples down a partition tree * diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 6a3b171587..8b45fdaeb7 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -1511,23 +1511,14 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate) mtstate->mt_transition_tupconv_maps = (TupleConversionMap **) palloc0(sizeof(TupleConversionMap *) * numResultRelInfos); - /* Choose the right set of partitions */ + /* + * If partition tuple-routing is active, we can't have partition + * ResultRelInfo's just yet, so return in that case. Instead, + * the conversion map will be initialized in + * ExecInitPartitionResultRelInfo() if and when needed. + */ if (mtstate->mt_partition_dispatch_info != NULL) - { - /* - * For tuple routing among partitions, we need TupleDescs based on - * the partition routing table. - */ - ResultRelInfo **resultRelInfos = mtstate->mt_partitions; - - for (i = 0; i < numResultRelInfos; ++i) - { - mtstate->mt_transition_tupconv_maps[i] = - convert_tuples_by_name(RelationGetDescr(resultRelInfos[i]->ri_RelationDesc), - RelationGetDescr(targetRelInfo->ri_RelationDesc), - gettext_noop("could not convert row type")); - } - } + return; else { /* Otherwise we need the ResultRelInfo for each subplan. */ @@ -1978,65 +1969,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) } /* - * Build WITH CHECK OPTION constraints for each leaf partition rel. Note - * that we didn't build the withCheckOptionList for each partition within - * the planner, but simple translation of the varattnos for each partition - * will suffice. This only occurs for the INSERT case; UPDATE/DELETE - * cases are handled above. - */ - if (node->withCheckOptionLists != NIL && mtstate->mt_num_partitions > 0) - { - List *wcoList; - PlanState *plan; - - /* - * In case of INSERT on partitioned tables, there is only one plan. - * Likewise, there is only one WITH CHECK OPTIONS list, not one per - * partition. We make a copy of the WCO qual for each partition; note - * that, if there are SubPlans in there, they all end up attached to - * the one parent Plan node. - */ - Assert(operation == CMD_INSERT && - list_length(node->withCheckOptionLists) == 1 && - mtstate->mt_nplans == 1); - wcoList = linitial(node->withCheckOptionLists); - plan = mtstate->mt_plans[0]; - for (i = 0; i < mtstate->mt_num_partitions; i++) - { - Relation partrel; - List *mapped_wcoList; - List *wcoExprs = NIL; - ListCell *ll; - - resultRelInfo = mtstate->mt_partitions[i]; - partrel = resultRelInfo->ri_RelationDesc; - - /* varno = node->nominalRelation */ - mapped_wcoList = map_partition_varattnos(wcoList, - node->nominalRelation, - partrel, rel, NULL); - foreach(ll, mapped_wcoList) - { - WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll)); - ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual), - plan); - - wcoExprs = lappend(wcoExprs, wcoExpr); - } - - resultRelInfo->ri_WithCheckOptions = mapped_wcoList; - resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; - } - } - - /* * Initialize RETURNING projections if needed. */ if (node->returningLists) { TupleTableSlot *slot; ExprContext *econtext; - List *returningList; /* * Initialize result tuple slot and assign its rowtype using the first @@ -2068,31 +2006,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) resultRelInfo->ri_RelationDesc->rd_att); resultRelInfo++; } - - /* - * Build a projection for each leaf partition rel. Note that we - * didn't build the returningList for each partition within the - * planner, but simple translation of the varattnos for each partition - * will suffice. This only occurs for the INSERT case; UPDATE/DELETE - * are handled above. - */ - returningList = linitial(node->returningLists); - for (i = 0; i < mtstate->mt_num_partitions; i++) - { - Relation partrel; - List *rlist; - - resultRelInfo = mtstate->mt_partitions[i]; - partrel = resultRelInfo->ri_RelationDesc; - - /* varno = node->nominalRelation */ - rlist = map_partition_varattnos(returningList, - node->nominalRelation, - partrel, rel, NULL); - resultRelInfo->ri_projectReturning = - ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, - resultRelInfo->ri_RelationDesc->rd_att); - } } else { @@ -2367,8 +2280,11 @@ ExecEndModifyTable(ModifyTableState *node) { ResultRelInfo *resultRelInfo = node->mt_partitions[i]; - ExecCloseIndices(resultRelInfo); - heap_close(resultRelInfo->ri_RelationDesc, NoLock); + if (resultRelInfo) + { + ExecCloseIndices(resultRelInfo); + heap_close(resultRelInfo->ri_RelationDesc, NoLock); + } } /* Release the standalone partition tuple descriptor, if any */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 1a35c5c9ad..988a374a74 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -982,6 +982,7 @@ typedef struct ModifyTableState int mt_num_dispatch; /* Number of entries in the above array */ int mt_num_partitions; /* Number of members in the following * arrays */ + Oid *mt_partition_oids; /* Per partition OIDs */ ResultRelInfo **mt_partitions; /* Per partition result relation pointers */ TupleConversionMap **mt_partition_tupconv_maps; /* Per partition tuple conversion map */ -- 2.11.0