From 793b407545b0d24715f0d44a9a546689e9a4282a Mon Sep 17 00:00:00 2001 From: amit Date: Tue, 20 Mar 2018 10:09:38 +0900 Subject: [PATCH v7] Fix ON CONFLICT to work with partitioned tables Author: Amit Langote, Alvaro Herrera, Etsuro Fujita --- doc/src/sgml/ddl.sgml | 15 -- src/backend/catalog/heap.c | 2 +- src/backend/catalog/partition.c | 62 +++++-- src/backend/commands/tablecmds.c | 15 +- src/backend/executor/execPartition.c | 237 ++++++++++++++++++++++++-- src/backend/executor/nodeModifyTable.c | 93 ++++++++-- src/backend/parser/analyze.c | 7 - src/include/catalog/partition.h | 2 +- src/include/executor/execPartition.h | 10 ++ src/include/nodes/execnodes.h | 1 + src/test/regress/expected/insert_conflict.out | 73 ++++++-- src/test/regress/expected/triggers.out | 33 ++++ src/test/regress/sql/insert_conflict.sql | 64 +++++-- src/test/regress/sql/triggers.sql | 33 ++++ 14 files changed, 551 insertions(+), 96 deletions(-) diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml index 3a54ba9d5a..8805b88d82 100644 --- a/doc/src/sgml/ddl.sgml +++ b/doc/src/sgml/ddl.sgml @@ -3324,21 +3324,6 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02 - Using the ON CONFLICT clause with partitioned tables - will cause an error if the conflict target is specified (see - for more details on how the clause - works). Therefore, it is not possible to specify - DO UPDATE as the alternative action, because - specifying the conflict target is mandatory in that case. On the other - hand, specifying DO NOTHING as the alternative action - works fine provided the conflict target is not specified. In that case, - unique constraints (or exclusion constraints) of the individual leaf - partitions are considered. - - - - - When an UPDATE causes a row to move from one partition to another, there is a chance that another concurrent UPDATE or DELETE misses this row. diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 3d80ff9e5b..13489162df 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1776,7 +1776,7 @@ heap_drop_with_catalog(Oid relid) elog(ERROR, "cache lookup failed for relation %u", relid); if (((Form_pg_class) GETSTRUCT(tuple))->relispartition) { - parentOid = get_partition_parent(relid); + parentOid = get_partition_parent(relid, false); LockRelationOid(parentOid, AccessExclusiveLock); /* diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 786c05df73..8dc73ae092 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -192,6 +192,7 @@ static int get_partition_bound_num_indexes(PartitionBoundInfo b); static int get_greatest_modulus(PartitionBoundInfo b); static uint64 compute_hash_value(int partnatts, FmgrInfo *partsupfunc, Datum *values, bool *isnull); +static Oid get_partition_parent_recurse(Relation inhRel, Oid relid, bool getroot); /* * RelationBuildPartitionDesc @@ -1384,24 +1385,43 @@ check_default_allows_bound(Relation parent, Relation default_rel, /* * get_partition_parent + * Obtain direct parent or topmost ancestor of given relation * - * Returns inheritance parent of a partition by scanning pg_inherits + * Returns direct inheritance parent of a partition by scanning pg_inherits; + * or, if 'getroot' is true, the topmost parent in the inheritance hierarchy. * * Note: Because this function assumes that the relation whose OID is passed * as an argument will have precisely one parent, it should only be called * when it is known that the relation is a partition. */ Oid -get_partition_parent(Oid relid) +get_partition_parent(Oid relid, bool getroot) +{ + Relation inhRel; + Oid parentOid; + + inhRel = heap_open(InheritsRelationId, AccessShareLock); + + parentOid = get_partition_parent_recurse(inhRel, relid, getroot); + if (parentOid == InvalidOid) + elog(ERROR, "could not find parent of relation %u", relid); + + heap_close(inhRel, AccessShareLock); + + return parentOid; +} + +/* + * get_partition_parent_recurse + * Recursive part of get_partition_parent + */ +static Oid +get_partition_parent_recurse(Relation inhRel, Oid relid, bool getroot) { - Form_pg_inherits form; - Relation catalogRelation; SysScanDesc scan; ScanKeyData key[2]; HeapTuple tuple; - Oid result; - - catalogRelation = heap_open(InheritsRelationId, AccessShareLock); + Oid result = InvalidOid; ScanKeyInit(&key[0], Anum_pg_inherits_inhrelid, @@ -1412,18 +1432,26 @@ get_partition_parent(Oid relid) BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(1)); - scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true, + /* Obtain the direct parent, and release resources before recursing */ + scan = systable_beginscan(inhRel, InheritsRelidSeqnoIndexId, true, NULL, 2, key); - tuple = systable_getnext(scan); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "could not find tuple for parent of relation %u", relid); - - form = (Form_pg_inherits) GETSTRUCT(tuple); - result = form->inhparent; - + if (HeapTupleIsValid(tuple)) + result = ((Form_pg_inherits) GETSTRUCT(tuple))->inhparent; systable_endscan(scan); - heap_close(catalogRelation, AccessShareLock); + + /* + * If we were asked to recurse, do so now. Except that if we didn't get a + * valid parent, then the 'relid' argument was already the topmost parent, + * so return that. + */ + if (getroot) + { + if (OidIsValid(result)) + return get_partition_parent_recurse(inhRel, result, getroot); + else + return relid; + } return result; } @@ -2505,7 +2533,7 @@ generate_partition_qual(Relation rel) return copyObject(rel->rd_partcheck); /* Grab at least an AccessShareLock on the parent table */ - parent = heap_open(get_partition_parent(RelationGetRelid(rel)), + parent = heap_open(get_partition_parent(RelationGetRelid(rel), false), AccessShareLock); /* Get pg_class.relpartbound */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 218224a156..6003afdd03 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -1292,7 +1292,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, */ if (is_partition && relOid != oldRelOid) { - state->partParentOid = get_partition_parent(relOid); + state->partParentOid = get_partition_parent(relOid, false); if (OidIsValid(state->partParentOid)) LockRelationOid(state->partParentOid, AccessExclusiveLock); } @@ -5843,7 +5843,8 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode) /* If rel is partition, shouldn't drop NOT NULL if parent has the same */ if (rel->rd_rel->relispartition) { - Oid parentId = get_partition_parent(RelationGetRelid(rel)); + Oid parentId = get_partition_parent(RelationGetRelid(rel), + false); Relation parent = heap_open(parentId, AccessShareLock); TupleDesc tupDesc = RelationGetDescr(parent); AttrNumber parent_attnum; @@ -14360,7 +14361,7 @@ ATExecDetachPartition(Relation rel, RangeVar *name) if (!has_superclass(idxid)) continue; - Assert((IndexGetRelation(get_partition_parent(idxid), false) == + Assert((IndexGetRelation(get_partition_parent(idxid, false), false) == RelationGetRelid(rel))); idx = index_open(idxid, AccessExclusiveLock); @@ -14489,7 +14490,7 @@ ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name) /* Silently do nothing if already in the right state */ currParent = !has_superclass(partIdxId) ? InvalidOid : - get_partition_parent(partIdxId); + get_partition_parent(partIdxId, false); if (currParent != RelationGetRelid(parentIdx)) { IndexInfo *childInfo; @@ -14722,8 +14723,10 @@ validatePartitionedIndex(Relation partedIdx, Relation partedTbl) /* make sure we see the validation we just did */ CommandCounterIncrement(); - parentIdxId = get_partition_parent(RelationGetRelid(partedIdx)); - parentTblId = get_partition_parent(RelationGetRelid(partedTbl)); + parentIdxId = get_partition_parent(RelationGetRelid(partedIdx), + false); + parentTblId = get_partition_parent(RelationGetRelid(partedTbl), + false); parentIdx = relation_open(parentIdxId, AccessExclusiveLock); parentTbl = relation_open(parentTblId, AccessExclusiveLock); Assert(!parentIdx->rd_index->indisvalid); diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index ce9a4e16cf..579cb3ddb9 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -15,10 +15,12 @@ #include "postgres.h" #include "catalog/pg_inherits_fn.h" +#include "catalog/pg_type.h" #include "executor/execPartition.h" #include "executor/executor.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "nodes/makefuncs.h" #include "utils/lsyscache.h" #include "utils/rls.h" #include "utils/ruleutils.h" @@ -36,6 +38,7 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel, Datum *values, bool *isnull, int maxfieldlen); +static List *adjust_onconflictset_tlist(List *tlist, TupleConversionMap *map); /* * ExecSetupPartitionTupleRouting - sets up information needed during @@ -64,6 +67,8 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) int num_update_rri = 0, update_rri_index = 0; PartitionTupleRouting *proute; + int nparts; + ModifyTable *node = mtstate ? (ModifyTable *) mtstate->ps.plan : NULL; /* * Get the information about the partition tree after locking all the @@ -74,20 +79,16 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) proute->partition_dispatch_info = RelationGetPartitionDispatchInfo(rel, &proute->num_dispatch, &leaf_parts); - proute->num_partitions = list_length(leaf_parts); - proute->partitions = (ResultRelInfo **) palloc(proute->num_partitions * - sizeof(ResultRelInfo *)); + proute->num_partitions = nparts = list_length(leaf_parts); + proute->partitions = + (ResultRelInfo **) palloc(nparts * sizeof(ResultRelInfo *)); proute->parent_child_tupconv_maps = - (TupleConversionMap **) palloc0(proute->num_partitions * - sizeof(TupleConversionMap *)); - proute->partition_oids = (Oid *) palloc(proute->num_partitions * - sizeof(Oid)); + (TupleConversionMap **) palloc0(nparts * sizeof(TupleConversionMap *)); + proute->partition_oids = (Oid *) palloc(nparts * sizeof(Oid)); /* Set up details specific to the type of tuple routing we are doing. */ - if (mtstate && mtstate->operation == CMD_UPDATE) + if (node && node->operation == CMD_UPDATE) { - ModifyTable *node = (ModifyTable *) mtstate->ps.plan; - update_rri = mtstate->resultRelInfo; num_update_rri = list_length(node->plans); proute->subplan_partition_offsets = @@ -109,6 +110,21 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) */ proute->partition_tuple_slot = MakeTupleTableSlot(NULL); + /* + * We might need these arrays for conflict checking and handling the + * DO UPDATE action + */ + if (node && node->onConflictAction != ONCONFLICT_NONE) + { + /* Indexes are always needed. */ + proute->partition_arbiter_indexes = + (List **) palloc(nparts * sizeof(List *)); + /* Only needed for the DO UPDATE action. */ + if (node->onConflictAction == ONCONFLICT_UPDATE) + proute->partition_conflproj_tdescs = + (TupleDesc *) palloc(nparts * sizeof(TupleDesc)); + } + i = 0; foreach(cell, leaf_parts) { @@ -475,9 +491,6 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, &mtstate->ps, RelationGetDescr(partrel)); } - Assert(proute->partitions[partidx] == NULL); - proute->partitions[partidx] = leaf_part_rri; - /* * Save a tuple conversion map to convert a tuple routed to this partition * from the parent's type to the partition's. @@ -487,6 +500,141 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, RelationGetDescr(partrel), gettext_noop("could not convert row type")); + /* + * Initialize information about this partition that's needed to handle + * the ON CONFLICT clause. + */ + if (node && node->onConflictAction != ONCONFLICT_NONE) + { + TupleConversionMap *map = proute->parent_child_tupconv_maps[partidx]; + int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; + Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + TupleDesc partrelDesc = RelationGetDescr(partrel); + ExprContext *econtext = mtstate->ps.ps_ExprContext; + ListCell *lc; + List *my_arbiterindexes = NIL; + + if (node->onConflictAction == ONCONFLICT_UPDATE) + { + List *onconflset; + TupleDesc tupDesc; + + Assert(node->onConflictSet != NIL); + + /* + * If partition's tuple descriptor differs from the root parent, + * we need to adjust the onConflictSet target list to account for + * differences in attribute numbers. + */ + if (map != NULL) + { + /* + * First convert Vars to contain partition's atttribute + * numbers. + */ + + /* Convert Vars referencing EXCLUDED pseudo-relation. */ + onconflset = map_partition_varattnos(node->onConflictSet, + INNER_VAR, + partrel, + firstResultRel, NULL); + /* Convert Vars referencing main target relation. */ + onconflset = map_partition_varattnos(onconflset, + firstVarno, + partrel, + firstResultRel, NULL); + + /* + * The original list wouldn't contain entries for the + * partition's dropped attributes, which we must be accounted + * for because targetlist must have all the attributes of the + * underlying table including the dropped ones. Fix that and + * reorder target list entries if their resnos change as a + * result of the adjustment. + */ + onconflset = adjust_onconflictset_tlist(onconflset, map); + } + else + /* Just reuse the original one. */ + onconflset = node->onConflictSet; + + /* + * We must set mtstate->mt_conflproj's tuple descriptor to this + * before trying to use it for projection. + */ + tupDesc = ExecTypeFromTL(onconflset, partrelDesc->tdhasoid); + PinTupleDesc(tupDesc); + proute->partition_conflproj_tdescs[partidx] = tupDesc; + + leaf_part_rri->ri_onConflictSetProj = + ExecBuildProjectionInfo(onconflset, econtext, + mtstate->mt_conflproj, + &mtstate->ps, partrelDesc); + + if (node->onConflictWhere) + { + if (map != NULL) + { + /* + * Convert the Vars to contain partition's atttribute + * numbers + */ + List *onconflwhere; + + /* Convert Vars referencing EXCLUDED pseudo-relation. */ + onconflwhere = map_partition_varattnos((List *) + node->onConflictWhere, + INNER_VAR, + partrel, + firstResultRel, NULL); + /* Convert Vars referencing main target relation. */ + onconflwhere = map_partition_varattnos((List *) + onconflwhere, + firstVarno, + partrel, + firstResultRel, NULL); + leaf_part_rri->ri_onConflictSetWhere = + ExecInitQual(onconflwhere, &mtstate->ps); + } + else + /* Just reuse the original one. */ + leaf_part_rri->ri_onConflictSetWhere = + resultRelInfo->ri_onConflictSetWhere; + } + } + + /* Initialize arbiter indexes list, if any. */ + foreach(lc, ((ModifyTable *) mtstate->ps.plan)->arbiterIndexes) + { + Oid parentArbiterIndexOid = lfirst_oid(lc); + int i; + + /* + * Find parentArbiterIndexOid's child in this partition and add it + * to my_arbiterindexes. + */ + for (i = 0; i < leaf_part_rri->ri_NumIndices; i++) + { + Relation index = leaf_part_rri->ri_IndexRelationDescs[i]; + Oid indexOid = RelationGetRelid(index); + + if (parentArbiterIndexOid == + get_partition_parent(indexOid, true)) + my_arbiterindexes = lappend_oid(my_arbiterindexes, + indexOid); + } + } + + /* + * Use this list instead of the original one containing parent's + * indexes. + */ + proute->partition_arbiter_indexes[partidx] = my_arbiterindexes; + } + + Assert(proute->partitions[partidx] == NULL); + proute->partitions[partidx] = leaf_part_rri; + MemoryContextSwitchTo(oldContext); return leaf_part_rri; @@ -946,3 +1094,66 @@ ExecBuildSlotPartitionKeyDescription(Relation rel, return buf.data; } + +/* + * Adjust the targetlist entries of an inherited ON CONFLICT DO UPDATE + * operation for a given partition + * + * The expressions have already been fixed, but we have to make sure that the + * target resnos match the partition. In some cases, this can force us to + * re-order the tlist to preserve resno ordering. + * + * Scribbles on the input tlist, so callers must make sure to make a copy + * before passing it to us. + */ +static List * +adjust_onconflictset_tlist(List *tlist, TupleConversionMap *map) +{ + List *new_tlist = NIL; + TupleDesc tupdesc = map->outdesc; + AttrNumber *attrMap = map->attrMap; + int numattrs = tupdesc->natts; + int attrno; + + for (attrno = 1; attrno <= numattrs; attrno++) + { + Form_pg_attribute att_tup = TupleDescAttr(tupdesc, attrno - 1); + TargetEntry *tle; + + if (attrMap[attrno - 1] != 0) + { + Assert(!att_tup->attisdropped); + + /* Get the corresponding tlist entry from the given tlist */ + tle = (TargetEntry *) list_nth(tlist, attrMap[attrno - 1] - 1); + + /* Get the resno right */ + if (tle->resno != attrno) + tle->resno = attrno; + } + else + { + Node *expr; + + Assert(att_tup->attisdropped); + + /* Insert NULL for dropped column */ + expr = (Node *) makeConst(INT4OID, + -1, + InvalidOid, + sizeof(int32), + (Datum) 0, + true, /* isnull */ + true /* byval */ ); + + tle = makeTargetEntry((Expr *) expr, + attrno, + pstrdup(NameStr(att_tup->attname)), + false); + } + + new_tlist = lappend(new_tlist, tle); + } + + return new_tlist; +} diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 4fa2d7265f..29f155e3a5 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -56,6 +56,7 @@ static bool ExecOnConflictUpdate(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo, + TupleDesc onConflictSetTupdesc, ItemPointer conflictTid, TupleTableSlot *planSlot, TupleTableSlot *excludedSlot, @@ -66,7 +67,8 @@ static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, ResultRelInfo *targetRelInfo, - TupleTableSlot *slot); + TupleTableSlot *slot, + int *partition_index); static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node); static void ExecSetupChildParentMapForTcs(ModifyTableState *mtstate); static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate); @@ -264,6 +266,7 @@ ExecInsert(ModifyTableState *mtstate, TupleTableSlot *slot, TupleTableSlot *planSlot, EState *estate, + int partition_index, bool canSetTag) { HeapTuple tuple; @@ -421,8 +424,18 @@ ExecInsert(ModifyTableState *mtstate, ItemPointerData conflictTid; bool specConflict; List *arbiterIndexes; + PartitionTupleRouting *proute = + mtstate->mt_partition_tuple_routing; - arbiterIndexes = node->arbiterIndexes; + /* Use the appropriate list of arbiter indexes. */ + if (mtstate->mt_partition_tuple_routing != NULL) + { + Assert(partition_index >= 0 && proute != NULL); + arbiterIndexes = + proute->partition_arbiter_indexes[partition_index]; + } + else + arbiterIndexes = node->arbiterIndexes; /* * Do a non-conclusive check for conflicts first. @@ -451,8 +464,20 @@ ExecInsert(ModifyTableState *mtstate, * tuple. */ TupleTableSlot *returning = NULL; + TupleDesc onconfl_tupdesc; + + /* Use the appropriate tuple descriptor. */ + if (mtstate->mt_partition_tuple_routing != NULL) + { + Assert(partition_index >= 0 && proute != NULL); + onconfl_tupdesc = + proute->partition_conflproj_tdescs[partition_index]; + } + else + onconfl_tupdesc = mtstate->mt_conflproj_tupdesc; if (ExecOnConflictUpdate(mtstate, resultRelInfo, + onconfl_tupdesc, &conflictTid, planSlot, slot, estate, canSetTag, &returning)) { @@ -1052,10 +1077,23 @@ lreplace:; bool tuple_deleted; TupleTableSlot *ret_slot; PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing; - int map_index; + int map_index, + partition_index; TupleConversionMap *tupconv_map; /* + * Disallow an INSERT ON CONFLICT DO UPDATE that causes the + * original row to migrate to a different partition. Maybe this + * can be implemented some day, but it seems a fringe feature with + * little redeeming value. + */ + if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("invalid ON UPDATE specification"), + errdetail("The result tuple would appear in a different partition than the original tuple."))); + + /* * When an UPDATE is run on a leaf partition, we will not have * partition tuple routing set up. In that case, fail with * partition constraint violation error. @@ -1125,10 +1163,11 @@ lreplace:; */ Assert(mtstate->rootResultRelInfo != NULL); slot = ExecPrepareTupleRouting(mtstate, estate, proute, - mtstate->rootResultRelInfo, slot); + mtstate->rootResultRelInfo, slot, + &partition_index); ret_slot = ExecInsert(mtstate, slot, planSlot, - estate, canSetTag); + estate, partition_index, canSetTag); /* Revert ExecPrepareTupleRouting's node change. */ estate->es_result_relation_info = resultRelInfo; @@ -1304,6 +1343,7 @@ lreplace:; static bool ExecOnConflictUpdate(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo, + TupleDesc onConflictSetTupdesc, ItemPointer conflictTid, TupleTableSlot *planSlot, TupleTableSlot *excludedSlot, @@ -1419,6 +1459,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, ExecCheckHeapTupleVisible(estate, &tuple, buffer); /* Store target's existing tuple in the state's dedicated slot */ + ExecSetSlotDescriptor(mtstate->mt_existing, RelationGetDescr(relation)); ExecStoreTuple(&tuple, mtstate->mt_existing, buffer, false); /* @@ -1462,6 +1503,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, } /* Project the new tuple version */ + ExecSetSlotDescriptor(mtstate->mt_conflproj, onConflictSetTupdesc); ExecProject(resultRelInfo->ri_onConflictSetProj); /* @@ -1631,13 +1673,16 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate) * In mtstate, transition capture changes may also need to be reverted. * * Returns a slot holding the tuple of the partition rowtype. + * *partition_index is set to the index of the partition that the input + * tuple is routed to. */ static TupleTableSlot * ExecPrepareTupleRouting(ModifyTableState *mtstate, EState *estate, PartitionTupleRouting *proute, ResultRelInfo *targetRelInfo, - TupleTableSlot *slot) + TupleTableSlot *slot, + int *partition_index) { int partidx; ResultRelInfo *partrel; @@ -1720,6 +1765,7 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate, proute->partition_tuple_slot, &slot); + *partition_index = partidx; return slot; } @@ -2065,16 +2111,21 @@ ExecModifyTable(PlanState *pstate) switch (operation) { case CMD_INSERT: + { + int partition_index; + /* Prepare for tuple routing if needed. */ if (proute) slot = ExecPrepareTupleRouting(node, estate, proute, - resultRelInfo, slot); + resultRelInfo, slot, + &partition_index); slot = ExecInsert(node, slot, planSlot, - estate, node->canSetTag); + estate, partition_index, node->canSetTag); /* Revert ExecPrepareTupleRouting's state change. */ if (proute) estate->es_result_relation_info = resultRelInfo; break; + } case CMD_UPDATE: slot = ExecUpdate(node, tupleid, oldtuple, slot, planSlot, &node->mt_epqstate, estate, node->canSetTag); @@ -2178,8 +2229,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) subplan = (Plan *) lfirst(l); /* Initialize the usesFdwDirectModify flag */ - resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i, - node->fdwDirectModifyPlans); + resultRelInfo->ri_usesFdwDirectModify = + bms_is_member(i, node->fdwDirectModifyPlans); /* * Verify result relation is a valid target for the current operation @@ -2252,7 +2303,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && (operation == CMD_INSERT || update_tuple_routing_needed)) mtstate->mt_partition_tuple_routing = - ExecSetupPartitionTupleRouting(mtstate, rel); + ExecSetupPartitionTupleRouting(mtstate, rel); /* * Build state for collecting transition tuples. This requires having a @@ -2368,9 +2419,13 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) econtext = mtstate->ps.ps_ExprContext; relationDesc = resultRelInfo->ri_RelationDesc->rd_att; - /* initialize slot for the existing tuple */ - mtstate->mt_existing = - ExecInitExtraTupleSlot(mtstate->ps.state, relationDesc); + /* + * Initialize slot for the existing tuple. We determine which + * tupleDesc to use for this after we have determined which relation + * the insert/update will be applied to, possibly after performing + * tuple routing. + */ + mtstate->mt_existing = ExecInitExtraTupleSlot(mtstate->ps.state, NULL); /* carried forward solely for the benefit of explain */ mtstate->mt_excludedtlist = node->exclRelTlist; @@ -2378,8 +2433,16 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* create target slot for UPDATE SET projection */ tupDesc = ExecTypeFromTL((List *) node->onConflictSet, relationDesc->tdhasoid); + PinTupleDesc(tupDesc); + mtstate->mt_conflproj_tupdesc = tupDesc; + + /* + * Just like the "existing tuple" slot, we'll defer deciding which + * tupleDesc to use for this slot to a point where tuple routing has + * been performed. + */ mtstate->mt_conflproj = - ExecInitExtraTupleSlot(mtstate->ps.state, tupDesc); + ExecInitExtraTupleSlot(mtstate->ps.state, NULL); /* build UPDATE SET projection state */ resultRelInfo->ri_onConflictSetProj = diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index cf1a34e41a..a4b5aaef44 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -1026,13 +1026,6 @@ transformOnConflictClause(ParseState *pstate, TargetEntry *te; int attno; - if (targetrel->rd_partdesc) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("%s cannot be applied to partitioned table \"%s\"", - "ON CONFLICT DO UPDATE", - RelationGetRelationName(targetrel)))); - /* * All INSERT expressions have been parsed, get ready for potentially * existing SET statements that need to be processed like an UPDATE. diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 2faf0ca26e..70ddb225a1 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -51,7 +51,7 @@ extern PartitionBoundInfo partition_bounds_copy(PartitionBoundInfo src, extern void check_new_partition_bound(char *relname, Relation parent, PartitionBoundSpec *spec); -extern Oid get_partition_parent(Oid relid); +extern Oid get_partition_parent(Oid relid, bool getroot); extern List *get_qual_from_partbound(Relation rel, Relation parent, PartitionBoundSpec *spec); extern List *map_partition_varattnos(List *expr, int fromrel_varno, diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 03a599ad57..93f490233e 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -90,6 +90,14 @@ typedef struct PartitionDispatchData *PartitionDispatch; * given leaf partition's rowtype after that * partition is chosen for insertion by * tuple-routing. + * partition_conflproj_tdescs Array of TupleDescs per partition, each + * describing the record type of the ON CONFLICT + * DO UPDATE SET target list as applied to a + * given partition + * partition_arbiter_indexes Array of Lists with each slot containing the + * list of OIDs of a given partition's indexes + * that are to be used as arbiter indexes for + * ON CONFLICT checking *----------------------- */ typedef struct PartitionTupleRouting @@ -106,6 +114,8 @@ typedef struct PartitionTupleRouting int num_subplan_partition_offsets; TupleTableSlot *partition_tuple_slot; TupleTableSlot *root_tuple_slot; + TupleDesc *partition_conflproj_tdescs; + List **partition_arbiter_indexes; } PartitionTupleRouting; extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index d9e591802f..88ef2b71f3 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -993,6 +993,7 @@ typedef struct ModifyTableState List *mt_excludedtlist; /* the excluded pseudo relation's tlist */ TupleTableSlot *mt_conflproj; /* CONFLICT ... SET ... projection target */ + TupleDesc mt_conflproj_tupdesc; /* tuple descriptor for it */ /* Tuple-routing support info */ struct PartitionTupleRouting *mt_partition_tuple_routing; diff --git a/src/test/regress/expected/insert_conflict.out b/src/test/regress/expected/insert_conflict.out index 2650faedee..a9677f06e6 100644 --- a/src/test/regress/expected/insert_conflict.out +++ b/src/test/regress/expected/insert_conflict.out @@ -786,16 +786,67 @@ select * from selfconflict; (3 rows) drop table selfconflict; --- check that the following works: --- insert into partitioned_table on conflict do nothing -create table parted_conflict_test (a int, b char) partition by list (a); -create table parted_conflict_test_1 partition of parted_conflict_test (b unique) for values in (1); +-- check ON CONFLICT handling with partitioned tables +create table parted_conflict_test (a int unique, b char) partition by list (a); +create table parted_conflict_test_1 partition of parted_conflict_test (b unique) for values in (1, 2); +-- no indexes required here insert into parted_conflict_test values (1, 'a') on conflict do nothing; -insert into parted_conflict_test values (1, 'a') on conflict do nothing; --- however, on conflict do update is not supported yet -insert into parted_conflict_test values (1) on conflict (b) do update set a = excluded.a; -ERROR: ON CONFLICT DO UPDATE cannot be applied to partitioned table "parted_conflict_test" --- but it works OK if we target the partition directly -insert into parted_conflict_test_1 values (1) on conflict (b) do -update set a = excluded.a; +-- index on a required, which does exist in parent +insert into parted_conflict_test values (1, 'a') on conflict (a) do nothing; +insert into parted_conflict_test values (1, 'a') on conflict (a) do update set b = excluded.b; +-- targeting partition directly will work +insert into parted_conflict_test_1 values (1, 'a') on conflict (a) do nothing; +insert into parted_conflict_test_1 values (1, 'b') on conflict (a) do update set b = excluded.b; +-- index on b required, which doesn't exist in parent +insert into parted_conflict_test values (2, 'b') on conflict (b) do update set a = excluded.a; +ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification +-- targeting partition directly will work +insert into parted_conflict_test_1 values (2, 'b') on conflict (b) do update set a = excluded.a; +-- should see (2, 'b') +select * from parted_conflict_test order by a; + a | b +---+--- + 2 | b +(1 row) + +-- now check that DO UPDATE works correctly for target partition with +-- different attribute numbers +create table parted_conflict_test_2 (b char, a int unique); +alter table parted_conflict_test attach partition parted_conflict_test_2 for values in (3); +truncate parted_conflict_test; +insert into parted_conflict_test values (3, 'a') on conflict (a) do update set b = excluded.b; +insert into parted_conflict_test values (3, 'b') on conflict (a) do update set b = excluded.b; +-- should see (3, 'b') +select * from parted_conflict_test order by a; + a | b +---+--- + 3 | b +(1 row) + +-- case where parent will have a dropped column, but the partition won't +alter table parted_conflict_test drop b, add b char; +create table parted_conflict_test_3 partition of parted_conflict_test for values in (4); +truncate parted_conflict_test; +insert into parted_conflict_test (a, b) values (4, 'a') on conflict (a) do update set b = excluded.b; +insert into parted_conflict_test (a, b) values (4, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a'; +-- should see (4, 'b') +select * from parted_conflict_test order by a; + a | b +---+--- + 4 | b +(1 row) + +-- case with multi-level partitioning +create table parted_conflict_test_4 partition of parted_conflict_test for values in (5) partition by list (a); +create table parted_conflict_test_4_1 partition of parted_conflict_test_4 for values in (5); +truncate parted_conflict_test; +insert into parted_conflict_test (a, b) values (5, 'a') on conflict (a) do update set b = excluded.b; +insert into parted_conflict_test (a, b) values (5, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a'; +-- should see (5, 'b') +select * from parted_conflict_test order by a; + a | b +---+--- + 5 | b +(1 row) + drop table parted_conflict_test; diff --git a/src/test/regress/expected/triggers.out b/src/test/regress/expected/triggers.out index 99be9ac6e9..f53ac6bdf1 100644 --- a/src/test/regress/expected/triggers.out +++ b/src/test/regress/expected/triggers.out @@ -2328,6 +2328,39 @@ insert into my_table values (3, 'CCC'), (4, 'DDD') NOTICE: trigger = my_table_update_trig, old table = (3,CCC), (4,DDD), new table = (3,CCC:CCC), (4,DDD:DDD) NOTICE: trigger = my_table_insert_trig, new table = -- +-- now using a partitioned table +-- +create table iocdu_tt_parted (a int primary key, b text) partition by list (a); +create table iocdu_tt_parted1 partition of iocdu_tt_parted for values in (1); +create table iocdu_tt_parted2 partition of iocdu_tt_parted for values in (2); +create table iocdu_tt_parted3 partition of iocdu_tt_parted for values in (3); +create table iocdu_tt_parted4 partition of iocdu_tt_parted for values in (4); +create trigger iocdu_tt_parted_insert_trig + after insert on iocdu_tt_parted referencing new table as new_table + for each statement execute procedure dump_insert(); +create trigger iocdu_tt_parted_update_trig + after update on iocdu_tt_parted referencing old table as old_table new table as new_table + for each statement execute procedure dump_update(); +-- inserts only +insert into iocdu_tt_parted values (1, 'AAA'), (2, 'BBB') + on conflict (a) do + update set b = iocdu_tt_parted.b || ':' || excluded.b; +NOTICE: trigger = iocdu_tt_parted_update_trig, old table = , new table = +NOTICE: trigger = iocdu_tt_parted_insert_trig, new table = (1,AAA), (2,BBB) +-- mixture of inserts and updates +insert into iocdu_tt_parted values (1, 'AAA'), (2, 'BBB'), (3, 'CCC'), (4, 'DDD') + on conflict (a) do + update set b = iocdu_tt_parted.b || ':' || excluded.b; +NOTICE: trigger = iocdu_tt_parted_update_trig, old table = (1,AAA), (2,BBB), new table = (1,AAA:AAA), (2,BBB:BBB) +NOTICE: trigger = iocdu_tt_parted_insert_trig, new table = (3,CCC), (4,DDD) +-- updates only +insert into iocdu_tt_parted values (3, 'CCC'), (4, 'DDD') + on conflict (a) do + update set b = iocdu_tt_parted.b || ':' || excluded.b; +NOTICE: trigger = iocdu_tt_parted_update_trig, old table = (3,CCC), (4,DDD), new table = (3,CCC:CCC), (4,DDD:DDD) +NOTICE: trigger = iocdu_tt_parted_insert_trig, new table = +drop table iocdu_tt_parted; +-- -- Verify that you can't create a trigger with transition tables for -- more than one event. -- diff --git a/src/test/regress/sql/insert_conflict.sql b/src/test/regress/sql/insert_conflict.sql index 32c647e3f8..73122479a3 100644 --- a/src/test/regress/sql/insert_conflict.sql +++ b/src/test/regress/sql/insert_conflict.sql @@ -472,15 +472,59 @@ select * from selfconflict; drop table selfconflict; --- check that the following works: --- insert into partitioned_table on conflict do nothing -create table parted_conflict_test (a int, b char) partition by list (a); -create table parted_conflict_test_1 partition of parted_conflict_test (b unique) for values in (1); +-- check ON CONFLICT handling with partitioned tables +create table parted_conflict_test (a int unique, b char) partition by list (a); +create table parted_conflict_test_1 partition of parted_conflict_test (b unique) for values in (1, 2); + +-- no indexes required here insert into parted_conflict_test values (1, 'a') on conflict do nothing; -insert into parted_conflict_test values (1, 'a') on conflict do nothing; --- however, on conflict do update is not supported yet -insert into parted_conflict_test values (1) on conflict (b) do update set a = excluded.a; --- but it works OK if we target the partition directly -insert into parted_conflict_test_1 values (1) on conflict (b) do -update set a = excluded.a; + +-- index on a required, which does exist in parent +insert into parted_conflict_test values (1, 'a') on conflict (a) do nothing; +insert into parted_conflict_test values (1, 'a') on conflict (a) do update set b = excluded.b; + +-- targeting partition directly will work +insert into parted_conflict_test_1 values (1, 'a') on conflict (a) do nothing; +insert into parted_conflict_test_1 values (1, 'b') on conflict (a) do update set b = excluded.b; + +-- index on b required, which doesn't exist in parent +insert into parted_conflict_test values (2, 'b') on conflict (b) do update set a = excluded.a; + +-- targeting partition directly will work +insert into parted_conflict_test_1 values (2, 'b') on conflict (b) do update set a = excluded.a; + +-- should see (2, 'b') +select * from parted_conflict_test order by a; + +-- now check that DO UPDATE works correctly for target partition with +-- different attribute numbers +create table parted_conflict_test_2 (b char, a int unique); +alter table parted_conflict_test attach partition parted_conflict_test_2 for values in (3); +truncate parted_conflict_test; +insert into parted_conflict_test values (3, 'a') on conflict (a) do update set b = excluded.b; +insert into parted_conflict_test values (3, 'b') on conflict (a) do update set b = excluded.b; + +-- should see (3, 'b') +select * from parted_conflict_test order by a; + +-- case where parent will have a dropped column, but the partition won't +alter table parted_conflict_test drop b, add b char; +create table parted_conflict_test_3 partition of parted_conflict_test for values in (4); +truncate parted_conflict_test; +insert into parted_conflict_test (a, b) values (4, 'a') on conflict (a) do update set b = excluded.b; +insert into parted_conflict_test (a, b) values (4, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a'; + +-- should see (4, 'b') +select * from parted_conflict_test order by a; + +-- case with multi-level partitioning +create table parted_conflict_test_4 partition of parted_conflict_test for values in (5) partition by list (a); +create table parted_conflict_test_4_1 partition of parted_conflict_test_4 for values in (5); +truncate parted_conflict_test; +insert into parted_conflict_test (a, b) values (5, 'a') on conflict (a) do update set b = excluded.b; +insert into parted_conflict_test (a, b) values (5, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a'; + +-- should see (5, 'b') +select * from parted_conflict_test order by a; + drop table parted_conflict_test; diff --git a/src/test/regress/sql/triggers.sql b/src/test/regress/sql/triggers.sql index 3354f4899f..3773c6bc98 100644 --- a/src/test/regress/sql/triggers.sql +++ b/src/test/regress/sql/triggers.sql @@ -1773,6 +1773,39 @@ insert into my_table values (3, 'CCC'), (4, 'DDD') update set b = my_table.b || ':' || excluded.b; -- +-- now using a partitioned table +-- + +create table iocdu_tt_parted (a int primary key, b text) partition by list (a); +create table iocdu_tt_parted1 partition of iocdu_tt_parted for values in (1); +create table iocdu_tt_parted2 partition of iocdu_tt_parted for values in (2); +create table iocdu_tt_parted3 partition of iocdu_tt_parted for values in (3); +create table iocdu_tt_parted4 partition of iocdu_tt_parted for values in (4); +create trigger iocdu_tt_parted_insert_trig + after insert on iocdu_tt_parted referencing new table as new_table + for each statement execute procedure dump_insert(); +create trigger iocdu_tt_parted_update_trig + after update on iocdu_tt_parted referencing old table as old_table new table as new_table + for each statement execute procedure dump_update(); + +-- inserts only +insert into iocdu_tt_parted values (1, 'AAA'), (2, 'BBB') + on conflict (a) do + update set b = iocdu_tt_parted.b || ':' || excluded.b; + +-- mixture of inserts and updates +insert into iocdu_tt_parted values (1, 'AAA'), (2, 'BBB'), (3, 'CCC'), (4, 'DDD') + on conflict (a) do + update set b = iocdu_tt_parted.b || ':' || excluded.b; + +-- updates only +insert into iocdu_tt_parted values (3, 'CCC'), (4, 'DDD') + on conflict (a) do + update set b = iocdu_tt_parted.b || ':' || excluded.b; + +drop table iocdu_tt_parted; + +-- -- Verify that you can't create a trigger with transition tables for -- more than one event. -- -- 2.11.0