From b61c6a375ef46ac72127f96eb8f0ca50b4230a4d Mon Sep 17 00:00:00 2001 From: amit Date: Wed, 28 Feb 2018 17:58:00 +0900 Subject: [PATCH v4 3/3] Fix ON CONFLICT to work with partitioned tables --- doc/src/sgml/ddl.sgml | 15 --- src/backend/catalog/heap.c | 2 +- src/backend/catalog/partition.c | 62 ++++++++--- src/backend/commands/tablecmds.c | 15 +-- src/backend/executor/execPartition.c | 149 ++++++++++++++++++++++++-- src/backend/executor/nodeModifyTable.c | 58 ++++++++-- src/backend/optimizer/prep/preptlist.c | 6 +- src/backend/optimizer/prep/prepunion.c | 56 ++++++++++ src/backend/parser/analyze.c | 7 -- src/include/catalog/partition.h | 2 +- src/include/executor/execPartition.h | 16 +++ src/include/nodes/execnodes.h | 1 + src/include/optimizer/prep.h | 10 ++ src/test/regress/expected/insert_conflict.out | 73 +++++++++++-- src/test/regress/sql/insert_conflict.sql | 64 +++++++++-- 15 files changed, 443 insertions(+), 93 deletions(-) diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml index 3a54ba9d5a..8805b88d82 100644 --- a/doc/src/sgml/ddl.sgml +++ b/doc/src/sgml/ddl.sgml @@ -3324,21 +3324,6 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02 - Using the ON CONFLICT clause with partitioned tables - will cause an error if the conflict target is specified (see - for more details on how the clause - works). Therefore, it is not possible to specify - DO UPDATE as the alternative action, because - specifying the conflict target is mandatory in that case. On the other - hand, specifying DO NOTHING as the alternative action - works fine provided the conflict target is not specified. In that case, - unique constraints (or exclusion constraints) of the individual leaf - partitions are considered. - - - - - When an UPDATE causes a row to move from one partition to another, there is a chance that another concurrent UPDATE or DELETE misses this row. diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 3d80ff9e5b..13489162df 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1776,7 +1776,7 @@ heap_drop_with_catalog(Oid relid) elog(ERROR, "cache lookup failed for relation %u", relid); if (((Form_pg_class) GETSTRUCT(tuple))->relispartition) { - parentOid = get_partition_parent(relid); + parentOid = get_partition_parent(relid, false); LockRelationOid(parentOid, AccessExclusiveLock); /* diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 786c05df73..8dc73ae092 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -192,6 +192,7 @@ static int get_partition_bound_num_indexes(PartitionBoundInfo b); static int get_greatest_modulus(PartitionBoundInfo b); static uint64 compute_hash_value(int partnatts, FmgrInfo *partsupfunc, Datum *values, bool *isnull); +static Oid get_partition_parent_recurse(Relation inhRel, Oid relid, bool getroot); /* * RelationBuildPartitionDesc @@ -1384,24 +1385,43 @@ check_default_allows_bound(Relation parent, Relation default_rel, /* * get_partition_parent + * Obtain direct parent or topmost ancestor of given relation * - * Returns inheritance parent of a partition by scanning pg_inherits + * Returns direct inheritance parent of a partition by scanning pg_inherits; + * or, if 'getroot' is true, the topmost parent in the inheritance hierarchy. * * Note: Because this function assumes that the relation whose OID is passed * as an argument will have precisely one parent, it should only be called * when it is known that the relation is a partition. */ Oid -get_partition_parent(Oid relid) +get_partition_parent(Oid relid, bool getroot) +{ + Relation inhRel; + Oid parentOid; + + inhRel = heap_open(InheritsRelationId, AccessShareLock); + + parentOid = get_partition_parent_recurse(inhRel, relid, getroot); + if (parentOid == InvalidOid) + elog(ERROR, "could not find parent of relation %u", relid); + + heap_close(inhRel, AccessShareLock); + + return parentOid; +} + +/* + * get_partition_parent_recurse + * Recursive part of get_partition_parent + */ +static Oid +get_partition_parent_recurse(Relation inhRel, Oid relid, bool getroot) { - Form_pg_inherits form; - Relation catalogRelation; SysScanDesc scan; ScanKeyData key[2]; HeapTuple tuple; - Oid result; - - catalogRelation = heap_open(InheritsRelationId, AccessShareLock); + Oid result = InvalidOid; ScanKeyInit(&key[0], Anum_pg_inherits_inhrelid, @@ -1412,18 +1432,26 @@ get_partition_parent(Oid relid) BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(1)); - scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true, + /* Obtain the direct parent, and release resources before recursing */ + scan = systable_beginscan(inhRel, InheritsRelidSeqnoIndexId, true, NULL, 2, key); - tuple = systable_getnext(scan); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "could not find tuple for parent of relation %u", relid); - - form = (Form_pg_inherits) GETSTRUCT(tuple); - result = form->inhparent; - + if (HeapTupleIsValid(tuple)) + result = ((Form_pg_inherits) GETSTRUCT(tuple))->inhparent; systable_endscan(scan); - heap_close(catalogRelation, AccessShareLock); + + /* + * If we were asked to recurse, do so now. Except that if we didn't get a + * valid parent, then the 'relid' argument was already the topmost parent, + * so return that. + */ + if (getroot) + { + if (OidIsValid(result)) + return get_partition_parent_recurse(inhRel, result, getroot); + else + return relid; + } return result; } @@ -2505,7 +2533,7 @@ generate_partition_qual(Relation rel) return copyObject(rel->rd_partcheck); /* Grab at least an AccessShareLock on the parent table */ - parent = heap_open(get_partition_parent(RelationGetRelid(rel)), + parent = heap_open(get_partition_parent(RelationGetRelid(rel), false), AccessShareLock); /* Get pg_class.relpartbound */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 218224a156..6003afdd03 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -1292,7 +1292,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, */ if (is_partition && relOid != oldRelOid) { - state->partParentOid = get_partition_parent(relOid); + state->partParentOid = get_partition_parent(relOid, false); if (OidIsValid(state->partParentOid)) LockRelationOid(state->partParentOid, AccessExclusiveLock); } @@ -5843,7 +5843,8 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode) /* If rel is partition, shouldn't drop NOT NULL if parent has the same */ if (rel->rd_rel->relispartition) { - Oid parentId = get_partition_parent(RelationGetRelid(rel)); + Oid parentId = get_partition_parent(RelationGetRelid(rel), + false); Relation parent = heap_open(parentId, AccessShareLock); TupleDesc tupDesc = RelationGetDescr(parent); AttrNumber parent_attnum; @@ -14360,7 +14361,7 @@ ATExecDetachPartition(Relation rel, RangeVar *name) if (!has_superclass(idxid)) continue; - Assert((IndexGetRelation(get_partition_parent(idxid), false) == + Assert((IndexGetRelation(get_partition_parent(idxid, false), false) == RelationGetRelid(rel))); idx = index_open(idxid, AccessExclusiveLock); @@ -14489,7 +14490,7 @@ ATExecAttachPartitionIdx(List **wqueue, Relation parentIdx, RangeVar *name) /* Silently do nothing if already in the right state */ currParent = !has_superclass(partIdxId) ? InvalidOid : - get_partition_parent(partIdxId); + get_partition_parent(partIdxId, false); if (currParent != RelationGetRelid(parentIdx)) { IndexInfo *childInfo; @@ -14722,8 +14723,10 @@ validatePartitionedIndex(Relation partedIdx, Relation partedTbl) /* make sure we see the validation we just did */ CommandCounterIncrement(); - parentIdxId = get_partition_parent(RelationGetRelid(partedIdx)); - parentTblId = get_partition_parent(RelationGetRelid(partedTbl)); + parentIdxId = get_partition_parent(RelationGetRelid(partedIdx), + false); + parentTblId = get_partition_parent(RelationGetRelid(partedTbl), + false); parentIdx = relation_open(parentIdxId, AccessExclusiveLock); parentTbl = relation_open(parentTblId, AccessExclusiveLock); Assert(!parentIdx->rd_index->indisvalid); diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index f6fe7cd61d..8876d91a44 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -19,6 +19,7 @@ #include "executor/executor.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "optimizer/prep.h" #include "utils/lsyscache.h" #include "utils/rls.h" #include "utils/ruleutils.h" @@ -64,6 +65,7 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) int num_update_rri = 0, update_rri_index = 0; PartitionTupleRouting *proute; + int nparts; /* * Get the information about the partition tree after locking all the @@ -74,14 +76,12 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) proute->partition_dispatch_info = RelationGetPartitionDispatchInfo(rel, &proute->num_dispatch, &leaf_parts); - proute->num_partitions = list_length(leaf_parts); - proute->partitions = (ResultRelInfo **) palloc(proute->num_partitions * - sizeof(ResultRelInfo *)); + proute->num_partitions = nparts = list_length(leaf_parts); + proute->partitions = + (ResultRelInfo **) palloc(nparts * sizeof(ResultRelInfo *)); proute->parent_child_tupconv_maps = - (TupleConversionMap **) palloc0(proute->num_partitions * - sizeof(TupleConversionMap *)); - proute->partition_oids = (Oid *) palloc(proute->num_partitions * - sizeof(Oid)); + (TupleConversionMap **) palloc0(nparts * sizeof(TupleConversionMap *)); + proute->partition_oids = (Oid *) palloc(nparts * sizeof(Oid)); /* Set up details specific to the type of tuple routing we are doing. */ if (mtstate && mtstate->operation == CMD_UPDATE) @@ -109,6 +109,18 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) */ proute->partition_tuple_slot = MakeTupleTableSlot(NULL); + /* + * We might need these arrays for conflict checking and handling the + * DO UPDATE action + */ + if (mtstate && mtstate->mt_onconflict != ONCONFLICT_NONE) + { + proute->partition_arbiter_indexes = + (List **) palloc(nparts * sizeof(List *)); + proute->partition_onconfl_tdescs = + (TupleDesc *) palloc(nparts * sizeof(TupleDesc)); + } + i = 0; foreach(cell, leaf_parts) { @@ -475,9 +487,6 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, &mtstate->ps, RelationGetDescr(partrel)); } - Assert(proute->partitions[partidx] == NULL); - proute->partitions[partidx] = leaf_part_rri; - /* * Save a tuple conversion map to convert a tuple routed to this partition * from the parent's type to the partition's. @@ -487,6 +496,126 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, RelationGetDescr(partrel), gettext_noop("could not convert row type")); + /* + * Initialize information about this partition that's needed to handle + * the ON CONFLICT clause. + */ + if (node && node->onConflictAction != ONCONFLICT_NONE) + { + TupleDesc partrelDesc = RelationGetDescr(partrel); + TupleConversionMap *map = proute->parent_child_tupconv_maps[partidx]; + int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; + Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + ExprContext *econtext = mtstate->ps.ps_ExprContext; + ListCell *lc; + List *my_arbiterindexes = NIL; + + /* + * If the root parent and partition have the same tuple + * descriptor, just reuse the original projection and WHERE + * clause expressions for partition. + */ + if (map == NULL) + { + /* Use the existing slot. */ + leaf_part_rri->ri_onConflictSetProj = + resultRelInfo->ri_onConflictSetProj; + leaf_part_rri->ri_onConflictSetWhere = + resultRelInfo->ri_onConflictSetWhere; + proute->partition_onconfl_tdescs[partidx] = + mtstate->mt_conflproj_tupdesc; + PinTupleDesc(proute->partition_onconfl_tdescs[partidx]); + } + else + { + /* Convert expressions contain partition's attnos. */ + List *conv_setproj; + + /* + * First convert references to the EXCLUDED pseudo-relation, which + * was set to INNER_VAR by set_plan_references. + */ + conv_setproj = + map_partition_varattnos((List *) node->onConflictSet, + INNER_VAR, partrel, + firstResultRel, NULL); + + /* Then convert references to main target relation. */ + conv_setproj = + map_partition_varattnos((List *) conv_setproj, + firstVarno, partrel, + firstResultRel, NULL); + + conv_setproj = + adjust_and_expand_partition_tlist(RelationGetDescr(firstResultRel), + RelationGetDescr(partrel), + RelationGetRelationName(partrel), + firstVarno, + conv_setproj); + proute->partition_onconfl_tdescs[partidx] = + ExecTypeFromTL(conv_setproj, partrelDesc->tdhasoid); + PinTupleDesc(proute->partition_onconfl_tdescs[partidx]); + + leaf_part_rri->ri_onConflictSetProj = + ExecBuildProjectionInfo(conv_setproj, econtext, + mtstate->mt_conflproj, + &mtstate->ps, partrelDesc); + + /* For WHERE quals, map_partition_varattnos() suffices. */ + if (node->onConflictWhere) + { + List *conv_where; + ExprState *qualexpr; + + /* First convert references to EXCLUDED pseudo-relation. */ + conv_where = map_partition_varattnos((List *) + node->onConflictWhere, + INNER_VAR, + partrel, + firstResultRel, NULL); + /* Then convert references to main target relation. */ + conv_where = map_partition_varattnos((List *) + conv_where, + firstVarno, + partrel, firstResultRel, + NULL); + qualexpr = ExecInitQual(conv_where, &mtstate->ps); + leaf_part_rri->ri_onConflictSetWhere = qualexpr; + } + } + + /* Initialize arbiter indexes list, if any. */ + foreach(lc, ((ModifyTable *) mtstate->ps.plan)->arbiterIndexes) + { + Oid parentArbiterIndexOid = lfirst_oid(lc); + int i; + + /* + * Find parentArbiterIndexOid's child in this partition and add it + * to my_arbiterindexes. + */ + for (i = 0; i < leaf_part_rri->ri_NumIndices; i++) + { + Relation index = leaf_part_rri->ri_IndexRelationDescs[i]; + Oid indexOid = RelationGetRelid(index); + + if (parentArbiterIndexOid == + get_partition_parent(indexOid, true)) + my_arbiterindexes = lappend_oid(my_arbiterindexes, + indexOid); + } + } + + /* + * Use this list instead of the original one containing parent's + * indexes. + */ + proute->partition_arbiter_indexes[partidx] = my_arbiterindexes; + } + + Assert(proute->partitions[partidx] == NULL); + proute->partitions[partidx] = leaf_part_rri; + MemoryContextSwitchTo(oldContext); return leaf_part_rri; diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index a9a48e914f..11780c0801 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -56,6 +56,7 @@ static bool ExecOnConflictUpdate(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo, + TupleDesc onConflictSetTupdesc, ItemPointer conflictTid, TupleTableSlot *planSlot, TupleTableSlot *excludedSlot, @@ -269,7 +270,9 @@ ExecInsert(ModifyTableState *mtstate, List *recheckIndexes = NIL; TupleTableSlot *result = NULL; TransitionCaptureState *ar_insert_trig_tcs; + TupleDesc partConflTupdesc = NULL; OnConflictAction onconflict = mtstate->mt_onconflict; + List *arbiterIndexes = NIL; /* * get the heap tuple out of the tuple table slot, making sure we have a @@ -285,8 +288,8 @@ ExecInsert(ModifyTableState *mtstate, /* Determine the partition to heap_insert the tuple into */ if (mtstate->mt_partition_tuple_routing) { - int leaf_part_index; PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing; + int leaf_part_index; /* * Away we go ... If we end up not finding a partition after all, @@ -373,6 +376,13 @@ ExecInsert(ModifyTableState *mtstate, tuple, proute->partition_tuple_slot, &slot); + + /* determine this partition's ON CONFLICT information */ + if (onconflict != ONCONFLICT_NONE) + { + arbiterIndexes = proute->partition_arbiter_indexes[leaf_part_index]; + partConflTupdesc = proute->partition_onconfl_tdescs[leaf_part_index]; + } } resultRelationDesc = resultRelInfo->ri_RelationDesc; @@ -509,9 +519,18 @@ ExecInsert(ModifyTableState *mtstate, uint32 specToken; ItemPointerData conflictTid; bool specConflict; - List *arbiterIndexes; + TupleDesc onconfl_tupdesc; - arbiterIndexes = ((ModifyTable *) mtstate->ps.plan)->arbiterIndexes; + if (!mtstate->mt_partition_tuple_routing) + { + /* in the partition case, this was already done */ + arbiterIndexes = + ((ModifyTable *) mtstate->ps.plan)->arbiterIndexes; + onconfl_tupdesc = mtstate->mt_conflproj_tupdesc; + } + + if (mtstate->mt_partition_tuple_routing) + onconfl_tupdesc = partConflTupdesc; /* * Do a non-conclusive check for conflicts first. @@ -542,6 +561,7 @@ ExecInsert(ModifyTableState *mtstate, TupleTableSlot *returning = NULL; if (ExecOnConflictUpdate(mtstate, resultRelInfo, + onconfl_tupdesc, &conflictTid, planSlot, slot, estate, canSetTag, &returning)) { @@ -1148,6 +1168,18 @@ lreplace:; TupleConversionMap *tupconv_map; /* + * Disallow an INSERT ON CONFLICT DO UPDATE that causes the + * original row to migrate to a different partition. Maybe this + * can be implemented some day, but it seems a fringe feature with + * little redeeming value. + */ + if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("invalid ON UPDATE specification"), + errdetail("The result tuple would appear in a different partition than the original tuple."))); + + /* * When an UPDATE is run on a leaf partition, we will not have * partition tuple routing set up. In that case, fail with * partition constraint violation error. @@ -1398,6 +1430,7 @@ lreplace:; static bool ExecOnConflictUpdate(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo, + TupleDesc onConflictSetTupdesc, ItemPointer conflictTid, TupleTableSlot *planSlot, TupleTableSlot *excludedSlot, @@ -1513,6 +1546,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, ExecCheckHeapTupleVisible(estate, &tuple, buffer); /* Store target's existing tuple in the state's dedicated slot */ + ExecSetSlotDescriptor(mtstate->mt_existing, RelationGetDescr(relation)); ExecStoreTuple(&tuple, mtstate->mt_existing, buffer, false); /* @@ -1556,6 +1590,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, } /* Project the new tuple version */ + ExecSetSlotDescriptor(mtstate->mt_conflproj, onConflictSetTupdesc); ExecProject(resultRelInfo->ri_onConflictSetProj); /* @@ -2160,8 +2195,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) subplan = (Plan *) lfirst(l); /* Initialize the usesFdwDirectModify flag */ - resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i, - node->fdwDirectModifyPlans); + resultRelInfo->ri_usesFdwDirectModify = + bms_is_member(i, node->fdwDirectModifyPlans); /* * Verify result relation is a valid target for the current operation @@ -2233,7 +2268,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && (operation == CMD_INSERT || update_tuple_routing_needed)) mtstate->mt_partition_tuple_routing = - ExecSetupPartitionTupleRouting(mtstate, rel); + ExecSetupPartitionTupleRouting(mtstate, rel); /* * Build state for collecting transition tuples. This requires having a @@ -2349,9 +2384,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) econtext = mtstate->ps.ps_ExprContext; relationDesc = resultRelInfo->ri_RelationDesc->rd_att; - /* initialize slot for the existing tuple */ - mtstate->mt_existing = - ExecInitExtraTupleSlot(mtstate->ps.state, relationDesc); + /* + * initialize slot for the existing tuple. + */ + mtstate->mt_existing = ExecInitExtraTupleSlot(mtstate->ps.state, NULL); /* carried forward solely for the benefit of explain */ mtstate->mt_excludedtlist = node->exclRelTlist; @@ -2359,8 +2395,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* create target slot for UPDATE SET projection */ tupDesc = ExecTypeFromTL((List *) node->onConflictSet, relationDesc->tdhasoid); + PinTupleDesc(tupDesc); + mtstate->mt_conflproj_tupdesc = tupDesc; mtstate->mt_conflproj = - ExecInitExtraTupleSlot(mtstate->ps.state, tupDesc); + ExecInitExtraTupleSlot(mtstate->ps.state, NULL); /* build UPDATE SET projection state */ resultRelInfo->ri_onConflictSetProj = diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c index b6e658fe81..804e30c500 100644 --- a/src/backend/optimizer/prep/preptlist.c +++ b/src/backend/optimizer/prep/preptlist.c @@ -53,10 +53,6 @@ #include "utils/rel.h" -static List *expand_targetlist(List *tlist, int command_type, - Index result_relation, TupleDesc tupdesc); - - /* * preprocess_targetlist * Driver for preprocessing the parse tree targetlist. @@ -252,7 +248,7 @@ preprocess_targetlist(PlannerInfo *root) * tuple descriptor, add targetlist entries for any missing attributes, and * ensure the non-junk attributes appear in proper field order. */ -static List * +List * expand_targetlist(List *tlist, int command_type, Index result_relation, TupleDesc tupdesc) { diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index d0d9812da6..6f78b6a75a 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -2377,6 +2377,15 @@ adjust_inherited_tlist(List *tlist, AppendRelInfo *context) if (tle->resjunk) continue; /* ignore junk items */ + /* + * XXX ugly hack: must ignore dummy tlist entry added by + * expand_targetlist() for dropped columns in the parent table or we + * fail because there is no translation. Must find a better way to + * deal with this case, though. + */ + if (IsA(tle->expr, Const) && ((Const *) tle->expr)->constisnull) + continue; + /* Look up the translation of this column: it must be a Var */ if (tle->resno <= 0 || tle->resno > list_length(context->translated_vars)) @@ -2415,6 +2424,10 @@ adjust_inherited_tlist(List *tlist, AppendRelInfo *context) if (tle->resjunk) continue; /* ignore junk items */ + /* XXX ugly hack; see above */ + if (IsA(tle->expr, Const) && ((Const *) tle->expr)->constisnull) + continue; + if (tle->resno == attrno) new_tlist = lappend(new_tlist, tle); else if (tle->resno > attrno) @@ -2429,6 +2442,10 @@ adjust_inherited_tlist(List *tlist, AppendRelInfo *context) if (!tle->resjunk) continue; /* here, ignore non-junk items */ + /* XXX ugly hack; see above */ + if (IsA(tle->expr, Const) && ((Const *) tle->expr)->constisnull) + continue; + tle->resno = attrno; new_tlist = lappend(new_tlist, tle); attrno++; @@ -2438,6 +2455,45 @@ adjust_inherited_tlist(List *tlist, AppendRelInfo *context) } /* + * Given a targetlist for the parentRel of the given varno, adjust it to be in + * the correct order and to contain all the needed elements for the given + * partition. + */ +List * +adjust_and_expand_partition_tlist(TupleDesc parentDesc, + TupleDesc partitionDesc, + char *partitionRelname, + int parentVarno, + List *targetlist) +{ + AppendRelInfo appinfo; + List *result_tl; + + /* + * Fist, fix the target entries' resnos, by using inheritance translation. + */ + appinfo.type = T_AppendRelInfo; + appinfo.parent_relid = parentVarno; + appinfo.parent_reltype = InvalidOid; // parentRel->rd_rel->reltype; + appinfo.child_relid = -1; + appinfo.child_reltype = InvalidOid; // partrel->rd_rel->reltype; + appinfo.parent_reloid = 1; // dummy parentRel->rd_id; + appinfo.translated_vars = + make_inh_translation_list(parentDesc, partitionDesc, + partitionRelname, 1); + result_tl = adjust_inherited_tlist((List *) targetlist, &appinfo); + + /* + * Add any attributes that are missing in the source list, such + * as dropped columns in the partition. + */ + result_tl = expand_targetlist(result_tl, CMD_UPDATE, + parentVarno, partitionDesc); + + return result_tl; +} + +/* * adjust_appendrel_attrs_multilevel * Apply Var translations from a toplevel appendrel parent down to a child. * diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index c3a9617f67..92696f0607 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -1025,13 +1025,6 @@ transformOnConflictClause(ParseState *pstate, TargetEntry *te; int attno; - if (targetrel->rd_partdesc) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("%s cannot be applied to partitioned table \"%s\"", - "ON CONFLICT DO UPDATE", - RelationGetRelationName(targetrel)))); - /* * All INSERT expressions have been parsed, get ready for potentially * existing SET statements that need to be processed like an UPDATE. diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 2faf0ca26e..70ddb225a1 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -51,7 +51,7 @@ extern PartitionBoundInfo partition_bounds_copy(PartitionBoundInfo src, extern void check_new_partition_bound(char *relname, Relation parent, PartitionBoundSpec *spec); -extern Oid get_partition_parent(Oid relid); +extern Oid get_partition_parent(Oid relid, bool getroot); extern List *get_qual_from_partbound(Relation rel, Relation parent, PartitionBoundSpec *spec); extern List *map_partition_varattnos(List *expr, int fromrel_varno, diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 03a599ad57..9bad06a8e5 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -90,6 +90,20 @@ typedef struct PartitionDispatchData *PartitionDispatch; * given leaf partition's rowtype after that * partition is chosen for insertion by * tuple-routing. + * partition_arbiter_indexes Array of Lists with each slot containing the + * list of OIDs of a given partition's indexes + * that are to be used as arbiter indexes for + * ON CONFLICT checking + * partition_conflproj_slots Array of TupleTableSlots to hold tuples of + * ON CONFLICT DO UPDATE SET projections; + * contains NULL for partitions whose tuple + * descriptor exactly matches the root parent's + * (including dropped columns) + * partition_existing_slots Array of TupleTableSlots to hold existing + * tuple during ON CONFLICT DO UPDATE handling; + * contains NULL for partitions whose tuple + * descriptor exactly matches the root parent's + * (including dropped columns) *----------------------- */ typedef struct PartitionTupleRouting @@ -106,6 +120,8 @@ typedef struct PartitionTupleRouting int num_subplan_partition_offsets; TupleTableSlot *partition_tuple_slot; TupleTableSlot *root_tuple_slot; + TupleDesc *partition_onconfl_tdescs; + List **partition_arbiter_indexes; } PartitionTupleRouting; extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 1bee5ccbeb..cc2f55ee12 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -993,6 +993,7 @@ typedef struct ModifyTableState TupleTableSlot *mt_existing; /* slot to store existing target tuple in */ List *mt_excludedtlist; /* the excluded pseudo relation's tlist */ TupleTableSlot *mt_conflproj; /* CONFLICT ... SET ... projection target */ + TupleDesc mt_conflproj_tupdesc; /* tuple descriptor for it */ struct PartitionTupleRouting *mt_partition_tuple_routing; /* Tuple-routing support info */ struct TransitionCaptureState *mt_transition_capture; diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h index 38608770a2..7074bae79a 100644 --- a/src/include/optimizer/prep.h +++ b/src/include/optimizer/prep.h @@ -14,6 +14,7 @@ #ifndef PREP_H #define PREP_H +#include "access/tupdesc.h" #include "nodes/plannodes.h" #include "nodes/relation.h" @@ -42,6 +43,9 @@ extern List *preprocess_targetlist(PlannerInfo *root); extern PlanRowMark *get_plan_rowmark(List *rowmarks, Index rtindex); +extern List *expand_targetlist(List *tlist, int command_type, + Index result_relation, TupleDesc tupdesc); + /* * prototypes for prepunion.c */ @@ -65,4 +69,10 @@ extern SpecialJoinInfo *build_child_join_sjinfo(PlannerInfo *root, extern Relids adjust_child_relids_multilevel(PlannerInfo *root, Relids relids, Relids child_relids, Relids top_parent_relids); +extern List *adjust_and_expand_partition_tlist(TupleDesc parentDesc, + TupleDesc partitionDesc, + char *partitionRelname, + int parentVarno, + List *targetlist); + #endif /* PREP_H */ diff --git a/src/test/regress/expected/insert_conflict.out b/src/test/regress/expected/insert_conflict.out index 2650faedee..a9677f06e6 100644 --- a/src/test/regress/expected/insert_conflict.out +++ b/src/test/regress/expected/insert_conflict.out @@ -786,16 +786,67 @@ select * from selfconflict; (3 rows) drop table selfconflict; --- check that the following works: --- insert into partitioned_table on conflict do nothing -create table parted_conflict_test (a int, b char) partition by list (a); -create table parted_conflict_test_1 partition of parted_conflict_test (b unique) for values in (1); +-- check ON CONFLICT handling with partitioned tables +create table parted_conflict_test (a int unique, b char) partition by list (a); +create table parted_conflict_test_1 partition of parted_conflict_test (b unique) for values in (1, 2); +-- no indexes required here insert into parted_conflict_test values (1, 'a') on conflict do nothing; -insert into parted_conflict_test values (1, 'a') on conflict do nothing; --- however, on conflict do update is not supported yet -insert into parted_conflict_test values (1) on conflict (b) do update set a = excluded.a; -ERROR: ON CONFLICT DO UPDATE cannot be applied to partitioned table "parted_conflict_test" --- but it works OK if we target the partition directly -insert into parted_conflict_test_1 values (1) on conflict (b) do -update set a = excluded.a; +-- index on a required, which does exist in parent +insert into parted_conflict_test values (1, 'a') on conflict (a) do nothing; +insert into parted_conflict_test values (1, 'a') on conflict (a) do update set b = excluded.b; +-- targeting partition directly will work +insert into parted_conflict_test_1 values (1, 'a') on conflict (a) do nothing; +insert into parted_conflict_test_1 values (1, 'b') on conflict (a) do update set b = excluded.b; +-- index on b required, which doesn't exist in parent +insert into parted_conflict_test values (2, 'b') on conflict (b) do update set a = excluded.a; +ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification +-- targeting partition directly will work +insert into parted_conflict_test_1 values (2, 'b') on conflict (b) do update set a = excluded.a; +-- should see (2, 'b') +select * from parted_conflict_test order by a; + a | b +---+--- + 2 | b +(1 row) + +-- now check that DO UPDATE works correctly for target partition with +-- different attribute numbers +create table parted_conflict_test_2 (b char, a int unique); +alter table parted_conflict_test attach partition parted_conflict_test_2 for values in (3); +truncate parted_conflict_test; +insert into parted_conflict_test values (3, 'a') on conflict (a) do update set b = excluded.b; +insert into parted_conflict_test values (3, 'b') on conflict (a) do update set b = excluded.b; +-- should see (3, 'b') +select * from parted_conflict_test order by a; + a | b +---+--- + 3 | b +(1 row) + +-- case where parent will have a dropped column, but the partition won't +alter table parted_conflict_test drop b, add b char; +create table parted_conflict_test_3 partition of parted_conflict_test for values in (4); +truncate parted_conflict_test; +insert into parted_conflict_test (a, b) values (4, 'a') on conflict (a) do update set b = excluded.b; +insert into parted_conflict_test (a, b) values (4, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a'; +-- should see (4, 'b') +select * from parted_conflict_test order by a; + a | b +---+--- + 4 | b +(1 row) + +-- case with multi-level partitioning +create table parted_conflict_test_4 partition of parted_conflict_test for values in (5) partition by list (a); +create table parted_conflict_test_4_1 partition of parted_conflict_test_4 for values in (5); +truncate parted_conflict_test; +insert into parted_conflict_test (a, b) values (5, 'a') on conflict (a) do update set b = excluded.b; +insert into parted_conflict_test (a, b) values (5, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a'; +-- should see (5, 'b') +select * from parted_conflict_test order by a; + a | b +---+--- + 5 | b +(1 row) + drop table parted_conflict_test; diff --git a/src/test/regress/sql/insert_conflict.sql b/src/test/regress/sql/insert_conflict.sql index 32c647e3f8..73122479a3 100644 --- a/src/test/regress/sql/insert_conflict.sql +++ b/src/test/regress/sql/insert_conflict.sql @@ -472,15 +472,59 @@ select * from selfconflict; drop table selfconflict; --- check that the following works: --- insert into partitioned_table on conflict do nothing -create table parted_conflict_test (a int, b char) partition by list (a); -create table parted_conflict_test_1 partition of parted_conflict_test (b unique) for values in (1); +-- check ON CONFLICT handling with partitioned tables +create table parted_conflict_test (a int unique, b char) partition by list (a); +create table parted_conflict_test_1 partition of parted_conflict_test (b unique) for values in (1, 2); + +-- no indexes required here insert into parted_conflict_test values (1, 'a') on conflict do nothing; -insert into parted_conflict_test values (1, 'a') on conflict do nothing; --- however, on conflict do update is not supported yet -insert into parted_conflict_test values (1) on conflict (b) do update set a = excluded.a; --- but it works OK if we target the partition directly -insert into parted_conflict_test_1 values (1) on conflict (b) do -update set a = excluded.a; + +-- index on a required, which does exist in parent +insert into parted_conflict_test values (1, 'a') on conflict (a) do nothing; +insert into parted_conflict_test values (1, 'a') on conflict (a) do update set b = excluded.b; + +-- targeting partition directly will work +insert into parted_conflict_test_1 values (1, 'a') on conflict (a) do nothing; +insert into parted_conflict_test_1 values (1, 'b') on conflict (a) do update set b = excluded.b; + +-- index on b required, which doesn't exist in parent +insert into parted_conflict_test values (2, 'b') on conflict (b) do update set a = excluded.a; + +-- targeting partition directly will work +insert into parted_conflict_test_1 values (2, 'b') on conflict (b) do update set a = excluded.a; + +-- should see (2, 'b') +select * from parted_conflict_test order by a; + +-- now check that DO UPDATE works correctly for target partition with +-- different attribute numbers +create table parted_conflict_test_2 (b char, a int unique); +alter table parted_conflict_test attach partition parted_conflict_test_2 for values in (3); +truncate parted_conflict_test; +insert into parted_conflict_test values (3, 'a') on conflict (a) do update set b = excluded.b; +insert into parted_conflict_test values (3, 'b') on conflict (a) do update set b = excluded.b; + +-- should see (3, 'b') +select * from parted_conflict_test order by a; + +-- case where parent will have a dropped column, but the partition won't +alter table parted_conflict_test drop b, add b char; +create table parted_conflict_test_3 partition of parted_conflict_test for values in (4); +truncate parted_conflict_test; +insert into parted_conflict_test (a, b) values (4, 'a') on conflict (a) do update set b = excluded.b; +insert into parted_conflict_test (a, b) values (4, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a'; + +-- should see (4, 'b') +select * from parted_conflict_test order by a; + +-- case with multi-level partitioning +create table parted_conflict_test_4 partition of parted_conflict_test for values in (5) partition by list (a); +create table parted_conflict_test_4_1 partition of parted_conflict_test_4 for values in (5); +truncate parted_conflict_test; +insert into parted_conflict_test (a, b) values (5, 'a') on conflict (a) do update set b = excluded.b; +insert into parted_conflict_test (a, b) values (5, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a'; + +-- should see (5, 'b') +select * from parted_conflict_test order by a; + drop table parted_conflict_test; -- 2.11.0