From 36fa984f5561d671c681167e35656cb0806ff852 Mon Sep 17 00:00:00 2001 From: amitlan Date: Mon, 11 Oct 2021 14:57:19 +0900 Subject: [PATCH 2/2] Enforce foreign key correctly during cross-partition updates When an update on a partitioned table referenced in foreign keys constraint causes a row to move from one partition to another, which is implemented by deleting the old row from the source leaf partition followed by inserting the new row into the destination leaf partition, firing the foreign key triggers on that delete event can result in surprising outcomes for those keys. For example, a given foreign key's delete trigger which implements the ON DELETE CASCADE clause of that key will delete any referencing rows when triggerred for that internal DELETE, although it should not, because the referenced row is simply being moved from one partition of the referenced root partitioned table into another, not being deleted from it. This commit teaches trigger.c to skip queuing such delete trigger events on the leaf partitions in favor of an UPDATE event fired on the root target relation. Doing so makes sense because both the old and the new tuple "logically" belong to the root relation. The after trigger event queuing interface now allows passing the source and the destination partitions of a particular cross-partition update when registering the update event for the root partitioned table. Along with the 2 ctids of the old and the new tuple, an after trigger event now also stores the OIDs of those partitions. The tuples fetched from the source and the destination partitions are converted into the root table format before they are passed to the trigger function. The implementation currently has a limitation that only the foreign keys pointing into the query's target relation are considered, not those of its sub-partitioned partitions. That seems like a reasonable limitation, because it sounds rare to have distinct foreign keys pointing into sub-partitioned partitions, but not into the root table. --- doc/src/sgml/ref/update.sgml | 7 + src/backend/commands/trigger.c | 322 +++++++++++++++++++--- src/backend/executor/execMain.c | 19 +- src/backend/executor/execReplication.c | 5 +- src/backend/executor/nodeModifyTable.c | 187 ++++++++++++- src/backend/utils/adt/ri_triggers.c | 6 + src/include/commands/trigger.h | 4 + src/include/executor/executor.h | 3 +- src/include/nodes/execnodes.h | 3 + src/test/regress/expected/foreign_key.out | 204 +++++++++++++- src/test/regress/sql/foreign_key.sql | 135 ++++++++- 11 files changed, 840 insertions(+), 55 deletions(-) diff --git a/doc/src/sgml/ref/update.sgml b/doc/src/sgml/ref/update.sgml index 3fa54e5f70..3ba13010e7 100644 --- a/doc/src/sgml/ref/update.sgml +++ b/doc/src/sgml/ref/update.sgml @@ -316,6 +316,13 @@ UPDATE count partition (provided the foreign data wrapper supports tuple routing), they cannot be moved from a foreign-table partition to another partition. + + + An attempt of moving a row from one partition to another will fail if a + foreign key is found to directly reference a non-root partitioned table + in the partition tree, unless that table is also directly mentioned + in the UPDATEquery. + diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 7b12ee921e..8d50bcb83c 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -88,7 +88,11 @@ static HeapTuple ExecCallTriggerFunc(TriggerData *trigdata, FmgrInfo *finfo, Instrumentation *instr, MemoryContext per_tuple_context); -static void AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, +static void AfterTriggerSaveEvent(EState *estate, + ModifyTableState *mtstate, + ResultRelInfo *relinfo, + ResultRelInfo *src_partinfo, + ResultRelInfo *dst_partinfo, int event, bool row_trigger, TupleTableSlot *oldtup, TupleTableSlot *newtup, List *recheckIndexes, Bitmapset *modifiedCols, @@ -2325,7 +2329,9 @@ ExecASInsertTriggers(EState *estate, ResultRelInfo *relinfo, TriggerDesc *trigdesc = relinfo->ri_TrigDesc; if (trigdesc && trigdesc->trig_insert_after_statement) - AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_INSERT, + AfterTriggerSaveEvent(estate, NULL, relinfo, + NULL, NULL, + TRIGGER_EVENT_INSERT, false, NULL, NULL, NIL, NULL, transition_capture); } @@ -2414,7 +2420,9 @@ ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo, if ((trigdesc && trigdesc->trig_insert_after_row) || (transition_capture && transition_capture->tcs_insert_new_table)) - AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_INSERT, + AfterTriggerSaveEvent(estate, NULL, relinfo, + NULL, NULL, + TRIGGER_EVENT_INSERT, true, NULL, slot, recheckIndexes, NULL, transition_capture); @@ -2539,7 +2547,9 @@ ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo, TriggerDesc *trigdesc = relinfo->ri_TrigDesc; if (trigdesc && trigdesc->trig_delete_after_statement) - AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_DELETE, + AfterTriggerSaveEvent(estate, NULL, relinfo, + NULL, NULL, + TRIGGER_EVENT_DELETE, false, NULL, NULL, NIL, NULL, transition_capture); } @@ -2636,7 +2646,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, } void -ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo, +ExecARDeleteTriggers(EState *estate, ModifyTableState *mtstate, + ResultRelInfo *relinfo, ItemPointer tupleid, HeapTuple fdw_trigtuple, TransitionCaptureState *transition_capture) @@ -2660,7 +2671,9 @@ ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo, else ExecForceStoreHeapTuple(fdw_trigtuple, slot, false); - AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_DELETE, + AfterTriggerSaveEvent(estate, mtstate, relinfo, + NULL, NULL, + TRIGGER_EVENT_DELETE, true, slot, NULL, NIL, NULL, transition_capture); } @@ -2781,7 +2794,9 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo, Assert(relinfo->ri_RootResultRelInfo == NULL); if (trigdesc && trigdesc->trig_update_after_statement) - AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_UPDATE, + AfterTriggerSaveEvent(estate, NULL, relinfo, + NULL, NULL, + TRIGGER_EVENT_UPDATE, false, NULL, NULL, NIL, ExecGetAllUpdatedCols(relinfo, estate), transition_capture); @@ -2919,8 +2934,20 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, return true; } +/* + * 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source and + * destination partitions, respectively, of a cross-partition update of the + * root partitioned table mentioned in the query, given by 'relinfo'. + * 'tupleid' in that case refers to the ctid of the "old" tuple in the source + * partition, and 'newslot' contains the "new" tuple in the destination + * partition. This interface allows to support the requirements of + * ExecCrossPartitionUpdateForeignKey(). + */ void -ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, +ExecARUpdateTriggers(EState *estate, ModifyTableState *mtstate, + ResultRelInfo *relinfo, + ResultRelInfo *src_partinfo, + ResultRelInfo *dst_partinfo, ItemPointer tupleid, HeapTuple fdw_trigtuple, TupleTableSlot *newslot, @@ -2940,12 +2967,15 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, * separately for DELETE and INSERT to capture transition table rows. * In such case, either old tuple or new tuple can be NULL. */ - TupleTableSlot *oldslot = ExecGetTriggerOldSlot(estate, relinfo); + TupleTableSlot *oldslot = ExecGetTriggerOldSlot(estate, + src_partinfo != NULL ? + src_partinfo : + relinfo); if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid)) GetTupleForTrigger(estate, NULL, - relinfo, + src_partinfo != NULL ? src_partinfo : relinfo, tupleid, LockTupleExclusive, oldslot, @@ -2955,7 +2985,9 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo, else ExecClearTuple(oldslot); - AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_UPDATE, + AfterTriggerSaveEvent(estate, mtstate, relinfo, + src_partinfo, dst_partinfo, + TRIGGER_EVENT_UPDATE, true, oldslot, newslot, recheckIndexes, ExecGetAllUpdatedCols(relinfo, estate), transition_capture); @@ -3081,7 +3113,9 @@ ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo) TriggerDesc *trigdesc = relinfo->ri_TrigDesc; if (trigdesc && trigdesc->trig_truncate_after_statement) - AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_TRUNCATE, + AfterTriggerSaveEvent(estate, NULL, relinfo, + NULL, NULL, + TRIGGER_EVENT_TRUNCATE, false, NULL, NULL, NIL, NULL, NULL); } @@ -3398,7 +3432,7 @@ typedef SetConstraintStateData *SetConstraintState; */ typedef uint32 TriggerFlags; -#define AFTER_TRIGGER_OFFSET 0x0FFFFFFF /* must be low-order bits */ +#define AFTER_TRIGGER_OFFSET 0x07FFFFFF /* must be low-order bits */ #define AFTER_TRIGGER_DONE 0x10000000 #define AFTER_TRIGGER_IN_PROGRESS 0x20000000 /* bits describing the size and tuple sources of this event */ @@ -3406,7 +3440,8 @@ typedef uint32 TriggerFlags; #define AFTER_TRIGGER_FDW_FETCH 0x80000000 #define AFTER_TRIGGER_1CTID 0x40000000 #define AFTER_TRIGGER_2CTID 0xC0000000 -#define AFTER_TRIGGER_TUP_BITS 0xC0000000 +#define AFTER_TRIGGER_CP_UPDATE 0x08000000 +#define AFTER_TRIGGER_TUP_BITS 0xC8000000 typedef struct AfterTriggerSharedData *AfterTriggerShared; @@ -3427,8 +3462,24 @@ typedef struct AfterTriggerEventData TriggerFlags ate_flags; /* status bits and offset to shared data */ ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */ ItemPointerData ate_ctid2; /* new updated tuple */ + + /* + * During a cross-partition update of a partitioned table, we also store + * the OIDs of source and destination partitions that are needed to + * fetch the old (ctid1) and the new tuple (ctid2) from, respectively. + */ + Oid ate_src_part; + Oid ate_dst_part; } AfterTriggerEventData; +/* AfterTriggerEventData, minus ate_src_part, ate_dst_part */ +typedef struct AfterTriggerEventDataNoOids +{ + TriggerFlags ate_flags; /* status bits and offset to shared data */ + ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */ + ItemPointerData ate_ctid2; /* new updated tuple */ +} AfterTriggerEventDataNoOids; + /* AfterTriggerEventData, minus ate_ctid2 */ typedef struct AfterTriggerEventDataOneCtid { @@ -3443,11 +3494,13 @@ typedef struct AfterTriggerEventDataZeroCtids } AfterTriggerEventDataZeroCtids; #define SizeofTriggerEvent(evt) \ - (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \ + (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_CP_UPDATE ? \ sizeof(AfterTriggerEventData) : \ - ((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_1CTID ? \ - sizeof(AfterTriggerEventDataOneCtid) : \ - sizeof(AfterTriggerEventDataZeroCtids)) + (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \ + sizeof(AfterTriggerEventDataNoOids) : \ + (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_1CTID ? \ + sizeof(AfterTriggerEventDataOneCtid) : \ + sizeof(AfterTriggerEventDataZeroCtids)))) #define GetTriggerSharedData(evt) \ ((AfterTriggerShared) ((char *) (evt) + ((evt)->ate_flags & AFTER_TRIGGER_OFFSET))) @@ -3629,6 +3682,8 @@ static AfterTriggersData afterTriggers; static void AfterTriggerExecute(EState *estate, AfterTriggerEvent event, ResultRelInfo *relInfo, + ResultRelInfo *src_relInfo, + ResultRelInfo *dst_relInfo, TriggerDesc *trigdesc, FmgrInfo *finfo, Instrumentation *instr, @@ -3953,8 +4008,16 @@ afterTriggerDeleteHeadEventChunk(AfterTriggersQueryData *qs) * fmgr lookup cache space at the caller level. (For triggers fired at * the end of a query, we can even piggyback on the executor's state.) * + * When fired for a cross-partition update of a partitioned table, the old + * tuple is fetched using 'src_relInfo' (the source leaf partition) and + * the new tuple using 'dst_relInfo' (the destination leaf partition), though + * both are converted into the root partitioned table's format before passing + * to the trigger function. + * * event: event currently being fired. - * rel: open relation for event. + * relInfo: result relation for event. + * src_relInfo: source partition of a cross-partition update + * dst_relInfo: its destination partition * trigdesc: working copy of rel's trigger info. * finfo: array of fmgr lookup cache entries (one per trigger in trigdesc). * instr: array of EXPLAIN ANALYZE instrumentation nodes (one per trigger), @@ -3968,6 +4031,8 @@ static void AfterTriggerExecute(EState *estate, AfterTriggerEvent event, ResultRelInfo *relInfo, + ResultRelInfo *src_relInfo, + ResultRelInfo *dst_relInfo, TriggerDesc *trigdesc, FmgrInfo *finfo, Instrumentation *instr, MemoryContext per_tuple_context, @@ -3975,6 +4040,8 @@ AfterTriggerExecute(EState *estate, TupleTableSlot *trig_tuple_slot2) { Relation rel = relInfo->ri_RelationDesc; + Relation src_rel = src_relInfo->ri_RelationDesc; + Relation dst_rel = dst_relInfo->ri_RelationDesc; AfterTriggerShared evtshared = GetTriggerSharedData(event); Oid tgoid = evtshared->ats_tgoid; TriggerData LocTriggerData = {0}; @@ -4055,12 +4122,36 @@ AfterTriggerExecute(EState *estate, default: if (ItemPointerIsValid(&(event->ate_ctid1))) { - LocTriggerData.tg_trigslot = ExecGetTriggerOldSlot(estate, relInfo); + TupleTableSlot *src_slot = ExecGetTriggerOldSlot(estate, + src_relInfo); - if (!table_tuple_fetch_row_version(rel, &(event->ate_ctid1), + if (!table_tuple_fetch_row_version(src_rel, + &(event->ate_ctid1), SnapshotAny, - LocTriggerData.tg_trigslot)) + src_slot)) elog(ERROR, "failed to fetch tuple1 for AFTER trigger"); + + /* + * Store the tuple fetched from the source partition into + * the target (root partitioned) table slot, converting if + * needed. + */ + if (src_relInfo != relInfo) + { + TupleConversionMap *map = ExecGetChildToRootMap(src_relInfo); + + LocTriggerData.tg_trigslot = ExecGetTriggerOldSlot(estate, relInfo); + if (map) + { + execute_attr_map_slot(map->attrMap, + src_slot, + LocTriggerData.tg_trigslot); + } + else + ExecCopySlot(LocTriggerData.tg_trigslot, src_slot); + } + else + LocTriggerData.tg_trigslot = src_slot; LocTriggerData.tg_trigtuple = ExecFetchSlotHeapTuple(LocTriggerData.tg_trigslot, false, &should_free_trig); } @@ -4070,16 +4161,42 @@ AfterTriggerExecute(EState *estate, } /* don't touch ctid2 if not there */ - if ((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == - AFTER_TRIGGER_2CTID && + if (((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == + AFTER_TRIGGER_2CTID || + (event->ate_flags & AFTER_TRIGGER_TUP_BITS) == + AFTER_TRIGGER_CP_UPDATE) && ItemPointerIsValid(&(event->ate_ctid2))) { - LocTriggerData.tg_newslot = ExecGetTriggerNewSlot(estate, relInfo); + TupleTableSlot *dst_slot = ExecGetTriggerNewSlot(estate, + dst_relInfo); - if (!table_tuple_fetch_row_version(rel, &(event->ate_ctid2), + if (!table_tuple_fetch_row_version(dst_rel, + &(event->ate_ctid2), SnapshotAny, - LocTriggerData.tg_newslot)) + dst_slot)) elog(ERROR, "failed to fetch tuple2 for AFTER trigger"); + + /* + * Store the tuple fetched from the destination partition into + * the target (root partitioned) table slot, converting if + * needed. + */ + if (dst_relInfo != relInfo) + { + TupleConversionMap *map = ExecGetChildToRootMap(dst_relInfo); + + LocTriggerData.tg_newslot = ExecGetTriggerNewSlot(estate, relInfo); + if (map) + { + execute_attr_map_slot(map->attrMap, + dst_slot, + LocTriggerData.tg_newslot); + } + else + ExecCopySlot(LocTriggerData.tg_newslot, dst_slot); + } + else + LocTriggerData.tg_newslot = dst_slot; LocTriggerData.tg_newtuple = ExecFetchSlotHeapTuple(LocTriggerData.tg_newslot, false, &should_free_new); } @@ -4308,13 +4425,16 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, if ((event->ate_flags & AFTER_TRIGGER_IN_PROGRESS) && evtshared->ats_firing_id == firing_id) { + ResultRelInfo *src_rInfo, + *dst_rInfo; /* * So let's fire it... but first, find the correct relation if * this is not the same relation as before. */ if (rel == NULL || RelationGetRelid(rel) != evtshared->ats_relid) { - rInfo = ExecGetTriggerResultRel(estate, evtshared->ats_relid); + rInfo = ExecGetTriggerResultRel(estate, evtshared->ats_relid, + NULL); rel = rInfo->ri_RelationDesc; /* Catch calls with insufficient relcache refcounting */ Assert(!RelationHasReferenceCountZero(rel)); @@ -4339,12 +4459,33 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, evtshared->ats_relid); } + /* + * Look up source and destination partition result rels of a + * cross-partition update event. + */ + if ((event->ate_flags & AFTER_TRIGGER_TUP_BITS ) == + AFTER_TRIGGER_CP_UPDATE) + { + Assert(OidIsValid(event->ate_src_part) && + OidIsValid(event->ate_dst_part)); + src_rInfo = ExecGetTriggerResultRel(estate, + event->ate_src_part, + rInfo); + dst_rInfo = ExecGetTriggerResultRel(estate, + event->ate_dst_part, + rInfo); + } + else + src_rInfo = dst_rInfo = rInfo; + /* * Fire it. Note that the AFTER_TRIGGER_IN_PROGRESS flag is * still set, so recursive examinations of the event list * won't try to re-fire it. */ - AfterTriggerExecute(estate, event, rInfo, trigdesc, finfo, instr, + AfterTriggerExecute(estate, event, rInfo, + src_rInfo, dst_rInfo, + trigdesc, finfo, instr, per_tuple_context, slot1, slot2); /* @@ -5539,16 +5680,38 @@ AfterTriggerPendingOnRel(Oid relid) * Transition tuplestores are built now, rather than when events are pulled * off of the queue because AFTER ROW triggers are allowed to select from the * transition tables for the statement. + * + * This contains special support to queue the update events for the case where + * a partitioned table undergoing a cross-partition update may have foreign + * keys pointing into it. Normally, a partitioned table's row triggers are + * not fired because the leaf partition(s) which are modified as a result of + * the operation on the partitioned table contain the same triggers which are + * fired instead. But that general scheme can cause problematic behavior with + * foreign key triggers during cross-partition updates, which are implemented + * as DELETE on the source partition followed by INSERT into the destination + * partition. Specifically, firing DELETE triggers would lead to the wrong + * foreign key action to be enforced considering that the original command is + * UPDATE. * ---------- */ static void -AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, +AfterTriggerSaveEvent(EState *estate, ModifyTableState *mtstate, + ResultRelInfo *relinfo, + ResultRelInfo *src_partinfo, + ResultRelInfo *dst_partinfo, int event, bool row_trigger, TupleTableSlot *oldslot, TupleTableSlot *newslot, List *recheckIndexes, Bitmapset *modifiedCols, TransitionCaptureState *transition_capture) { Relation rel = relinfo->ri_RelationDesc; + Relation rootRel = relinfo->ri_RootResultRelInfo ? + relinfo->ri_RootResultRelInfo->ri_RelationDesc: NULL; + bool maybe_crosspart_update = + (row_trigger && mtstate && mtstate->operation == CMD_UPDATE && + (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE || + (rootRel && rootRel->rd_rel->relkind == + RELKIND_PARTITIONED_TABLE))); TriggerDesc *trigdesc = relinfo->ri_TrigDesc; AfterTriggerEventData new_event; AfterTriggerSharedData new_shared; @@ -5655,6 +5818,19 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, return; } + /* + * We normally don't see partitioned tables here for row level triggers + * except in the special case of a cross-partitioned update. In that + * case, nodeModifyTable.c: ExecCrossPartitionUpdateForeignKey() calls to + * queue an update event on the root target partitioned table, also + * passing the source and destination partitions and their tuples. + */ + Assert(!row_trigger || + rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE || + (maybe_crosspart_update && + TRIGGER_FIRED_BY_UPDATE(event) && + src_partinfo != NULL && dst_partinfo != NULL)); + /* * Validate the event code and collect the associated tuple CTIDs. * @@ -5715,6 +5891,16 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, Assert(newslot != NULL); ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1)); ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2)); + /* + * Also remember the OIDs of partitions to fetch these tuples + * out of later in AfterTriggerExecute(). + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + Assert(src_partinfo != NULL && dst_partinfo != NULL); + new_event.ate_src_part = RelationGetRelid(src_partinfo->ri_RelationDesc); + new_event.ate_dst_part = RelationGetRelid(dst_partinfo->ri_RelationDesc); + } } else { @@ -5741,11 +5927,43 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger)) new_event.ate_flags = (row_trigger && event == TRIGGER_EVENT_UPDATE) ? - AFTER_TRIGGER_2CTID : AFTER_TRIGGER_1CTID; + (relkind == RELKIND_PARTITIONED_TABLE ? AFTER_TRIGGER_CP_UPDATE : + AFTER_TRIGGER_2CTID) : + AFTER_TRIGGER_1CTID; + /* else, we'll initialize ate_flags for each trigger */ tgtype_level = (row_trigger ? TRIGGER_TYPE_ROW : TRIGGER_TYPE_STATEMENT); + /* + * Must convert/copy the source and destination partition tuples into the + * root partitioned table's format/slot, because the processing in the loop + * below expects both oldslot and newslot tuples to be in that form. + */ + if (row_trigger && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + TupleTableSlot *rootslot; + TupleConversionMap *map; + + rootslot = ExecGetTriggerOldSlot(estate, relinfo); + map = ExecGetChildToRootMap(src_partinfo); + if (map) + oldslot = execute_attr_map_slot(map->attrMap, + oldslot, + rootslot); + else + oldslot = ExecCopySlot(rootslot, oldslot); + + rootslot = ExecGetTriggerNewSlot(estate, relinfo); + map = ExecGetChildToRootMap(dst_partinfo); + if (map) + newslot = execute_attr_map_slot(map->attrMap, + newslot, + rootslot); + else + newslot = ExecCopySlot(rootslot, newslot); + } + for (i = 0; i < trigdesc->numtriggers; i++) { Trigger *trigger = &trigdesc->triggers[i]; @@ -5775,12 +5993,28 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, * If the trigger is a foreign key enforcement trigger, there are * certain cases where we can skip queueing the event because we can * tell by inspection that the FK constraint will still pass. + * There are also some cases during cross-partition updates of a + * partitioned table where queuing the event can be skipped. */ if (TRIGGER_FIRED_BY_UPDATE(event) || TRIGGER_FIRED_BY_DELETE(event)) { switch (RI_FKey_trigger_type(trigger->tgfoid)) { case RI_TRIGGER_PK: + /* + * For cross-partitioned updates of partitioned PK table, + * skip the event fired by the component delete on the + * source leaf partition unless the constraint originates + * in the partition itself (!tgisclone), because the update + * event that will be fired on the root (partitioned) + * target table will be used to perform the necessary + * foreign key enforcement action. + */ + if (maybe_crosspart_update && + TRIGGER_FIRED_BY_DELETE(event) && + trigger->tgisclone) + continue; + /* Update or delete on trigger's PK table */ if (!RI_FKey_pk_upd_check_required(trigger, rel, oldslot, newslot)) @@ -5791,8 +6025,19 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, break; case RI_TRIGGER_FK: - /* Update on trigger's FK table */ - if (!RI_FKey_fk_upd_check_required(trigger, rel, + /* + * Update on trigger's FK table. We can skip the update + * event fired on a partitioned table during a + * cross-partition of that table, because the insert event + * that is fired on the destination leaf partition would + * suffice to perform the necessary foreign key check. + * Moreover, RI_FKey_fk_upd_check_required() expects to be + * passed a tuple that contains system attributes, most of + * which are not present in the virtual slot belonging to + * a partitioned table. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE || + !RI_FKey_fk_upd_check_required(trigger, rel, oldslot, newslot)) { /* skip queuing this event */ @@ -5801,7 +6046,16 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, break; case RI_TRIGGER_NONE: - /* Not an FK trigger */ + /* + * Not an FK trigger. No need to queue the update event + * fired during a cross-partitioned update of a partitioned + * table, because the same row trigger must be present in + * the leaf partition(s) that are affected as part of this + * update and the events fired on them are queued instead. + */ + if (row_trigger && + rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + continue; break; } } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index b3ce4bae53..40b6d924ce 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1279,7 +1279,8 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, * in es_trig_target_relations. */ ResultRelInfo * -ExecGetTriggerResultRel(EState *estate, Oid relid) +ExecGetTriggerResultRel(EState *estate, Oid relid, + ResultRelInfo *rootRelInfo) { ResultRelInfo *rInfo; ListCell *l; @@ -1330,7 +1331,7 @@ ExecGetTriggerResultRel(EState *estate, Oid relid) InitResultRelInfo(rInfo, rel, 0, /* dummy rangetable index */ - NULL, + rootRelInfo, estate->es_instrument); estate->es_trig_target_relations = lappend(estate->es_trig_target_relations, rInfo); @@ -1447,8 +1448,22 @@ ExecCloseResultRelations(EState *estate) foreach(l, estate->es_opened_result_relations) { ResultRelInfo *resultRelInfo = lfirst(l); + ListCell *lc; ExecCloseIndices(resultRelInfo); + foreach(lc, resultRelInfo->ri_ancestorResultRels) + { + ResultRelInfo *rInfo = lfirst(lc); + + /* + * Don't close the root ancestor relation, because that one's + * closed in ExecCloseRangeTableRelations(). + */ + if (rInfo->ri_RangeTableIndex > 0) + continue; + + table_close(rInfo->ri_RelationDesc, NoLock); + } } /* Close any relations that have been opened by ExecGetTriggerResultRel(). */ diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 1e285e0349..9ca79a8aed 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -516,7 +516,8 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, NULL, NIL); /* AFTER ROW UPDATE Triggers */ - ExecARUpdateTriggers(estate, resultRelInfo, + ExecARUpdateTriggers(estate, NULL, resultRelInfo, + NULL, NULL, tid, NULL, slot, recheckIndexes, NULL); @@ -556,7 +557,7 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo, simple_table_tuple_delete(rel, tid, estate->es_snapshot); /* AFTER ROW DELETE Triggers */ - ExecARDeleteTriggers(estate, resultRelInfo, + ExecARDeleteTriggers(estate, NULL, resultRelInfo, tid, NULL, NULL); } } diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index d328856ae5..e9d1b3fb5d 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -38,6 +38,7 @@ #include "access/tableam.h" #include "access/xact.h" #include "catalog/catalog.h" +#include "catalog/partition.h" #include "commands/trigger.h" #include "executor/execPartition.h" #include "executor/executor.h" @@ -596,7 +597,9 @@ ExecInsert(ModifyTableState *mtstate, TupleTableSlot *slot, TupleTableSlot *planSlot, EState *estate, - bool canSetTag) + bool canSetTag, + TupleTableSlot **inserted_tuple, + ResultRelInfo **insert_destrel) { Relation resultRelationDesc; List *recheckIndexes = NIL; @@ -956,7 +959,9 @@ ExecInsert(ModifyTableState *mtstate, if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture && mtstate->mt_transition_capture->tcs_update_new_table) { - ExecARUpdateTriggers(estate, resultRelInfo, NULL, + ExecARUpdateTriggers(estate, mtstate, resultRelInfo, + NULL, NULL, + NULL, NULL, slot, NULL, @@ -994,6 +999,11 @@ ExecInsert(ModifyTableState *mtstate, if (resultRelInfo->ri_projectReturning) result = ExecProcessReturning(resultRelInfo, slot, planSlot); + if (inserted_tuple) + *inserted_tuple = slot; + if (insert_destrel) + *insert_destrel = resultRelInfo; + return result; } @@ -1346,7 +1356,8 @@ ldelete:; if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture && mtstate->mt_transition_capture->tcs_update_old_table) { - ExecARUpdateTriggers(estate, resultRelInfo, + ExecARUpdateTriggers(estate, mtstate, resultRelInfo, + NULL, NULL, tupleid, oldtuple, NULL, @@ -1361,7 +1372,7 @@ ldelete:; } /* AFTER ROW DELETE Triggers */ - ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple, + ExecARDeleteTriggers(estate, mtstate, resultRelInfo, tupleid, oldtuple, ar_delete_trig_tcs); /* Process RETURNING if present and if requested */ @@ -1433,7 +1444,9 @@ ExecCrossPartitionUpdate(ModifyTableState *mtstate, TupleTableSlot *slot, TupleTableSlot *planSlot, EPQState *epqstate, bool canSetTag, TupleTableSlot **retry_slot, - TupleTableSlot **inserted_tuple) + TupleTableSlot **returning_slot, + TupleTableSlot **inserted_tuple, + ResultRelInfo **insert_destrel) { EState *estate = mtstate->ps.state; TupleConversionMap *tupconv_map; @@ -1556,8 +1569,9 @@ ExecCrossPartitionUpdate(ModifyTableState *mtstate, mtstate->mt_root_tuple_slot); /* Tuple routing starts from the root table. */ - *inserted_tuple = ExecInsert(mtstate, mtstate->rootResultRelInfo, slot, - planSlot, estate, canSetTag); + *returning_slot = ExecInsert(mtstate, mtstate->rootResultRelInfo, slot, + planSlot, estate, canSetTag, inserted_tuple, + insert_destrel); /* * Reset the transition state that may possibly have been written by @@ -1570,6 +1584,124 @@ ExecCrossPartitionUpdate(ModifyTableState *mtstate, return true; } +/* + * Return the ancestor relations of a given leaf partition result relation + * up to and including the query's root target relation. + */ +static List * +GetAncestorResultRels(ResultRelInfo *resultRelInfo) +{ + ResultRelInfo *rootRelInfo = resultRelInfo->ri_RootResultRelInfo; + Relation partRel = resultRelInfo->ri_RelationDesc; + Oid rootRelOid; + + if (!partRel->rd_rel->relispartition) + elog(ERROR, "cannot find ancestors of a non-partition result relation"); + Assert(rootRelInfo != NULL); + rootRelOid = RelationGetRelid(rootRelInfo->ri_RelationDesc); + if (resultRelInfo->ri_ancestorResultRels == NIL) + { + ListCell *lc; + List *oids = get_partition_ancestors(RelationGetRelid(partRel)); + List *ancResultRels = NIL; + + foreach(lc, oids) + { + Oid ancOid = lfirst_oid(lc); + Relation ancRel; + ResultRelInfo *rInfo; + + /* We use ri_RootResultRelInfo for the root ancestor. */ + if (ancOid == rootRelOid) + break; + + /* + * All ancestors up to the root target relation must have been + * locked by the planner or AcquireExecutorLocks(). + */ + ancRel = table_open(ancOid, NoLock); + rInfo = makeNode(ResultRelInfo); + + /* No need to make ri_RangeTableIndex valid. */ + InitResultRelInfo(rInfo, ancRel, 0, NULL, 0); + ancResultRels = lappend(ancResultRels, rInfo); + } + ancResultRels = lappend(ancResultRels, rootRelInfo); + resultRelInfo->ri_ancestorResultRels = ancResultRels; + } + + return resultRelInfo->ri_ancestorResultRels; +} + +/* + * Queues up an update event using the target root partitioned table's trigger + * to check that a cross-partition update hasn't broken any foreign keys + * pointing into it. + */ +static void +ExecCrossPartitionUpdateForeignKey(ResultRelInfo *sourcePartInfo, + ResultRelInfo *destPartInfo, + ItemPointer tupleid, + TupleTableSlot *oldslot, + TupleTableSlot *newslot, + ModifyTableState *mtstate, + EState *estate) +{ + ListCell *lc; + ResultRelInfo *rootRelInfo = sourcePartInfo->ri_RootResultRelInfo; + List *ancestorRels = GetAncestorResultRels(sourcePartInfo); + + /* + * For any foreign keys that point directly into a non-root ancestors of + * the source partition, we can in theory fire an update event to enforce + * those constraints using their triggers, if we could tell if both the + * source and the destination partitions are under the same ancestor. But + * for now, we simply report an error that those cannot be enforced. + */ + foreach(lc, ancestorRels) + { + ResultRelInfo *rInfo = lfirst(lc); + TriggerDesc *trigdesc = rInfo->ri_TrigDesc; + bool has_noncloned_fkey = false; + + if (rInfo == rootRelInfo) + break; + + if (trigdesc && trigdesc->trig_update_after_row) + { + int i; + + for (i = 0; i < trigdesc->numtriggers; i++) + { + Trigger *trig = &trigdesc->triggers[i]; + + if (!trig->tgisclone && + RI_FKey_trigger_type(trig->tgfoid) == RI_TRIGGER_PK) + { + has_noncloned_fkey = true; + break; + } + } + } + + if (has_noncloned_fkey) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot move tuple across partitions when a non-root ancestor of the source partition is directly referenced in a foreign key"), + errdetail("A foreign key points to ancestor \"%s\", but not the root ancestor \"%s\".", + RelationGetRelationName(rInfo->ri_RelationDesc), + RelationGetRelationName(rootRelInfo->ri_RelationDesc)), + errhint("Consider defining the foreign key on \"%s\".", + RelationGetRelationName(rootRelInfo->ri_RelationDesc)))); + } + + /* Perform the root table's triggers. */ + ExecARUpdateTriggers(estate, mtstate, rootRelInfo, + sourcePartInfo, destPartInfo, + tupleid, NULL, + newslot, NIL, NULL); +} + /* ---------------------------------------------------------------- * ExecUpdate * @@ -1742,9 +1874,12 @@ lreplace:; */ if (partition_constraint_failed) { - TupleTableSlot *inserted_tuple, + TupleTableSlot *oldslot = slot, + *inserted_tuple, + *returning_slot = NULL, *retry_slot; bool retry; + ResultRelInfo *insert_destrel = NULL; /* * ExecCrossPartitionUpdate will first DELETE the row from the @@ -1756,14 +1891,39 @@ lreplace:; retry = !ExecCrossPartitionUpdate(mtstate, resultRelInfo, tupleid, oldtuple, slot, planSlot, epqstate, canSetTag, - &retry_slot, &inserted_tuple); + &retry_slot, &returning_slot, + &inserted_tuple, + &insert_destrel); if (retry) { slot = retry_slot; goto lreplace; } - return inserted_tuple; + /* + * If the partitioned table being updated is referenced in foreign + * keys, queue up trigger events to check that none of them were + * violated. No special treatment is needed in non-cross-partition + * update situations, because the leaf partition's AR update + * triggers will take care of that. During cross-partition + * updates implemented as delete on the source partition followed + * by insert on the destination partition, AR update triggers of + * the root table (that is, the table mentioned in the query) must + * be fired. + * + * NULL insert_destrel means that the move failed to occur, that + * is, the update failed, so no need to anything in that case. + */ + if (insert_destrel && + resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->trig_update_after_row) + ExecCrossPartitionUpdateForeignKey(resultRelInfo, + insert_destrel, + tupleid, oldslot, + inserted_tuple, + mtstate, estate); + + return returning_slot; } /* @@ -1942,7 +2102,10 @@ lreplace:; (estate->es_processed)++; /* AFTER ROW UPDATE Triggers */ - ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot, + ExecARUpdateTriggers(estate, mtstate, resultRelInfo, + NULL, NULL, + tupleid, oldtuple, + slot, recheckIndexes, mtstate->operation == CMD_INSERT ? mtstate->mt_oc_transition_capture : @@ -2559,7 +2722,7 @@ ExecModifyTable(PlanState *pstate) ExecInitInsertProjection(node, resultRelInfo); slot = ExecGetInsertNewTuple(resultRelInfo, planSlot); slot = ExecInsert(node, resultRelInfo, slot, planSlot, - estate, node->canSetTag); + estate, node->canSetTag, NULL, NULL); break; case CMD_UPDATE: /* Initialize projection info if first time for this table */ diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 96269fc2ad..38b3b9e53f 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -1208,6 +1208,12 @@ RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel, TransactionId xmin; bool isnull; + /* + * AfterTriggerSaveEvent() handles things such that this function is never + * called for partitioned tables. + */ + Assert(fk_rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE); + riinfo = ri_FetchConstraintInfo(trigger, fk_rel, false); ri_nullcheck = ri_NullCheck(RelationGetDescr(fk_rel), newslot, riinfo, false); diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index 489c93de92..cbbf7449da 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -211,6 +211,7 @@ extern bool ExecBRDeleteTriggers(EState *estate, HeapTuple fdw_trigtuple, TupleTableSlot **epqslot); extern void ExecARDeleteTriggers(EState *estate, + ModifyTableState *mtstate, ResultRelInfo *relinfo, ItemPointer tupleid, HeapTuple fdw_trigtuple, @@ -230,7 +231,10 @@ extern bool ExecBRUpdateTriggers(EState *estate, HeapTuple fdw_trigtuple, TupleTableSlot *slot); extern void ExecARUpdateTriggers(EState *estate, + ModifyTableState *mtstate, ResultRelInfo *relinfo, + ResultRelInfo *src_partinfo, + ResultRelInfo *dst_partinfo, ItemPointer tupleid, HeapTuple fdw_trigtuple, TupleTableSlot *slot, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 3dc03c913e..acf2b9b240 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -203,7 +203,8 @@ extern void InitResultRelInfo(ResultRelInfo *resultRelInfo, Index resultRelationIndex, ResultRelInfo *partition_root_rri, int instrument_options); -extern ResultRelInfo *ExecGetTriggerResultRel(EState *estate, Oid relid); +extern ResultRelInfo *ExecGetTriggerResultRel(EState *estate, Oid relid, + ResultRelInfo *rootRelInfo); extern void ExecConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate); extern bool ExecPartitionCheck(ResultRelInfo *resultRelInfo, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 3dfac5bd5f..30785e4b55 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -524,6 +524,9 @@ typedef struct ResultRelInfo /* for use by copyfrom.c when performing multi-inserts */ struct CopyMultiInsertBuffer *ri_CopyMultiInsertBuffer; + + /* Used during cross-partition updates on partitioned tables. */ + List *ri_ancestorResultRels; } ResultRelInfo; /* ---------------- diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out index bf794dce9d..6a0f499fcb 100644 --- a/src/test/regress/expected/foreign_key.out +++ b/src/test/regress/expected/foreign_key.out @@ -2485,7 +2485,7 @@ DELETE FROM pk WHERE a = 20; ERROR: update or delete on table "pk11" violates foreign key constraint "fk_a_fkey2" on table "fk" DETAIL: Key (a)=(20) is still referenced from table "fk". UPDATE pk SET a = 90 WHERE a = 30; -ERROR: update or delete on table "pk11" violates foreign key constraint "fk_a_fkey2" on table "fk" +ERROR: update or delete on table "pk" violates foreign key constraint "fk_a_fkey" on table "fk" DETAIL: Key (a)=(30) is still referenced from table "fk". SELECT tableoid::regclass, * FROM fk; tableoid | a @@ -2554,15 +2554,213 @@ CREATE SCHEMA fkpart10 CREATE TABLE tbl1(f1 int PRIMARY KEY) PARTITION BY RANGE(f1) CREATE TABLE tbl1_p1 PARTITION OF tbl1 FOR VALUES FROM (minvalue) TO (1) CREATE TABLE tbl1_p2 PARTITION OF tbl1 FOR VALUES FROM (1) TO (maxvalue) - CREATE TABLE tbl2(f1 int REFERENCES tbl1 DEFERRABLE INITIALLY DEFERRED); + CREATE TABLE tbl2(f1 int REFERENCES tbl1 DEFERRABLE INITIALLY DEFERRED) + CREATE TABLE tbl3(f1 int PRIMARY KEY) PARTITION BY RANGE(f1) + CREATE TABLE tbl3_p1 PARTITION OF tbl3 FOR VALUES FROM (minvalue) TO (1) + CREATE TABLE tbl3_p2 PARTITION OF tbl3 FOR VALUES FROM (1) TO (maxvalue) + CREATE TABLE tbl4(f1 int REFERENCES tbl3 DEFERRABLE INITIALLY DEFERRED); INSERT INTO fkpart10.tbl1 VALUES (0), (1); INSERT INTO fkpart10.tbl2 VALUES (0), (1); +INSERT INTO fkpart10.tbl3 VALUES (-2), (-1), (0); +INSERT INTO fkpart10.tbl4 VALUES (-2), (-1); BEGIN; DELETE FROM fkpart10.tbl1 WHERE f1 = 0; UPDATE fkpart10.tbl1 SET f1 = 2 WHERE f1 = 1; INSERT INTO fkpart10.tbl1 VALUES (0), (1); COMMIT; +-- test that cross-partition updates correctly enforces the foreign key +-- restriction (specifically testing INITIAILLY DEFERRED) +BEGIN; +UPDATE fkpart10.tbl1 SET f1 = 3 WHERE f1 = 0; +UPDATE fkpart10.tbl3 SET f1 = f1 * -1; +INSERT INTO fkpart10.tbl1 VALUES (4); +COMMIT; +ERROR: update or delete on table "tbl1" violates foreign key constraint "tbl2_f1_fkey" on table "tbl2" +DETAIL: Key (f1)=(0) is still referenced from table "tbl2". +BEGIN; +UPDATE fkpart10.tbl3 SET f1 = f1 * -1; +UPDATE fkpart10.tbl3 SET f1 = f1 + 3; +UPDATE fkpart10.tbl1 SET f1 = 3 WHERE f1 = 0; +INSERT INTO fkpart10.tbl1 VALUES (0); +COMMIT; +ERROR: update or delete on table "tbl3" violates foreign key constraint "tbl4_f1_fkey" on table "tbl4" +DETAIL: Key (f1)=(-2) is still referenced from table "tbl4". +BEGIN; +UPDATE fkpart10.tbl3 SET f1 = f1 * -1; +UPDATE fkpart10.tbl1 SET f1 = 3 WHERE f1 = 0; +INSERT INTO fkpart10.tbl1 VALUES (0); +INSERT INTO fkpart10.tbl3 VALUES (-2), (-1); +COMMIT; +-- test where the updated table now has both an IMMEDIATE and a DEFERRED +-- constraint pointing into it +CREATE TABLE fkpart10.tbl5(f1 int REFERENCES fkpart10.tbl3); +INSERT INTO fkpart10.tbl5 VALUES (-2), (-1); +BEGIN; +UPDATE fkpart10.tbl3 SET f1 = f1 * -3; +ERROR: update or delete on table "tbl3" violates foreign key constraint "tbl5_f1_fkey" on table "tbl5" +DETAIL: Key (f1)=(-2) is still referenced from table "tbl5". +COMMIT; +-- Now test where the row referenced from the table with an IMMEDIATE +-- constraint stays in place, while those referenced from the table with a +-- DEFERRED constraint don't. +DELETE FROM fkpart10.tbl5; +INSERT INTO fkpart10.tbl5 VALUES (0); +BEGIN; +UPDATE fkpart10.tbl3 SET f1 = f1 * -3; +COMMIT; +ERROR: update or delete on table "tbl3" violates foreign key constraint "tbl4_f1_fkey" on table "tbl4" +DETAIL: Key (f1)=(-2) is still referenced from table "tbl4". DROP SCHEMA fkpart10 CASCADE; -NOTICE: drop cascades to 2 other objects +NOTICE: drop cascades to 5 other objects DETAIL: drop cascades to table fkpart10.tbl1 drop cascades to table fkpart10.tbl2 +drop cascades to table fkpart10.tbl3 +drop cascades to table fkpart10.tbl4 +drop cascades to table fkpart10.tbl5 +-- verify foreign keys are enforced during cross-partition updates, +-- especially on the PK side +CREATE SCHEMA fkpart11 + CREATE TABLE pk (a INT PRIMARY KEY, b text) PARTITION BY LIST (a) + CREATE TABLE fk ( + a INT, + CONSTRAINT fkey FOREIGN KEY (a) REFERENCES pk(a) ON UPDATE CASCADE ON DELETE CASCADE + ) + CREATE TABLE fk_parted ( + a INT PRIMARY KEY, + CONSTRAINT fkey FOREIGN KEY (a) REFERENCES pk(a) ON UPDATE CASCADE ON DELETE CASCADE + ) PARTITION BY LIST (a) + CREATE TABLE fk_another ( + a INT, + CONSTRAINT fkey FOREIGN KEY (a) REFERENCES fk_parted (a) ON UPDATE CASCADE ON DELETE CASCADE + ) + CREATE TABLE pk1 PARTITION OF pk FOR VALUES IN (1, 2) PARTITION BY LIST (a) + CREATE TABLE pk2 PARTITION OF pk FOR VALUES IN (3) + CREATE TABLE pk3 PARTITION OF pk FOR VALUES IN (4) + CREATE TABLE fk1 PARTITION OF fk_parted FOR VALUES IN (1, 2) + CREATE TABLE fk2 PARTITION OF fk_parted FOR VALUES IN (3) + CREATE TABLE fk3 PARTITION OF fk_parted FOR VALUES IN (4); +CREATE TABLE fkpart11.pk11 (b text, a int NOT NULL); +ALTER TABLE fkpart11.pk1 ATTACH PARTITION fkpart11.pk11 FOR VALUES IN (1); +CREATE TABLE fkpart11.pk12 (b text, c int, a int NOT NULL); +ALTER TABLE fkpart11.pk12 DROP c; +ALTER TABLE fkpart11.pk1 ATTACH PARTITION fkpart11.pk12 FOR VALUES IN (2); +INSERT INTO fkpart11.pk VALUES (1, 'xxx'), (3, 'yyy'); +INSERT INTO fkpart11.fk VALUES (1), (3); +INSERT INTO fkpart11.fk_parted VALUES (1), (3); +INSERT INTO fkpart11.fk_another VALUES (1), (3); +-- moves 2 rows from one leaf partition to another, with both updates being +-- cascaded to fk and fk_parted. Updates of fk_parted, of which one is +-- cross-partition (3 -> 4), are further cascaded to fk_another. +UPDATE fkpart11.pk SET a = a + 1 RETURNING tableoid::pg_catalog.regclass, *; + tableoid | a | b +---------------+---+----- + fkpart11.pk12 | 2 | xxx + fkpart11.pk3 | 4 | yyy +(2 rows) + +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk; + tableoid | a +-------------+--- + fkpart11.fk | 2 + fkpart11.fk | 4 +(2 rows) + +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_parted; + tableoid | a +--------------+--- + fkpart11.fk1 | 2 + fkpart11.fk3 | 4 +(2 rows) + +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_another; + tableoid | a +---------------------+--- + fkpart11.fk_another | 2 + fkpart11.fk_another | 4 +(2 rows) + +-- let's try with the foreign key pointing at tables in the partition tree +-- that are not the same as the query's target table +-- 1. foreign key pointing into a non-root ancestor +-- +-- A cross-partition update on the root table will fail, because we currently +-- can't enforce the foreign keys pointing into a non-leaf partition +ALTER TABLE fkpart11.fk DROP CONSTRAINT fkey; +DELETE FROM fkpart11.fk WHERE a = 4; +ALTER TABLE fkpart11.fk ADD CONSTRAINT fkey FOREIGN KEY (a) REFERENCES fkpart11.pk1 (a) ON UPDATE CASCADE ON DELETE CASCADE; +UPDATE fkpart11.pk SET a = a - 1; +ERROR: cannot move tuple across partitions when a non-root ancestor of the source partition is directly referenced in a foreign key +DETAIL: A foreign key points to ancestor "pk1", but not the root ancestor "pk". +HINT: Consider defining the foreign key on "pk". +-- it's okay though if the non-leaf partition is updated directly +UPDATE fkpart11.pk1 SET a = a - 1; +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.pk; + tableoid | a | b +---------------+---+----- + fkpart11.pk11 | 1 | xxx + fkpart11.pk3 | 4 | yyy +(2 rows) + +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk; + tableoid | a +-------------+--- + fkpart11.fk | 1 +(1 row) + +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_parted; + tableoid | a +--------------+--- + fkpart11.fk1 | 1 + fkpart11.fk3 | 4 +(2 rows) + +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_another; + tableoid | a +---------------------+--- + fkpart11.fk_another | 4 + fkpart11.fk_another | 1 +(2 rows) + +-- 2. foreign key pointing into a single leaf partition +-- +-- A cross-partition update that deletes from the pointed-to leaf partition +-- is allowed to succeed +ALTER TABLE fkpart11.fk DROP CONSTRAINT fkey; +ALTER TABLE fkpart11.fk ADD CONSTRAINT fkey FOREIGN KEY (a) REFERENCES fkpart11.pk11 (a) ON UPDATE CASCADE ON DELETE CASCADE; +-- will delete (1) from p11 which is cascaded to fk +UPDATE fkpart11.pk SET a = a + 1 WHERE a = 1; +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk; + tableoid | a +----------+--- +(0 rows) + +DROP TABLE fkpart11.fk; +-- check that regular and deferrable AR triggers on the PK tables +-- still work as expected +CREATE FUNCTION fkpart11.print_row () RETURNS TRIGGER LANGUAGE plpgsql AS $$ + BEGIN + RAISE NOTICE 'TABLE: %, OP: %, OLD: %, NEW: %', TG_RELNAME, TG_OP, OLD, NEW; + RETURN NULL; + END; +$$; +CREATE TRIGGER trig_upd_pk AFTER UPDATE ON fkpart11.pk FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row(); +CREATE TRIGGER trig_del_pk AFTER DELETE ON fkpart11.pk FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row(); +CREATE TRIGGER trig_ins_pk AFTER INSERT ON fkpart11.pk FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row(); +CREATE CONSTRAINT TRIGGER trig_upd_fk_parted AFTER UPDATE ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row(); +CREATE CONSTRAINT TRIGGER trig_del_fk_parted AFTER DELETE ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row(); +CREATE CONSTRAINT TRIGGER trig_ins_fk_parted AFTER INSERT ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row(); +UPDATE fkpart11.pk SET a = 3 WHERE a = 4; +NOTICE: TABLE: pk3, OP: DELETE, OLD: (4,yyy), NEW: +NOTICE: TABLE: pk2, OP: INSERT, OLD: , NEW: (3,yyy) +NOTICE: TABLE: fk3, OP: DELETE, OLD: (4), NEW: +NOTICE: TABLE: fk2, OP: INSERT, OLD: , NEW: (3) +UPDATE fkpart11.pk SET a = 1 WHERE a = 2; +NOTICE: TABLE: pk12, OP: DELETE, OLD: (xxx,2), NEW: +NOTICE: TABLE: pk11, OP: INSERT, OLD: , NEW: (xxx,1) +NOTICE: TABLE: fk1, OP: UPDATE, OLD: (2), NEW: (1) +DROP SCHEMA fkpart11 CASCADE; +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table fkpart11.pk +drop cascades to table fkpart11.fk_parted +drop cascades to table fkpart11.fk_another +drop cascades to function fkpart11.print_row() diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql index de417b62b6..6c69a69c7a 100644 --- a/src/test/regress/sql/foreign_key.sql +++ b/src/test/regress/sql/foreign_key.sql @@ -1820,12 +1820,145 @@ CREATE SCHEMA fkpart10 CREATE TABLE tbl1(f1 int PRIMARY KEY) PARTITION BY RANGE(f1) CREATE TABLE tbl1_p1 PARTITION OF tbl1 FOR VALUES FROM (minvalue) TO (1) CREATE TABLE tbl1_p2 PARTITION OF tbl1 FOR VALUES FROM (1) TO (maxvalue) - CREATE TABLE tbl2(f1 int REFERENCES tbl1 DEFERRABLE INITIALLY DEFERRED); + CREATE TABLE tbl2(f1 int REFERENCES tbl1 DEFERRABLE INITIALLY DEFERRED) + CREATE TABLE tbl3(f1 int PRIMARY KEY) PARTITION BY RANGE(f1) + CREATE TABLE tbl3_p1 PARTITION OF tbl3 FOR VALUES FROM (minvalue) TO (1) + CREATE TABLE tbl3_p2 PARTITION OF tbl3 FOR VALUES FROM (1) TO (maxvalue) + CREATE TABLE tbl4(f1 int REFERENCES tbl3 DEFERRABLE INITIALLY DEFERRED); INSERT INTO fkpart10.tbl1 VALUES (0), (1); INSERT INTO fkpart10.tbl2 VALUES (0), (1); +INSERT INTO fkpart10.tbl3 VALUES (-2), (-1), (0); +INSERT INTO fkpart10.tbl4 VALUES (-2), (-1); BEGIN; DELETE FROM fkpart10.tbl1 WHERE f1 = 0; UPDATE fkpart10.tbl1 SET f1 = 2 WHERE f1 = 1; INSERT INTO fkpart10.tbl1 VALUES (0), (1); COMMIT; + +-- test that cross-partition updates correctly enforces the foreign key +-- restriction (specifically testing INITIAILLY DEFERRED) +BEGIN; +UPDATE fkpart10.tbl1 SET f1 = 3 WHERE f1 = 0; +UPDATE fkpart10.tbl3 SET f1 = f1 * -1; +INSERT INTO fkpart10.tbl1 VALUES (4); +COMMIT; + +BEGIN; +UPDATE fkpart10.tbl3 SET f1 = f1 * -1; +UPDATE fkpart10.tbl3 SET f1 = f1 + 3; +UPDATE fkpart10.tbl1 SET f1 = 3 WHERE f1 = 0; +INSERT INTO fkpart10.tbl1 VALUES (0); +COMMIT; + +BEGIN; +UPDATE fkpart10.tbl3 SET f1 = f1 * -1; +UPDATE fkpart10.tbl1 SET f1 = 3 WHERE f1 = 0; +INSERT INTO fkpart10.tbl1 VALUES (0); +INSERT INTO fkpart10.tbl3 VALUES (-2), (-1); +COMMIT; + +-- test where the updated table now has both an IMMEDIATE and a DEFERRED +-- constraint pointing into it +CREATE TABLE fkpart10.tbl5(f1 int REFERENCES fkpart10.tbl3); +INSERT INTO fkpart10.tbl5 VALUES (-2), (-1); +BEGIN; +UPDATE fkpart10.tbl3 SET f1 = f1 * -3; +COMMIT; + +-- Now test where the row referenced from the table with an IMMEDIATE +-- constraint stays in place, while those referenced from the table with a +-- DEFERRED constraint don't. +DELETE FROM fkpart10.tbl5; +INSERT INTO fkpart10.tbl5 VALUES (0); +BEGIN; +UPDATE fkpart10.tbl3 SET f1 = f1 * -3; +COMMIT; + DROP SCHEMA fkpart10 CASCADE; + +-- verify foreign keys are enforced during cross-partition updates, +-- especially on the PK side +CREATE SCHEMA fkpart11 + CREATE TABLE pk (a INT PRIMARY KEY, b text) PARTITION BY LIST (a) + CREATE TABLE fk ( + a INT, + CONSTRAINT fkey FOREIGN KEY (a) REFERENCES pk(a) ON UPDATE CASCADE ON DELETE CASCADE + ) + CREATE TABLE fk_parted ( + a INT PRIMARY KEY, + CONSTRAINT fkey FOREIGN KEY (a) REFERENCES pk(a) ON UPDATE CASCADE ON DELETE CASCADE + ) PARTITION BY LIST (a) + CREATE TABLE fk_another ( + a INT, + CONSTRAINT fkey FOREIGN KEY (a) REFERENCES fk_parted (a) ON UPDATE CASCADE ON DELETE CASCADE + ) + CREATE TABLE pk1 PARTITION OF pk FOR VALUES IN (1, 2) PARTITION BY LIST (a) + CREATE TABLE pk2 PARTITION OF pk FOR VALUES IN (3) + CREATE TABLE pk3 PARTITION OF pk FOR VALUES IN (4) + CREATE TABLE fk1 PARTITION OF fk_parted FOR VALUES IN (1, 2) + CREATE TABLE fk2 PARTITION OF fk_parted FOR VALUES IN (3) + CREATE TABLE fk3 PARTITION OF fk_parted FOR VALUES IN (4); +CREATE TABLE fkpart11.pk11 (b text, a int NOT NULL); +ALTER TABLE fkpart11.pk1 ATTACH PARTITION fkpart11.pk11 FOR VALUES IN (1); +CREATE TABLE fkpart11.pk12 (b text, c int, a int NOT NULL); +ALTER TABLE fkpart11.pk12 DROP c; +ALTER TABLE fkpart11.pk1 ATTACH PARTITION fkpart11.pk12 FOR VALUES IN (2); +INSERT INTO fkpart11.pk VALUES (1, 'xxx'), (3, 'yyy'); +INSERT INTO fkpart11.fk VALUES (1), (3); +INSERT INTO fkpart11.fk_parted VALUES (1), (3); +INSERT INTO fkpart11.fk_another VALUES (1), (3); +-- moves 2 rows from one leaf partition to another, with both updates being +-- cascaded to fk and fk_parted. Updates of fk_parted, of which one is +-- cross-partition (3 -> 4), are further cascaded to fk_another. +UPDATE fkpart11.pk SET a = a + 1 RETURNING tableoid::pg_catalog.regclass, *; +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk; +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_parted; +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_another; + +-- let's try with the foreign key pointing at tables in the partition tree +-- that are not the same as the query's target table + +-- 1. foreign key pointing into a non-root ancestor +-- +-- A cross-partition update on the root table will fail, because we currently +-- can't enforce the foreign keys pointing into a non-leaf partition +ALTER TABLE fkpart11.fk DROP CONSTRAINT fkey; +DELETE FROM fkpart11.fk WHERE a = 4; +ALTER TABLE fkpart11.fk ADD CONSTRAINT fkey FOREIGN KEY (a) REFERENCES fkpart11.pk1 (a) ON UPDATE CASCADE ON DELETE CASCADE; +UPDATE fkpart11.pk SET a = a - 1; +-- it's okay though if the non-leaf partition is updated directly +UPDATE fkpart11.pk1 SET a = a - 1; +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.pk; +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk; +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_parted; +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk_another; + +-- 2. foreign key pointing into a single leaf partition +-- +-- A cross-partition update that deletes from the pointed-to leaf partition +-- is allowed to succeed +ALTER TABLE fkpart11.fk DROP CONSTRAINT fkey; +ALTER TABLE fkpart11.fk ADD CONSTRAINT fkey FOREIGN KEY (a) REFERENCES fkpart11.pk11 (a) ON UPDATE CASCADE ON DELETE CASCADE; +-- will delete (1) from p11 which is cascaded to fk +UPDATE fkpart11.pk SET a = a + 1 WHERE a = 1; +SELECT tableoid::pg_catalog.regclass, * FROM fkpart11.fk; +DROP TABLE fkpart11.fk; + +-- check that regular and deferrable AR triggers on the PK tables +-- still work as expected +CREATE FUNCTION fkpart11.print_row () RETURNS TRIGGER LANGUAGE plpgsql AS $$ + BEGIN + RAISE NOTICE 'TABLE: %, OP: %, OLD: %, NEW: %', TG_RELNAME, TG_OP, OLD, NEW; + RETURN NULL; + END; +$$; +CREATE TRIGGER trig_upd_pk AFTER UPDATE ON fkpart11.pk FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row(); +CREATE TRIGGER trig_del_pk AFTER DELETE ON fkpart11.pk FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row(); +CREATE TRIGGER trig_ins_pk AFTER INSERT ON fkpart11.pk FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row(); +CREATE CONSTRAINT TRIGGER trig_upd_fk_parted AFTER UPDATE ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row(); +CREATE CONSTRAINT TRIGGER trig_del_fk_parted AFTER DELETE ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row(); +CREATE CONSTRAINT TRIGGER trig_ins_fk_parted AFTER INSERT ON fkpart11.fk_parted INITIALLY DEFERRED FOR EACH ROW EXECUTE FUNCTION fkpart11.print_row(); +UPDATE fkpart11.pk SET a = 3 WHERE a = 4; +UPDATE fkpart11.pk SET a = 1 WHERE a = 2; + +DROP SCHEMA fkpart11 CASCADE; -- 2.24.1