From 365409b9d7cf723a65b832804bc5002d83ae15d5 Mon Sep 17 00:00:00 2001 From: amit Date: Wed, 16 Aug 2017 11:36:14 +0900 Subject: [PATCH 1/3] Relieve RelationGetPartitionDispatchInfo() of doing any locking Anyone who wants to call RelationGetPartitionDispatchInfo() must first acquire locks using find_all_inheritors. Doing it this way gets rid of the possibility of a deadlock when partitions are concurrently locked, because RelationGetPartitionDispatchInfo would lock the partitions in one order and find_all_inheritors would in another. Reported-by: Amit Khandekar, Robert Haas Reports: https://postgr.es/m/CAJ3gD9fdjk2O8aPMXidCeYeB-mFB%3DwY9ZLfe8cQOfG4bTqVGyQ%40mail.gmail.com https://postgr.es/m/CA%2BTgmobwbh12OJerqAGyPEjb_%2B2y7T0nqRKTcjed6L4NTET6Fg%40mail.gmail.com --- src/backend/catalog/partition.c | 55 ++++++++++++++++++++++------------------- src/backend/executor/execMain.c | 10 +++++--- src/include/catalog/partition.h | 3 +-- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index c1a307c8d3..96a64ce6b2 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -999,12 +999,16 @@ get_partition_qual_relid(Oid relid) * RelationGetPartitionDispatchInfo * Returns information necessary to route tuples down a partition tree * - * All the partitions will be locked with lockmode, unless it is NoLock. - * A list of the OIDs of all the leaf partitions of rel is returned in - * *leaf_part_oids. + * The number of elements in the returned array (that is, the number of + * PartitionDispatch objects for the partitioned tables in the partition tree) + * is returned in *num_parted and a list of the OIDs of all the leaf + * partitions of rel is returned in *leaf_part_oids. + * + * All the relations in the partition tree (including 'rel') must have been + * locked (using at least the AccessShareLock) by the caller. */ PartitionDispatch * -RelationGetPartitionDispatchInfo(Relation rel, int lockmode, +RelationGetPartitionDispatchInfo(Relation rel, int *num_parted, List **leaf_part_oids) { PartitionDispatchData **pd; @@ -1019,14 +1023,18 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode, offset; /* - * Lock partitions and make a list of the partitioned ones to prepare - * their PartitionDispatch objects below. + * We rely on the relcache to traverse the partition tree to build both + * the leaf partition OIDs list and the array of PartitionDispatch objects + * for the partitioned tables in the tree. That means every partitioned + * table in the tree must be locked, which is fine since we require the + * caller to lock all the partitions anyway. * - * Cannot use find_all_inheritors() here, because then the order of OIDs - * in parted_rels list would be unknown, which does not help, because we - * assign indexes within individual PartitionDispatch in an order that is - * predetermined (determined by the order of OIDs in individual partition - * descriptors). + * For every partitioned table in the tree, starting with the root + * partitioned table, add its relcache entry to parted_rels, while also + * queuing its partitions (in the order in which they appear in the + * partition descriptor) to be looked at later in the same loop. This is + * a bit tricky but works because the foreach() macro doesn't fetch the + * next list element until the bottom of the loop. */ *num_parted = 1; parted_rels = list_make1(rel); @@ -1035,29 +1043,24 @@ RelationGetPartitionDispatchInfo(Relation rel, int lockmode, APPEND_REL_PARTITION_OIDS(rel, all_parts, all_parents); forboth(lc1, all_parts, lc2, all_parents) { - Relation partrel = heap_open(lfirst_oid(lc1), lockmode); + Oid partrelid = lfirst_oid(lc1); Relation parent = lfirst(lc2); - PartitionDesc partdesc = RelationGetPartitionDesc(partrel); - /* - * If this partition is a partitioned table, add its children to the - * end of the list, so that they are processed as well. - */ - if (partdesc) + if (get_rel_relkind(partrelid) == RELKIND_PARTITIONED_TABLE) { + /* + * Already locked by the caller. Note that it is the + * responsibility of the caller to close the below relcache entry, + * once done using the information being collected here (for + * example, in ExecEndModifyTable). + */ + Relation partrel = heap_open(partrelid, NoLock); + (*num_parted)++; parted_rels = lappend(parted_rels, partrel); parted_rel_parents = lappend(parted_rel_parents, parent); APPEND_REL_PARTITION_OIDS(partrel, all_parts, all_parents); } - else - heap_close(partrel, NoLock); - - /* - * We keep the partitioned ones open until we're done using the - * information being collected here (for example, see - * ExecEndModifyTable). - */ } /* diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 6671a25ffb..74071eede6 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -43,6 +43,7 @@ #include "access/xact.h" #include "catalog/namespace.h" #include "catalog/partition.h" +#include "catalog/pg_inherits_fn.h" #include "catalog/pg_publication.h" #include "commands/matview.h" #include "commands/trigger.h" @@ -3249,9 +3250,12 @@ ExecSetupPartitionTupleRouting(Relation rel, int i; ResultRelInfo *leaf_part_rri; - /* Get the tuple-routing information and lock partitions */ - *pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock, num_parted, - &leaf_parts); + /* + * Get the information about the partition tree after locking all the + * partitions. + */ + (void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, NULL); + *pd = RelationGetPartitionDispatchInfo(rel, num_parted, &leaf_parts); *num_partitions = list_length(leaf_parts); *partitions = (ResultRelInfo *) palloc(*num_partitions * sizeof(ResultRelInfo)); diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index bef7a0f5fb..2283c675e9 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -88,8 +88,7 @@ extern Expr *get_partition_qual_relid(Oid relid); /* For tuple routing */ extern PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel, - int lockmode, int *num_parted, - List **leaf_part_oids); + int *num_parted, List **leaf_part_oids); extern void FormPartitionKeyDatum(PartitionDispatch pd, TupleTableSlot *slot, EState *estate, -- 2.11.0