From e9850f583ad1088d0c36f68b48ed9129ac3fde58 Mon Sep 17 00:00:00 2001
From: amit <amitlangote09@gmail.com>
Date: Fri, 18 Mar 2016 19:48:49 +0900
Subject: [PATCH 6/7] Introduce tuple routing for partitioned tables.

A tuple inserted into a partitioned table using insert or copy is now
routed to the appropriate leaf partition, recursively.

State required for the same is built in advance by BeginCopy() and
ExecInitModifyTable().  It consists of N ResultRelInfos and pointers
to TupleConversionMap structs, where N is the number of leaf partitions
in the multi-level partition tree. One needs only to get a pointer to the
root PartitionDescNode node for the tree. A tuple conversion map is only
allocated if the tuple descriptor of a leaf partition differs from that
of the root table.  Partition check expression is not initialized for
leaf partition result relations, as they are redundant in this case.
Whether or not to build one is now determined by new parameter
load_partition_check of InitResultRelInfo().

CopyFrom() or ExecInsert() passes the root node (set up initially, as
just mentioned) along with the tuple slot to get_partition_for_tuple()
which traverses the tree to determine the appropriate leaf partition for
the tuple.  It returns the index (0..N) of the leaf partition.  Caller
then switches the result relation (ResultRelInfo) appropriately and
performs the tuple conversion, if required, before following up with
the row triggers, constraints, actual heap and index insertion.

* Following requires further consideration:
Both CopyFrom() and ExecInsert() switch to the partition before invoking
BR triggers and back after AR trigger are processed. That ignores parent's
triggers completely.  Note that those triggers are not required to be present
in each partition.  OTOH, constraints are fine (both check and not null),
since parent's constraints are present in partitions.
---
 src/backend/catalog/partition.c        |  324 ++++++++++++++++++++++++++++++++
 src/backend/commands/copy.c            |  113 +++++++++++-
 src/backend/commands/tablecmds.c       |    1 +
 src/backend/executor/execMain.c        |   44 +++++-
 src/backend/executor/nodeModifyTable.c |   96 ++++++++++
 src/backend/parser/analyze.c           |    8 +
 src/include/catalog/partition.h        |   24 +++
 src/include/executor/executor.h        |    6 +
 src/include/nodes/execnodes.h          |   10 +
 9 files changed, 621 insertions(+), 5 deletions(-)

diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
index eb27937..f669932 100644
--- a/src/backend/catalog/partition.c
+++ b/src/backend/catalog/partition.c
@@ -96,6 +96,21 @@ static Node *generate_partition_check_expr(Relation rel);
 static PartitionDescNode GetPartitionDescNodeRecurse(Relation rel, int offset);
 static int get_leaf_partition_count(PartitionDescNode pdnode);
 
+static PartitionKeyExecInfo *BuildPartitionKeyExecInfo(Relation rel);
+static void FormPartitionKeyDatum(PartitionKeyExecInfo *pkinfo,
+							TupleTableSlot *slot,
+							EState *estate,
+							Datum *values,
+							bool *isnull);
+static int list_partition_for_tuple(PartitionKey key, PartitionDesc pdesc,
+							Datum value, bool isnull);
+static int range_partition_for_tuple(PartitionKey key, PartitionDesc pdesc,
+							Datum *values);
+static int range_partition_bsearch(PartitionKey key, PartitionDesc pdesc,
+						Datum *values);
+static bool rightof(PartitionKey key, Datum *values, PartitionRangeBound *bound);
+static bool leftof(PartitionKey key, Datum *values, PartitionRangeBound *bound);
+
 /*
  * StorePartitionKey
  *		Store the partition keys of rel into pg_partitioned catalog
@@ -1618,6 +1633,7 @@ GetPartitionDescNodeRecurse(Relation rel, int offset)
 
 	/* First build our own node */
 	root = (PartitionDescNode) palloc0(sizeof(PartitionDescNodeData));
+	root->pkinfo = BuildPartitionKeyExecInfo(rel);
 	root->pdesc = pdesc;
 	root->relid = RelationGetRelid(rel);
 	root->offset = offset;
@@ -1735,3 +1751,311 @@ get_leaf_partition_oids(PartitionDescNode pdnode)
 
 	return result;
 }
+
+/* ----------------
+ *	BuildPartitionKeyExecInfo
+ *		Construct a list of PartitionKeyExecInfo records for an open
+ *		relation
+ *
+ * PartitionKeyExecInfo stores the information about the partition key
+ * that's needed when inserting tuples into a partitioned table; especially,
+ * partition key expression state if there are any expression columns in
+ * the partition key.  Normally we build a PartitionKeyExecInfo for a
+ * partitioned table just once per command, and then use it for (potentially)
+ * many tuples.
+ * ----------------
+ */
+static PartitionKeyExecInfo *
+BuildPartitionKeyExecInfo(Relation rel)
+{
+	PartitionKeyExecInfo   *pkinfo;
+
+	pkinfo = (PartitionKeyExecInfo *) palloc0(sizeof(PartitionKeyExecInfo));
+	pkinfo->pi_Key = CopyPartitionKey(rel->rd_partkey);
+	pkinfo->pi_ExpressionState = NIL;
+
+	return pkinfo;
+}
+
+/*
+ * get_partition_for_tuple
+ *		Recursively finds the "leaf" partition for tuple (slot)
+ *
+ * Returns -1 if no partition is found and sets *failed_at to the OID of
+ * the partitioned table whose partition was not found.
+ */
+int
+get_partition_for_tuple(PartitionDescNode pdnode,
+						TupleTableSlot *slot,
+						EState *estate,
+						Oid *failed_at)
+{
+	PartitionKeyExecInfo   *pkinfo = pdnode->pkinfo;
+	PartitionDescNode		node;
+	Datum	values[PARTITION_MAX_KEYS];
+	bool	isnull[PARTITION_MAX_KEYS];
+	int		i;
+	int		index;
+
+	/* Guard against stack overflow due to overly deep partition tree */
+	check_stack_depth();		
+
+	if (pdnode->pdesc->nparts == 0)
+	{
+		*failed_at = pdnode->relid;
+		return -1;
+	}
+
+	/* Extract partition key from tuple */
+	Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
+	FormPartitionKeyDatum(pkinfo, slot, estate, values, isnull);
+
+	/* Disallow nulls, if range partition key */
+	for (i = 0; i < pkinfo->pi_Key->partnatts; i++)
+		if (isnull[i] && pkinfo->pi_Key->strategy == PARTITION_STRAT_RANGE)
+			ereport(ERROR,
+					(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+					 errmsg("range partition key contains null")));
+
+	switch (pkinfo->pi_Key->strategy)
+	{
+		case PARTITION_STRAT_LIST:
+			index = list_partition_for_tuple(pkinfo->pi_Key, pdnode->pdesc,
+											 values[0], isnull[0]);
+			break;
+
+		case PARTITION_STRAT_RANGE:
+			index = range_partition_for_tuple(pkinfo->pi_Key, pdnode->pdesc,
+											  values);
+			break;
+	}
+
+	/* Don't recurse, if no partition exists for the key at this level... */
+	if (index < 0)
+	{
+		*failed_at = pdnode->relid;
+		return index;
+	}
+
+	/* ... or, if the index'th partition is a leaf partition. */
+	if (!relid_is_partitioned(pdnode->pdesc->oids[index]))
+	{
+		PartitionDescNode	prev;
+
+		/* Patiently determine own leaf partition index */
+		prev = node = pdnode->downlink;
+		if (node && node->index < index)
+		{
+			while (node)
+			{
+				if (node->index > index)
+				{
+					node = prev;
+					break;
+				}
+
+				prev = node;
+				node = node->next;
+			}
+
+			if (!node)
+				node = prev;
+
+			/*
+			 * Account for siblings on left that are partition trees and those
+			 * that are leaf partitions themsleves.
+			 */
+			return node->offset + node->num_leaf_partitions +
+										(index - node->index - 1);
+		}
+		else
+			/* Only leaf partitions in pdnode->offset..index */
+			return pdnode->offset + index;
+
+	}
+
+	/* Recurse, by locating the index'th partition's node */
+	node = pdnode->downlink;
+	while (node->next != NULL && node->index != index)
+		node = node->next;
+	Assert (node != NULL);
+
+	return get_partition_for_tuple(node, slot, estate, failed_at);
+}
+
+/*
+ * FormPartitionKeyDatum
+ *		Construct values[] and isnull[] arrays for partition key columns
+ */
+static void
+FormPartitionKeyDatum(PartitionKeyExecInfo *pkinfo,
+					  TupleTableSlot *slot,
+					  EState *estate,
+					  Datum *values,
+					  bool *isnull)
+{
+	ListCell   *partexpr_item;
+	int			i;
+
+	if (pkinfo->pi_Key->partexprs != NIL && pkinfo->pi_ExpressionState == NIL)
+	{
+		/* First time through, set up expression evaluation state */
+		pkinfo->pi_ExpressionState = (List *)
+			ExecPrepareExpr((Expr *) pkinfo->pi_Key->partexprs,
+							estate);
+		/* Check caller has set up context correctly */
+		Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
+	}
+
+	partexpr_item = list_head(pkinfo->pi_ExpressionState);
+	for (i = 0; i < pkinfo->pi_Key->partnatts; i++)
+	{
+		AttrNumber	keycol = pkinfo->pi_Key->partattrs[i];
+		Datum		pkDatum;
+		bool		isNull;
+
+		if (keycol != 0)
+		{
+			/* Plain column; get the value directly from the heap tuple */
+			pkDatum = slot_getattr(slot, keycol, &isNull);
+		}
+		else
+		{
+			/* Expression; need to evaluate it */
+			if (partexpr_item == NULL)
+				elog(ERROR, "wrong number of partition key expressions");
+			pkDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
+											   GetPerTupleExprContext(estate),
+											   &isNull,
+											   NULL);
+			partexpr_item = lnext(partexpr_item);
+		}
+		values[i] = pkDatum;
+		isnull[i] = isNull;
+	}
+
+	if (partexpr_item != NULL)
+		elog(ERROR, "wrong number of partition key expressions");
+}
+
+/*
+ * list_partition_for_tuple
+ *		Find the list partition for a tuple
+ *
+ * Returns -1 if none found.
+ */
+static int
+list_partition_for_tuple(PartitionKey key, PartitionDesc pdesc,
+						 Datum value, bool isnull)
+{
+	int			i;
+
+	Assert(pdesc->nparts > 0);
+
+	for (i = 0; i < pdesc->nparts; i++)
+	{
+		int		j;
+
+		if (isnull)
+		{
+			if (pdesc->lists[i]->contains_null)
+				return i;
+
+			continue;
+		}
+
+		for (j = 0; j < pdesc->lists[i]->nvalues; j++)
+		{
+			int32	cmpval;
+
+			cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
+								   key->tcinfo->typcoll[0],
+								   pdesc->lists[i]->values[j],
+								   value));
+			if (!cmpval)
+				return i;
+		}
+	}
+
+	return -1;
+}
+
+/*
+ * range_partition_for_tuple
+ *		Search the range partition for a range key ('values')
+ *
+ * Returns -1 if none found.
+ */
+static int
+range_partition_for_tuple(PartitionKey key, PartitionDesc pdesc, Datum *values)
+{
+	Assert(pdesc->nparts > 0);
+
+	return range_partition_bsearch(key, pdesc, values);
+}
+
+/*
+ * range_partition_bsearch
+ *		Workhorse of range_partition_for_tuple
+ */
+static int
+range_partition_bsearch(PartitionKey key, PartitionDesc pdesc,
+						Datum *values)
+{
+	int		low, high;
+
+	/* Good ol' bsearch */
+	low = 0;
+	high = pdesc->nparts - 1;
+	while (low <= high)
+	{
+		int		idx = (low + high) / 2;
+
+		if (pdesc->rangeuppers[idx]->infinite)
+		{
+			if (rightof(key, values, pdesc->rangelowers[idx]))
+				return idx;
+
+			break;
+		}
+		else if (leftof(key, values, pdesc->rangeuppers[idx]))
+		{
+			if (pdesc->rangelowers[idx]->infinite)
+				return idx;
+
+			if (rightof(key, values, pdesc->rangelowers[idx]))
+				return idx;
+
+			high = idx - 1;
+			continue;
+		}
+
+		low = idx + 1;
+	}
+
+	return -1;
+}
+
+/* Does range key lie to the right of partition bound */
+static bool
+rightof(PartitionKey key, Datum *values, PartitionRangeBound *bound)
+{
+	int32	cmpval = compare_range_keys(key, values, bound->val);
+
+	if (!cmpval)
+		return bound->lower ? bound->inclusive : !bound->inclusive;
+
+	return cmpval > 0;
+}
+
+/* Does range key lie to the left of partition bound */
+static bool
+leftof(PartitionKey key, Datum *values, PartitionRangeBound *bound)
+{
+	int32	cmpval = compare_range_keys(key, values, bound->val);
+
+	if (!cmpval)
+		return !bound->lower ? bound->inclusive : !bound->inclusive;
+
+	return cmpval < 0;
+}
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 14e81de..0b788b1 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -161,6 +161,10 @@ typedef struct CopyStateData
 	ExprState **defexprs;		/* array of default att expressions */
 	bool		volatile_defexprs;		/* is any of defexprs volatile? */
 	List	   *range_table;
+	PartitionDescNode		pdnode;	/* partition descriptor node tree */
+	ResultRelInfo		   *partitions;
+	TupleConversionMap	  **partition_tupconv_maps;
+	int						num_partitions;
 
 	/*
 	 * These variables are used to reduce overhead in textual COPY FROM.
@@ -1364,6 +1368,52 @@ BeginCopy(bool is_from,
 					(errcode(ERRCODE_UNDEFINED_COLUMN),
 					 errmsg("table \"%s\" does not have OIDs",
 							RelationGetRelationName(cstate->rel))));
+
+		/* Initialize state for CopyFrom tuple routing */
+		if (is_from && rel->rd_rel->relkind == RELKIND_PARTITIONED_REL)
+		{
+			List		   *leaf_part_oids;
+			ListCell	   *cell;
+			int				i;
+			int				num_leaf_parts;
+			ResultRelInfo  *leaf_rel_rri;
+
+			cstate->pdnode = RelationGetPartitionDescNode(rel);
+			leaf_part_oids = get_leaf_partition_oids(cstate->pdnode);
+			num_leaf_parts = list_length(leaf_part_oids);
+
+			cstate->num_partitions = num_leaf_parts;
+			cstate->partitions = (ResultRelInfo *)
+								palloc0(num_leaf_parts * sizeof(ResultRelInfo));
+			cstate->partition_tupconv_maps = (TupleConversionMap **)
+						palloc0(num_leaf_parts * sizeof(TupleConversionMap *));
+
+			leaf_rel_rri = cstate->partitions;
+			i = 0;
+			foreach(cell, leaf_part_oids)
+			{
+				Relation	leaf_rel;
+
+				leaf_rel = heap_open(lfirst_oid(cell), RowExclusiveLock);
+				InitResultRelInfo(leaf_rel_rri,
+								  leaf_rel,
+								  1,		/* dummy */
+								  false,	/* no need for partition check */
+								  0);
+
+				/* Open partition indices */
+				ExecOpenIndices(leaf_rel_rri, false);
+
+				if (!equalTupleDescs(tupDesc, RelationGetDescr(leaf_rel)))
+					cstate->partition_tupconv_maps[i] =
+								convert_tuples_by_name(tupDesc,
+									RelationGetDescr(leaf_rel),
+									gettext_noop("could not convert row type"));
+
+				leaf_rel_rri++;
+				i++;
+			}
+		}
 	}
 	else
 	{
@@ -1659,6 +1709,8 @@ ClosePipeToProgram(CopyState cstate)
 static void
 EndCopy(CopyState cstate)
 {
+	int		i;
+
 	if (cstate->is_program)
 	{
 		ClosePipeToProgram(cstate);
@@ -1672,6 +1724,18 @@ EndCopy(CopyState cstate)
 							cstate->filename)));
 	}
 
+	/* Close all partitions and indices thereof */
+	for (i = 0; i < cstate->num_partitions; i++)
+	{
+		ResultRelInfo *resultRelInfo = cstate->partitions + i;
+
+		ExecCloseIndices(resultRelInfo);
+		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
+
+		if (cstate->partition_tupconv_maps[i])
+			pfree(cstate->partition_tupconv_maps[i]);
+	}
+
 	MemoryContextDelete(cstate->copycontext);
 	pfree(cstate);
 }
@@ -2216,6 +2280,7 @@ CopyFrom(CopyState cstate)
 	Datum	   *values;
 	bool	   *nulls;
 	ResultRelInfo *resultRelInfo;
+	ResultRelInfo *saved_resultRelInfo = NULL;
 	EState	   *estate = CreateExecutorState(); /* for ExecConstraints() */
 	ExprContext *econtext;
 	TupleTableSlot *myslot;
@@ -2236,7 +2301,8 @@ CopyFrom(CopyState cstate)
 
 	Assert(cstate->rel);
 
-	if (cstate->rel->rd_rel->relkind != RELKIND_RELATION)
+	if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
+		cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_REL)
 	{
 		if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
 			ereport(ERROR,
@@ -2344,6 +2410,7 @@ CopyFrom(CopyState cstate)
 	InitResultRelInfo(resultRelInfo,
 					  cstate->rel,
 					  1,		/* dummy rangetable index */
+					  true,		/* do load partition check expression */
 					  0);
 
 	ExecOpenIndices(resultRelInfo, false);
@@ -2371,6 +2438,7 @@ CopyFrom(CopyState cstate)
 	if ((resultRelInfo->ri_TrigDesc != NULL &&
 		 (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
 		  resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
+		cstate->pdnode != NULL ||
 		cstate->volatile_defexprs)
 	{
 		useHeapMultiInsert = false;
@@ -2395,7 +2463,11 @@ CopyFrom(CopyState cstate)
 	values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
 	nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
 
-	bistate = GetBulkInsertState();
+	if (useHeapMultiInsert)
+		bistate = GetBulkInsertState();
+	else
+		bistate = NULL;
+
 	econtext = GetPerTupleExprContext(estate);
 
 	/* Set up callback to identify error line number */
@@ -2447,6 +2519,31 @@ CopyFrom(CopyState cstate)
 		slot = myslot;
 		ExecStoreTuple(tuple, slot, InvalidBuffer, false);
 
+		/* Determine the partition */
+		saved_resultRelInfo = resultRelInfo;
+		if (cstate->pdnode)
+		{
+			int		i_leaf_partition;
+			TupleConversionMap *map;
+
+			econtext->ecxt_scantuple = slot;
+			i_leaf_partition = ExecFindPartition(resultRelInfo,
+												 cstate->pdnode,
+												 slot,
+												 estate);
+			Assert(i_leaf_partition >= 0 &&
+				   i_leaf_partition < cstate->num_partitions);
+
+			resultRelInfo = cstate->partitions + i_leaf_partition;
+			estate->es_result_relation_info = resultRelInfo;
+
+			map = cstate->partition_tupconv_maps[i_leaf_partition];
+			if (map)
+				tuple = do_convert_tuple(tuple, map);
+
+			tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+		}
+
 		skip_tuple = false;
 
 		/* BEFORE ROW INSERT Triggers */
@@ -2497,7 +2594,8 @@ CopyFrom(CopyState cstate)
 				List	   *recheckIndexes = NIL;
 
 				/* OK, store the tuple and create index entries for it */
-				heap_insert(cstate->rel, tuple, mycid, hi_options, bistate);
+				heap_insert(resultRelInfo->ri_RelationDesc,
+							tuple, mycid, hi_options, bistate);
 
 				if (resultRelInfo->ri_NumIndices > 0)
 					recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
@@ -2517,6 +2615,12 @@ CopyFrom(CopyState cstate)
 			 * tuples inserted by an INSERT command.
 			 */
 			processed++;
+
+			if (saved_resultRelInfo)
+			{
+				resultRelInfo = saved_resultRelInfo;
+				estate->es_result_relation_info = resultRelInfo;
+			}
 		}
 	}
 
@@ -2530,7 +2634,8 @@ CopyFrom(CopyState cstate)
 	/* Done, clean up */
 	error_context_stack = errcallback.previous;
 
-	FreeBulkInsertState(bistate);
+	if (bistate)
+		FreeBulkInsertState(bistate);
 
 	MemoryContextSwitchTo(oldcontext);
 
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index b5a1136..bf76013 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -1242,6 +1242,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 		InitResultRelInfo(resultRelInfo,
 						  rel,
 						  0,	/* dummy rangetable index */
+						  false,
 						  0);
 		resultRelInfo++;
 	}
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index f7a23d9..0bb3fb7 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -826,6 +826,7 @@ InitPlan(QueryDesc *queryDesc, int eflags)
 			InitResultRelInfo(resultRelInfo,
 							  resultRelation,
 							  resultRelationIndex,
+							  true,
 							  estate->es_instrument);
 			resultRelInfo++;
 		}
@@ -1215,6 +1216,7 @@ void
 InitResultRelInfo(ResultRelInfo *resultRelInfo,
 				  Relation resultRelationDesc,
 				  Index resultRelationIndex,
+				  bool load_partition_check,
 				  int instrument_options)
 {
 	MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
@@ -1252,7 +1254,8 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
 	resultRelInfo->ri_ConstraintExprs = NULL;
 	resultRelInfo->ri_junkFilter = NULL;
 	resultRelInfo->ri_projectReturning = NULL;
-	resultRelInfo->ri_PartitionCheck = (Expr *)
+	if (load_partition_check)
+		resultRelInfo->ri_PartitionCheck = (Expr *)
 						RelationGetPartitionCheckExpr(resultRelationDesc);
 }
 
@@ -1316,6 +1319,7 @@ ExecGetTriggerResultRel(EState *estate, Oid relid)
 	InitResultRelInfo(rInfo,
 					  rel,
 					  0,		/* dummy rangetable index */
+					  true,
 					  estate->es_instrument);
 	estate->es_trig_target_relations =
 		lappend(estate->es_trig_target_relations, rInfo);
@@ -2988,3 +2992,41 @@ EvalPlanQualEnd(EPQState *epqstate)
 	epqstate->planstate = NULL;
 	epqstate->origslot = NULL;
 }
+
+int
+ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDescNode pdnode,
+				  TupleTableSlot *slot, EState *estate)
+{
+	int		i_leaf_partition;
+	Oid		failed_at;
+
+	i_leaf_partition = get_partition_for_tuple(pdnode, slot, estate,
+											   &failed_at);
+
+	if (i_leaf_partition < 0)
+	{
+		Relation	rel = resultRelInfo->ri_RelationDesc;
+		char	   *val_desc;
+		Bitmapset  *insertedCols,
+				   *updatedCols,
+				   *modifiedCols;
+		TupleDesc	tupDesc = RelationGetDescr(rel);
+
+		insertedCols = GetInsertedColumns(resultRelInfo, estate);
+		updatedCols = GetUpdatedColumns(resultRelInfo, estate);
+		modifiedCols = bms_union(insertedCols, updatedCols);
+		val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
+												 slot,
+												 tupDesc,
+												 modifiedCols,
+												 64);
+		Assert(OidIsValid(failed_at));
+		ereport(ERROR,
+				(errcode(ERRCODE_CHECK_VIOLATION),
+				 errmsg("no partition of relation \"%s\" found for row",
+						get_rel_name(failed_at)),
+		  val_desc ? errdetail("Failing row contains %s.", val_desc) : 0));
+	}
+
+	return i_leaf_partition;
+}
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 6ecb6be..581112d 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -243,6 +243,7 @@ ExecInsert(ModifyTableState *mtstate,
 {
 	HeapTuple	tuple;
 	ResultRelInfo *resultRelInfo;
+	ResultRelInfo *saved_resultRelInfo = NULL;
 	Relation	resultRelationDesc;
 	Oid			newId;
 	List	   *recheckIndexes = NIL;
@@ -257,6 +258,31 @@ ExecInsert(ModifyTableState *mtstate,
 	 * get information on the (current) result relation
 	 */
 	resultRelInfo = estate->es_result_relation_info;
+
+	saved_resultRelInfo = resultRelInfo;
+
+	if (mtstate->mt_partition_node)
+	{
+		int		i_leaf_partition;
+		ExprContext *econtext = GetPerTupleExprContext(estate);
+		TupleConversionMap *map;
+
+		econtext->ecxt_scantuple = slot;
+		i_leaf_partition = ExecFindPartition(resultRelInfo,
+											 mtstate->mt_partition_node,
+											 slot,
+											 estate);
+		Assert(i_leaf_partition >= 0 &&
+			   i_leaf_partition < mtstate->mt_num_partitions);
+
+		resultRelInfo = mtstate->mt_partitions + i_leaf_partition;
+		estate->es_result_relation_info = resultRelInfo;
+
+		map = mtstate->mt_partition_tupconv_maps[i_leaf_partition];
+		if (map)
+			tuple = do_convert_tuple(tuple, map);
+	}
+
 	resultRelationDesc = resultRelInfo->ri_RelationDesc;
 
 	/*
@@ -496,6 +522,12 @@ ExecInsert(ModifyTableState *mtstate,
 
 	list_free(recheckIndexes);
 
+	if (saved_resultRelInfo)
+	{
+		resultRelInfo = saved_resultRelInfo;
+		estate->es_result_relation_info = resultRelInfo;
+	}
+
 	/*
 	 * Check any WITH CHECK OPTION constraints from parent views.  We are
 	 * required to do this after testing all constraints and uniqueness
@@ -1550,6 +1582,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 	Plan	   *subplan;
 	ListCell   *l;
 	int			i;
+	Relation	rel;
 
 	/* check for unsupported flags */
 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -1640,6 +1673,57 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 
 	estate->es_result_relation_info = saved_resultRelInfo;
 
+	/* Build state for INSERT tuple routing */
+	rel = mtstate->resultRelInfo->ri_RelationDesc;
+	if (operation == CMD_INSERT && rel->rd_rel->relkind == RELKIND_PARTITIONED_REL)
+	{
+		int					i,
+							num_leaf_parts;
+		List			   *leaf_part_oids;
+		ListCell		   *cell;
+		ResultRelInfo	   *leaf_rel_rri;
+
+		mtstate->mt_partition_node = RelationGetPartitionDescNode(rel);
+		leaf_part_oids = get_leaf_partition_oids(mtstate->mt_partition_node);
+		num_leaf_parts = list_length(leaf_part_oids);
+
+		mtstate->mt_num_partitions = num_leaf_parts;
+		mtstate->mt_partitions = (ResultRelInfo *)
+						palloc0(num_leaf_parts * sizeof(ResultRelInfo));
+		mtstate->mt_partition_tupconv_maps = (TupleConversionMap **)
+					palloc0(num_leaf_parts * sizeof(TupleConversionMap *));
+
+		leaf_rel_rri = mtstate->mt_partitions;
+		i = 0;
+		foreach(cell, leaf_part_oids)
+		{
+			Relation	leaf_rel;
+
+			leaf_rel = heap_open(lfirst_oid(cell), RowExclusiveLock);
+			InitResultRelInfo(leaf_rel_rri,
+							  leaf_rel,
+							  1,		/* dummy */
+							  false,	/* no need for partition checks */
+							  eflags);
+
+			/* Open partition indices (note: ON CONFLICT unsupported)*/
+			if (leaf_rel_rri->ri_RelationDesc->rd_rel->relhasindex &&
+				operation != CMD_DELETE &&
+				leaf_rel_rri->ri_IndexRelationDescs == NULL)
+				ExecOpenIndices(leaf_rel_rri, false);
+
+			if (!equalTupleDescs(RelationGetDescr(rel),
+								 RelationGetDescr(leaf_rel)))
+				mtstate->mt_partition_tupconv_maps[i] =
+							convert_tuples_by_name(RelationGetDescr(rel),
+												   RelationGetDescr(leaf_rel),
+								  gettext_noop("could not convert row type"));
+
+			leaf_rel_rri++;
+			i++;
+		}
+	}
+
 	/*
 	 * Initialize any WITH CHECK OPTION constraints if needed.
 	 */
@@ -1957,6 +2041,18 @@ ExecEndModifyTable(ModifyTableState *node)
 														   resultRelInfo);
 	}
 
+	/* Close all partitions and indices thereof */
+	for (i = 0; i < node->mt_num_partitions; i++)
+	{
+		ResultRelInfo *resultRelInfo = node->mt_partitions + i;
+
+		ExecCloseIndices(resultRelInfo);
+		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
+
+		if (node->mt_partition_tupconv_maps[i])
+			pfree(node->mt_partition_tupconv_maps[i]);
+	}
+
 	/*
 	 * Free the exprcontext
 	 */
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index f55129a..802cd68 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -755,8 +755,16 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 
 	/* Process ON CONFLICT, if any. */
 	if (stmt->onConflictClause)
+	{
+		/* Bail out if target relation is partitioned table */
+		if (relid_is_partitioned(pstate->p_target_rangetblentry->relid))
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("ON CONFLICT clause is not supported with partitioned tables")));
+
 		qry->onConflict = transformOnConflictClause(pstate,
 													stmt->onConflictClause);
+	}
 
 	/*
 	 * If we have a RETURNING clause, we need to add the target relation to
diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h
index 7d4bf2f..c987d63 100644
--- a/src/include/catalog/partition.h
+++ b/src/include/catalog/partition.h
@@ -14,6 +14,8 @@
 #define PARTITION_H
 
 #include "fmgr.h"
+#include "executor/tuptable.h"
+#include "nodes/execnodes.h"
 #include "parser/parse_node.h"
 #include "utils/relcache.h"
 
@@ -85,6 +87,22 @@ typedef struct PartitionDescData
 typedef struct PartitionDescData *PartitionDesc;
 
 /*
+ * PartitionKeyExecInfo
+ *
+ *		This struct holds the information needed to extract partition
+ *		column values from a heap tuple.
+ *
+ *		Key					copy of the rd_partkey of rel
+ *		ExpressionState		exec state for expressions, or NIL if none
+ */
+typedef struct PartitionKeyExecInfo
+{
+	NodeTag			type;
+	PartitionKey	pi_Key;
+	List		   *pi_ExpressionState;	/* list of ExprState */
+} PartitionKeyExecInfo;
+
+/*
  * Partition tree node
  *
  *	pkinfo					PartitionKey executor state
@@ -100,6 +118,7 @@ typedef struct PartitionDescData *PartitionDesc;
  */
 typedef struct PartitionDescNodeData
 {
+	PartitionKeyExecInfo *pkinfo;
 	PartitionDesc		pdesc;
 	Oid					relid;
 	int					index;
@@ -138,4 +157,9 @@ extern Expr *get_check_expr_from_partbound(Relation rel, Relation parent,
 										   Node *bound);
 extern List *get_leaf_partition_oids(PartitionDescNode pdnode);
 extern PartitionDescNode RelationGetPartitionDescNode(Relation rel);
+
+extern int get_partition_for_tuple(PartitionDescNode pdnode,
+					TupleTableSlot *slot,
+					EState *estate,
+					Oid *failed_at);
 #endif   /* PARTITION_H */
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 44fac27..d4e8b35 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -14,6 +14,7 @@
 #ifndef EXECUTOR_H
 #define EXECUTOR_H
 
+#include "catalog/partition.h"
 #include "executor/execdesc.h"
 #include "nodes/parsenodes.h"
 
@@ -188,6 +189,7 @@ extern void CheckValidResultRel(Relation resultRel, CmdType operation);
 extern void InitResultRelInfo(ResultRelInfo *resultRelInfo,
 				  Relation resultRelationDesc,
 				  Index resultRelationIndex,
+				  bool load_partition_check,
 				  int instrument_options);
 extern ResultRelInfo *ExecGetTriggerResultRel(EState *estate, Oid relid);
 extern bool ExecContextForcesOids(PlanState *planstate, bool *hasoids);
@@ -211,6 +213,10 @@ extern void EvalPlanQualSetPlan(EPQState *epqstate,
 extern void EvalPlanQualSetTuple(EPQState *epqstate, Index rti,
 					 HeapTuple tuple);
 extern HeapTuple EvalPlanQualGetTuple(EPQState *epqstate, Index rti);
+extern int ExecFindPartition(ResultRelInfo *resultRelInfo,
+				  PartitionDescNode pdnode,
+				  TupleTableSlot *slot,
+				  EState *estate);
 
 #define EvalPlanQualSetSlot(epqstate, slot)  ((epqstate)->origslot = (slot))
 extern void EvalPlanQualFetchRowMarks(EPQState *epqstate);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 74288e2..f2f535e 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -16,6 +16,7 @@
 
 #include "access/genam.h"
 #include "access/heapam.h"
+#include "access/tupconvert.h"
 #include "executor/instrument.h"
 #include "lib/pairingheap.h"
 #include "nodes/params.h"
@@ -1140,6 +1141,15 @@ typedef struct ModifyTableState
 										 * tlist  */
 	TupleTableSlot *mt_conflproj;		/* CONFLICT ... SET ... projection
 										 * target */
+	struct PartitionDescNodeData *mt_partition_node;
+										/* Partition descriptor node tree */
+	ResultRelInfo  *mt_partitions;		/* Per leaf partition target
+										 * relations */
+	TupleConversionMap **mt_partition_tupconv_maps;
+										/* Per leaf partition
+										 * tuple conversion map */
+	int				mt_num_partitions;	/* Number of leaf partition target
+										 * relations in the above array */
 } ModifyTableState;
 
 /* ----------------
-- 
1.7.1

