From db212397d755dbcdc7d5394b42a1d7b3cbdcb3aa Mon Sep 17 00:00:00 2001
From: amit <amitlangote09@gmail.com>
Date: Fri, 12 Feb 2016 19:25:58 +0900
Subject: [PATCH 10/10] Embarrasingly small optimizer patch to work with partitioned tables.

Basically, this creates a expand_inherited_rtentry clone that works
for partitioned tables instead of inheritance sets. AppendRelInfo's
are created for leaf partitions and added to PlannerInfo.append_rel_list.
Then, wherever RangeTblEntry.inh check is used to switch to append rel
processing, a check for relid_is_partitioned(relid) is added.

Note that it is not the intention of this commit to make constraint
exclusion also work for partitioned tables. A subsequent commit will
teach partition module to install an equivalent CHECK constraint
whenever a new partition is created that will enable constraint
exclusion based partition pruning.

One ugliness: EXPLAIN's set_deparse_planstate() uses appendplans[0] or
mergeplans[0] to initialize deparse_namespace.outer_planstate. That
gives expected behavior for Append plans meant for inheritance sets
because the parent is 0th member. Same is not true in case of Append
meant for partitioned tables (which this patch creates), so a user
will see first leaf partition's name where they would expect to see
name of the table they used in the query (IOW, the parent table).
---
 src/backend/optimizer/path/allpaths.c  |    7 +-
 src/backend/optimizer/plan/planner.c   |    3 +
 src/backend/optimizer/prep/prepunion.c |  231 ++++++++++++++++++++++++++++++++
 src/backend/optimizer/util/plancat.c   |   14 ++
 src/backend/optimizer/util/relnode.c   |    4 +-
 src/include/optimizer/prep.h           |    2 +
 6 files changed, 258 insertions(+), 3 deletions(-)

diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 380b15e..81c2323 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -19,6 +19,7 @@
 
 #include "access/sysattr.h"
 #include "access/tsmapi.h"
+#include "catalog/partition.h"
 #include "catalog/pg_class.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_proc.h"
@@ -324,7 +325,8 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
 		 */
 		set_dummy_rel_pathlist(rel);
 	}
-	else if (rte->inh)
+	else if (rte->inh || (rte->rtekind == RTE_RELATION &&
+							relid_is_partitioned(rte->relid)))
 	{
 		/* It's an "append relation", process accordingly */
 		set_append_rel_size(root, rel, rti, rte);
@@ -401,7 +403,8 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 	{
 		/* We already proved the relation empty, so nothing more to do */
 	}
-	else if (rte->inh)
+	else if (rte->inh || (rte->rtekind == RTE_RELATION &&
+							relid_is_partitioned(rte->relid)))
 	{
 		/* It's an "append relation", process accordingly */
 		set_append_rel_pathlist(root, rel, rti, rte);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index f77c804..581924b 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -538,6 +538,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	 */
 	expand_inherited_tables(root);
 
+	/* ... and partitioned tables. */
+	expand_partitioned_tables(root);
+
 	/*
 	 * Set hasHavingQual to remember if HAVING clause is present.  Needed
 	 * because preprocess_expression will reduce a constant-true condition to
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index e509a1a..54fae4b 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -33,6 +33,7 @@
 #include "access/heapam.h"
 #include "access/htup_details.h"
 #include "access/sysattr.h"
+#include "catalog/partition.h"
 #include "catalog/pg_inherits_fn.h"
 #include "catalog/pg_type.h"
 #include "miscadmin.h"
@@ -112,6 +113,11 @@ static Node *adjust_appendrel_attrs_mutator(Node *node,
 static Relids adjust_relid_set(Relids relids, Index oldrelid, Index newrelid);
 static List *adjust_inherited_tlist(List *tlist,
 					   AppendRelInfo *context);
+static void expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *rte,
+					Index rti);
+static void make_var_translation_list(Relation oldrelation,
+							  Index newvarno,
+							  List **translated_vars);
 
 
 /*
@@ -1217,6 +1223,29 @@ expand_inherited_tables(PlannerInfo *root)
 }
 
 /*
+ * expand_partitioned_tables
+ *		Expand each rangetable entry that is partitioned table into an
+ *		"append relation".
+ */
+void
+expand_partitioned_tables(PlannerInfo *root)
+{
+	Index		nrtes;
+	Index		rti;
+	ListCell   *rl;
+
+	nrtes = list_length(root->parse->rtable);
+	rl = list_head(root->parse->rtable);
+	for (rti = 1; rti <= nrtes; rti++)
+	{
+		RangeTblEntry *rte = (RangeTblEntry *) lfirst(rl);
+
+		expand_partitioned_rtentry(root, rte, rti);
+		rl = lnext(rl);
+	}
+}
+
+/*
  * expand_inherited_rtentry
  *		Check whether a rangetable entry represents an inheritance set.
  *		If so, add entries for all the child tables to the query's
@@ -1433,6 +1462,161 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
 }
 
 /*
+ * expand_partitioned_rtentry
+ *		Check whether a rangetable entry is a partitioned table.
+ *		If so, add entries for all the leaf partitions to the query's
+ *		rangetable, and build AppendRelInfo nodes for all the leaf partitions
+ *		and add them to root->append_rel_list.
+ *
+ * A childless table is never considered to be an inheritance set; therefore
+ * a parent RTE must always have at least two associated AppendRelInfos.
+ */
+static void
+expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
+{
+	Query	   *parse = root->parse;
+	Oid			parentOID;
+	PlanRowMark *oldrc;
+	Relation	oldrelation;
+	LOCKMODE	lockmode;
+	List	   *partOIDs;
+	List	   *appinfos;
+	ListCell   *l;
+
+	/* Quick exit */
+	if (rte->rtekind != RTE_RELATION || !relid_is_partitioned(rte->relid))
+		return;
+
+	parentOID = rte->relid;
+
+	/*
+	 * Locking policy same as that described in expand_inherited_rtentry().
+	 */
+	oldrc = get_plan_rowmark(root->rowMarks, rti);
+	if (rti == parse->resultRelation)
+		lockmode = RowExclusiveLock;
+	else if (oldrc && RowMarkRequiresRowShareLock(oldrc->markType))
+		lockmode = RowShareLock;
+	else
+		lockmode = AccessShareLock;
+
+	/* Scan for all members of inheritance set, acquire needed locks */
+	partOIDs = get_leaf_partitions(parentOID, lockmode);
+
+	/*
+	 * If parent relation is selected FOR UPDATE/SHARE, we need to mark its
+	 * PlanRowMark as isParent = true, and generate a new PlanRowMark for each
+	 * partition.
+	 */
+	if (oldrc)
+		oldrc->isParent = true;
+
+	/*
+	 * Must open the parent relation to examine its tupdesc.  We need not lock
+	 * it; we assume the rewriter already did.
+	 */
+	oldrelation = heap_open(parentOID, NoLock);
+
+	/* Scan the inheritance set and expand it */
+	appinfos = NIL;
+	foreach(l, partOIDs)
+	{
+		Oid			partOID = lfirst_oid(l);
+		Relation	newrelation;
+		RangeTblEntry *partrte;
+		Index		partRTindex;
+		AppendRelInfo *appinfo;
+
+		newrelation = heap_open(partOID, NoLock);
+
+		/*
+		 * It is possible that the parent table has partitions that are temp
+		 * tables of other backends (yeah it's possible - imagine if other
+		 * backend creates a leaf partition of a temp partitioned table).
+		 * We cannot safely access such partitions (because of buffering issues),
+		 * and the best thing to do seems to be to silently ignore them.
+		 */
+		if (RELATION_IS_OTHER_TEMP(newrelation))
+		{
+			heap_close(newrelation, lockmode);
+			continue;
+		}
+
+		/*
+		 * Build an RTE for the partition, and attach to query's rangetable list.
+		 * We copy most fields of the parent's RTE, but replace relation OID
+		 * and relkind, Also, set requiredPerms to zero since all required
+		 * permissions checks are done on the original RTE.
+		 */
+		partrte = copyObject(rte);
+		partrte->relid = partOID;
+		partrte->relkind = newrelation->rd_rel->relkind;
+		partrte->inh = false;
+		partrte->requiredPerms = 0;
+		parse->rtable = lappend(parse->rtable, partrte);
+		partRTindex = list_length(parse->rtable);
+
+		/*
+		 * Build an AppendRelInfo for this parent and child.
+		 */
+		appinfo = makeNode(AppendRelInfo);
+		appinfo->parent_relid = rti;
+		appinfo->child_relid = partRTindex;
+		appinfo->parent_reltype = oldrelation->rd_rel->reltype;
+		appinfo->child_reltype = newrelation->rd_rel->reltype;
+		make_var_translation_list(oldrelation, partRTindex, &appinfo->translated_vars);
+		appinfo->parent_reloid = parentOID;
+		appinfos = lappend(appinfos, appinfo);
+
+		/*
+		 * Translate the column permissions bitmaps to the partition's attnums (we
+		 * have to build the translated_vars list before we can do this).
+		 *
+		 * Note: we need to do this even though the executor won't run any
+		 * permissions checks on the child RTE.  The insertedCols/updatedCols
+		 * bitmaps may be examined for trigger-firing purposes.
+		 */
+		partrte->selectedCols = translate_col_privs(rte->selectedCols,
+												   appinfo->translated_vars);
+		partrte->insertedCols = translate_col_privs(rte->insertedCols,
+												   appinfo->translated_vars);
+		partrte->updatedCols = translate_col_privs(rte->updatedCols,
+												   appinfo->translated_vars);
+
+		/*
+		 * Build a PlanRowMark if parent is marked FOR UPDATE/SHARE.
+		 */
+		if (oldrc)
+		{
+			PlanRowMark *newrc = makeNode(PlanRowMark);
+
+			newrc->rti = partRTindex;
+			newrc->prti = rti;
+			newrc->rowmarkId = oldrc->rowmarkId;
+			/* Reselect rowmark type, because relkind might not match parent */
+			newrc->markType = select_rowmark_type(partrte, oldrc->strength);
+			newrc->allMarkTypes = (1 << newrc->markType);
+			newrc->strength = oldrc->strength;
+			newrc->waitPolicy = oldrc->waitPolicy;
+			newrc->isParent = false;
+
+			/* Include partition's rowmark type in parent's allMarkTypes */
+			oldrc->allMarkTypes |= newrc->allMarkTypes;
+
+			root->rowMarks = lappend(root->rowMarks, newrc);
+		}
+
+		/* Close partition relations, but keep locks */
+			heap_close(newrelation, NoLock);
+	}
+
+	heap_close(oldrelation, NoLock);
+
+	/* Otherwise, OK to add to root->append_rel_list */
+	root->append_rel_list = list_concat(root->append_rel_list, appinfos);
+}
+
+/*
  * make_inh_translation_list
  *	  Build the list of translations from parent Vars to child Vars for
  *	  an inheritance child.
@@ -1536,6 +1720,53 @@ make_inh_translation_list(Relation oldrelation, Relation newrelation,
 }
 
 /*
+ * make_var_translation_list
+ *	  Build the list of translations from parent Vars to partition Vars for
+ *	  a partition which is basically changing varnos to refer to the partition's
+ *	  RT entry.
+ */
+static void
+make_var_translation_list(Relation oldrelation,
+							  Index newvarno,
+							  List **translated_vars)
+{
+	List	   *vars = NIL;
+	TupleDesc	old_tupdesc = RelationGetDescr(oldrelation);
+	int			oldnatts = old_tupdesc->natts;
+	int			old_attno;
+
+	for (old_attno = 0; old_attno < oldnatts; old_attno++)
+	{
+		Form_pg_attribute att;
+		char	   *attname;
+		Oid			atttypid;
+		int32		atttypmod;
+		Oid			attcollation;
+
+		att = old_tupdesc->attrs[old_attno];
+		if (att->attisdropped)
+		{
+			/* Just put NULL into this list entry */
+			vars = lappend(vars, NULL);
+			continue;
+		}
+		attname = NameStr(att->attname);
+		atttypid = att->atttypid;
+		atttypmod = att->atttypmod;
+		attcollation = att->attcollation;
+
+		vars = lappend(vars, makeVar(newvarno,
+									 (AttrNumber) (old_attno + 1),
+									 atttypid,
+									 atttypmod,
+									 attcollation,
+									 0));
+	}
+
+	*translated_vars = vars;
+}
+
+/*
  * translate_col_privs
  *	  Translate a bitmapset representing per-column privileges from the
  *	  parent rel's attribute numbering to the child's.
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 0ea9fcf..4453aae 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -805,6 +805,14 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
 	switch (rel->rd_rel->relkind)
 	{
 		case RELKIND_RELATION:
+			if (relid_is_internal_partition(RelationGetRelid(rel)))
+			{
+				/* No storage */
+				*pages = 0;
+				*tuples = 0;
+				*allvisfrac = 0;
+				break;
+			}
 		case RELKIND_INDEX:
 		case RELKIND_MATVIEW:
 		case RELKIND_TOASTVALUE:
@@ -929,6 +937,12 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
 			*tuples = rel->rd_rel->reltuples;
 			*allvisfrac = 0;
 			break;
+		case RELKIND_PARTITIONED_REL:
+			/* No storage */
+			*pages = 0;
+			*tuples = 0;
+			*allvisfrac = 0;
+			break;
 		default:
 			/* else it has no disk storage; probably shouldn't get here? */
 			*pages = 0;
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 420692f..394021f 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include "miscadmin.h"
+#include "catalog/partition.h"
 #include "catalog/pg_class.h"
 #include "foreign/foreign.h"
 #include "optimizer/clauses.h"
@@ -199,7 +200,8 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
 	 * not in the main join tree, but we will need RelOptInfos to plan access
 	 * to them.
 	 */
-	if (rte->inh)
+	if (rte->inh || (rte->rtekind == RTE_RELATION &&
+							relid_is_partitioned(rte->relid)))
 	{
 		ListCell   *l;
 
diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h
index cebd8b6..611ed49 100644
--- a/src/include/optimizer/prep.h
+++ b/src/include/optimizer/prep.h
@@ -64,4 +64,6 @@ extern Node *adjust_appendrel_attrs(PlannerInfo *root, Node *node,
 extern Node *adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node,
 								  RelOptInfo *child_rel);
 
+extern void expand_partitioned_tables(PlannerInfo *root);
+
 #endif   /* PREP_H */
-- 
1.7.1

