From cdbb6d854fc59b576603c25f4567aab831e3d5b3 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tv@fuzzy.cz>
Date: Sun, 11 Jan 2015 19:51:48 +0100
Subject: [PATCH 2/7] shared infrastructure and functional dependencies

Basic infrastructure shared by all kinds of multivariate
stats, most importantly:

- adds a new system catalog (pg_mv_statistic)
- CREATE STATISTICS name ON table (columns) WITH (options)
- DROP STATISTICS name
- implementation of functional dependencies (the simplest
  type of multivariate statistics)
- building functional dependencies in ANALYZE
- updates regression tests (new catalog etc.)

This does not include any changes to the optimizer, i.e.
it does not influence the query planning (subject to
follow-up patches).

The current implementation requires a valid 'ltopr' for
the columns, so that we can sort the sample rows in various
ways, both in this patch and other kinds of statistics.
Maybe this restriction could be relaxed in the future,
requiring just 'eqopr' in case of stats not sorting the
data (e.g. functional dependencies and MCV lists).

Maybe some of the stats (functional dependencies and MCV
list with limited functionality) might be made to work
with hashes of the values, which is sufficient for equality
comparisons. But the queries would require the equality
operator anyway, so it's not really a weaker requirement.
The hashes might reduce space requirements, though.

The algorithm detecting the dependencies is rather simple
and probably needs improvements, so that it detects more
complicated dependencies, and also validation of the math.

The name 'functional dependencies' is more correct (than
'association rules') as it's exactly the name used in
relational theory (esp. Normal Forms) for tracking
column-level dependencies.

The multivariate statistics are automatically removed in
two situations

 (a) after a DROP TABLE (obviously)

 (b) after ALTER TABLE ... DROP COLUMN, if the statistics
     would be defined on less than 2 columns (remaining)

If there are more at least 2 columns remaining, we keep
the statistics but perform cleanup on the next ANALYZE.
The dropped columns are removed from stakeys, and the new
statistics is built on the smaller set.

We can't do this at DROP COLUMN, because that'd leave us
with invalid statistics, or we'd have to throw it away
although we can still use it. This lazy approach lets us
use the statistics although some of the columns are dead.

This also adds a simple list of statistics to \d in psql.
---
 src/backend/catalog/Makefile               |   1 +
 src/backend/catalog/dependency.c           |  11 +-
 src/backend/catalog/heap.c                 | 102 +++++
 src/backend/catalog/namespace.c            |  49 +++
 src/backend/catalog/objectaddress.c        |  22 +
 src/backend/catalog/system_views.sql       |  11 +
 src/backend/commands/Makefile              |   6 +-
 src/backend/commands/analyze.c             |  21 +
 src/backend/commands/dropcmds.c            |   4 +
 src/backend/commands/event_trigger.c       |   3 +
 src/backend/commands/statscmds.c           | 299 ++++++++++++++
 src/backend/commands/tablecmds.c           |   8 +-
 src/backend/nodes/copyfuncs.c              |  16 +
 src/backend/nodes/outfuncs.c               |  18 +
 src/backend/optimizer/util/plancat.c       |  63 +++
 src/backend/parser/gram.y                  |  71 +++-
 src/backend/tcop/utility.c                 |  11 +
 src/backend/utils/Makefile                 |   2 +-
 src/backend/utils/cache/relcache.c         |  59 +++
 src/backend/utils/cache/syscache.c         |  23 ++
 src/backend/utils/mvstats/Makefile         |  17 +
 src/backend/utils/mvstats/common.c         | 356 ++++++++++++++++
 src/backend/utils/mvstats/common.h         |  75 ++++
 src/backend/utils/mvstats/dependencies.c   | 638 +++++++++++++++++++++++++++++
 src/bin/psql/describe.c                    |  42 ++
 src/include/catalog/dependency.h           |   5 +-
 src/include/catalog/heap.h                 |   1 +
 src/include/catalog/indexing.h             |   7 +
 src/include/catalog/namespace.h            |   2 +
 src/include/catalog/pg_mv_statistic.h      |  71 ++++
 src/include/catalog/pg_proc.h              |   5 +
 src/include/catalog/toasting.h             |   1 +
 src/include/commands/defrem.h              |   4 +
 src/include/nodes/nodes.h                  |   2 +
 src/include/nodes/parsenodes.h             |  11 +
 src/include/nodes/relation.h               |  28 ++
 src/include/parser/kwlist.h                |   2 +-
 src/include/utils/mvstats.h                |  69 ++++
 src/include/utils/rel.h                    |   4 +
 src/include/utils/relcache.h               |   1 +
 src/include/utils/syscache.h               |   2 +
 src/test/regress/expected/rules.out        |   8 +
 src/test/regress/expected/sanity_check.out |   1 +
 43 files changed, 2139 insertions(+), 13 deletions(-)
 create mode 100644 src/backend/commands/statscmds.c
 create mode 100644 src/backend/utils/mvstats/Makefile
 create mode 100644 src/backend/utils/mvstats/common.c
 create mode 100644 src/backend/utils/mvstats/common.h
 create mode 100644 src/backend/utils/mvstats/dependencies.c
 create mode 100644 src/include/catalog/pg_mv_statistic.h
 create mode 100644 src/include/utils/mvstats.h

diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 25130ec..058b8a9 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -32,6 +32,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
 	pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \
 	pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \
 	pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
+	pg_mv_statistic.h \
 	pg_statistic.h pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \
 	pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
 	pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index efca34c..32a9ee3 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -39,6 +39,7 @@
 #include "catalog/pg_foreign_server.h"
 #include "catalog/pg_language.h"
 #include "catalog/pg_largeobject.h"
+#include "catalog/pg_mv_statistic.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_operator.h"
@@ -159,7 +160,8 @@ static const Oid object_classes[] = {
 	ExtensionRelationId,		/* OCLASS_EXTENSION */
 	EventTriggerRelationId,		/* OCLASS_EVENT_TRIGGER */
 	PolicyRelationId,			/* OCLASS_POLICY */
-	TransformRelationId			/* OCLASS_TRANSFORM */
+	TransformRelationId,		/* OCLASS_TRANSFORM */
+	MvStatisticRelationId		/* OCLASS_STATISTICS */
 };
 
 
@@ -1271,6 +1273,10 @@ doDeletion(const ObjectAddress *object, int flags)
 			DropTransformById(object->objectId);
 			break;
 
+		case OCLASS_STATISTICS:
+			RemoveStatisticsById(object->objectId);
+			break;
+
 		default:
 			elog(ERROR, "unrecognized object class: %u",
 				 object->classId);
@@ -2414,6 +2420,9 @@ getObjectClass(const ObjectAddress *object)
 
 		case TransformRelationId:
 			return OCLASS_TRANSFORM;
+
+		case MvStatisticRelationId:
+			return OCLASS_STATISTICS;
 	}
 
 	/* shouldn't get here */
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 04c4f8f..5176f86 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -46,6 +46,7 @@
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_foreign_table.h"
 #include "catalog/pg_inherits.h"
+#include "catalog/pg_mv_statistic.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_tablespace.h"
@@ -1612,7 +1613,10 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
 	heap_close(attr_rel, RowExclusiveLock);
 
 	if (attnum > 0)
+	{
 		RemoveStatistics(relid, attnum);
+		RemoveMVStatistics(relid, attnum);
+	}
 
 	relation_close(rel, NoLock);
 }
@@ -1840,6 +1844,11 @@ heap_drop_with_catalog(Oid relid)
 	RemoveStatistics(relid, 0);
 
 	/*
+	 * delete multi-variate statistics
+	 */
+	RemoveMVStatistics(relid, 0);
+
+	/*
 	 * delete attribute tuples
 	 */
 	DeleteAttributeTuples(relid);
@@ -2695,6 +2704,99 @@ RemoveStatistics(Oid relid, AttrNumber attnum)
 
 
 /*
+ * RemoveMVStatistics --- remove entries in pg_mv_statistic for a rel
+ *
+ * If attnum is zero, remove all entries for rel; else remove only the one(s)
+ * for that column.
+ */
+void
+RemoveMVStatistics(Oid relid, AttrNumber attnum)
+{
+	Relation	pgmvstatistic;
+	TupleDesc	tupdesc = NULL;
+	SysScanDesc scan;
+	ScanKeyData key;
+	HeapTuple	tuple;
+
+	/*
+	 * When dropping a column, we'll drop statistics with a single
+	 * remaining (undropped column). To do that, we need the tuple
+	 * descriptor.
+	 *
+	 * We already have the relation locked (as we're running ALTER
+	 * TABLE ... DROP COLUMN), so we'll just get the descriptor here.
+	 */
+	if (attnum != 0)
+	{
+		Relation rel = relation_open(relid, NoLock);
+
+		/* multivariate stats are supported on tables and matviews */
+		if (rel->rd_rel->relkind == RELKIND_RELATION ||
+			rel->rd_rel->relkind == RELKIND_MATVIEW)
+			tupdesc = RelationGetDescr(rel);
+
+		relation_close(rel, NoLock);
+	}
+
+	if (tupdesc == NULL)
+		return;
+
+	pgmvstatistic = heap_open(MvStatisticRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&key,
+				Anum_pg_mv_statistic_starelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+
+	scan = systable_beginscan(pgmvstatistic,
+							  MvStatisticRelidIndexId,
+							  true, NULL, 1, &key);
+
+	/* we must loop even when attnum != 0, in case of inherited stats */
+	while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+	{
+		bool delete = true;
+
+		if (attnum != 0)
+		{
+			Datum		adatum;
+			bool		isnull;
+			int			i;
+			int			ncolumns = 0;
+			ArrayType  *arr;
+			int16	   *attnums;
+
+			/* get the columns */
+			adatum = SysCacheGetAttr(MVSTATOID, tuple,
+									 Anum_pg_mv_statistic_stakeys, &isnull);
+			Assert(!isnull);
+
+			arr = DatumGetArrayTypeP(adatum);
+			attnums = (int16*)ARR_DATA_PTR(arr);
+
+			for (i = 0; i < ARR_DIMS(arr)[0]; i++)
+			{
+				/* count the column unless it's has been / is being dropped */
+				if ((! tupdesc->attrs[attnums[i]-1]->attisdropped) &&
+					(attnums[i] != attnum))
+					ncolumns += 1;
+			}
+
+			/* delete if there are less than two attributes */
+			delete = (ncolumns < 2);
+		}
+
+		if (delete)
+			simple_heap_delete(pgmvstatistic, &tuple->t_self);
+	}
+
+	systable_endscan(scan);
+
+	heap_close(pgmvstatistic, RowExclusiveLock);
+}
+
+
+/*
  * RelationTruncateIndexes - truncate all indexes associated
  * with the heap relation to zero tuples.
  *
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 6644c6f..178f565 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -4201,3 +4201,52 @@ pg_is_other_temp_schema(PG_FUNCTION_ARGS)
 
 	PG_RETURN_BOOL(isOtherTempNamespace(oid));
 }
+
+Oid
+get_statistics_oid(List *names, bool missing_ok)
+{
+	char	   *schemaname;
+	char	   *stats_name;
+	Oid			namespaceId;
+	Oid			stats_oid = InvalidOid;
+	ListCell   *l;
+
+	/* deconstruct the name list */
+	DeconstructQualifiedName(names, &schemaname, &stats_name);
+
+	if (schemaname)
+	{
+		/* use exact schema given */
+		namespaceId = LookupExplicitNamespace(schemaname, missing_ok);
+		if (missing_ok && !OidIsValid(namespaceId))
+			stats_oid = InvalidOid;
+		else
+			stats_oid = GetSysCacheOid1(MVSTATNAME,
+										PointerGetDatum(stats_name));
+	}
+	else
+	{
+		/* search for it in search path */
+		recomputeNamespacePath();
+
+		foreach(l, activeSearchPath)
+		{
+			namespaceId = lfirst_oid(l);
+
+			if (namespaceId == myTempNamespace)
+				continue;		/* do not look in temp namespace */
+			stats_oid = GetSysCacheOid1(MVSTATNAME,
+									  PointerGetDatum(stats_name));
+			if (OidIsValid(stats_oid))
+				break;
+		}
+	}
+
+	if (!OidIsValid(stats_oid) && !missing_ok)
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("statistics \"%s\" does not exist",
+						NameListToString(names))));
+
+	return stats_oid;
+}
diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c
index e44d7d0..b2bcf1f 100644
--- a/src/backend/catalog/objectaddress.c
+++ b/src/backend/catalog/objectaddress.c
@@ -37,6 +37,7 @@
 #include "catalog/pg_language.h"
 #include "catalog/pg_largeobject.h"
 #include "catalog/pg_largeobject_metadata.h"
+#include "catalog/pg_mv_statistic.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_opfamily.h"
@@ -436,9 +437,22 @@ static const ObjectPropertyType ObjectProperty[] =
 		Anum_pg_type_typacl,
 		ACL_KIND_TYPE,
 		true
+	},
+	{
+		MvStatisticRelationId,
+		MvStatisticOidIndexId,
+		MVSTATOID,
+		MVSTATNAME,
+		Anum_pg_mv_statistic_staname,
+		InvalidAttrNumber,		/* FIXME probably should have namespace */
+		InvalidAttrNumber,		/* XXX same owner as relation */
+		InvalidAttrNumber,		/* no ACL (same as relation) */
+		-1,						/* no ACL */
+		true
 	}
 };
 
+
 /*
  * This struct maps the string object types as returned by
  * getObjectTypeDescription into ObjType enum values.  Note that some enum
@@ -911,6 +925,11 @@ get_object_address(ObjectType objtype, List *objname, List *objargs,
 				address = get_object_address_defacl(objname, objargs,
 													missing_ok);
 				break;
+			case OBJECT_STATISTICS:
+				address.classId = MvStatisticRelationId;
+				address.objectId = get_statistics_oid(objname, missing_ok);
+				address.objectSubId = 0;
+				break;
 			default:
 				elog(ERROR, "unrecognized objtype: %d", (int) objtype);
 				/* placate compiler, in case it thinks elog might return */
@@ -2183,6 +2202,9 @@ check_object_ownership(Oid roleid, ObjectType objtype, ObjectAddress address,
 						(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 						 errmsg("must be superuser")));
 			break;
+		case OBJECT_STATISTICS:
+			/* FIXME do the right owner checks here */
+			break;
 		default:
 			elog(ERROR, "unrecognized object type: %d",
 				 (int) objtype);
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 536c805..e3f3387 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -158,6 +158,17 @@ CREATE VIEW pg_indexes AS
          LEFT JOIN pg_tablespace T ON (T.oid = I.reltablespace)
     WHERE C.relkind IN ('r', 'm') AND I.relkind = 'i';
 
+CREATE VIEW pg_mv_stats AS
+    SELECT
+        N.nspname AS schemaname,
+        C.relname AS tablename,
+        S.staname AS staname,
+        S.stakeys AS attnums,
+        length(S.stadeps) as depsbytes,
+        pg_mv_stats_dependencies_info(S.stadeps) as depsinfo
+    FROM (pg_mv_statistic S JOIN pg_class C ON (C.oid = S.starelid))
+        LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace);
+
 CREATE VIEW pg_stats WITH (security_barrier) AS
     SELECT
         nspname AS schemaname,
diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile
index b1ac704..5151001 100644
--- a/src/backend/commands/Makefile
+++ b/src/backend/commands/Makefile
@@ -18,8 +18,8 @@ OBJS = aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o  \
 	event_trigger.o explain.o extension.o foreigncmds.o functioncmds.o \
 	indexcmds.o lockcmds.o matview.o operatorcmds.o opclasscmds.o \
 	policy.o portalcmds.o prepare.o proclang.o \
-	schemacmds.o seclabel.o sequence.o tablecmds.o tablespace.o trigger.o \
-	tsearchcmds.o typecmds.o user.o vacuum.o vacuumlazy.o \
-	variable.o view.o
+	schemacmds.o seclabel.o sequence.o statscmds.o \
+	tablecmds.o tablespace.o trigger.o tsearchcmds.o typecmds.o \
+	user.o vacuum.o vacuumlazy.o variable.o view.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index ddb68ab..fa18903 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -27,6 +27,7 @@
 #include "catalog/indexing.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_inherits_fn.h"
+#include "catalog/pg_mv_statistic.h"
 #include "catalog/pg_namespace.h"
 #include "commands/dbcommands.h"
 #include "commands/tablecmds.h"
@@ -55,7 +56,11 @@
 #include "utils/syscache.h"
 #include "utils/timestamp.h"
 #include "utils/tqual.h"
+#include "utils/fmgroids.h"
+#include "utils/builtins.h"
 
+#include "utils/mvstats.h"
+#include "access/sysattr.h"
 
 /* Per-index data for ANALYZE */
 typedef struct AnlIndexData
@@ -460,6 +465,19 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
 	 * all analyzable columns.  We use a lower bound of 100 rows to avoid
 	 * possible overflow in Vitter's algorithm.  (Note: that will also be the
 	 * target in the corner case where there are no analyzable columns.)
+	 *
+	 * FIXME This sample sizing is mostly OK when computing stats for
+	 *       individual columns, but when computing multi-variate stats
+	 *       for multivariate stats (histograms, mcv, ...) it's rather
+	 *       insufficient. For stats on multiple columns / complex stats
+	 *       we need larger sample sizes, because we need to build more
+	 *       detailed stats (more MCV items / histogram buckets) to get
+	 *       good accuracy. Maybe it'd be appropriate to use samples
+	 *       proportional to the table (say, 0.5% - 1%) instead of a
+	 *       fixed size might be more appropriate. Also, this should be
+	 *       bound to the requested statistics size - e.g. number of MCV
+	 *       items or histogram buckets should require several sample
+	 *       rows per item/bucket (so the sample should be k*size).
 	 */
 	targrows = 100;
 	for (i = 0; i < attr_cnt; i++)
@@ -562,6 +580,9 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
 			update_attstats(RelationGetRelid(Irel[ind]), false,
 							thisdata->attr_cnt, thisdata->vacattrstats);
 		}
+
+		/* Build multivariate stats (if there are any). */
+		build_mv_stats(onerel, numrows, rows, attr_cnt, vacattrstats);
 	}
 
 	/*
diff --git a/src/backend/commands/dropcmds.c b/src/backend/commands/dropcmds.c
index f04f4f5..7d6318d 100644
--- a/src/backend/commands/dropcmds.c
+++ b/src/backend/commands/dropcmds.c
@@ -292,6 +292,10 @@ does_not_exist_skipping(ObjectType objtype, List *objname, List *objargs)
 			msg = gettext_noop("schema \"%s\" does not exist, skipping");
 			name = NameListToString(objname);
 			break;
+		case OBJECT_STATISTICS:
+			msg = gettext_noop("statistics \"%s\" does not exist, skipping");
+			name = NameListToString(objname);
+			break;
 		case OBJECT_TSPARSER:
 			if (!schema_does_not_exist_skipping(objname, &msg, &name))
 			{
diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c
index 3d1cb0b..baea9dd 100644
--- a/src/backend/commands/event_trigger.c
+++ b/src/backend/commands/event_trigger.c
@@ -110,6 +110,7 @@ static event_trigger_support_data event_trigger_support[] = {
 	{"SCHEMA", true},
 	{"SEQUENCE", true},
 	{"SERVER", true},
+	{"STATISTICS", true},
 	{"TABLE", true},
 	{"TABLESPACE", false},
 	{"TRANSFORM", true},
@@ -1106,6 +1107,7 @@ EventTriggerSupportsObjectType(ObjectType obtype)
 		case OBJECT_RULE:
 		case OBJECT_SCHEMA:
 		case OBJECT_SEQUENCE:
+		case OBJECT_STATISTICS:
 		case OBJECT_TABCONSTRAINT:
 		case OBJECT_TABLE:
 		case OBJECT_TRANSFORM:
@@ -1167,6 +1169,7 @@ EventTriggerSupportsObjectClass(ObjectClass objclass)
 		case OCLASS_DEFACL:
 		case OCLASS_EXTENSION:
 		case OCLASS_POLICY:
+		case OCLASS_STATISTICS:
 			return true;
 	}
 
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
new file mode 100644
index 0000000..3790082
--- /dev/null
+++ b/src/backend/commands/statscmds.c
@@ -0,0 +1,299 @@
+/*-------------------------------------------------------------------------
+ *
+ * statscmds.c
+ *	  Commands for creating and altering multivariate statistics
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/commands/statscmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/multixact.h"
+#include "access/reloptions.h"
+#include "access/relscan.h"
+#include "access/sysattr.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/heap.h"
+#include "catalog/index.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_depend.h"
+#include "catalog/pg_foreign_table.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_inherits_fn.h"
+#include "catalog/pg_mv_statistic.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_tablespace.h"
+#include "catalog/pg_trigger.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_type_fn.h"
+#include "catalog/storage.h"
+#include "catalog/toasting.h"
+#include "commands/cluster.h"
+#include "commands/comment.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/policy.h"
+#include "commands/sequence.h"
+#include "commands/tablecmds.h"
+#include "commands/tablespace.h"
+#include "commands/trigger.h"
+#include "commands/typecmds.h"
+#include "commands/user.h"
+#include "executor/executor.h"
+#include "foreign/foreign.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/parsenodes.h"
+#include "optimizer/clauses.h"
+#include "optimizer/planner.h"
+#include "parser/parse_clause.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_oper.h"
+#include "parser/parse_relation.h"
+#include "parser/parse_type.h"
+#include "parser/parse_utilcmd.h"
+#include "parser/parser.h"
+#include "pgstat.h"
+#include "rewrite/rewriteDefine.h"
+#include "rewrite/rewriteHandler.h"
+#include "rewrite/rewriteManip.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "storage/lock.h"
+#include "storage/predicate.h"
+#include "storage/smgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/relcache.h"
+#include "utils/ruleutils.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/tqual.h"
+#include "utils/typcache.h"
+#include "utils/mvstats.h"
+
+
+/* used for sorting the attnums in ExecCreateStatistics */
+static int compare_int16(const void *a, const void *b)
+{
+	return memcmp(a, b, sizeof(int16));
+}
+
+/*
+ * Implements the CREATE STATISTICS name ON table (columns) WITH (options)
+ *
+ * TODO Check that the types support sort, although maybe we can live
+ *      without it (and only build MCV list / association rules).
+ *
+ * TODO This should probably check for duplicate stats (i.e. same
+ *      keys, same options). Although maybe it's useful to have
+ *      multiple stats on the same columns with different options
+ *      (say, a detailed MCV-only stats for some queries, histogram
+ *      for others, etc.)
+ */
+ObjectAddress
+CreateStatistics(CreateStatsStmt *stmt)
+{
+	int			i, j;
+	ListCell   *l;
+	int16		attnums[INDEX_MAX_KEYS];
+	int			numcols = 0;
+	ObjectAddress	address = InvalidObjectAddress;
+	NameData	staname;
+	Oid			statoid;
+
+	HeapTuple	htup;
+	Datum		values[Natts_pg_mv_statistic];
+	bool		nulls[Natts_pg_mv_statistic];
+	int2vector *stakeys;
+	Relation	mvstatrel;
+	Relation	rel;
+	ObjectAddress parentobject, childobject;
+
+	/* by default build nothing */
+	bool 	build_dependencies = false;
+
+	Assert(IsA(stmt, CreateStatsStmt));
+
+	rel = heap_openrv(stmt->relation, AccessExclusiveLock);
+
+	/* transform the column names to attnum values */
+
+	foreach(l, stmt->keys)
+	{
+		char	   *attname = strVal(lfirst(l));
+		HeapTuple	atttuple;
+
+		atttuple = SearchSysCacheAttName(RelationGetRelid(rel), attname);
+
+		if (!HeapTupleIsValid(atttuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" referenced in statistics does not exist",
+							attname)));
+
+		/* more than MVHIST_MAX_DIMENSIONS columns not allowed */
+		if (numcols >= MVSTATS_MAX_DIMENSIONS)
+			ereport(ERROR,
+					(errcode(ERRCODE_TOO_MANY_COLUMNS),
+					 errmsg("cannot have more than %d keys in a statistics",
+							MVSTATS_MAX_DIMENSIONS)));
+
+		attnums[numcols] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum;
+		ReleaseSysCache(atttuple);
+		numcols++;
+	}
+
+	/*
+	 * Check the lower bound (at least 2 columns), the upper bound was
+	 * already checked in the loop.
+	 */
+	if (numcols < 2)
+			ereport(ERROR,
+					(errcode(ERRCODE_TOO_MANY_COLUMNS),
+					 errmsg("multivariate stats require 2 or more columns")));
+
+	/* look for duplicities */
+	for (i = 0; i < numcols; i++)
+		for (j = 0; j < numcols; j++)
+			if ((i != j) && (attnums[i] == attnums[j]))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("duplicate column name in statistics definition")));
+
+	/* parse the statistics options */
+	foreach (l, stmt->options)
+	{
+		DefElem *opt = (DefElem*)lfirst(l);
+
+		if (strcmp(opt->defname, "dependencies") == 0)
+			build_dependencies = defGetBoolean(opt);
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized STATISTICS option \"%s\"",
+							opt->defname)));
+	}
+
+	/* check that at least some statistics were requested */
+	if (! build_dependencies)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("no statistics type (dependencies) was requested")));
+
+	/* sort the attnums and build int2vector */
+	qsort(attnums, numcols, sizeof(int16), compare_int16);
+	stakeys = buildint2vector(attnums, numcols);
+
+	namestrcpy(&staname, stmt->statsname);
+
+	/*
+	 * Okay, let's create the pg_mv_statistic entry.
+	 */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	/* no stats collected yet, so just the keys */
+	values[Anum_pg_mv_statistic_starelid-1] = ObjectIdGetDatum(RelationGetRelid(rel));
+	values[Anum_pg_mv_statistic_staname -1] = NameGetDatum(&staname);
+
+	values[Anum_pg_mv_statistic_stakeys -1] = PointerGetDatum(stakeys);
+
+	values[Anum_pg_mv_statistic_deps_enabled -1] = BoolGetDatum(build_dependencies);
+
+	nulls[Anum_pg_mv_statistic_stadeps  -1] = true;
+
+	/* insert the tuple into pg_mv_statistic */
+	mvstatrel = heap_open(MvStatisticRelationId, RowExclusiveLock);
+
+	htup = heap_form_tuple(mvstatrel->rd_att, values, nulls);
+
+	simple_heap_insert(mvstatrel, htup);
+
+	CatalogUpdateIndexes(mvstatrel, htup);
+
+	statoid = HeapTupleGetOid(htup);
+
+	heap_freetuple(htup);
+
+
+	/*
+	 * Store a dependency too, so that statistics are dropped on DROP TABLE
+	 */
+	parentobject.classId = RelationRelationId;
+	parentobject.objectId = ObjectIdGetDatum(RelationGetRelid(rel));
+	parentobject.objectSubId = 0;
+	childobject.classId = MvStatisticRelationId;
+	childobject.objectId = statoid;
+	childobject.objectSubId = 0;
+
+	recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO);
+
+
+	heap_close(mvstatrel, RowExclusiveLock);
+
+	relation_close(rel, NoLock);
+
+	/*
+	 * Invalidate relcache so that others see the new statistics.
+	 */
+	CacheInvalidateRelcache(rel);
+
+	ObjectAddressSet(address, MvStatisticRelationId, statoid);
+
+	return address;
+}
+
+
+/*
+ * Implements the DROP STATISTICS
+ *
+ *     DROP STATISTICS stats_name ON table_name
+ *
+ * The first one requires an exact match, the second one just drops
+ * all the statistics on a table.
+ */
+void
+RemoveStatisticsById(Oid statsOid)
+{
+	Relation	relation;
+	HeapTuple	tup;
+
+	/*
+	 * Delete the pg_proc tuple.
+	 */
+	relation = heap_open(MvStatisticRelationId, RowExclusiveLock);
+
+	tup = SearchSysCache1(MVSTATOID, ObjectIdGetDatum(statsOid));
+	if (!HeapTupleIsValid(tup)) /* should not happen */
+		elog(ERROR, "cache lookup failed for statistics %u", statsOid);
+
+	simple_heap_delete(relation, &tup->t_self);
+
+	ReleaseSysCache(tup);
+
+	heap_close(relation, RowExclusiveLock);
+}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 56fed4d..f86d716 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -35,6 +35,7 @@
 #include "catalog/pg_foreign_table.h"
 #include "catalog/pg_inherits.h"
 #include "catalog/pg_inherits_fn.h"
+#include "catalog/pg_mv_statistic.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_tablespace.h"
@@ -93,7 +94,7 @@
 #include "utils/syscache.h"
 #include "utils/tqual.h"
 #include "utils/typcache.h"
-
+#include "utils/mvstats.h"
 
 /*
  * ON COMMIT action list
@@ -141,8 +142,9 @@ static List *on_commits = NIL;
 #define AT_PASS_ADD_COL			5		/* ADD COLUMN */
 #define AT_PASS_ADD_INDEX		6		/* ADD indexes */
 #define AT_PASS_ADD_CONSTR		7		/* ADD constraints, defaults */
-#define AT_PASS_MISC			8		/* other stuff */
-#define AT_NUM_PASSES			9
+#define AT_PASS_ADD_STATS		8		/* ADD statistics */
+#define AT_PASS_MISC			9		/* other stuff */
+#define AT_NUM_PASSES			10
 
 typedef struct AlteredTableInfo
 {
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index ba04b72..0ca2d35 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -4118,6 +4118,19 @@ _copyAlterPolicyStmt(const AlterPolicyStmt *from)
 	return newnode;
 }
 
+static CreateStatsStmt *
+_copyCreateStatsStmt(const CreateStatsStmt *from)
+{
+	CreateStatsStmt  *newnode = makeNode(CreateStatsStmt);
+
+	COPY_STRING_FIELD(statsname);
+	COPY_NODE_FIELD(relation);
+	COPY_NODE_FIELD(keys);
+	COPY_NODE_FIELD(options);
+
+	return newnode;
+}
+
 /* ****************************************************************
  *					pg_list.h copy functions
  * ****************************************************************
@@ -4965,6 +4978,9 @@ copyObject(const void *from)
 		case T_CommonTableExpr:
 			retval = _copyCommonTableExpr(from);
 			break;
+		case T_CreateStatsStmt:
+			retval = _copyCreateStatsStmt(from);
+			break;
 		case T_FuncWithArgs:
 			retval = _copyFuncWithArgs(from);
 			break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 63fae82..cae21d0 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1939,6 +1939,21 @@ _outIndexOptInfo(StringInfo str, const IndexOptInfo *node)
 }
 
 static void
+_outMVStatisticInfo(StringInfo str, const MVStatisticInfo *node)
+{
+	WRITE_NODE_TYPE("MVSTATISTICINFO");
+
+	/* NB: this isn't a complete set of fields */
+	WRITE_OID_FIELD(mvoid);
+
+	/* enabled statistics */
+	WRITE_BOOL_FIELD(deps_enabled);
+
+	/* built/available statistics */
+	WRITE_BOOL_FIELD(deps_built);
+}
+
+static void
 _outEquivalenceClass(StringInfo str, const EquivalenceClass *node)
 {
 	/*
@@ -3358,6 +3373,9 @@ _outNode(StringInfo str, const void *obj)
 			case T_PlannerParamItem:
 				_outPlannerParamItem(str, obj);
 				break;
+			case T_MVStatisticInfo:
+				_outMVStatisticInfo(str, obj);
+				break;
 
 			case T_CreateStmt:
 				_outCreateStmt(str, obj);
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 9442e5f..60fd57f 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -27,6 +27,7 @@
 #include "catalog/catalog.h"
 #include "catalog/dependency.h"
 #include "catalog/heap.h"
+#include "catalog/pg_mv_statistic.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -39,7 +40,9 @@
 #include "parser/parsetree.h"
 #include "rewrite/rewriteManip.h"
 #include "storage/bufmgr.h"
+#include "utils/builtins.h"
 #include "utils/lsyscache.h"
+#include "utils/syscache.h"
 #include "utils/rel.h"
 #include "utils/snapmgr.h"
 
@@ -93,6 +96,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 	Relation	relation;
 	bool		hasindex;
 	List	   *indexinfos = NIL;
+	List	   *stainfos = NIL;
 
 	/*
 	 * We need not lock the relation since it was already locked, either by
@@ -381,6 +385,65 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 
 	rel->indexlist = indexinfos;
 
+	if (true)
+	{
+		List	   *mvstatoidlist;
+		ListCell   *l;
+
+		mvstatoidlist = RelationGetMVStatList(relation);
+
+		foreach(l, mvstatoidlist)
+		{
+			ArrayType  *arr;
+			Datum		adatum;
+			bool		isnull;
+			Oid			mvoid = lfirst_oid(l);
+			Form_pg_mv_statistic mvstat;
+			MVStatisticInfo *info;
+
+			HeapTuple	htup = SearchSysCache1(MVSTATOID, ObjectIdGetDatum(mvoid));
+
+			/* XXX syscache contains OIDs of deleted stats (not invalidated) */
+			if (! HeapTupleIsValid(htup))
+				continue;
+
+			mvstat = (Form_pg_mv_statistic) GETSTRUCT(htup);
+
+			/* unavailable stats are not interesting for the planner */
+			if (mvstat->deps_built)
+			{
+				info = makeNode(MVStatisticInfo);
+
+				info->mvoid = mvoid;
+				info->rel = rel;
+
+				/* enabled statistics */
+				info->deps_enabled = mvstat->deps_enabled;
+
+				/* built/available statistics */
+				info->deps_built = mvstat->deps_built;
+
+				/* stakeys */
+				adatum = SysCacheGetAttr(MVSTATOID, htup,
+										 Anum_pg_mv_statistic_stakeys, &isnull);
+				Assert(!isnull);
+
+				arr = DatumGetArrayTypeP(adatum);
+
+				info->stakeys = buildint2vector((int16 *) ARR_DATA_PTR(arr),
+												ARR_DIMS(arr)[0]);
+
+				stainfos = lcons(info, stainfos);
+			}
+
+			ReleaseSysCache(htup);
+		}
+
+		list_free(mvstatoidlist);
+	}
+
+	rel->mvstatlist = stainfos;
+
 	/* Grab foreign-table info using the relcache, while we have it */
 	if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
 	{
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index c4bed8a..5446870 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -241,7 +241,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 		ConstraintsSetStmt CopyStmt CreateAsStmt CreateCastStmt
 		CreateDomainStmt CreateExtensionStmt CreateGroupStmt CreateOpClassStmt
 		CreateOpFamilyStmt AlterOpFamilyStmt CreatePLangStmt
-		CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt
+		CreateSchemaStmt CreateSeqStmt CreateStmt CreateStatsStmt CreateTableSpaceStmt
 		CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt
 		CreateAssertStmt CreateTransformStmt CreateTrigStmt CreateEventTrigStmt
 		CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatePolicyStmt
@@ -375,6 +375,12 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>	group_by_item empty_grouping_set rollup_clause cube_clause
 %type <node>	grouping_sets_clause
 
+%type <list>	OptStatsOptions
+%type <str>		opt_stats_name stats_name stats_options_name
+%type <node>	stats_options_arg
+%type <defelt>	stats_options_elem
+%type <list>	stats_options_list
+
 %type <list>	opt_fdw_options fdw_options
 %type <defelt>	fdw_option
 
@@ -809,6 +815,7 @@ stmt :
 			| CreateSchemaStmt
 			| CreateSeqStmt
 			| CreateStmt
+			| CreateStatsStmt
 			| CreateTableSpaceStmt
 			| CreateTransformStmt
 			| CreateTrigStmt
@@ -3436,6 +3443,65 @@ OptConsTableSpace:   USING INDEX TABLESPACE name	{ $$ = $4; }
 ExistingIndex:   USING INDEX index_name				{ $$ = $3; }
 		;
 
+/*****************************************************************************
+ *
+ *		QUERY :
+ *				CREATE STATISTICS stats_name ON relname (columns) WITH (options)
+ *
+ *****************************************************************************/
+
+
+CreateStatsStmt:	CREATE STATISTICS opt_stats_name ON qualified_name '(' columnList ')' OptStatsOptions
+					{
+						CreateStatsStmt *n = makeNode(CreateStatsStmt);
+						n->statsname = $3;
+						n->relation = $5;
+						n->keys = $7;
+						n->options = $9;
+						$$ = (Node *)n;
+					}
+			;
+
+opt_stats_name:
+			stats_name						{ $$ = $1; }
+			| /*EMPTY*/						{ $$ = NULL; }
+		;
+
+stats_name: ColId							{ $$ = $1; };
+
+OptStatsOptions:
+			WITH '(' stats_options_list ')'	{ $$ = $3; }
+			| /*EMPTY*/						{ $$ = NIL; }
+		;
+
+stats_options_list:
+			stats_options_elem
+				{
+					$$ = list_make1($1);
+				}
+			| stats_options_list ',' stats_options_elem
+				{
+					$$ = lappend($1, $3);
+				}
+		;
+
+stats_options_elem:
+			stats_options_name stats_options_arg
+				{
+					$$ = makeDefElem($1, $2);
+				}
+		;
+
+stats_options_name:
+			NonReservedWord			{ $$ = $1; }
+		;
+
+stats_options_arg:
+			opt_boolean_or_string	{ $$ = (Node *) makeString($1); }
+			| NumericOnly			{ $$ = (Node *) $1; }
+			| /* EMPTY */			{ $$ = NULL; }
+		;
+
 
 /*****************************************************************************
  *
@@ -5621,6 +5687,7 @@ drop_type:	TABLE									{ $$ = OBJECT_TABLE; }
 			| TEXT_P SEARCH DICTIONARY				{ $$ = OBJECT_TSDICTIONARY; }
 			| TEXT_P SEARCH TEMPLATE				{ $$ = OBJECT_TSTEMPLATE; }
 			| TEXT_P SEARCH CONFIGURATION			{ $$ = OBJECT_TSCONFIGURATION; }
+			| STATISTICS							{ $$ = OBJECT_STATISTICS; }
 		;
 
 any_name_list:
@@ -13860,7 +13927,6 @@ unreserved_keyword:
 			| STANDALONE_P
 			| START
 			| STATEMENT
-			| STATISTICS
 			| STDIN
 			| STDOUT
 			| STORAGE
@@ -14077,6 +14143,7 @@ reserved_keyword:
 			| SELECT
 			| SESSION_USER
 			| SOME
+			| STATISTICS
 			| SYMMETRIC
 			| TABLE
 			| THEN
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index e81bbc6..7029278 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -1520,6 +1520,10 @@ ProcessUtilitySlow(Node *parsetree,
 				address = ExecSecLabelStmt((SecLabelStmt *) parsetree);
 				break;
 
+			case T_CreateStatsStmt:	/* CREATE STATISTICS */
+				address = CreateStatistics((CreateStatsStmt *) parsetree);
+				break;
+
 			default:
 				elog(ERROR, "unrecognized node type: %d",
 					 (int) nodeTag(parsetree));
@@ -2160,6 +2164,9 @@ CreateCommandTag(Node *parsetree)
 				case OBJECT_TRANSFORM:
 					tag = "DROP TRANSFORM";
 					break;
+				case OBJECT_STATISTICS:
+					tag = "DROP STATISTICS";
+					break;
 				default:
 					tag = "???";
 			}
@@ -2527,6 +2534,10 @@ CreateCommandTag(Node *parsetree)
 			tag = "EXECUTE";
 			break;
 
+		case T_CreateStatsStmt:
+			tag = "CREATE STATISTICS";
+			break;
+
 		case T_DeallocateStmt:
 			{
 				DeallocateStmt *stmt = (DeallocateStmt *) parsetree;
diff --git a/src/backend/utils/Makefile b/src/backend/utils/Makefile
index 8374533..eba0352 100644
--- a/src/backend/utils/Makefile
+++ b/src/backend/utils/Makefile
@@ -9,7 +9,7 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS        = fmgrtab.o
-SUBDIRS     = adt cache error fmgr hash init mb misc mmgr resowner sort time
+SUBDIRS     = adt cache error fmgr hash init mb misc mmgr mvstats resowner sort time
 
 # location of Catalog.pm
 catalogdir  = $(top_srcdir)/src/backend/catalog
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 6b0c0b7..b6473bb 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -47,6 +47,7 @@
 #include "catalog/pg_auth_members.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_database.h"
+#include "catalog/pg_mv_statistic.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_proc.h"
@@ -3922,6 +3923,62 @@ RelationGetIndexList(Relation relation)
 	return result;
 }
 
+
+List *
+RelationGetMVStatList(Relation relation)
+{
+	Relation	indrel;
+	SysScanDesc indscan;
+	ScanKeyData skey;
+	HeapTuple	htup;
+	List	   *result;
+	List	   *oldlist;
+	MemoryContext oldcxt;
+
+	/* Quick exit if we already computed the list. */
+	if (relation->rd_mvstatvalid != 0)
+		return list_copy(relation->rd_mvstatlist);
+
+	/*
+	 * We build the list we intend to return (in the caller's context) while
+	 * doing the scan.  After successfully completing the scan, we copy that
+	 * list into the relcache entry.  This avoids cache-context memory leakage
+	 * if we get some sort of error partway through.
+	 */
+	result = NIL;
+
+	/* Prepare to scan pg_index for entries having indrelid = this rel. */
+	ScanKeyInit(&skey,
+				Anum_pg_mv_statistic_starelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(relation)));
+
+	indrel = heap_open(MvStatisticRelationId, AccessShareLock);
+	indscan = systable_beginscan(indrel, MvStatisticRelidIndexId, true,
+								 NULL, 1, &skey);
+
+	while (HeapTupleIsValid(htup = systable_getnext(indscan)))
+		/* TODO maybe include only already built statistics? */
+		result = insert_ordered_oid(result, HeapTupleGetOid(htup));
+
+	systable_endscan(indscan);
+
+	heap_close(indrel, AccessShareLock);
+
+	/* Now save a copy of the completed list in the relcache entry. */
+	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+	oldlist = relation->rd_mvstatlist;
+	relation->rd_mvstatlist = list_copy(result);
+
+	relation->rd_mvstatvalid = true;
+	MemoryContextSwitchTo(oldcxt);
+
+	/* Don't leak the old list, if there is one */
+	list_free(oldlist);
+
+	return result;
+}
+
 /*
  * insert_ordered_oid
  *		Insert a new Oid into a sorted list of Oids, preserving ordering
@@ -4891,6 +4948,8 @@ load_relcache_init_file(bool shared)
 		rel->rd_indexattr = NULL;
 		rel->rd_keyattr = NULL;
 		rel->rd_idattr = NULL;
+		rel->rd_mvstatvalid = false;
+		rel->rd_mvstatlist = NIL;
 		rel->rd_createSubid = InvalidSubTransactionId;
 		rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
 		rel->rd_amcache = NULL;
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index efce7b9..ced92cd 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -43,6 +43,7 @@
 #include "catalog/pg_foreign_server.h"
 #include "catalog/pg_foreign_table.h"
 #include "catalog/pg_language.h"
+#include "catalog/pg_mv_statistic.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_operator.h"
@@ -501,6 +502,28 @@ static const struct cachedesc cacheinfo[] = {
 		},
 		4
 	},
+	{MvStatisticRelationId,		/* MVSTATNAME */
+		MvStatisticNameIndexId,
+		1,
+		{
+			Anum_pg_mv_statistic_staname,
+			0,
+			0,
+			0
+		},
+		4
+	},
+	{MvStatisticRelationId,		/* MVSTATOID */
+		MvStatisticOidIndexId,
+		1,
+		{
+			ObjectIdAttributeNumber,
+			0,
+			0,
+			0
+		},
+		4
+	},
 	{NamespaceRelationId,		/* NAMESPACENAME */
 		NamespaceNameIndexId,
 		1,
diff --git a/src/backend/utils/mvstats/Makefile b/src/backend/utils/mvstats/Makefile
new file mode 100644
index 0000000..099f1ed
--- /dev/null
+++ b/src/backend/utils/mvstats/Makefile
@@ -0,0 +1,17 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for utils/mvstats
+#
+# IDENTIFICATION
+#    src/backend/utils/mvstats/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/utils/mvstats
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = common.o dependencies.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/mvstats/common.c b/src/backend/utils/mvstats/common.c
new file mode 100644
index 0000000..a755c49
--- /dev/null
+++ b/src/backend/utils/mvstats/common.c
@@ -0,0 +1,356 @@
+/*-------------------------------------------------------------------------
+ *
+ * common.c
+ *	  POSTGRES multivariate statistics
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mvstats/common.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "common.h"
+
+static VacAttrStats ** lookup_var_attr_stats(int2vector *attrs,
+									  int natts, VacAttrStats **vacattrstats);
+
+static List* list_mv_stats(Oid relid);
+
+
+/*
+ * Compute requested multivariate stats, using the rows sampled for the
+ * plain (single-column) stats.
+ *
+ * This fetches a list of stats from pg_mv_statistic, computes the stats
+ * and serializes them back into the catalog (as bytea values).
+ */
+void
+build_mv_stats(Relation onerel, int numrows, HeapTuple *rows,
+			   int natts, VacAttrStats **vacattrstats)
+{
+	ListCell *lc;
+	List *mvstats;
+
+	TupleDesc tupdesc = RelationGetDescr(onerel);
+
+	/*
+	 * Fetch defined MV groups from pg_mv_statistic, and then compute
+	 * the MV statistics (histograms for now).
+	 */
+	mvstats = list_mv_stats(RelationGetRelid(onerel));
+
+	foreach (lc, mvstats)
+	{
+		int				j;
+		MVStatisticInfo *stat = (MVStatisticInfo *)lfirst(lc);
+		MVDependencies	deps  = NULL;
+
+		VacAttrStats  **stats  = NULL;
+		int				numatts   = 0;
+
+		/* int2 vector of attnums the stats should be computed on */
+		int2vector * attrs = stat->stakeys;
+
+		/* see how many of the columns are not dropped */
+		for (j = 0; j < attrs->dim1; j++)
+			if (! tupdesc->attrs[attrs->values[j]-1]->attisdropped)
+				numatts += 1;
+
+		/* if there are dropped attributes, build a filtered int2vector */
+		if (numatts != attrs->dim1)
+		{
+			int16 *tmp = palloc0(numatts * sizeof(int16));
+			int attnum = 0;
+
+			for (j = 0; j < attrs->dim1; j++)
+				if (! tupdesc->attrs[attrs->values[j]-1]->attisdropped)
+					tmp[attnum++] = attrs->values[j];
+
+			pfree(attrs);
+			attrs = buildint2vector(tmp, numatts);
+		}
+
+		/* filter only the interesting vacattrstats records */
+		stats = lookup_var_attr_stats(attrs, natts, vacattrstats);
+
+		/* check allowed number of dimensions */
+		Assert((attrs->dim1 >= 2) && (attrs->dim1 <= MVSTATS_MAX_DIMENSIONS));
+
+		/*
+		 * Analyze functional dependencies of columns.
+		 */
+		deps = build_mv_dependencies(numrows, rows, attrs, stats);
+
+		/* store the histogram / MCV list in the catalog */
+		update_mv_stats(stat->mvoid, deps, attrs);
+	}
+}
+
+/*
+ * Lookup the VacAttrStats info for the selected columns, with indexes
+ * matching the attrs vector (to make it easy to work with when
+ * computing multivariate stats).
+ */
+static VacAttrStats **
+lookup_var_attr_stats(int2vector *attrs, int natts, VacAttrStats **vacattrstats)
+{
+	int i, j;
+	int numattrs = attrs->dim1;
+	VacAttrStats **stats = (VacAttrStats**)palloc0(numattrs * sizeof(VacAttrStats*));
+
+	/* lookup VacAttrStats info for the requested columns (same attnum) */
+	for (i = 0; i < numattrs; i++)
+	{
+		stats[i] = NULL;
+		for (j = 0; j < natts; j++)
+		{
+			if (attrs->values[i] == vacattrstats[j]->tupattnum)
+			{
+				stats[i] = vacattrstats[j];
+				break;
+			}
+		}
+
+		/*
+		 * Check that we found the info, that the attnum matches and
+		 * that there's the requested 'lt' operator and that the type
+		 * is 'passed-by-value'.
+		 */
+		Assert(stats[i] != NULL);
+		Assert(stats[i]->tupattnum == attrs->values[i]);
+
+		/* FIXME This is rather ugly way to check for 'ltopr' (which
+		 *       is defined for 'scalar' attributes).
+		 */
+		Assert(((StdAnalyzeData *)stats[i]->extra_data)->ltopr != InvalidOid);
+	}
+
+	return stats;
+}
+
+/*
+ * Fetch list of MV stats defined on a table, without the actual data
+ * for histograms, MCV lists etc.
+ */
+static List*
+list_mv_stats(Oid relid)
+{
+	Relation	indrel;
+	SysScanDesc indscan;
+	ScanKeyData skey;
+	HeapTuple	htup;
+	List	   *result = NIL;
+
+	/* Prepare to scan pg_mv_statistic for entries having indrelid = this rel. */
+	ScanKeyInit(&skey,
+				Anum_pg_mv_statistic_starelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+
+	indrel = heap_open(MvStatisticRelationId, AccessShareLock);
+	indscan = systable_beginscan(indrel, MvStatisticRelidIndexId, true,
+								 NULL, 1, &skey);
+
+	while (HeapTupleIsValid(htup = systable_getnext(indscan)))
+	{
+		MVStatisticInfo *info = makeNode(MVStatisticInfo);
+		Form_pg_mv_statistic stats = (Form_pg_mv_statistic) GETSTRUCT(htup);
+
+		info->mvoid = HeapTupleGetOid(htup);
+		info->stakeys = buildint2vector(stats->stakeys.values, stats->stakeys.dim1);
+		info->deps_built = stats->deps_built;
+
+		result = lappend(result, info);
+	}
+
+	systable_endscan(indscan);
+
+	heap_close(indrel, AccessShareLock);
+
+	/* TODO maybe save the list into relcache, as in RelationGetIndexList
+	 *      (which was used as an inspiration of this one)?. */
+
+	return result;
+}
+
+void
+update_mv_stats(Oid mvoid, MVDependencies dependencies, int2vector *attrs)
+{
+	HeapTuple	stup,
+				oldtup;
+	Datum		values[Natts_pg_mv_statistic];
+	bool		nulls[Natts_pg_mv_statistic];
+	bool		replaces[Natts_pg_mv_statistic];
+
+	Relation	sd = heap_open(MvStatisticRelationId, RowExclusiveLock);
+
+	memset(nulls,    1, Natts_pg_mv_statistic * sizeof(bool));
+	memset(replaces, 0, Natts_pg_mv_statistic * sizeof(bool));
+	memset(values,   0, Natts_pg_mv_statistic * sizeof(Datum));
+
+	/*
+	 * Construct a new pg_mv_statistic tuple - replace only the histogram
+	 * and MCV list, depending whether it actually was computed.
+	 */
+	if (dependencies != NULL)
+	{
+		nulls[Anum_pg_mv_statistic_stadeps -1]    = false;
+		values[Anum_pg_mv_statistic_stadeps  - 1]
+			= PointerGetDatum(serialize_mv_dependencies(dependencies));
+	}
+
+	/* always replace the value (either by bytea or NULL) */
+	replaces[Anum_pg_mv_statistic_stadeps -1] = true;
+
+	/* always change the availability flags */
+	nulls[Anum_pg_mv_statistic_deps_built -1] = false;
+	nulls[Anum_pg_mv_statistic_stakeys-1]     = false;
+
+	/* use the new attnums, in case we removed some dropped ones */
+	replaces[Anum_pg_mv_statistic_deps_built-1] = true;
+	replaces[Anum_pg_mv_statistic_stakeys -1]    = true;
+
+	values[Anum_pg_mv_statistic_deps_built-1] = BoolGetDatum(dependencies != NULL);
+	values[Anum_pg_mv_statistic_stakeys -1]    = PointerGetDatum(attrs);
+
+	/* Is there already a pg_mv_statistic tuple for this attribute? */
+	oldtup = SearchSysCache1(MVSTATOID,
+							 ObjectIdGetDatum(mvoid));
+
+	if (HeapTupleIsValid(oldtup))
+	{
+		/* Yes, replace it */
+		stup = heap_modify_tuple(oldtup,
+								 RelationGetDescr(sd),
+								 values,
+								 nulls,
+								 replaces);
+		ReleaseSysCache(oldtup);
+		simple_heap_update(sd, &stup->t_self, stup);
+	}
+	else
+		elog(ERROR, "invalid pg_mv_statistic record (oid=%d)", mvoid);
+
+	/* update indexes too */
+	CatalogUpdateIndexes(sd, stup);
+
+	heap_freetuple(stup);
+
+	heap_close(sd, RowExclusiveLock);
+}
+
+/* multi-variate stats comparator */
+
+/*
+ * qsort_arg comparator for sorting Datums (MV stats)
+ *
+ * This does not maintain the tupnoLink array.
+ */
+int
+compare_scalars_simple(const void *a, const void *b, void *arg)
+{
+	Datum		da = *(Datum*)a;
+	Datum		db = *(Datum*)b;
+	SortSupport ssup= (SortSupport) arg;
+
+	return ApplySortComparator(da, false, db, false, ssup);
+}
+
+/*
+ * qsort_arg comparator for sorting data when partitioning a MV bucket
+ */
+int
+compare_scalars_partition(const void *a, const void *b, void *arg)
+{
+	Datum		da = ((ScalarItem*)a)->value;
+	Datum		db = ((ScalarItem*)b)->value;
+	SortSupport ssup= (SortSupport) arg;
+
+	return ApplySortComparator(da, false, db, false, ssup);
+}
+
+/* initialize multi-dimensional sort */
+MultiSortSupport
+multi_sort_init(int ndims)
+{
+	MultiSortSupport mss;
+
+	Assert(ndims >= 2);
+
+	mss = (MultiSortSupport)palloc0(offsetof(MultiSortSupportData, ssup)
+									+ sizeof(SortSupportData)*ndims);
+
+	mss->ndims = ndims;
+
+	return mss;
+}
+
+/*
+ * add sort into for dimension 'dim' (index into vacattrstats) to mss,
+ * at the position 'sortattr'
+ */
+void
+multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
+						int dim, VacAttrStats **vacattrstats)
+{
+	/* first, lookup StdAnalyzeData for the dimension (attribute) */
+	SortSupportData ssup;
+	StdAnalyzeData *tmp = (StdAnalyzeData *)vacattrstats[dim]->extra_data;
+
+	Assert(mss != NULL);
+	Assert(sortdim < mss->ndims);
+
+	/* initialize sort support, etc. */
+	memset(&ssup, 0, sizeof(ssup));
+	ssup.ssup_cxt = CurrentMemoryContext;
+
+	/* We always use the default collation for statistics */
+	ssup.ssup_collation = DEFAULT_COLLATION_OID;
+	ssup.ssup_nulls_first = false;
+
+	PrepareSortSupportFromOrderingOp(tmp->ltopr, &ssup);
+
+	mss->ssup[sortdim] = ssup;
+}
+
+/* compare all the dimensions in the selected order */
+int
+multi_sort_compare(const void *a, const void *b, void *arg)
+{
+	int i;
+	SortItem *ia = (SortItem*)a;
+	SortItem *ib = (SortItem*)b;
+
+	MultiSortSupport mss = (MultiSortSupport)arg;
+
+	for (i = 0; i < mss->ndims; i++)
+	{
+		int	compare;
+
+		compare = ApplySortComparator(ia->values[i], ia->isnull[i],
+									  ib->values[i], ib->isnull[i],
+									  &mss->ssup[i]);
+
+		if (compare != 0)
+			return compare;
+
+	}
+
+	/* equal by default */
+	return 0;
+}
+
+/* compare selected dimension */
+int
+multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
+					   MultiSortSupport mss)
+{
+	return ApplySortComparator(a->values[dim], a->isnull[dim],
+							   b->values[dim], b->isnull[dim],
+							   &mss->ssup[dim]);
+}
diff --git a/src/backend/utils/mvstats/common.h b/src/backend/utils/mvstats/common.h
new file mode 100644
index 0000000..6d5465b
--- /dev/null
+++ b/src/backend/utils/mvstats/common.h
@@ -0,0 +1,75 @@
+/*-------------------------------------------------------------------------
+ *
+ * common.h
+ *	  POSTGRES multivariate statistics
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mvstats/common.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/tuptoaster.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_mv_statistic.h"
+#include "foreign/fdwapi.h"
+#include "postmaster/autovacuum.h"
+#include "storage/lmgr.h"
+#include "utils/datum.h"
+#include "utils/sortsupport.h"
+#include "utils/syscache.h"
+#include "utils/fmgroids.h"
+#include "utils/builtins.h"
+#include "access/sysattr.h"
+
+#include "utils/mvstats.h"
+
+/* FIXME private structure copied from analyze.c */
+
+typedef struct
+{
+	Oid			eqopr;			/* '=' operator for datatype, if any */
+	Oid			eqfunc;			/* and associated function */
+	Oid			ltopr;			/* '<' operator for datatype, if any */
+} StdAnalyzeData;
+
+typedef struct
+{
+	Datum		value;			/* a data value */
+	int			tupno;			/* position index for tuple it came from */
+} ScalarItem;
+ 
+/* multi-sort */
+typedef struct MultiSortSupportData {
+	int				ndims;		/* number of dimensions supported by the */
+	SortSupportData	ssup[1];	/* sort support data for each dimension */
+} MultiSortSupportData;
+
+typedef MultiSortSupportData* MultiSortSupport;
+
+typedef struct SortItem {
+	Datum  *values;
+	bool   *isnull;
+} SortItem;
+
+MultiSortSupport multi_sort_init(int ndims);
+
+void multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
+							  int dim, VacAttrStats **vacattrstats);
+
+int multi_sort_compare(const void *a, const void *b, void *arg);
+
+int multi_sort_compare_dim(int dim, const SortItem *a,
+						   const SortItem *b, MultiSortSupport mss);
+
+/* comparators, used when constructing multivariate stats */
+int compare_scalars_simple(const void *a, const void *b, void *arg);
+int compare_scalars_partition(const void *a, const void *b, void *arg);
diff --git a/src/backend/utils/mvstats/dependencies.c b/src/backend/utils/mvstats/dependencies.c
new file mode 100644
index 0000000..84b6561
--- /dev/null
+++ b/src/backend/utils/mvstats/dependencies.c
@@ -0,0 +1,638 @@
+/*-------------------------------------------------------------------------
+ *
+ * dependencies.c
+ *	  POSTGRES multivariate functional dependencies
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mvstats/dependencies.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "common.h"
+#include "utils/lsyscache.h"
+
+/*
+ * Mine functional dependencies between columns, in the form (A => B),
+ * meaning that a value in column 'A' determines value in 'B'. A simple
+ * artificial example may be a table created like this
+ *
+ *     CREATE TABLE deptest (a INT, b INT)
+ *        AS SELECT i, i/10 FROM generate_series(1,100000) s(i);
+ *
+ * Clearly, once we know the value for 'A' we can easily determine the
+ * value of 'B' by dividing (A/10). A more practical example may be
+ * addresses, where (ZIP code => city name), i.e. once we know the ZIP,
+ * we probably know which city it belongs to. Larger cities usually have
+ * multiple ZIP codes, so the dependency can't be reversed.
+ *
+ * Functional dependencies are a concept well described in relational
+ * theory, especially in definition of normalization and "normal forms".
+ * Wikipedia has a nice definition of a functional dependency [1]:
+ *
+ *     In a given table, an attribute Y is said to have a functional
+ *     dependency on a set of attributes X (written X -> Y) if and only
+ *     if each X value is associated with precisely one Y value. For
+ *     example, in an "Employee" table that includes the attributes
+ *     "Employee ID" and "Employee Date of Birth", the functional
+ *     dependency {Employee ID} -> {Employee Date of Birth} would hold.
+ *     It follows from the previous two sentences that each {Employee ID}
+ *     is associated with precisely one {Employee Date of Birth}.
+ *
+ * [1] http://en.wikipedia.org/wiki/Database_normalization
+ *
+ * Most datasets might be normalized not to contain any such functional
+ * dependencies, but sometimes it's not practical. In some cases it's
+ * actually a conscious choice to model the dataset in denormalized way,
+ * either because of performance or to make querying easier.
+ *
+ * The current implementation supports only dependencies between two
+ * columns, but this is merely a simplification of the initial patch.
+ * It's certainly useful to mine for dependencies involving multiple
+ * columns on the 'left' side, i.e. a condition for the dependency.
+ * That is dependencies [A,B] => C and so on.
+ *
+ * TODO The implementation may/should be smart enough not to mine both
+ *      [A => B] and [A,C => B], because the second dependency is a
+ *      consequence of the first one (if values of A determine values
+ *      of B, adding another column won't change that). The ANALYZE
+ *      should first analyze 1:1 dependencies, then 2:1 dependencies
+ *      (and skip the already identified ones), etc.
+ *
+ * For example the dependency [city name => zip code] is much weaker
+ * than [city name, state name => zip code], because there may be
+ * multiple cities with the same name in various states. It's not
+ * perfect though - there are probably cities with the same name within
+ * the same state, but this is relatively rare occurence hopefully.
+ * More about this in the section about dependency mining.
+ *
+ * Handling multiple columns on the right side is not necessary, as such
+ * dependencies may be decomposed into a set of dependencies with
+ * the same meaning, one for each column on the right side. For example
+ *
+ *     A => [B,C]
+ *
+ * is exactly the same as
+ *
+ *     (A => B) & (A => C).
+ *
+ * Of course, storing (A => [B, C]) may be more efficient thant storing
+ * the two dependencies (A => B) and (A => C) separately.
+ *
+ *
+ * Dependency mining (ANALYZE)
+ * ---------------------------
+ *
+ * The current build algorithm is rather simple - for each pair [A,B] of
+ * columns, the data are sorted lexicographically (first by A, then B),
+ * and then a number of metrics is computed by walking the sorted data.
+ *
+ * In general the algorithm counts distict values of A (forming groups
+ * thanks to the sorting), supporting or contradicting the hypothesis
+ * that A => B (i.e. that values of B are predetermined by A). If there
+ * are multiple values of B for a single value of A, it's counted as
+ * contradicting.
+ *
+ * A group may be neither supporting nor contradicting. To be counted as
+ * supporting, the group has to have at least min_group_size(=3) rows.
+ * Smaller 'supporting' groups are counted as neutral.
+ *
+ * Finally, the number of rows in supporting and contradicting groups is
+ * compared, and if there is at least 10x more supporting rows, the
+ * dependency is considered valid.
+ *
+ *
+ * Real-world datasets are imperfect - there may be errors (e.g. due to
+ * data-entry mistakes), or factually correct records, yet contradicting
+ * the dependency (e.g. when a city splits into two, but both keep the
+ * same ZIP code). A strict ANALYZE implementation (where the functional
+ * dependencies are identified) would ignore dependencies on such noisy
+ * data, making the approach unusable in practice.
+ *
+ * The proposed implementation attempts to handle such noisy cases
+ * gracefully, by tolerating small number of contradicting cases.
+ *
+ * In the future this might also perform some sort of test and decide
+ * whether it's worth building any other kind of multivariate stats,
+ * or whether the dependencies sufficiently describe the data. Or at
+ * least not build the MCV list / histogram on the implied columns.
+ * Such reduction would however make the 'verification' (see the next
+ * section) impossible.
+ *
+ *
+ * Clause reduction (planner/optimizer)
+ * ------------------------------------
+ *
+ * Apllying the dependencies is quite simple - given a list of clauses,
+ * try to apply all the dependencies. For example given clause list
+ *
+ *    (a = 1) AND (b = 1) AND (c = 1) AND (d < 100)
+ *
+ * and dependencies [a=>b] and [a=>d], this may be reduced to
+ *
+ *    (a = 1) AND (c = 1) AND (d < 100)
+ *
+ * The (d<100) can't be reduced as it's not an equality clause, so the
+ * dependency [a=>d] can't be applied.
+ *
+ * See clauselist_apply_dependencies() for more details.
+ *
+ * The problem with the reduction is that the query may use conditions
+ * that are not redundant, but in fact contradictory - e.g. the user
+ * may search for a ZIP code and a city name not matching the ZIP code.
+ *
+ * In such cases, the condition on the city name is not actually
+ * redundant, but actually contradictory (making the result empty), and
+ * removing it while estimating the cardinality will make the estimate
+ * worse.
+ *
+ * The current estimation assuming independence (and multiplying the
+ * selectivities) works better in this case, but only by utter luck.
+ *
+ * In some cases this might be verified using the other multivariate
+ * statistics - MCV lists and histograms. For MCV lists the verification
+ * might be very simple - peek into the list if there are any items
+ * matching the clause on the 'A' column (e.g. ZIP code), and if such
+ * item is found, check that the 'B' column matches the other clause.
+ * If it does not, the clauses are contradictory. We can't really say
+ * if such item was not found, except maybe restricting the selectivity
+ * using the MCV data (e.g. using min/max selectivity, or something).
+ *
+ * With histograms, it might work similarly - we can't check the values
+ * directly (because histograms use buckets, unlike MCV lists, storing
+ * the actual values). So we can only observe the buckets matching the
+ * clauses - if those buckets have very low frequency, it probably means
+ * the two clauses are incompatible.
+ *
+ * It's unclear what 'low frequency' is, but if one of the clauses is
+ * implied (automatically true because of the other clause), then
+ *
+ *     selectivity[clause(A)] = selectivity[clause(A) & clause(B)]
+ *
+ * So we might compute selectivity of the first clause (on the column
+ * A in dependency [A=>B]) - for example using regular statistics.
+ * And then check if the selectivity computed from the histogram is
+ * about the same (or significantly lower).
+ *
+ * The problem is that histograms work well only when the data ordering
+ * matches the natural meaning. For values that serve as labels - like
+ * city names or ZIP codes, or even generated IDs, histograms really
+ * don't work all that well. For example sorting cities by name won't
+ * match the sorting of ZIP codes, rendering the histogram unusable.
+ *
+ * The MCV are probably going to work much better, because they don't
+ * really assume any sort of ordering. And it's probably more appropriate
+ * for the label-like data.
+ *
+ * TODO Support dependencies with multiple columns on left/right.
+ *
+ * TODO Investigate using histogram and MCV list to confirm the
+ *      functional dependencies.
+ *
+ * TODO Investigate statistical testing of the distribution (to decide
+ *      whether it makes sense to build the histogram/MCV list).
+ *
+ * TODO Using a min/max of selectivities would probably make more sense
+ *      for the associated columns.
+ *
+ * TODO Consider eliminating the implied columns from the histogram and
+ *      MCV lists (but maybe that's not a good idea, because that'd make
+ *      it impossible to use these stats for non-equality clauses and
+ *      also it wouldn't be possible to use the stats for verification
+ *      of the dependencies as proposed in another TODO).
+ *
+ * TODO This builds a complete set of dependencies, i.e. including
+ *      transitive dependencies - if we identify [A => B] and [B => C],
+ *      we're likely to identify [A => C] too. It might be better to
+ *      keep only the minimal set of dependencies, i.e. prune all the
+ *      dependencies that we can recreate by transivitity.
+ *
+ *      There are two conceptual ways to do that:
+ *
+ *      (a) generate all the rules, and then prune the rules that may
+ *          be recteated by combining other dependencies, or
+ *
+ *      (b) performing the 'is combination of other dependencies' check
+ *          before actually doing the work
+ *
+ *      The second option has the advantage that we don't really need
+ *      to perform the sort/count. It's not sufficient alone, though,
+ *      because we may discover the dependencies in the wrong order.
+ *      For example [A => B], [A => C] and then [B => C]. None of those
+ *      dependencies is a combination of the already known ones, yet
+ *      [A => C] is a combination of [A => B] and [B => C].
+ *
+ * FIXME Not sure the current NULL handling makes much sense. We assume
+ *       that NULL is 0, so it's handled like a regular value
+ *       (NULL == NULL), so all NULLs in a single column form a single
+ *       group. Maybe that's not the right thing to do, especially with
+ *       equality conditions - in that case NULLs are irrelevant. So
+ *       maybe the right solution would be to just ignore NULL values?
+ *
+ *       However simply "ignoring" the NULL values does not seem like
+ *       a good idea - imagine columns A and B, where for each value of
+ *       A, values in B are constant (same for the whole group) or NULL.
+ *       Let's say only 10% of B values in each group is not NULL. Then
+ *       ignoring the NULL values will result in 10x misestimate (and
+ *       it's trivial to construct arbitrary errors). So maybe handling
+ *       NULL values just like a regular value is the right thing here.
+ *
+ *       Or maybe NULL values should be treated differently on each side
+ *       of the dependency? E.g. as ignored on the left (condition) and
+ *       as regular values on the right - this seems consistent with how
+ *       equality clauses work, as equality clause means 'NOT NULL'.
+ *       So if we say [A => B] then it may also imply "NOT NULL" on the
+ *       right side.
+ */
+MVDependencies
+build_mv_dependencies(int numrows, HeapTuple *rows, int2vector *attrs,
+					  VacAttrStats **stats)
+{
+	int i;
+	int numattrs = attrs->dim1;
+
+	/* result */
+	int ndeps = 0;
+	MVDependencies	dependencies = NULL;
+	MultiSortSupport mss = multi_sort_init(2);	/* 2 dimensions for now */
+
+	/* TODO Maybe this should be somehow related to the number of
+	 *      distinct values in the two columns we're currently analyzing.
+	 *      Assuming the distribution is uniform, we can estimate the
+	 *      average group size and use it as a threshold. Or something
+	 *      like that. Seems better than a static approach.
+	 */
+	int min_group_size = 3;
+
+	/* dimension indexes we'll check for associations [a => b] */
+	int dima, dimb;
+
+	/*
+	 * We'll reuse the same array for all the 2-column combinations.
+	 *
+	 * It's possible to sort the sample rows directly, but this seemed
+	 * somehow simples / less error prone. Another option would be to
+	 * allocate the arrays for each SortItem separately, but that'd be
+	 * significant overhead (not just CPU, but especially memory bloat).
+	 */
+	SortItem * items = (SortItem*)palloc0(numrows * sizeof(SortItem));
+
+	Datum *values = (Datum*)palloc0(sizeof(Datum) * numrows * 2);
+	bool  *isnull = (bool*)palloc0(sizeof(bool) * numrows * 2);
+
+	for (i = 0; i < numrows; i++)
+	{
+		items[i].values = &values[i * 2];
+		items[i].isnull = &isnull[i * 2];
+	}
+
+	Assert(numattrs >= 2);
+
+	/*
+	 * Evaluate all possible combinations of [A => B], using a simple algorithm:
+	 *
+	 * (a) sort the data by [A,B]
+	 * (b) split the data into groups by A (new group whenever a value changes)
+	 * (c) count different values in the B column (again, value changes)
+	 *
+	 * TODO It should be rather simple to merge [A => B] and [A => C] into
+	 *      [A => B,C]. Just keep A constant, collect all the "implied" columns
+	 *      and you're done.
+	 */
+	for (dima = 0; dima < numattrs; dima++)
+	{
+		/* prepare the sort function for the first dimension */
+		multi_sort_add_dimension(mss, 0, dima, stats);
+
+		for (dimb = 0; dimb < numattrs; dimb++)
+		{
+			SortItem current;
+
+			/* number of groups supporting / contradicting the dependency */
+			int n_supporting = 0;
+			int n_contradicting = 0;
+
+			/* counters valid within a group */
+			int group_size = 0;
+			int n_violations = 0;
+
+			int n_supporting_rows = 0;
+			int n_contradicting_rows = 0;
+
+			/* make sure the columns are different (A => A) */
+			if (dima == dimb)
+				continue;
+
+			/* prepare the sort function for the second dimension */
+			multi_sort_add_dimension(mss, 1, dimb, stats);
+
+			/* reset the values and isnull flags */
+			memset(values, 0, sizeof(Datum) * numrows * 2);
+			memset(isnull, 0, sizeof(bool)  * numrows * 2);
+
+			/* accumulate all the data for both columns into an array and sort it */
+			for (i = 0; i < numrows; i++)
+			{
+				items[i].values[0]
+					= heap_getattr(rows[i], attrs->values[dima],
+									stats[dima]->tupDesc, &items[i].isnull[0]);
+
+				items[i].values[1]
+					= heap_getattr(rows[i], attrs->values[dimb],
+									stats[dimb]->tupDesc, &items[i].isnull[1]);
+			}
+
+			qsort_arg((void *) items, numrows, sizeof(SortItem),
+					  multi_sort_compare, mss);
+
+			/*
+			 * Walk through the array, split it into rows according to
+			 * the A value, and count distinct values in the other one.
+			 * If there's a single B value for the whole group, we count
+			 * it as supporting the association, otherwise we count it
+			 * as contradicting.
+			 *
+			 * Furthermore we require a group to have at least a certain
+			 * number of rows to be considered useful for supporting the
+			 * dependency. But when it's contradicting, use it always useful.
+			 */
+
+			/* start with values from the first row */
+			current = items[0];
+			group_size  = 1;
+
+			for (i = 1; i < numrows; i++)
+			{
+				/* end of the group */
+				if (multi_sort_compare_dim(0, &items[i], &current, mss) != 0)
+				{
+					/*
+					 * If there are no contradicting rows, count it as
+					 * supporting (otherwise contradicting), but only if
+					 * the group is large enough.
+					 *
+					 * The requirement of a minimum group size makes it
+					 * impossible to identify [unique,unique] cases, but
+					 * that's probably a different case. This is more
+					 * about [zip => city] associations etc.
+					 *
+					 * If there are violations, count the group/rows as
+					 * a violation.
+					 *
+					 * It may ne neither, if the group is too small (does
+					 * not contain at least min_group_size rows).
+					 */
+					if ((n_violations == 0) && (group_size >= min_group_size))
+					{
+						n_supporting +=  1;
+						n_supporting_rows += group_size;
+					}
+					else if (n_violations > 0)
+					{
+						n_contradicting +=  1;
+						n_contradicting_rows += group_size;
+					}
+
+					/* current values start a new group */
+					n_violations = 0;
+					group_size = 0;
+				}
+				/* mismatch of a B value is contradicting */
+				else if (multi_sort_compare_dim(1, &items[i], &current, mss) != 0)
+				{
+					n_violations += 1;
+				}
+
+				current = items[i];
+				group_size += 1;
+			}
+
+			/* handle the last group (just like above) */
+			if ((n_violations == 0) && (group_size >= min_group_size))
+			{
+				n_supporting += 1;
+				n_supporting_rows += group_size;
+			}
+			else if (n_violations)
+			{
+				n_contradicting += 1;
+				n_contradicting_rows += group_size;
+			}
+
+			/*
+			 * See if the number of rows supporting the association is at least
+			 * 10x the number of rows violating the hypothetical dependency.
+			 *
+			 * TODO This is rather arbitrary limit - I guess it's possible to do
+			 *      some math to come up with a better rule (e.g. testing a hypothesis
+			 *      'this is due to randomness'). We can create a contingency table
+			 *      from the values and use it for testing. Possibly only when
+			 *      there are no contradicting rows?
+			 *
+			 * TODO Also, if (a => b) and (b => a) at the same time, it pretty much
+			 *      means there's a 1:1 relation (or one is a 'label'), making the
+			 *      conditions rather redundant. Although it's possible that the
+			 *      query uses incompatible combination of values.
+			 */
+			if (n_supporting_rows > (n_contradicting_rows * 10))
+			{
+				if (dependencies == NULL)
+				{
+					dependencies = (MVDependencies)palloc0(sizeof(MVDependenciesData));
+					dependencies->magic = MVSTAT_DEPS_MAGIC;
+				}
+				else
+					dependencies = repalloc(dependencies, offsetof(MVDependenciesData, deps)
+											+ sizeof(MVDependency) * (dependencies->ndeps + 1));
+
+				/* update the */
+				dependencies->deps[ndeps] = (MVDependency)palloc0(sizeof(MVDependencyData));
+				dependencies->deps[ndeps]->a = attrs->values[dima];
+				dependencies->deps[ndeps]->b = attrs->values[dimb];
+
+				dependencies->ndeps = (++ndeps);
+			}
+		}
+	}
+
+	pfree(items);
+	pfree(values);
+	pfree(isnull);
+	pfree(stats);
+	pfree(mss);
+
+	return dependencies;
+}
+
+/*
+ * Store the dependencies into a bytea, so that it can be stored in the
+ * pg_mv_statistic catalog.
+ *
+ * Currently this only supports simple two-column rules, and stores them
+ * as a sequence of attnum pairs. In the future, this needs to be made
+ * more complex to support multiple columns on both sides of the
+ * implication (using AND on left, OR on right).
+ */
+bytea *
+serialize_mv_dependencies(MVDependencies dependencies)
+{
+	int i;
+
+	/* we need to store ndeps, and each needs 2 * int16 */
+	Size len = VARHDRSZ + offsetof(MVDependenciesData, deps)
+				+ dependencies->ndeps * (sizeof(int16) * 2);
+
+	bytea * output = (bytea*)palloc0(len);
+
+	char * tmp = VARDATA(output);
+
+	SET_VARSIZE(output, len);
+
+	/* first, store the number of dimensions / items */
+	memcpy(tmp, dependencies, offsetof(MVDependenciesData, deps));
+	tmp += offsetof(MVDependenciesData, deps);
+
+	/* walk through the dependencies and copy both columns into the bytea */
+	for (i = 0; i < dependencies->ndeps; i++)
+	{
+		memcpy(tmp, &(dependencies->deps[i]->a), sizeof(int16));
+		tmp += sizeof(int16);
+
+		memcpy(tmp, &(dependencies->deps[i]->b), sizeof(int16));
+		tmp += sizeof(int16);
+	}
+
+	return output;
+}
+
+/*
+ * Reads serialized dependencies into MVDependencies structure.
+ */
+MVDependencies
+deserialize_mv_dependencies(bytea * data)
+{
+	int		i;
+	Size	expected_size;
+	MVDependencies	dependencies;
+	char   *tmp;
+
+	if (data == NULL)
+		return NULL;
+
+	if (VARSIZE_ANY_EXHDR(data) < offsetof(MVDependenciesData,deps))
+		elog(ERROR, "invalid MVDependencies size %ld (expected at least %ld)",
+			 VARSIZE_ANY_EXHDR(data), offsetof(MVDependenciesData,deps));
+
+	/* read the MVDependencies header */
+	dependencies = (MVDependencies)palloc0(sizeof(MVDependenciesData));
+
+	/* initialize pointer to the data part (skip the varlena header) */
+	tmp = VARDATA(data);
+
+	/* get the header and perform basic sanity checks */
+	memcpy(dependencies, tmp, offsetof(MVDependenciesData, deps));
+	tmp += offsetof(MVDependenciesData, deps);
+
+	if (dependencies->magic != MVSTAT_DEPS_MAGIC)
+	{
+		pfree(dependencies);
+		elog(WARNING, "not a MV Dependencies (magic number mismatch)");
+		return NULL;
+	}
+
+	Assert(dependencies->ndeps > 0);
+
+	/* what bytea size do we expect for those parameters */
+	expected_size = offsetof(MVDependenciesData,deps) +
+					dependencies->ndeps * sizeof(int16) * 2;
+
+	if (VARSIZE_ANY_EXHDR(data) != expected_size)
+		elog(ERROR, "invalid dependencies size %ld (expected %ld)",
+			 VARSIZE_ANY_EXHDR(data), expected_size);
+
+	/* allocate space for the MCV items */
+	dependencies = repalloc(dependencies, offsetof(MVDependenciesData,deps)
+							+ (dependencies->ndeps * sizeof(MVDependency)));
+
+	for (i = 0; i < dependencies->ndeps; i++)
+	{
+		dependencies->deps[i] = (MVDependency)palloc0(sizeof(MVDependencyData));
+
+		memcpy(&(dependencies->deps[i]->a), tmp, sizeof(int16));
+		tmp += sizeof(int16);
+
+		memcpy(&(dependencies->deps[i]->b), tmp, sizeof(int16));
+		tmp += sizeof(int16);
+	}
+
+	return dependencies;
+}
+
+/* print some basic info about dependencies (number of dependencies) */
+Datum
+pg_mv_stats_dependencies_info(PG_FUNCTION_ARGS)
+{
+	bytea	   *data = PG_GETARG_BYTEA_P(0);
+	char	   *result;
+
+	MVDependencies dependencies = deserialize_mv_dependencies(data);
+
+	if (dependencies == NULL)
+		PG_RETURN_NULL();
+
+	result = palloc0(128);
+	snprintf(result, 128, "dependencies=%d", dependencies->ndeps);
+
+	/* FIXME free the deserialized data (pfree is not enough) */
+
+	PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+
+/* print the dependencies
+ *
+ * TODO  Would be nice if this knew the actual column names (instead of
+ *       the attnums).
+ *
+ * FIXME This is really ugly and does not really check the lengths and
+ *       strcpy/snprintf return values properly. Needs to be fixed.
+ */
+Datum
+pg_mv_stats_dependencies_show(PG_FUNCTION_ARGS)
+{
+	int			i = 0;
+	bytea	   *data = PG_GETARG_BYTEA_P(0);
+	char	   *result = NULL;
+	int			len = 0;
+
+	MVDependencies dependencies = deserialize_mv_dependencies(data);
+
+	if (dependencies == NULL)
+		PG_RETURN_NULL();
+
+	for (i = 0; i < dependencies->ndeps; i++)
+	{
+		MVDependency dependency = dependencies->deps[i];
+		char	buffer[128];
+
+		int		tmp = snprintf(buffer, 128, "%s%d => %d",
+				((i == 0) ? "" : ", "), dependency->a, dependency->b);
+
+		if (tmp < 127)
+		{
+			if (result == NULL)
+				result = palloc0(len + tmp + 1);
+			else
+				result = repalloc(result, len + tmp + 1);
+
+			strcpy(result + len, buffer);
+			len += tmp;
+		}
+	}
+
+	PG_RETURN_TEXT_P(cstring_to_text(result));
+}
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index bb59bc2..f6d60ad 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -2104,6 +2104,48 @@ describeOneTableDetails(const char *schemaname,
 			PQclear(result);
 		}
 
+		/* print any multivariate statistics */
+		if (pset.sversion >= 90500)
+		{
+			printfPQExpBuffer(&buf,
+						   "SELECT oid, staname, stakeys,\n"
+						   "  deps_enabled,\n"
+						   "  deps_built,\n"
+						   "  (SELECT string_agg(attname::text,', ')\n"
+						   "    FROM ((SELECT unnest(stakeys) AS attnum) s\n"
+						   "         JOIN pg_attribute a ON (starelid = a.attrelid and a.attnum = s.attnum))) AS attnums\n"
+						   "FROM pg_mv_statistic stat WHERE starelid  = '%s' ORDER BY 1;",
+							  oid);
+
+			result = PSQLexec(buf.data);
+			if (!result)
+				goto error_return;
+			else
+				tuples = PQntuples(result);
+
+			if (tuples > 0)
+			{
+				printTableAddFooter(&cont, _("Statistics:"));
+				for (i = 0; i < tuples; i++)
+				{
+					printfPQExpBuffer(&buf, "    ");
+
+					/* statistics name */
+					appendPQExpBuffer(&buf, "%s ", PQgetvalue(result, i, 1));
+
+					/*  options */
+					if (!strcmp(PQgetvalue(result, i, 3), "t"))
+						appendPQExpBuffer(&buf, "(dependencies)");
+
+					appendPQExpBuffer(&buf, " ON (%s)",
+							PQgetvalue(result, i, 7));
+
+					printTableAddFooter(&cont, buf.data);
+				}
+			}
+			PQclear(result);
+		}
+
 		/* print rules */
 		if (tableinfo.hasrules && tableinfo.relkind != 'm')
 		{
diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h
index fbcf904..9a5c397 100644
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@@ -153,10 +153,11 @@ typedef enum ObjectClass
 	OCLASS_EXTENSION,			/* pg_extension */
 	OCLASS_EVENT_TRIGGER,		/* pg_event_trigger */
 	OCLASS_POLICY,				/* pg_policy */
-	OCLASS_TRANSFORM			/* pg_transform */
+	OCLASS_TRANSFORM,			/* pg_transform */
+	OCLASS_STATISTICS			/* pg_mv_statistics */
 } ObjectClass;
 
-#define LAST_OCLASS		OCLASS_TRANSFORM
+#define LAST_OCLASS		OCLASS_STATISTICS
 
 
 /* in dependency.c */
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index e6ac394..36debeb 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -119,6 +119,7 @@ extern void RemoveAttrDefault(Oid relid, AttrNumber attnum,
 				  DropBehavior behavior, bool complain, bool internal);
 extern void RemoveAttrDefaultById(Oid attrdefId);
 extern void RemoveStatistics(Oid relid, AttrNumber attnum);
+extern void RemoveMVStatistics(Oid relid, AttrNumber attnum);
 
 extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno,
 						  bool relhasoids);
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index c38958d..e171ae6 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -173,6 +173,13 @@ DECLARE_UNIQUE_INDEX(pg_largeobject_loid_pn_index, 2683, on pg_largeobject using
 DECLARE_UNIQUE_INDEX(pg_largeobject_metadata_oid_index, 2996, on pg_largeobject_metadata using btree(oid oid_ops));
 #define LargeObjectMetadataOidIndexId	2996
 
+DECLARE_UNIQUE_INDEX(pg_mv_statistic_oid_index, 3380, on pg_mv_statistic using btree(oid oid_ops));
+#define MvStatisticOidIndexId  3380
+DECLARE_UNIQUE_INDEX(pg_mv_statistic_name_index, 3997, on pg_mv_statistic using btree(staname name_ops));
+#define MvStatisticNameIndexId  3997
+DECLARE_INDEX(pg_mv_statistic_relid_index, 3379, on pg_mv_statistic using btree(starelid oid_ops));
+#define MvStatisticRelidIndexId	3379
+
 DECLARE_UNIQUE_INDEX(pg_namespace_nspname_index, 2684, on pg_namespace using btree(nspname name_ops));
 #define NamespaceNameIndexId  2684
 DECLARE_UNIQUE_INDEX(pg_namespace_oid_index, 2685, on pg_namespace using btree(oid oid_ops));
diff --git a/src/include/catalog/namespace.h b/src/include/catalog/namespace.h
index b6ad934..9bb59f9 100644
--- a/src/include/catalog/namespace.h
+++ b/src/include/catalog/namespace.h
@@ -137,6 +137,8 @@ extern Oid	get_collation_oid(List *collname, bool missing_ok);
 extern Oid	get_conversion_oid(List *conname, bool missing_ok);
 extern Oid	FindDefaultConversionProc(int32 for_encoding, int32 to_encoding);
 
+extern Oid	get_statistics_oid(List *names, bool missing_ok);
+
 /* initialization & transaction cleanup code */
 extern void InitializeSearchPath(void);
 extern void AtEOXact_Namespace(bool isCommit, bool parallel);
diff --git a/src/include/catalog/pg_mv_statistic.h b/src/include/catalog/pg_mv_statistic.h
new file mode 100644
index 0000000..8c33a92
--- /dev/null
+++ b/src/include/catalog/pg_mv_statistic.h
@@ -0,0 +1,71 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_mv_statistic.h
+ *	  definition of the system "multivariate statistic" relation (pg_mv_statistic)
+ *	  along with the relation's initial contents.
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/catalog/pg_mv_statistic.h
+ *
+ * NOTES
+ *	  the genbki.pl script reads this file and generates .bki
+ *	  information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_MV_STATISTIC_H
+#define PG_MV_STATISTIC_H
+
+#include "catalog/genbki.h"
+
+/* ----------------
+ *		pg_mv_statistic definition.  cpp turns this into
+ *		typedef struct FormData_pg_mv_statistic
+ * ----------------
+ */
+#define MvStatisticRelationId  3381
+
+CATALOG(pg_mv_statistic,3381)
+{
+	/* These fields form the unique key for the entry: */
+	Oid			starelid;			/* relation containing attributes */
+	NameData	staname;			/* statistics name */
+
+	/* statistics requested to build */
+	bool		deps_enabled;		/* analyze dependencies? */
+
+	/* statistics that are available (if requested) */
+	bool		deps_built;			/* dependencies were built */
+
+	/* variable-length fields start here, but we allow direct access to stakeys */
+	int2vector	stakeys;			/* array of column keys */
+
+#ifdef CATALOG_VARLEN
+	bytea		stadeps;			/* dependencies (serialized) */
+#endif
+
+} FormData_pg_mv_statistic;
+
+/* ----------------
+ *		Form_pg_mv_statistic corresponds to a pointer to a tuple with
+ *		the format of pg_mv_statistic relation.
+ * ----------------
+ */
+typedef FormData_pg_mv_statistic *Form_pg_mv_statistic;
+
+/* ----------------
+ *		compiler constants for pg_attrdef
+ * ----------------
+ */
+#define Natts_pg_mv_statistic					6
+#define Anum_pg_mv_statistic_starelid			1
+#define Anum_pg_mv_statistic_staname			2
+#define Anum_pg_mv_statistic_deps_enabled		3
+#define Anum_pg_mv_statistic_deps_built			4
+#define Anum_pg_mv_statistic_stakeys			5
+#define Anum_pg_mv_statistic_stadeps			6
+
+#endif   /* PG_MV_STATISTIC_H */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index d8640db..85c638d 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2739,6 +2739,11 @@ DESCR("current user privilege on any column by rel name");
 DATA(insert OID = 3029 (  has_any_column_privilege	   PGNSP PGUID 12 10 0 0 0 f f f f t f s s 2 0 16 "26 25" _null_ _null_ _null_ _null_ _null_ has_any_column_privilege_id _null_ _null_ _null_ ));
 DESCR("current user privilege on any column by rel oid");
 
+DATA(insert OID = 3998 (  pg_mv_stats_dependencies_info     PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "17" _null_ _null_ _null_ _null_ _null_ pg_mv_stats_dependencies_info _null_ _null_ _null_ ));
+DESCR("multivariate stats: functional dependencies info");
+DATA(insert OID = 3999 (  pg_mv_stats_dependencies_show     PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "17" _null_ _null_ _null_ _null_ _null_ pg_mv_stats_dependencies_show _null_ _null_ _null_ ));
+DESCR("multivariate stats: functional dependencies show");
+
 DATA(insert OID = 1928 (  pg_stat_get_numscans			PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ ));
 DESCR("statistics: number of scans done for table/index");
 DATA(insert OID = 1929 (  pg_stat_get_tuples_returned	PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_tuples_returned _null_ _null_ _null_ ));
diff --git a/src/include/catalog/toasting.h b/src/include/catalog/toasting.h
index fb2f035..b7c878d 100644
--- a/src/include/catalog/toasting.h
+++ b/src/include/catalog/toasting.h
@@ -49,6 +49,7 @@ extern void BootstrapToastTable(char *relName,
 DECLARE_TOAST(pg_attrdef, 2830, 2831);
 DECLARE_TOAST(pg_constraint, 2832, 2833);
 DECLARE_TOAST(pg_description, 2834, 2835);
+DECLARE_TOAST(pg_mv_statistic, 3577, 3578);
 DECLARE_TOAST(pg_proc, 2836, 2837);
 DECLARE_TOAST(pg_rewrite, 2838, 2839);
 DECLARE_TOAST(pg_seclabel, 3598, 3599);
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index adae296..3adb956 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -75,6 +75,10 @@ extern ObjectAddress DefineOperator(List *names, List *parameters);
 extern void RemoveOperatorById(Oid operOid);
 extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt);
 
+/* commands/statscmds.c */
+extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt);
+extern void RemoveStatisticsById(Oid statsOid);
+
 /* commands/aggregatecmds.c */
 extern ObjectAddress DefineAggregate(List *name, List *args, bool oldstyle,
 				List *parameters, const char *queryString);
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 603edd3..ece0776 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -251,6 +251,7 @@ typedef enum NodeTag
 	T_PlaceHolderInfo,
 	T_MinMaxAggInfo,
 	T_PlannerParamItem,
+	T_MVStatisticInfo,
 
 	/*
 	 * TAGS FOR MEMORY NODES (memnodes.h)
@@ -381,6 +382,7 @@ typedef enum NodeTag
 	T_CreatePolicyStmt,
 	T_AlterPolicyStmt,
 	T_CreateTransformStmt,
+	T_CreateStatsStmt,
 
 	/*
 	 * TAGS FOR PARSE TREE NODES (parsenodes.h)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 9142e94..3650897 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -596,6 +596,16 @@ typedef struct ColumnDef
 	int			location;		/* parse location, or -1 if none/unknown */
 } ColumnDef;
 
+typedef struct CreateStatsStmt
+{
+	NodeTag		type;
+	char	   *statsname;		/* name of new statistics, or NULL for default */
+	RangeVar   *relation;		/* relation to build statistics on */
+	List	   *keys;			/* String nodes naming referenced column(s) */
+	List	   *options;		/* list of DefElem nodes */
+} CreateStatsStmt;
+
+
 /*
  * TableLikeClause - CREATE TABLE ( ... LIKE ... ) clause
  */
@@ -1405,6 +1415,7 @@ typedef enum ObjectType
 	OBJECT_RULE,
 	OBJECT_SCHEMA,
 	OBJECT_SEQUENCE,
+	OBJECT_STATISTICS,
 	OBJECT_TABCONSTRAINT,
 	OBJECT_TABLE,
 	OBJECT_TABLESPACE,
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 5393005..baa0c88 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -479,6 +479,7 @@ typedef struct RelOptInfo
 	List	   *lateral_vars;	/* LATERAL Vars and PHVs referenced by rel */
 	Relids		lateral_referencers;	/* rels that reference me laterally */
 	List	   *indexlist;		/* list of IndexOptInfo */
+	List	   *mvstatlist;		/* list of MVStatisticInfo */
 	BlockNumber pages;			/* size estimates derived from pg_class */
 	double		tuples;
 	double		allvisfrac;
@@ -573,6 +574,33 @@ typedef struct IndexOptInfo
 	bool		amhasgetbitmap; /* does AM have amgetbitmap interface? */
 } IndexOptInfo;
 
+/*
+ * MVStatisticInfo
+ *		Information about multivariate stats for planning/optimization
+ *
+ * This contains information about which columns are covered by the
+ * statistics (stakeys), which options were requested while adding the
+ * statistics (*_enabled), and which kinds of statistics were actually
+ * built and are available for the optimizer (*_built).
+ */
+typedef struct MVStatisticInfo
+{
+	NodeTag		type;
+
+	Oid			mvoid;			/* OID of the statistics row */
+	RelOptInfo *rel;			/* back-link to index's table */
+
+	/* enabled statistics */
+	bool		deps_enabled;	/* functional dependencies enabled */
+
+	/* built/available statistics */
+	bool		deps_built;		/* functional dependencies built */
+
+	/* columns in the statistics (attnums) */
+	int2vector *stakeys;		/* attnums of the columns covered */
+
+} MVStatisticInfo;
+
 
 /*
  * EquivalenceClasses
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 812ca83..daefcef 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -361,7 +361,7 @@ PG_KEYWORD("stable", STABLE, UNRESERVED_KEYWORD)
 PG_KEYWORD("standalone", STANDALONE_P, UNRESERVED_KEYWORD)
 PG_KEYWORD("start", START, UNRESERVED_KEYWORD)
 PG_KEYWORD("statement", STATEMENT, UNRESERVED_KEYWORD)
-PG_KEYWORD("statistics", STATISTICS, UNRESERVED_KEYWORD)
+PG_KEYWORD("statistics", STATISTICS, RESERVED_KEYWORD)
 PG_KEYWORD("stdin", STDIN, UNRESERVED_KEYWORD)
 PG_KEYWORD("stdout", STDOUT, UNRESERVED_KEYWORD)
 PG_KEYWORD("storage", STORAGE, UNRESERVED_KEYWORD)
diff --git a/src/include/utils/mvstats.h b/src/include/utils/mvstats.h
new file mode 100644
index 0000000..411cd16
--- /dev/null
+++ b/src/include/utils/mvstats.h
@@ -0,0 +1,69 @@
+/*-------------------------------------------------------------------------
+ *
+ * mvstats.h
+ *	  Multivariate statistics and selectivity estimation functions.
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/mvstats.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef MVSTATS_H
+#define MVSTATS_H
+
+#include "commands/vacuum.h"
+
+
+#define MVSTATS_MAX_DIMENSIONS	8		/* max number of attributes */
+
+/* An associative rule, tracking [a => b] dependency.
+ *
+ * TODO Make this work with multiple columns on both sides.
+ */
+typedef struct MVDependencyData {
+	int16	a;
+	int16	b;
+} MVDependencyData;
+
+typedef MVDependencyData* MVDependency;
+
+typedef struct MVDependenciesData {
+	uint32			magic;		/* magic constant marker */
+	int32			ndeps;		/* number of dependencies */
+	MVDependency	deps[1];	/* XXX why not a pointer? */
+} MVDependenciesData;
+
+typedef MVDependenciesData* MVDependencies;
+
+#define MVSTAT_DEPS_MAGIC		0xB4549A2C	/* marks serialized bytea */
+#define MVSTAT_DEPS_TYPE_BASIC	1			/* basic dependencies type */
+
+/*
+ * TODO Maybe fetching the histogram/MCV list separately is inefficient?
+ *      Consider adding a single `fetch_stats` method, fetching all
+ *      stats specified using flags (or something like that).
+ */
+
+bytea * serialize_mv_dependencies(MVDependencies dependencies);
+
+/* deserialization of stats (serialization is private to analyze) */
+MVDependencies	deserialize_mv_dependencies(bytea * data);
+
+/* FIXME this probably belongs somewhere else (not to operations stats) */
+extern Datum pg_mv_stats_dependencies_info(PG_FUNCTION_ARGS);
+extern Datum pg_mv_stats_dependencies_show(PG_FUNCTION_ARGS);
+
+MVDependencies
+build_mv_dependencies(int numrows, HeapTuple *rows,
+								  int2vector *attrs,
+								  VacAttrStats **stats);
+
+void build_mv_stats(Relation onerel, int numrows, HeapTuple *rows,
+						   int natts, VacAttrStats **vacattrstats);
+
+void update_mv_stats(Oid relid, MVDependencies dependencies, int2vector *attrs);
+
+#endif
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 8a55a09..4d6edb6 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -79,6 +79,7 @@ typedef struct RelationData
 	bool		rd_isvalid;		/* relcache entry is valid */
 	char		rd_indexvalid;	/* state of rd_indexlist: 0 = not valid, 1 =
 								 * valid, 2 = temporarily forced */
+	bool		rd_mvstatvalid;	/* state of rd_mvstatlist: true/false */
 
 	/*
 	 * rd_createSubid is the ID of the highest subtransaction the rel has
@@ -111,6 +112,9 @@ typedef struct RelationData
 	List	   *rd_indexlist;	/* list of OIDs of indexes on relation */
 	Oid			rd_oidindex;	/* OID of unique index on OID, if any */
 	Oid			rd_replidindex; /* OID of replica identity index, if any */
+ 
+	/* data managed by RelationGetMVStatList: */
+	List	   *rd_mvstatlist;	/* list of OIDs of multivariate stats */
 
 	/* data managed by RelationGetIndexAttrBitmap: */
 	Bitmapset  *rd_indexattr;	/* identifies columns used in indexes */
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 6953281..77efeff 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -38,6 +38,7 @@ extern void RelationClose(Relation relation);
  * Routines to compute/retrieve additional cached information
  */
 extern List *RelationGetIndexList(Relation relation);
+extern List *RelationGetMVStatList(Relation relation);
 extern Oid	RelationGetOidIndex(Relation relation);
 extern Oid	RelationGetReplicaIndex(Relation relation);
 extern List *RelationGetIndexExpressions(Relation relation);
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index 18404e2..bff702e 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -66,6 +66,8 @@ enum SysCacheIdentifier
 	INDEXRELID,
 	LANGNAME,
 	LANGOID,
+	MVSTATNAME,
+	MVSTATOID,
 	NAMESPACENAME,
 	NAMESPACEOID,
 	OPERNAMENSP,
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 80374e4..428b1e8 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1365,6 +1365,14 @@ pg_matviews| SELECT n.nspname AS schemaname,
      LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)))
      LEFT JOIN pg_tablespace t ON ((t.oid = c.reltablespace)))
   WHERE (c.relkind = 'm'::"char");
+pg_mv_stats| SELECT n.nspname AS schemaname,
+    c.relname AS tablename,
+    s.stakeys AS attnums,
+    length(s.stadeps) AS depsbytes,
+    pg_mv_stats_dependencies_info(s.stadeps) AS depsinfo
+   FROM ((pg_mv_statistic s
+     JOIN pg_class c ON ((c.oid = s.starelid)))
+     LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)));
 pg_policies| SELECT n.nspname AS schemaname,
     c.relname AS tablename,
     pol.polname AS policyname,
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out
index eb0bc88..92a0d8a 100644
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -113,6 +113,7 @@ pg_inherits|t
 pg_language|t
 pg_largeobject|t
 pg_largeobject_metadata|t
+pg_mv_statistic|t
 pg_namespace|t
 pg_opclass|t
 pg_operator|t
-- 
2.1.0

