diff -dcrpN postgresql.orig/src/backend/catalog/Makefile postgresql/src/backend/catalog/Makefile
*** postgresql.orig/src/backend/catalog/Makefile	2011-02-22 18:51:42.675518441 +0100
--- postgresql/src/backend/catalog/Makefile	2011-04-28 14:21:14.694179328 +0200
*************** POSTGRES_BKI_SRCS = $(addprefix $(top_sr
*** 31,38 ****
  	pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \
  	pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \
  	pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
! 	pg_statistic.h pg_rewrite.h pg_trigger.h pg_description.h \
! 	pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
  	pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \
  	pg_authid.h pg_auth_members.h pg_shdepend.h pg_shdescription.h \
  	pg_ts_config.h pg_ts_config_map.h pg_ts_dict.h \
--- 31,38 ----
  	pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \
  	pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \
  	pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
! 	pg_statistic.h pg_statistic2.h pg_statistic3.h pg_rewrite.h pg_trigger.h \
! 	pg_description.h pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
  	pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \
  	pg_authid.h pg_auth_members.h pg_shdepend.h pg_shdescription.h \
  	pg_ts_config.h pg_ts_config_map.h pg_ts_dict.h \
diff -dcrpN postgresql.orig/src/backend/commands/indexcmds.c postgresql/src/backend/commands/indexcmds.c
*** postgresql.orig/src/backend/commands/indexcmds.c	2011-04-26 09:54:04.012362009 +0200
--- postgresql/src/backend/commands/indexcmds.c	2011-04-28 14:21:14.697179127 +0200
***************
*** 26,32 ****
--- 26,35 ----
  #include "catalog/indexing.h"
  #include "catalog/pg_opclass.h"
  #include "catalog/pg_opfamily.h"
+ #include "catalog/pg_statistic2.h"
+ #include "catalog/pg_statistic3.h"
  #include "catalog/pg_tablespace.h"
+ #include "catalog/pg_type.h"
  #include "commands/dbcommands.h"
  #include "commands/defrem.h"
  #include "commands/tablecmds.h"
***************
*** 36,41 ****
--- 39,45 ----
  #include "nodes/nodeFuncs.h"
  #include "optimizer/clauses.h"
  #include "optimizer/planner.h"
+ #include "optimizer/var.h"
  #include "parser/parse_coerce.h"
  #include "parser/parse_func.h"
  #include "parser/parse_oper.h"
*************** ReindexDatabase(const char *databaseName
*** 1693,1695 ****
--- 1697,1961 ----
  
  	MemoryContextDelete(private_context);
  }
+ 
+ /*
+  * DoCrossColStat
+  *	Add or remove one row in pg_statistic2
+  */
+ static void
+ DoCrossColStat(ExtraStatStmt *stmt)
+ {
+ 	Oid			relId;
+ 	Relation		rel;
+ 	ListCell	   *l;
+ 	int			len, i, j;
+ 	bool			differ = false;
+ 	AttrNumber	   *attnums;
+ 	AttrNumber	   *sorted_attnums;
+ 	int16			typlen;
+ 	bool			typbyval;
+ 	char			typalign;
+ 	Datum		   *datum_attnums;
+ 	ArrayType	   *arr_attnums;
+ 	ScanKeyData		scanKey[2];
+ 	SysScanDesc		scan;
+ 	HeapTuple		tuple;
+ 	TupleDesc		tupDesc;
+ 	Datum			values[Natts_pg_statistic2];
+ 	bool			nulls[Natts_pg_statistic2];
+ 
+ 	relId = RangeVarGetRelid(stmt->relation, false);
+ 
+ 	len = list_length(stmt->columns);
+ 	if (len < 2)
+ 		elog(ERROR, "cross column statistics need at least two columns");
+ 
+ 	attnums = (int2 *)palloc(len * sizeof(AttrNumber));
+ 	sorted_attnums = (int2 *)palloc(len * sizeof(AttrNumber));
+ 	datum_attnums = (Datum *)palloc(len * sizeof(Datum));
+ 
+ 	i = 0;
+ 	foreach(l, stmt->columns)
+ 	{
+ 		Node	   *node = (Node *) lfirst(l);
+ 		Var	   *var;
+ 
+ 		if (!IsA(node, Var))
+ 			elog(ERROR, "not a column reference");
+ 
+ 		var = (Var *) node;
+ 
+ 		if (var->varattno == 0)
+ 			elog(ERROR, "row expansion via \"*\" is not supported here");
+ 
+ 		attnums[i++] = var->varattno;
+ 	}
+ 
+ 	for (i = 0; i < len; i++)
+ 		sorted_attnums[i] = attnums[i];
+ 	for (i = 0;  i < len - 1; i++)
+ 		for (j = i+1; j < len; j++)
+ 			if (sorted_attnums[i] > sorted_attnums[j])
+ 			{
+ 				AttrNumber	tmp = sorted_attnums[i];
+ 
+ 				sorted_attnums[i] = sorted_attnums[j];
+ 				sorted_attnums[j] = tmp;
+ 			}
+ 
+ 	for (i = 0; i < len; i++)
+ 	{
+ 		if (!differ && attnums[i] != sorted_attnums[i])
+ 			differ = true;
+ 
+ 		if ((i < len - 1) && sorted_attnums[i] == sorted_attnums[i+1])
+ 			elog(ERROR, "column list must contain every column exactly once");
+ 
+ 		datum_attnums[i] = Int16GetDatum(sorted_attnums[i]);
+ 	}
+ 
+ 	if (differ)
+ 		elog(WARNING, "the column list was reordered in the order of table attributes");
+ 
+ 	get_typlenbyvalalign(INT2OID, &typlen, &typbyval, &typalign);
+ 	arr_attnums = construct_array(datum_attnums, len,
+ 						INT2OID, typlen, typbyval, typalign);
+ 
+ 	rel = heap_open(Statistic2RelationId, RowExclusiveLock);
+ 
+ 	/*
+ 	 * There's no syscache for pg_statistic2,
+ 	 * arrays aren't supported there as search keys.
+ 	 * We need to do the hard way.
+ 	 */
+ 	ScanKeyInit(&scanKey[0],
+ 					Anum_pg_statistic2_sta2relid,
+ 					BTEqualStrategyNumber, F_OIDEQ,
+ 					ObjectIdGetDatum(relId));
+ 	ScanKeyInit(&scanKey[1],
+ 					Anum_pg_statistic2_sta2attnums,
+ 					BTEqualStrategyNumber, F_ARRAY_EQ,
+ 					PointerGetDatum(arr_attnums));
+ 
+ 	scan = systable_beginscan(rel, Statistic2RelidAttnumsInhIndexId, true,
+ 									SnapshotNow, 2, scanKey);
+ 
+ 	tuple = systable_getnext(scan);
+ 
+ 	if (stmt->create)
+ 	{
+ 		if (HeapTupleIsValid(tuple))
+ 		{
+ 			systable_endscan(scan);
+ 			elog(ERROR, "pg_statistic2 entry already exists for this table and set of columns");
+ 		}
+ 		systable_endscan(scan);
+ 
+ 		for (i = 0; i < Natts_pg_statistic2; i++)
+ 			nulls[i] = TRUE;
+ 
+ 		values[Anum_pg_statistic2_sta2relid - 1] = ObjectIdGetDatum(relId);
+ 		nulls[Anum_pg_statistic2_sta2relid - 1] = FALSE;
+ 
+ 		values[Anum_pg_statistic2_sta2attnums - 1] = PointerGetDatum(arr_attnums);
+ 		nulls[Anum_pg_statistic2_sta2attnums - 1] = FALSE;
+ 
+ 		values[Anum_pg_statistic2_sta2inherit - 1] = BoolGetDatum(FALSE);
+ 		nulls[Anum_pg_statistic2_sta2inherit - 1] = FALSE;
+ 
+ 		tupDesc = RelationGetDescr(rel);
+ 
+ 		tuple = heap_form_tuple(tupDesc, values, nulls);
+ 
+ 		simple_heap_insert(rel, tuple);
+ 
+ 		CatalogUpdateIndexes(rel, tuple);
+ 	}
+ 	else
+ 	{
+ 		if (!HeapTupleIsValid(tuple))
+ 		{
+ 			systable_endscan(scan);
+ 			elog(ERROR, "pg_statistic2 entry doesn't exist for this table and set of columns");
+ 		}
+ 
+ 		simple_heap_delete(rel, &tuple->t_self);
+ 
+ 		systable_endscan(scan);
+ 	}
+ 
+ 	relation_close(rel, NoLock);
+ }
+ 
+ /*
+  * DoExprColStat
+  *	Add or remove one row in pg_statistic3
+  */
+ static void
+ DoExprStat(ExtraStatStmt *stmt)
+ {
+ 	Oid			relId;
+ 	Relation		rel;
+ 	int			i;
+ 	char			*exprbin;
+ 	Datum			exprbindatum;
+ 	ScanKeyData		scanKey[2];
+ 	SysScanDesc		scan;
+ 	HeapTuple		tuple;
+ 	TupleDesc		tupDesc;
+ 	Datum			values[Natts_pg_statistic3];
+ 	bool			nulls[Natts_pg_statistic3];
+ 
+ 	if (IsA(stmt->expr, Var) || IsA(stmt->expr, ColumnRef))
+ 		elog(ERROR, "single column are covered by basic statistics");
+ 
+ 	relId = RangeVarGetRelid(stmt->relation, false);
+ 
+ 	rel = heap_open(Statistic3RelationId, RowExclusiveLock);
+ 
+ 	exprbin = nodeToString(stmt->expr);
+ 	exprbindatum = CStringGetTextDatum(exprbin);
+ 
+ 	/*
+ 	 * There's no syscache for pg_statistic3,
+ 	 * arrays aren't supported there as search keys.
+ 	 * We need to do the hard way.
+ 	 */
+ 	ScanKeyInit(&scanKey[0],
+ 					Anum_pg_statistic3_sta3relid,
+ 					BTEqualStrategyNumber, F_OIDEQ,
+ 					ObjectIdGetDatum(relId));
+ 	ScanKeyInit(&scanKey[1],
+ 					Anum_pg_statistic3_sta3expr,
+ 					BTEqualStrategyNumber, F_TEXTEQ,
+ 					exprbindatum);
+ 
+ 	scan = systable_beginscan(rel, Statistic3RelidExprInhIndexId, true,
+ 									SnapshotNow, 2, scanKey);
+ 
+ 	tuple = systable_getnext(scan);
+ 
+ 	if (stmt->create)
+ 	{
+ 		if (HeapTupleIsValid(tuple))
+ 		{
+ 			systable_endscan(scan);
+ 			elog(ERROR, "pg_statistic3 entry already exists for this table and expression");
+ 		}
+ 		systable_endscan(scan);
+ 
+ 		if (!contain_var_clause(stmt->expr))
+ 			elog(ERROR, "constant expressions are not interesting");
+ 
+ 		for (i = 0; i < Natts_pg_statistic3; i++)
+ 			nulls[i] = TRUE;
+ 
+ 		values[Anum_pg_statistic3_sta3relid - 1] = ObjectIdGetDatum(relId);
+ 		nulls[Anum_pg_statistic3_sta3relid - 1] = FALSE;
+ 
+ 		values[Anum_pg_statistic3_sta3expr - 1] = exprbindatum;
+ 		nulls[Anum_pg_statistic3_sta3expr - 1] = FALSE;
+ 
+ 		values[Anum_pg_statistic3_sta3inherit - 1] = BoolGetDatum(FALSE);
+ 		nulls[Anum_pg_statistic3_sta3inherit - 1] = FALSE;
+ 
+ 		tupDesc = RelationGetDescr(rel);
+ 
+ 		tuple = heap_form_tuple(tupDesc, values, nulls);
+ 
+ 		simple_heap_insert(rel, tuple);
+ 
+ 		CatalogUpdateIndexes(rel, tuple);
+ 	}
+ 	else
+ 	{
+ 		if (!HeapTupleIsValid(tuple))
+ 		{
+ 			systable_endscan(scan);
+ 			elog(ERROR, "pg_statistic3 entry doesn't exist for this table and expression");
+ 		}
+ 
+ 		simple_heap_delete(rel, &tuple->t_self);
+ 
+ 		systable_endscan(scan);
+ 	}
+ 
+ 	pfree(exprbin);
+ 	pfree(DatumGetPointer(exprbindatum));
+ 
+ 	relation_close(rel, NoLock);
+ }
+ 
+ /*
+  * ExtraColStat
+  *	Add or remove one entry in pg_statistics2 or pg_statistic3
+  */
+ void ExtraStatistics(ExtraStatStmt *stmt)
+ {
+ 	if (list_length(stmt->columns) > 0)
+ 		DoCrossColStat(stmt);
+ 	else if (stmt->expr != NULL)
+ 		DoExprStat(stmt);
+ 	else
+ 		elog(ERROR, "internal error in ExtraStatistics");
+ }
diff -dcrpN postgresql.orig/src/backend/executor/nodeHash.c postgresql/src/backend/executor/nodeHash.c
*** postgresql.orig/src/backend/executor/nodeHash.c	2011-04-11 15:36:27.096816773 +0200
--- postgresql/src/backend/executor/nodeHash.c	2011-04-28 14:21:14.700178924 +0200
*************** ExecHashBuildSkewHash(HashJoinTable hash
*** 1144,1150 ****
  	if (!HeapTupleIsValid(statsTuple))
  		return;
  
! 	if (get_attstatsslot(statsTuple, node->skewColType, node->skewColTypmod,
  						 STATISTIC_KIND_MCV, InvalidOid,
  						 NULL,
  						 &values, &nvalues,
--- 1144,1150 ----
  	if (!HeapTupleIsValid(statsTuple))
  		return;
  
! 	if (get_attstatsslot(statsTuple, STAT_VARIABLE, node->skewColType, node->skewColTypmod,
  						 STATISTIC_KIND_MCV, InvalidOid,
  						 NULL,
  						 &values, &nvalues,
diff -dcrpN postgresql.orig/src/backend/nodes/copyfuncs.c postgresql/src/backend/nodes/copyfuncs.c
*** postgresql.orig/src/backend/nodes/copyfuncs.c	2011-04-19 09:37:54.828715621 +0200
--- postgresql/src/backend/nodes/copyfuncs.c	2011-04-28 14:21:14.704178653 +0200
*************** _copyCreateForeignTableStmt(CreateForeig
*** 3458,3463 ****
--- 3458,3476 ----
  	return newnode;
  }
  
+ static ExtraStatStmt *
+ _copyExtraStatStmt(ExtraStatStmt *from)
+ {
+ 	ExtraStatStmt *newnode = makeNode(ExtraStatStmt);
+ 
+ 	COPY_SCALAR_FIELD(create);
+ 	newnode->relation = _copyRangeVar(from->relation);
+ 	COPY_NODE_FIELD(columns);
+ 	COPY_NODE_FIELD(expr);
+ 
+ 	return newnode;
+ }
+ 
  static CreateTrigStmt *
  _copyCreateTrigStmt(CreateTrigStmt *from)
  {
*************** copyObject(void *from)
*** 4377,4382 ****
--- 4390,4398 ----
  		case T_CreateForeignTableStmt:
  			retval = _copyCreateForeignTableStmt(from);
  			break;
+ 		case T_ExtraStatStmt:
+ 			retval = _copyExtraStatStmt(from);
+ 			break;
  		case T_CreateTrigStmt:
  			retval = _copyCreateTrigStmt(from);
  			break;
diff -dcrpN postgresql.orig/src/backend/nodes/equalfuncs.c postgresql/src/backend/nodes/equalfuncs.c
*** postgresql.orig/src/backend/nodes/equalfuncs.c	2011-04-19 09:37:54.829715550 +0200
--- postgresql/src/backend/nodes/equalfuncs.c	2011-04-28 14:21:14.707178453 +0200
*************** _equalCreateForeignTableStmt(CreateForei
*** 1795,1800 ****
--- 1795,1812 ----
  }
  
  static bool
+ _equalExtraStatStmt(ExtraStatStmt *a, ExtraStatStmt *b)
+ {
+ 	COMPARE_SCALAR_FIELD(create);
+ 	if (!_equalRangeVar(a->relation, b->relation))
+ 		return FALSE;
+ 	COMPARE_NODE_FIELD(columns);
+ 	COMPARE_NODE_FIELD(expr);
+ 
+ 	return true;
+ }
+ 
+ static bool
  _equalCreateTrigStmt(CreateTrigStmt *a, CreateTrigStmt *b)
  {
  	COMPARE_STRING_FIELD(trigname);
*************** equal(void *a, void *b)
*** 2930,2935 ****
--- 2942,2950 ----
  		case T_CreateForeignTableStmt:
  			retval = _equalCreateForeignTableStmt(a, b);
  			break;
+ 		case T_ExtraStatStmt:
+ 			retval = _equalExtraStatStmt(a, b);
+ 			break;
  		case T_CreateTrigStmt:
  			retval = _equalCreateTrigStmt(a, b);
  			break;
diff -dcrpN postgresql.orig/src/backend/optimizer/path/clausesel.c postgresql/src/backend/optimizer/path/clausesel.c
*** postgresql.orig/src/backend/optimizer/path/clausesel.c	2011-01-04 15:13:15.940560845 +0100
--- postgresql/src/backend/optimizer/path/clausesel.c	2011-04-28 14:21:14.720177575 +0200
***************
*** 13,29 ****
--- 13,40 ----
   *-------------------------------------------------------------------------
   */
  #include "postgres.h"
+ #include "postgres_ext.h"
  
+ #include "access/skey.h"
+ #include "access/relscan.h"
+ #include "catalog/indexing.h"
  #include "catalog/pg_operator.h"
+ #include "catalog/pg_statistic2.h"
+ #include "catalog/pg_statistic3.h"
+ #include "catalog/pg_type.h"
  #include "nodes/makefuncs.h"
+ #include "nodes/pg_list.h"
  #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
  #include "optimizer/pathnode.h"
  #include "optimizer/plancat.h"
+ #include "optimizer/var.h"
  #include "parser/parsetree.h"
+ #include "utils/array.h" 
  #include "utils/fmgroids.h"
  #include "utils/lsyscache.h"
  #include "utils/selfuncs.h"
+ #include "utils/tqual.h"
  
  
  /*
*************** typedef struct RangeQueryClause
*** 34,39 ****
--- 45,51 ----
  {
  	struct RangeQueryClause *next;		/* next in linked list */
  	Node	   *var;			/* The common variable of the clauses */
+ 	AttrNumber	varattno;	/* for finding cross-column statistics */
  	bool		have_lobound;	/* found a low-bound clause yet? */
  	bool		have_hibound;	/* found a high-bound clause yet? */
  	Selectivity lobound;		/* Selectivity of a var > something clause */
*************** typedef struct RangeQueryClause
*** 43,48 ****
--- 55,75 ----
  static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
  			   bool varonleft, bool isLTsel, Selectivity s2);
  
+ typedef struct CrossColumnClause
+ {
+ 	struct CrossColumnClause *next;
+ 	Node	   *var;
+ 	AttrNumber	varattno;
+ 	Node	   *expr;
+ 	Selectivity	sel;
+ } CrossColumnClause;
+ 
+ static void addXCClause(CrossColumnClause **xclist, Node *clause,
+ 			   bool varonleft, Selectivity s2);
+ 
+ static bool crosscolumn_selectivity(Oid relId,
+ 			   CrossColumnClause **xcnext, RangeQueryClause **rqlist,
+ 			   Selectivity *result_sel);
  
  /****************************************************************************
   *		ROUTINES TO COMPUTE SELECTIVITIES
*************** clauselist_selectivity(PlannerInfo *root
*** 99,106 ****
--- 126,138 ----
  {
  	Selectivity s1 = 1.0;
  	RangeQueryClause *rqlist = NULL;
+ 	CrossColumnClause *xclist = NULL;
+ 	Oid		relId = InvalidOid;
+ 	bool		onerel = false;
  	ListCell   *l;
  
+ //	elog(NOTICE, "clauselist_selectivity varRelid %d, list length %d", varRelid, list_length(clauses));
+ 
  	/*
  	 * If there's exactly one clause, then no use in trying to match up pairs,
  	 * so just go directly to clause_selectivity().
*************** clauselist_selectivity(PlannerInfo *root
*** 162,167 ****
--- 194,215 ----
  					 (varonleft = false,
  					  is_pseudo_constant_clause_relids(linitial(expr->args),
  													   rinfo->left_relids)));
+ 				if (ok)
+ 				{
+ 					int	relid;
+ 					Oid	tmprelId;
+ 
+ 					relid = bms_singleton_member(rinfo->clause_relids);
+ 					tmprelId = root->simple_rte_array[relid]->relid;
+ 
+ 					if (!OidIsValid(relId))
+ 					{
+ 						onerel = true;
+ 						relId = tmprelId;
+ 					}
+ 					else if (relId != tmprelId)
+ 						onerel = false;
+ 				}
  			}
  			else
  			{
*************** clauselist_selectivity(PlannerInfo *root
*** 169,174 ****
--- 217,241 ----
  					(is_pseudo_constant_clause(lsecond(expr->args)) ||
  					 (varonleft = false,
  					  is_pseudo_constant_clause(linitial(expr->args))));
+ 				if (ok)
+ 				{
+ 					Relids	relids;
+ 					int	relid;
+ 					Oid	tmprelId;
+ 
+ 					relids = pull_varnos(clause);
+ 					relid = bms_singleton_member(relids);
+ 					tmprelId = root->simple_rte_array[relid]->relid;
+ 					bms_free(relids);
+ 
+ 					if (!OidIsValid(relId))
+ 					{
+ 						onerel = true;
+ 						relId = tmprelId;
+ 					}
+ 					else if (relId != tmprelId)
+ 						onerel = false;
+ 				}
  			}
  
  			if (ok)
*************** clauselist_selectivity(PlannerInfo *root
*** 188,193 ****
--- 255,264 ----
  						addRangeClause(&rqlist, clause,
  									   varonleft, false, s2);
  						break;
+ 					case F_EQSEL:
+ 						addXCClause(&xclist, clause,
+ 									   varonleft, s2);
+ 						break;
  					default:
  						/* Just merge the selectivity in generically */
  						s1 = s1 * s2;
*************** clauselist_selectivity(PlannerInfo *root
*** 202,207 ****
--- 273,299 ----
  	}
  
  	/*
+ 	 * Scan xclist and rqlist recursively and filter out
+ 	 * all possible cross-column selectivities.
+ 	 */
+ 	if (onerel)
+ 		crosscolumn_selectivity(relId, &xclist, &rqlist, &s1);
+ 
+ 	/*
+ 	 * Free the cross-column clauses
+ 	 */
+ 	while (xclist != NULL)
+ 	{
+ 		CrossColumnClause *xcnext;
+ 
+ 		s1 = s1 * xclist->sel;
+ 
+ 		xcnext = xclist->next;
+ 		pfree(xclist);
+ 		xclist = xcnext;
+ 	}
+ 
+ 	/*
  	 * Now scan the rangequery pair list.
  	 */
  	while (rqlist != NULL)
*************** clauselist_selectivity(PlannerInfo *root
*** 279,284 ****
--- 371,392 ----
  	return s1;
  }
  
+ static AttrNumber
+ var_get_attno(Node *clause)
+ {
+ 	Var	   *var;
+ 
+ 	if (IsA(clause, Var))
+ 	{
+ 		var = (Var *)clause;
+ //		elog(NOTICE, "var_get_attno varattno %d", var->varattno);
+ 		return var->varattno;
+ 	}
+ 
+ //	elog(NOTICE, "var_get_attno default 0");
+ 	return 0;
+ }
+ 
  /*
   * addRangeClause --- add a new range clause for clauselist_selectivity
   *
*************** addRangeClause(RangeQueryClause **rqlist
*** 358,363 ****
--- 466,473 ----
  	/* No matching var found, so make a new clause-pair data structure */
  	rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
  	rqelem->var = var;
+ 	rqelem->varattno = var_get_attno(var);
+ 
  	if (is_lobound)
  	{
  		rqelem->have_lobound = true;
*************** addRangeClause(RangeQueryClause **rqlist
*** 375,380 ****
--- 485,522 ----
  }
  
  /*
+  * addXCClause - add a new clause to the list of clauses for cross-column stats inspection
+  *
+  */
+ static void
+ addXCClause(CrossColumnClause **xclist, Node *clause,
+ 					bool varonleft, Selectivity s)
+ {
+ 	CrossColumnClause *xcelem;
+ 	Node	   *var;
+ 	Node	   *expr;
+ 
+ 	if (varonleft)
+ 	{
+ 		var = get_leftop((Expr *) clause);
+ 		expr = get_rightop((Expr *) clause);
+ 	}
+ 	else
+ 	{
+ 		var = get_rightop((Expr *) clause);
+ 		expr = get_leftop((Expr *) clause);
+ 	}
+ 
+ 	xcelem = (CrossColumnClause *) palloc(sizeof(CrossColumnClause));
+ 	xcelem->var = var;
+ 	xcelem->varattno = var_get_attno(var);
+ 	xcelem->expr = expr;
+ 	xcelem->sel = s;
+ 	xcelem->next = *xclist;
+ 	*xclist = xcelem;
+ }
+ 
+ /*
   * bms_is_subset_singleton
   *
   * Same result as bms_is_subset(s, bms_make_singleton(x)),
*************** clause_selectivity(PlannerInfo *root,
*** 499,504 ****
--- 641,648 ----
  	{
  		rinfo = (RestrictInfo *) clause;
  
+ //		elog(NOTICE, "RestrictInfo, %s", nodeToString(rinfo->clause));
+ 
  		/*
  		 * If the clause is marked pseudoconstant, then it will be used as a
  		 * gating qual and should not affect selectivity estimates; hence
*************** clause_selectivity(PlannerInfo *root,
*** 779,781 ****
--- 923,1211 ----
  
  	return s1;
  }
+ 
+ static bool
+ has_xcol_selectivity(Oid relId, int natts, AttrNumber *attnums, Selectivity *result_sel)
+ {
+ 	Relation	rel;
+ 	Datum		*datums = (Datum *)palloc(natts * sizeof(Datum));
+ 	ArrayType	*arr_attnums;
+ 	int		i;
+ 	int16		typlen;
+ 	bool		typbyval;
+ 	char		typalign;
+ 	ScanKeyData	scanKey[2];
+ 	SysScanDesc	scan;
+ 	HeapTuple	tuple;
+ 	bool		result;
+ 	Selectivity	sel = 1e-5; /* fixed selectivity for now */
+ 
+ 	for (i = 0; i < natts; i++)
+ 		datums[i] = Int16GetDatum(attnums[i]);
+ 
+ 	get_typlenbyvalalign(INT2OID, &typlen, &typbyval, &typalign);
+ 	arr_attnums = construct_array(datums, natts,
+ 							INT2OID, typlen, typbyval, typalign);
+ 
+ 	rel = heap_open(Statistic2RelationId, AccessShareLock);
+ 
+ 	ScanKeyInit(&scanKey[0],
+ 					Anum_pg_statistic2_sta2relid,
+ 					BTEqualStrategyNumber, F_OIDEQ,
+ 					ObjectIdGetDatum(relId));
+ 	ScanKeyInit(&scanKey[1],
+ 					Anum_pg_statistic2_sta2attnums,
+ 					BTEqualStrategyNumber, F_ARRAY_EQ,
+ 					PointerGetDatum(arr_attnums));
+ 
+ 	scan = systable_beginscan(rel, Statistic2RelidAttnumsInhIndexId, true,
+ 								SnapshotNow, 2, scanKey);
+ 
+ 	tuple = systable_getnext(scan);
+ 
+ 	result = HeapTupleIsValid(tuple);
+ 
+ 	systable_endscan(scan);
+ 
+ 	heap_close(rel, NoLock);
+ 
+ 	pfree(datums);
+ 
+ 	if (result)
+ 		*result_sel = sel;
+ 
+ 	return result;
+ }
+ 
+ typedef struct {
+ 	CrossColumnClause	*xc;
+ 	RangeQueryClause	*rq;
+ } reclist;
+ 
+ typedef struct {
+ 	int	len;
+ 	reclist	*rclist;
+ 	AttrNumber *attnums;
+ } reclist2;
+ 
+ 
+ /* add rclist to our list, so the ordered attnums arrays are unique */
+ static void
+ add_reclist(int len, reclist *rclist, List **results)
+ {
+ 	ListCell   *lc;
+ 	int		i, j;
+ 	reclist2	*rclist2;
+ 	AttrNumber	*attnums = (AttrNumber *) palloc(len * sizeof(AttrNumber));
+ 
+ 	/* collect the ordered varattnos from the Vars */
+ 	for (i = 0; i < len; i++)
+ 	{
+ 		if (rclist[i].xc)
+ 			attnums[i] = rclist[i].xc->varattno;
+ 		else
+ 			attnums[i] = rclist[i].rq->varattno;
+ 	}
+ 	for (i = 0; i < len - 1; i++)
+ 		for (j = i + 1; j < len; j++)
+ 		{
+ 			AttrNumber tmp = attnums[i];
+ 			attnums[i] = attnums[j];
+ 			attnums[j] = tmp;
+ 		}
+ 
+ 	/* match this ordered attnum list against the current list of attnum arrays */
+ 	foreach(lc, *results)
+ 	{
+ 		reclist2   *rc2 = (reclist2 *) lfirst(lc);
+ 
+ 		if (len != rc2->len)
+ 			continue;
+ 
+ 		for (i = 0; i < len; i++)
+ 			if (attnums[i] != rc2->attnums[i])
+ 				break;
+ 		if (i < len)
+ 			continue;
+ 
+ 		/* found */
+ 		return;
+ 	}
+ 
+ 	/* not found, add it to the list */
+ 	rclist2 = (reclist2 *) palloc(sizeof(reclist2));
+ 	rclist2->len = len;
+ 	rclist2->rclist = (reclist *) palloc(len * sizeof(reclist));
+ 	for (i = 0; i < len; i++)
+ 	{
+ 		rclist2->rclist[i].xc = rclist[i].xc;
+ 		rclist2->rclist[i].rq = rclist[i].rq;
+ 	}
+ 	rclist2->attnums = attnums;
+ 
+ 	*results = lappend(*results, rclist2);
+ }
+ 
+ static int
+ compare_reclist2(reclist2 *a, reclist2 *b)
+ {
+ 	int	i;
+ 
+ 	if (a->len < b->len)
+ 		return -1;
+ 	else if (a->len > b->len)
+ 		return 1;
+ 
+ 	for (i = 0; i < a->len; i++)
+ 	{
+ 		if (a->attnums[i] < b->attnums[i])
+ 			return -1;
+ 		else if (a->attnums[i] > b->attnums[i])
+ 			return 1;
+ 	}
+ 
+ 	return 0;
+ }
+ 
+ static bool
+ add_reclist2(int *len, reclist2 **p_reclist2, reclist2 *rclist2)
+ {
+ 	int	curr_len = *len;
+ 	int	i, j;
+ 
+ 	if (curr_len == 0)
+ 	{
+ 		p_reclist2[i] = rclist2;
+ 		curr_len++;
+ 		*len = curr_len;
+ 		return true;
+ 	}
+ 
+ 	for (i = 0; i < curr_len; i++)
+ 	{
+ 		if (compare_reclist2(rclist2, p_reclist2[i]) > 0)
+ 		{
+ 			for (j = curr_len; j > i; j--)
+ 				p_reclist2[j] = p_reclist2[j - 1];
+ 			p_reclist2[i] = rclist2;
+ 			curr_len++;
+ 			*len = curr_len;
+ 			return true;
+ 		}
+ 	}
+ 
+ 	return false;
+ }
+ 
+ static void
+ collect_xcol_lists(int curr_depth, CrossColumnClause *xclist, RangeQueryClause *rqlist, reclist *rclist, List **results)
+ {
+ 	CrossColumnClause	*xc_tmp;
+ 	RangeQueryClause	*rq_tmp;
+ 
+ 	for (xc_tmp = xclist; xc_tmp; xc_tmp = xc_tmp->next)
+ 	{
+ 		if (xc_tmp->varattno == 0)
+ 			continue;
+ 
+ 		rclist[curr_depth].xc = xc_tmp;
+ 		collect_xcol_lists(curr_depth + 1, xc_tmp->next, rqlist, rclist, results);
+ 		add_reclist(curr_depth + 1, rclist, results);
+ 		rclist[curr_depth].xc = NULL;
+ 	}
+ 
+ 	for (rq_tmp = rqlist; rq_tmp; rq_tmp = rq_tmp->next)
+ 	{
+ 		if (rq_tmp->varattno == 0)
+ 			continue;
+ 
+ 		rclist[curr_depth].rq = rq_tmp;
+ 		collect_xcol_lists(curr_depth + 1, (xclist ? xclist->next : xclist), rq_tmp->next, rclist, results);
+ 		add_reclist(curr_depth + 1, rclist, results);
+ 		rclist[curr_depth].rq = NULL;
+ 	}
+ }
+ 
+ static bool
+ crosscolumn_selectivity(Oid relId, CrossColumnClause **xclist, RangeQueryClause **rqlist, Selectivity *result_sel)
+ {
+ 	CrossColumnClause *xc;
+ 	RangeQueryClause *rq;
+ 	List	   *resultlist = NIL;
+ 	ListCell   *lc;
+ 	reclist	   *rclist;
+ 	reclist2   **p_rclist2;
+ 	int		max_len, i;
+ 	Selectivity	sel = 1.0;
+ 	bool		found_xc_sel = false;
+ 
+ 	max_len = 0;
+ 	for (rq = *rqlist; rq; max_len++, rq = rq->next)
+ 		;
+ 	for (xc = *xclist; xc; max_len++, xc = xc->next)
+ 		;
+ 
+ //	elog(NOTICE, "crosscolumn_selectivity max length of array %d", max_len);
+ 
+ 	rclist = (reclist *) palloc(max_len * sizeof(reclist));
+ 	for (i = 0; i < max_len; i++)
+ 	{
+ 		rclist[i].xc = NULL;
+ 		rclist[i].rq = NULL;
+ 	}
+ 
+ 	collect_xcol_lists(0, *xclist, *rqlist, rclist, &resultlist);
+ 
+ 	pfree(rclist);
+ 
+ 	max_len = list_length(resultlist);
+ //	elog(NOTICE, "crosscolumn_selectivity list length of arrays %d", max_len);
+ 	p_rclist2 = (reclist2 **) palloc(max_len * sizeof(reclist2 *));
+ 
+ 	max_len = 0;
+ 	foreach (lc, resultlist)
+ 	{
+ 		reclist2 *rclist2 = (reclist2 *) lfirst(lc);
+ 
+ 		if (!add_reclist2(&max_len, p_rclist2, rclist2))
+ 		{
+ 			pfree(rclist2->rclist);
+ 			pfree(rclist2->attnums);
+ 			pfree(rclist2);
+ 		}
+ 	}
+ //	elog(NOTICE, "crosscolumn_selectivity length of ordered/unique array of previous list %d", max_len);
+ 
+ 	list_free(resultlist);
+ 
+ 	for (i = 0; i < max_len; i++)
+ 	{
+ 		if (p_rclist2[i] == NULL)
+ 			continue;
+ 
+ 		if (has_xcol_selectivity(relId, p_rclist2[i]->len, p_rclist2[i]->attnums, &sel))
+ 		{
+ 			int	j;
+ 
+ 			/* remove the xclist and rqlist members found in p_rclist2[i] */
+ 			for (j = 0; j < p_rclist2[i]->len; j++)
+ 			{
+ 				/* TODO ... */
+ 			}
+ 
+ 			/* also, remove later elements in p_rclist2 that has any of the removed elements */
+ 			/* TODO ... */
+ 
+ //			elog(NOTICE, "crosscolumn_selectivity found xc selectivity %lf", sel);
+ 			found_xc_sel = true;
+ 			*result_sel *= sel;
+ 		}
+ 
+ 		pfree(p_rclist2[i]->rclist);
+ 		pfree(p_rclist2[i]->attnums);
+ 		pfree(p_rclist2[i]);
+ 	}
+ 	pfree(p_rclist2);
+ 
+ 	return found_xc_sel;
+ }
diff -dcrpN postgresql.orig/src/backend/parser/gram.y postgresql/src/backend/parser/gram.y
*** postgresql.orig/src/backend/parser/gram.y	2011-04-26 09:54:04.055359065 +0200
--- postgresql/src/backend/parser/gram.y	2011-04-28 14:21:14.739176296 +0200
*************** static void SplitColQualList(List *qualL
*** 199,209 ****
  		CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt
  		CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt
  		CreateAssertStmt CreateTrigStmt
! 		CreateUserStmt CreateUserMappingStmt CreateRoleStmt
! 		CreatedbStmt DeclareCursorStmt DefineStmt DeleteStmt DiscardStmt DoStmt
  		DropGroupStmt DropOpClassStmt DropOpFamilyStmt DropPLangStmt DropStmt
! 		DropAssertStmt DropTrigStmt DropRuleStmt DropCastStmt DropRoleStmt
! 		DropUserStmt DropdbStmt DropTableSpaceStmt DropFdwStmt
  		DropForeignServerStmt DropUserMappingStmt ExplainStmt FetchStmt
  		GrantStmt GrantRoleStmt IndexStmt InsertStmt ListenStmt LoadStmt
  		LockStmt NotifyStmt ExplainableStmt PreparableStmt
--- 199,210 ----
  		CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt
  		CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt
  		CreateAssertStmt CreateTrigStmt
! 		CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatedbStmt
! 		CreateCCStmt CreateESStmt
! 		DeclareCursorStmt DefineStmt DeleteStmt DiscardStmt DoStmt
  		DropGroupStmt DropOpClassStmt DropOpFamilyStmt DropPLangStmt DropStmt
! 		DropAssertStmt DropTrigStmt DropRuleStmt DropCastStmt DropCCStmt DropESStmt
! 		DropRoleStmt DropUserStmt DropdbStmt DropTableSpaceStmt DropFdwStmt
  		DropForeignServerStmt DropUserMappingStmt ExplainStmt FetchStmt
  		GrantStmt GrantRoleStmt IndexStmt InsertStmt ListenStmt LoadStmt
  		LockStmt NotifyStmt ExplainableStmt PreparableStmt
*************** static void SplitColQualList(List *qualL
*** 315,320 ****
--- 316,323 ----
  %type <list>	opt_fdw_options fdw_options
  %type <defelt>	fdw_option
  
+ %type <list>	cc_column_list
+ 
  %type <range>	OptTempTableName
  %type <into>	into_clause create_as_target
  
*************** static void SplitColQualList(List *qualL
*** 499,505 ****
  	DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP
  
  	EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EXCEPT
! 	EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN
  	EXTENSION EXTERNAL EXTRACT
  
  	FALSE_P FAMILY FETCH FIRST_P FLOAT_P FOLLOWING FOR FORCE FOREIGN FORWARD
--- 502,508 ----
  	DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP
  
  	EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EXCEPT
! 	EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN EXPRESSION
  	EXTENSION EXTERNAL EXTRACT
  
  	FALSE_P FAMILY FETCH FIRST_P FLOAT_P FOLLOWING FOR FORCE FOREIGN FORWARD
*************** stmt :
*** 700,707 ****
--- 703,712 ----
  			| CreateAsStmt
  			| CreateAssertStmt
  			| CreateCastStmt
+ 			| CreateCCStmt
  			| CreateConversionStmt
  			| CreateDomainStmt
+ 			| CreateESStmt
  			| CreateExtensionStmt
  			| CreateFdwStmt
  			| CreateForeignServerStmt
*************** stmt :
*** 729,734 ****
--- 734,741 ----
  			| DoStmt
  			| DropAssertStmt
  			| DropCastStmt
+ 			| DropCCStmt
+ 			| DropESStmt
  			| DropFdwStmt
  			| DropForeignServerStmt
  			| DropGroupStmt
*************** schema_stmt:
*** 1190,1195 ****
--- 1197,1267 ----
  
  /*****************************************************************************
   *
+  * Create cross column / expression statistics
+  *
+  *****************************************************************************/
+ 
+ CreateCCStmt:
+ 			CREATE CROSS COLUMN STATISTICS ON qualified_name '(' cc_column_list ')'
+ 				{
+ 					ExtraStatStmt *n = makeNode(ExtraStatStmt);
+ 					n->create = TRUE;
+ 					n->relation = $6;
+ 					n->columns = $8;
+ 					n->expr = NULL;
+ 					$$ = (Node *)n;
+ 				}
+ 		;
+ 
+ DropCCStmt:
+ 			DROP CROSS COLUMN STATISTICS ON qualified_name '(' cc_column_list ')'
+ 				{
+ 					ExtraStatStmt *n = makeNode(ExtraStatStmt);
+ 					n->create = FALSE;
+ 					n->relation = $6;
+ 					n->columns = $8;
+ 					n->expr = NULL;
+ 					$$ = (Node *)n;
+ 				}
+ 		;
+ 
+ cc_column_list:
+ 			columnref
+ 				{
+ 					$$ = list_make1($1);
+ 				}
+ 			| cc_column_list ',' columnref
+ 				{
+ 					$$ = lappend($1, $3);
+ 				}
+ 		;
+ 
+ CreateESStmt:
+ 			CREATE EXPRESSION STATISTICS ON qualified_name '(' a_expr ')'
+ 				{
+ 					ExtraStatStmt *n = makeNode(ExtraStatStmt);
+ 					n->create = TRUE;
+ 					n->relation = $5;
+ 					n->columns = NIL;
+ 					n->expr = $7;
+ 					$$ = (Node *)n;
+ 				}
+ 		;
+ 
+ DropESStmt:
+ 			DROP EXPRESSION STATISTICS ON qualified_name '(' a_expr ')'
+ 				{
+ 					ExtraStatStmt *n = makeNode(ExtraStatStmt);
+ 					n->create = FALSE;
+ 					n->relation = $5;
+ 					n->columns = NIL;
+ 					n->expr = $7;
+ 					$$ = (Node *)n;
+ 				}
+ 		;
+ 
+ /*****************************************************************************
+  *
   * Set PG internal variable
   *	  SET name TO 'var_value'
   * Include SQL92 syntax (thomas 1997-10-22):
*************** unreserved_keyword:
*** 11898,11903 ****
--- 11970,11976 ----
  			| EXCLUSIVE
  			| EXECUTE
  			| EXPLAIN
+ 			| EXPRESSION
  			| EXTENSION
  			| EXTERNAL
  			| FAMILY
diff -dcrpN postgresql.orig/src/backend/parser/parse_utilcmd.c postgresql/src/backend/parser/parse_utilcmd.c
*** postgresql.orig/src/backend/parser/parse_utilcmd.c	2011-04-26 09:54:04.062358585 +0200
--- postgresql/src/backend/parser/parse_utilcmd.c	2011-04-28 14:21:14.745175892 +0200
*************** setSchemaName(char *context_schema, char
*** 2700,2702 ****
--- 2700,2878 ----
  						"different from the one being created (%s)",
  						*stmt_schema_name, context_schema)));
  }
+ 
+ /*
+  *
+  */
+ bool
+ set_location_unknown_walker(Node *node, void *dummy)
+ {
+ 	if (node == NULL)
+ 		return false;
+ 
+ 	switch (node->type)
+ 	{
+ 		case T_TypeName:
+ 			{
+ 				TypeName *n = (TypeName *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_ColumnRef:
+ 			{
+ 				ColumnRef *n = (ColumnRef *)node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_ParamRef:
+ 			{
+ 				ParamRef *n = (ParamRef *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_A_Expr:
+ 			{
+ 				A_Expr *n = (A_Expr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_A_Const:
+ 			{
+ 				A_Const *n = (A_Const *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_TypeCast:
+ 			{
+ 				TypeCast *n = (TypeCast *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_FuncCall:
+ 			{
+ 				FuncCall *n = (FuncCall *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_A_ArrayExpr:
+ 			{
+ 				A_ArrayExpr *n = (A_ArrayExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_Var:
+ 			{
+ 				Var *n = (Var *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_Const:
+ 			{
+ 				Const *n = (Const *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_FuncExpr:
+ 			{
+ 				FuncExpr *n = (FuncExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_OpExpr:
+ 			{
+ 				OpExpr *n = (OpExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_DistinctExpr:
+ 			{
+ 				DistinctExpr *n = (DistinctExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_ScalarArrayOpExpr:
+ 			{
+ 				ScalarArrayOpExpr *n = (ScalarArrayOpExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_BoolExpr:
+ 			{
+ 				BoolExpr *n = (BoolExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_CaseExpr:
+ 			{
+ 				CaseExpr *n = (CaseExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_CaseWhen:
+ 			{
+ 				CaseWhen *n = (CaseWhen *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_ArrayExpr:
+ 			{
+ 				ArrayExpr *n = (ArrayExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_CoalesceExpr:
+ 			{
+ 				CoalesceExpr *n = (CoalesceExpr *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		case T_CoerceToDomain:
+ 			{
+ 				CoerceToDomain *n = (CoerceToDomain *) node;
+ 				n->location = -1;
+ 			}
+ 			break;
+ 		default:
+ 			break;
+ 	}
+ 
+ 	return expression_tree_walker(node, set_location_unknown_walker, NULL);
+ }
+ 
+ /*
+  * transformExtraStatistics
+  *		Transform the column list or the expression into a form
+  *		usable by the executor.
+  */
+ ExtraStatStmt *
+ transformExtraStatistics(ExtraStatStmt *stmt, const char *queryString)
+ {
+ 	ParseState *pstate;
+ 	RangeTblEntry *rte;
+ 	ExtraStatStmt *newstmt;
+ 	List	   *columns = NIL;
+ 	ListCell   *cell;
+ 
+ 	pstate = make_parsestate(NULL);
+ 	pstate->p_sourcetext = queryString;
+ 
+ 	rte = addRangeTableEntry(pstate, stmt->relation, NULL, false, true);
+ 	addRTEtoQuery(pstate, rte, true, true, true);
+ 
+ 	newstmt = makeNode(ExtraStatStmt);
+ 	newstmt->create = stmt->create;
+ 	newstmt->relation = copyObject(stmt->relation);
+ 
+ 	foreach(cell, stmt->columns)
+ 	{
+ 		Node *col = lfirst(cell);
+ 
+ 		columns = lappend(columns, transformExpr(pstate, col));
+ 	}
+ 
+ 	newstmt->columns = columns;
+ 	newstmt->expr = transformExpr(pstate, stmt->expr);
+ 	query_or_expression_tree_walker(newstmt->expr, set_location_unknown_walker, NULL, 0);
+ 
+ 	return newstmt;
+ }
diff -dcrpN postgresql.orig/src/backend/tcop/utility.c postgresql/src/backend/tcop/utility.c
*** postgresql.orig/src/backend/tcop/utility.c	2011-04-26 09:54:04.075357697 +0200
--- postgresql/src/backend/tcop/utility.c	2011-04-28 14:21:14.748175689 +0200
*************** check_xact_readonly(Node *parsetree)
*** 229,234 ****
--- 229,235 ----
  		case T_AlterTableSpaceOptionsStmt:
  		case T_CreateForeignTableStmt:
  		case T_SecLabelStmt:
+ 		case T_ExtraStatStmt:
  			PreventCommandIfReadOnly(CreateCommandTag(parsetree));
  			break;
  		default:
*************** standard_ProcessUtility(Node *parsetree,
*** 573,578 ****
--- 574,587 ----
  			}
  			break;
  
+ 		case T_ExtraStatStmt:
+ 			{
+ 				ExtraStatStmt *newstmt = transformExtraStatistics((ExtraStatStmt *)parsetree, queryString);
+ 
+ 				ExtraStatistics(newstmt);
+ 			}
+ 			break;
+ 
  		case T_CreateTableSpaceStmt:
  			PreventTransactionChain(isTopLevel, "CREATE TABLESPACE");
  			CreateTableSpace((CreateTableSpaceStmt *) parsetree);
*************** CreateCommandTag(Node *parsetree)
*** 1734,1739 ****
--- 1743,1771 ----
  			tag = "CREATE FOREIGN TABLE";
  			break;
  
+ 		case T_ExtraStatStmt:
+ 			{
+ 				ExtraStatStmt *stmt = (ExtraStatStmt *)parsetree;
+ 
+ 				if (list_length(stmt->columns) > 0)
+ 				{
+ 					if (stmt->create)
+ 						tag = "CREATE CROSS COLUMN STATISTICS";
+ 					else
+ 						tag = "DROP CROSS COLUMN STATISTICS";
+ 				}
+ 				else if (stmt->expr != NULL)
+ 				{
+ 					if (stmt->create)
+ 						tag = "CREATE EXPRESSION STATISTICS";
+ 					else
+ 						tag = "DROP EXPRESSION STATISTICS";
+ 				}
+ 				else
+ 					tag = "???";
+ 			}
+ 			break;
+ 
  		case T_DropStmt:
  			switch (((DropStmt *) parsetree)->removeType)
  			{
diff -dcrpN postgresql.orig/src/backend/tsearch/ts_selfuncs.c postgresql/src/backend/tsearch/ts_selfuncs.c
*** postgresql.orig/src/backend/tsearch/ts_selfuncs.c	2011-04-11 15:36:27.150812982 +0200
--- postgresql/src/backend/tsearch/ts_selfuncs.c	2011-04-28 14:21:14.749175621 +0200
*************** tsquerysel(VariableStatData *vardata, Da
*** 169,175 ****
  		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
  
  		/* MCELEM will be an array of TEXT elements for a tsvector column */
! 		if (get_attstatsslot(vardata->statsTuple,
  							 TEXTOID, -1,
  							 STATISTIC_KIND_MCELEM, InvalidOid,
  							 NULL,
--- 169,175 ----
  		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
  
  		/* MCELEM will be an array of TEXT elements for a tsvector column */
! 		if (get_attstatsslot(vardata->statsTuple, STAT_VARIABLE,
  							 TEXTOID, -1,
  							 STATISTIC_KIND_MCELEM, InvalidOid,
  							 NULL,
diff -dcrpN postgresql.orig/src/backend/utils/adt/selfuncs.c postgresql/src/backend/utils/adt/selfuncs.c
*** postgresql.orig/src/backend/utils/adt/selfuncs.c	2011-04-26 09:54:04.094356395 +0200
--- postgresql/src/backend/utils/adt/selfuncs.c	2011-04-28 15:53:46.195302017 +0200
***************
*** 94,102 ****
--- 94,104 ----
  #include "access/gin.h"
  #include "access/sysattr.h"
  #include "catalog/index.h"
+ #include "catalog/indexing.h"
  #include "catalog/pg_collation.h"
  #include "catalog/pg_opfamily.h"
  #include "catalog/pg_statistic.h"
+ #include "catalog/pg_statistic3.h"
  #include "catalog/pg_type.h"
  #include "executor/executor.h"
  #include "mb/pg_wchar.h"
***************
*** 111,116 ****
--- 113,119 ----
  #include "optimizer/restrictinfo.h"
  #include "optimizer/var.h"
  #include "parser/parse_coerce.h"
+ #include "parser/parse_utilcmd.h"
  #include "parser/parsetree.h"
  #include "utils/builtins.h"
  #include "utils/bytea.h"
*************** var_eq_const(VariableStatData *vardata, 
*** 275,281 ****
  		 * don't like this, maybe you shouldn't be using eqsel for your
  		 * operator...)
  		 */
! 		if (get_attstatsslot(vardata->statsTuple,
  							 vardata->atttype, vardata->atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
--- 278,284 ----
  		 * don't like this, maybe you shouldn't be using eqsel for your
  		 * operator...)
  		 */
! 		if (get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  							 vardata->atttype, vardata->atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
*************** var_eq_non_const(VariableStatData *varda
*** 417,423 ****
  		 * Cross-check: selectivity should never be estimated as more than the
  		 * most common value's.
  		 */
! 		if (get_attstatsslot(vardata->statsTuple,
  							 vardata->atttype, vardata->atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
--- 420,426 ----
  		 * Cross-check: selectivity should never be estimated as more than the
  		 * most common value's.
  		 */
! 		if (get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  							 vardata->atttype, vardata->atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
*************** mcv_selectivity(VariableStatData *vardat
*** 588,594 ****
  	sumcommon = 0.0;
  
  	if (HeapTupleIsValid(vardata->statsTuple) &&
! 		get_attstatsslot(vardata->statsTuple,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_MCV, InvalidOid,
  						 NULL,
--- 591,597 ----
  	sumcommon = 0.0;
  
  	if (HeapTupleIsValid(vardata->statsTuple) &&
! 		get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_MCV, InvalidOid,
  						 NULL,
*************** histogram_selectivity(VariableStatData *
*** 664,670 ****
  	Assert(min_hist_size > 2 * n_skip);
  
  	if (HeapTupleIsValid(vardata->statsTuple) &&
! 		get_attstatsslot(vardata->statsTuple,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
  						 NULL,
--- 667,673 ----
  	Assert(min_hist_size > 2 * n_skip);
  
  	if (HeapTupleIsValid(vardata->statsTuple) &&
! 		get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
  						 NULL,
*************** ineq_histogram_selectivity(PlannerInfo *
*** 741,747 ****
  	 * the reverse way if isgt is TRUE.
  	 */
  	if (HeapTupleIsValid(vardata->statsTuple) &&
! 		get_attstatsslot(vardata->statsTuple,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
  						 &hist_op,
--- 744,750 ----
  	 * the reverse way if isgt is TRUE.
  	 */
  	if (HeapTupleIsValid(vardata->statsTuple) &&
! 		get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
  						 &hist_op,
*************** booltestsel(PlannerInfo *root, BoolTestT
*** 1434,1440 ****
  		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
  		freq_null = stats->stanullfrac;
  
! 		if (get_attstatsslot(vardata.statsTuple,
  							 vardata.atttype, vardata.atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
--- 1437,1443 ----
  		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
  		freq_null = stats->stanullfrac;
  
! 		if (get_attstatsslot(vardata.statsTuple, vardata.stats_type,
  							 vardata.atttype, vardata.atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
*************** eqjoinsel_inner(Oid operator,
*** 2074,2080 ****
  	if (HeapTupleIsValid(vardata1->statsTuple))
  	{
  		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
! 		have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
  									  vardata1->atttype,
  									  vardata1->atttypmod,
  									  STATISTIC_KIND_MCV,
--- 2077,2083 ----
  	if (HeapTupleIsValid(vardata1->statsTuple))
  	{
  		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
! 		have_mcvs1 = get_attstatsslot(vardata1->statsTuple, vardata1->stats_type,
  									  vardata1->atttype,
  									  vardata1->atttypmod,
  									  STATISTIC_KIND_MCV,
*************** eqjoinsel_inner(Oid operator,
*** 2087,2093 ****
  	if (HeapTupleIsValid(vardata2->statsTuple))
  	{
  		stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
! 		have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
  									  vardata2->atttype,
  									  vardata2->atttypmod,
  									  STATISTIC_KIND_MCV,
--- 2090,2096 ----
  	if (HeapTupleIsValid(vardata2->statsTuple))
  	{
  		stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
! 		have_mcvs2 = get_attstatsslot(vardata2->statsTuple, vardata2->stats_type,
  									  vardata2->atttype,
  									  vardata2->atttypmod,
  									  STATISTIC_KIND_MCV,
*************** eqjoinsel_semi(Oid operator,
*** 2309,2315 ****
  	if (HeapTupleIsValid(vardata1->statsTuple))
  	{
  		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
! 		have_mcvs1 = get_attstatsslot(vardata1->statsTuple,
  									  vardata1->atttype,
  									  vardata1->atttypmod,
  									  STATISTIC_KIND_MCV,
--- 2312,2318 ----
  	if (HeapTupleIsValid(vardata1->statsTuple))
  	{
  		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
! 		have_mcvs1 = get_attstatsslot(vardata1->statsTuple, vardata1->stats_type,
  									  vardata1->atttype,
  									  vardata1->atttypmod,
  									  STATISTIC_KIND_MCV,
*************** eqjoinsel_semi(Oid operator,
*** 2321,2327 ****
  
  	if (HeapTupleIsValid(vardata2->statsTuple))
  	{
! 		have_mcvs2 = get_attstatsslot(vardata2->statsTuple,
  									  vardata2->atttype,
  									  vardata2->atttypmod,
  									  STATISTIC_KIND_MCV,
--- 2324,2330 ----
  
  	if (HeapTupleIsValid(vardata2->statsTuple))
  	{
! 		have_mcvs2 = get_attstatsslot(vardata2->statsTuple, vardata2->stats_type,
  									  vardata2->atttype,
  									  vardata2->atttypmod,
  									  STATISTIC_KIND_MCV,
*************** estimate_hash_bucketsize(PlannerInfo *ro
*** 3322,3328 ****
  
  	if (HeapTupleIsValid(vardata.statsTuple))
  	{
! 		if (get_attstatsslot(vardata.statsTuple,
  							 vardata.atttype, vardata.atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
--- 3325,3331 ----
  
  	if (HeapTupleIsValid(vardata.statsTuple))
  	{
! 		if (get_attstatsslot(vardata.statsTuple, vardata.stats_type,
  							 vardata.atttype, vardata.atttypmod,
  							 STATISTIC_KIND_MCV, InvalidOid,
  							 NULL,
*************** examine_variable(PlannerInfo *root, Node
*** 4103,4108 ****
--- 4106,4112 ----
  {
  	Node	   *basenode;
  	Relids		varnos;
+ 	int		onerelid = 0;
  	RelOptInfo *onerel;
  
  	/* Make sure we don't return dangling pointers in vardata */
*************** examine_variable(PlannerInfo *root, Node
*** 4147,4152 ****
--- 4151,4157 ----
  		}
  		else if (rte->rtekind == RTE_RELATION)
  		{
+ 			vardata->stats_type = STAT_VARIABLE;
  			vardata->statsTuple = SearchSysCache3(STATRELATTINH,
  												ObjectIdGetDatum(rte->relid),
  												Int16GetDatum(var->varattno),
*************** examine_variable(PlannerInfo *root, Node
*** 4185,4192 ****
  		case BMS_SINGLETON:
  			if (varRelid == 0 || bms_is_member(varRelid, varnos))
  			{
! 				onerel = find_base_rel(root,
! 					   (varRelid ? varRelid : bms_singleton_member(varnos)));
  				vardata->rel = onerel;
  				node = basenode;	/* strip any relabeling */
  			}
--- 4190,4197 ----
  		case BMS_SINGLETON:
  			if (varRelid == 0 || bms_is_member(varRelid, varnos))
  			{
! 				onerelid = (varRelid ? varRelid : bms_singleton_member(varnos));
! 				onerel = find_base_rel(root, onerelid);
  				vardata->rel = onerel;
  				node = basenode;	/* strip any relabeling */
  			}
*************** examine_variable(PlannerInfo *root, Node
*** 4220,4233 ****
  	{
  		/*
  		 * We have an expression in vars of a single relation.	Try to match
! 		 * it to expressional index columns, in hopes of finding some
! 		 * statistics.
  		 *
  		 * XXX it's conceivable that there are multiple matches with different
  		 * index opfamilies; if so, we need to pick one that matches the
  		 * operator we are estimating for.	FIXME later.
  		 */
  		ListCell   *ilist;
  
  		foreach(ilist, onerel->indexlist)
  		{
--- 4225,4301 ----
  	{
  		/*
  		 * We have an expression in vars of a single relation.	Try to match
! 		 * it to expression statistics first then to expressional index columns,
! 		 * in hopes of finding some statistics.
  		 *
  		 * XXX it's conceivable that there are multiple matches with different
  		 * index opfamilies; if so, we need to pick one that matches the
  		 * operator we are estimating for.	FIXME later.
  		 */
  		ListCell   *ilist;
+ 		Node	   *expr = copyObject(node);
+ 		char	   *exprbin;
+ 		Datum		exprbindatum;
+ 
+ #define USE_SYSCACHE_FOR_SEARCH	0
+ #if !USE_SYSCACHE_FOR_SEARCH
+ 		Relation	rel;
+ 		ScanKeyData	scanKey[2];  
+ 		SysScanDesc	scan;
+ 		HeapTuple	tuple;
+ #endif
+ 
+ 		query_or_expression_tree_walker(expr, set_location_unknown_walker, NULL, 0);
+ 		exprbin = nodeToString(expr);
+ 		exprbindatum = CStringGetTextDatum(exprbin);
+ 
+ #if USE_SYSCACHE_FOR_SEARCH
+ 		vardata->statsTuple = SearchSysCache3(STAT3RELEXPRINH,
+ 								ObjectIdGetDatum(root->simple_rte_array[onerelid]->relid),
+ 								exprbindatum,
+ 								BoolGetDatum(false));
+ 		if (HeapTupleIsValid(vardata->statsTuple))
+ 		{
+ 			vardata->stats_type = STAT_EXPRESSION;
+ 			vardata->freefunc = ReleaseSysCache;
+ 			return;
+ 		}
+ 
+ #else
+ 
+ 		rel = heap_open(Statistic3RelationId, RowShareLock);
+ 
+ 		ScanKeyInit(&scanKey[0],
+ 							Anum_pg_statistic3_sta3relid,
+ 							BTEqualStrategyNumber, F_OIDEQ,
+ 							ObjectIdGetDatum(root->simple_rte_array[onerelid]->relid));
+ 		ScanKeyInit(&scanKey[1],
+ 							Anum_pg_statistic3_sta3expr,
+ 							BTEqualStrategyNumber, F_TEXTEQ,
+ 							exprbindatum);
+ 
+ 		scan = systable_beginscan(rel, Statistic3RelidExprInhIndexId, true,
+ 							SnapshotNow, 2, scanKey);
+ 
+ 		tuple = systable_getnext(scan);
+ 		if (HeapTupleIsValid(tuple))
+ 		{
+ //			elog(NOTICE, "examine_variable expression found");
+ 			vardata->stats_type = STAT_EXPRESSION;
+ 			vardata->statsTuple = heap_copytuple(tuple);
+ 			vardata->freefunc = heap_freetuple;
+ 		}
+ 
+ 		systable_endscan(scan);
+ 
+ 		pfree(exprbin);
+ 		pfree(DatumGetPointer(exprbindatum));
+ 
+ 		relation_close(rel, RowShareLock);
+ 
+ 		if (vardata->statsTuple)
+ 			return;
+ #endif
  
  		foreach(ilist, onerel->indexlist)
  		{
*************** examine_variable(PlannerInfo *root, Node
*** 4286,4291 ****
--- 4354,4360 ----
  						}
  						else if (index->indpred == NIL)
  						{
+ 							vardata->stats_type = STAT_VARIABLE;
  							vardata->statsTuple =
  								SearchSysCache3(STATRELATTINH,
  										   ObjectIdGetDatum(index->indexoid),
*************** get_variable_numdistinct(VariableStatDat
*** 4327,4337 ****
  	 */
  	if (HeapTupleIsValid(vardata->statsTuple))
  	{
! 		/* Use the pg_statistic entry */
! 		Form_pg_statistic stats;
  
! 		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
! 		stadistinct = stats->stadistinct;
  	}
  	else if (vardata->vartype == BOOLOID)
  	{
--- 4396,4425 ----
  	 */
  	if (HeapTupleIsValid(vardata->statsTuple))
  	{
! 		switch (vardata->stats_type)
! 		{
! 			case STAT_VARIABLE:
! 			{
! 				/* Use the pg_statistic entry */
! 				Form_pg_statistic stats;
  
! 				stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
! 				stadistinct = stats->stadistinct;
! 				break;
! 			}
! 			case STAT_EXPRESSION:
! 			{
! 				/* Use the pg_statistic entry */
! 				Form_pg_statistic3 stats3;
! 
! 				stats3 = (Form_pg_statistic3) GETSTRUCT(vardata->statsTuple);
! 				stadistinct = stats3->sta3distinct;
! 				break;
! 			}
! 			default:
! 				elog(ERROR, "internal error");
! 				return 0.0;
! 		}
  	}
  	else if (vardata->vartype == BOOLOID)
  	{
*************** get_variable_range(PlannerInfo *root, Va
*** 4462,4468 ****
  	 * the one we want, fail --- this suggests that there is data we can't
  	 * use.
  	 */
! 	if (get_attstatsslot(vardata->statsTuple,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_HISTOGRAM, sortop,
  						 NULL,
--- 4550,4556 ----
  	 * the one we want, fail --- this suggests that there is data we can't
  	 * use.
  	 */
! 	if (get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_HISTOGRAM, sortop,
  						 NULL,
*************** get_variable_range(PlannerInfo *root, Va
*** 4477,4483 ****
  		}
  		free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
  	}
! 	else if (get_attstatsslot(vardata->statsTuple,
  							  vardata->atttype, vardata->atttypmod,
  							  STATISTIC_KIND_HISTOGRAM, InvalidOid,
  							  NULL,
--- 4565,4571 ----
  		}
  		free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);
  	}
! 	else if (get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  							  vardata->atttype, vardata->atttypmod,
  							  STATISTIC_KIND_HISTOGRAM, InvalidOid,
  							  NULL,
*************** get_variable_range(PlannerInfo *root, Va
*** 4494,4500 ****
  	 * the MCVs.  However, usually the MCVs will not be the extreme values, so
  	 * avoid unnecessary data copying.
  	 */
! 	if (get_attstatsslot(vardata->statsTuple,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_MCV, InvalidOid,
  						 NULL,
--- 4582,4588 ----
  	 * the MCVs.  However, usually the MCVs will not be the extreme values, so
  	 * avoid unnecessary data copying.
  	 */
! 	if (get_attstatsslot(vardata->statsTuple, vardata->stats_type,
  						 vardata->atttype, vardata->atttypmod,
  						 STATISTIC_KIND_MCV, InvalidOid,
  						 NULL,
*************** btcostestimate(PG_FUNCTION_ARGS)
*** 6255,6260 ****
--- 6343,6349 ----
  		}
  		else
  		{
+ 			vardata.stats_type = STAT_VARIABLE;
  			vardata.statsTuple = SearchSysCache3(STATRELATTINH,
  												 ObjectIdGetDatum(relid),
  												 Int16GetDatum(colnum),
*************** btcostestimate(PG_FUNCTION_ARGS)
*** 6281,6286 ****
--- 6370,6376 ----
  		}
  		else
  		{
+ 			vardata.stats_type = STAT_VARIABLE;
  			vardata.statsTuple = SearchSysCache3(STATRELATTINH,
  												 ObjectIdGetDatum(relid),
  												 Int16GetDatum(colnum),
*************** btcostestimate(PG_FUNCTION_ARGS)
*** 6300,6306 ****
  									 index->opcintype[0],
  									 BTLessStrategyNumber);
  		if (OidIsValid(sortop) &&
! 			get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
  							 STATISTIC_KIND_CORRELATION,
  							 sortop,
  							 NULL,
--- 6390,6396 ----
  									 index->opcintype[0],
  									 BTLessStrategyNumber);
  		if (OidIsValid(sortop) &&
! 			get_attstatsslot(vardata.statsTuple, vardata.stats_type, InvalidOid, 0,
  							 STATISTIC_KIND_CORRELATION,
  							 sortop,
  							 NULL,
diff -dcrpN postgresql.orig/src/backend/utils/cache/catcache.c postgresql/src/backend/utils/cache/catcache.c
*** postgresql.orig/src/backend/utils/cache/catcache.c	2011-04-13 10:11:05.021216766 +0200
--- postgresql/src/backend/utils/cache/catcache.c	2011-04-28 14:21:14.766174476 +0200
*************** GetCCHashEqFuncs(Oid keytype, PGFunction
*** 135,140 ****
--- 135,141 ----
  			*eqfunc = F_INT4EQ;
  			break;
  		case TEXTOID:
+ 		case PGNODETREEOID:
  			*hashfunc = hashtext;
  
  			*eqfunc = F_TEXTEQ;
diff -dcrpN postgresql.orig/src/backend/utils/cache/lsyscache.c postgresql/src/backend/utils/cache/lsyscache.c
*** postgresql.orig/src/backend/utils/cache/lsyscache.c	2011-04-11 15:36:27.175811226 +0200
--- postgresql/src/backend/utils/cache/lsyscache.c	2011-04-28 14:21:14.769174273 +0200
***************
*** 27,32 ****
--- 27,33 ----
  #include "catalog/pg_operator.h"
  #include "catalog/pg_proc.h"
  #include "catalog/pg_statistic.h"
+ #include "catalog/pg_statistic3.h" 
  #include "catalog/pg_type.h"
  #include "miscadmin.h"
  #include "nodes/makefuncs.h"
*************** get_attavgwidth(Oid relid, AttrNumber at
*** 2667,2680 ****
   * type ID to pass to free_attstatsslot later.
   */
  bool
! get_attstatsslot(HeapTuple statstuple,
  				 Oid atttype, int32 atttypmod,
  				 int reqkind, Oid reqop,
  				 Oid *actualop,
  				 Datum **values, int *nvalues,
  				 float4 **numbers, int *nnumbers)
  {
! 	Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple);
  	int			i,
  				j;
  	Datum		val;
--- 2668,2682 ----
   * type ID to pass to free_attstatsslot later.
   */
  bool
! get_attstatsslot(HeapTuple statstuple, StatType stat_type,
  				 Oid atttype, int32 atttypmod,
  				 int reqkind, Oid reqop,
  				 Oid *actualop,
  				 Datum **values, int *nvalues,
  				 float4 **numbers, int *nnumbers)
  {
! 	Form_pg_statistic stats;
! 	Form_pg_statistic3 stats3;
  	int			i,
  				j;
  	Datum		val;
*************** get_attstatsslot(HeapTuple statstuple,
*** 2685,2707 ****
  	HeapTuple	typeTuple;
  	Form_pg_type typeForm;
  
! 	for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
  	{
! 		if ((&stats->stakind1)[i] == reqkind &&
! 			(reqop == InvalidOid || (&stats->staop1)[i] == reqop))
  			break;
  	}
  	if (i >= STATISTIC_NUM_SLOTS)
  		return false;			/* not there */
  
  	if (actualop)
! 		*actualop = (&stats->staop1)[i];
  
  	if (values)
  	{
! 		val = SysCacheGetAttr(STATRELATTINH, statstuple,
  							  Anum_pg_statistic_stavalues1 + i,
  							  &isnull);
  		if (isnull)
  			elog(ERROR, "stavalues is null");
  		statarray = DatumGetArrayTypeP(val);
--- 2687,2755 ----
  	HeapTuple	typeTuple;
  	Form_pg_type typeForm;
  
! 	switch (stat_type)
  	{
! 		case STAT_VARIABLE:
! 			stats = (Form_pg_statistic) GETSTRUCT(statstuple);
! 			stats3 = NULL;
! 
! 			for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
! 			{
! 				if ((&stats->stakind1)[i] == reqkind &&
! 					(reqop == InvalidOid || (&stats->staop1)[i] == reqop))
! 					break;
! 			}
  			break;
+ 		case STAT_EXPRESSION:
+ 			stats = NULL;
+ 			stats3 = (Form_pg_statistic3) GETSTRUCT(statstuple);
+ 
+ 			for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+ 			{
+ 				if ((&stats3->sta3kind1)[i] == reqkind &&
+ 					(reqop == InvalidOid || (&stats3->sta3op1)[i] == reqop))
+ 					break;
+ 			}
+ 			break;
+ 		default:
+ 			elog(ERROR, "internal error");
+ 			return false; /* make compiler quiet */
  	}
+ 
  	if (i >= STATISTIC_NUM_SLOTS)
  		return false;			/* not there */
  
  	if (actualop)
! 	{
! 		switch (stat_type)
! 		{
! 			case STAT_VARIABLE:
! 				*actualop = (&stats->staop1)[i];
! 				break;
! 			case STAT_EXPRESSION:
! 				*actualop = (&stats3->sta3op1)[i];
! 				break;
! 		}
! 	}
  
  	if (values)
  	{
! 		switch (stat_type)
! 		{
! 			case STAT_VARIABLE:
! 				val = SysCacheGetAttr(STATRELATTINH, statstuple,
  							  Anum_pg_statistic_stavalues1 + i,
  							  &isnull);
+ 				break;
+ 			case STAT_EXPRESSION:
+ 				val = SysCacheGetAttr(STAT3RELEXPRINH, statstuple,
+ 							  Anum_pg_statistic3_sta3values1 + i,
+ 							  &isnull);
+ 				break;
+ 			default:
+ 				elog(ERROR, "internal error");
+ 				return false; /* silence compiler */
+ 		}
  		if (isnull)
  			elog(ERROR, "stavalues is null");
  		statarray = DatumGetArrayTypeP(val);
*************** get_attstatsslot(HeapTuple statstuple,
*** 2753,2761 ****
  
  	if (numbers)
  	{
! 		val = SysCacheGetAttr(STATRELATTINH, statstuple,
  							  Anum_pg_statistic_stanumbers1 + i,
  							  &isnull);
  		if (isnull)
  			elog(ERROR, "stanumbers is null");
  		statarray = DatumGetArrayTypeP(val);
--- 2801,2821 ----
  
  	if (numbers)
  	{
! 		switch (stat_type)
! 		{
! 			case STAT_VARIABLE:
! 				val = SysCacheGetAttr(STATRELATTINH, statstuple,
  							  Anum_pg_statistic_stanumbers1 + i,
  							  &isnull);
+ 				break;
+ 			case STAT_EXPRESSION:
+ 				val = SysCacheGetAttr(STAT3RELEXPRINH, statstuple,
+ 							  Anum_pg_statistic3_sta3numbers1 + i,
+ 							  &isnull);
+ 				break;
+ 			default:
+ 				return false; /* silence compiler */
+ 		}
  		if (isnull)
  			elog(ERROR, "stanumbers is null");
  		statarray = DatumGetArrayTypeP(val);
diff -dcrpN postgresql.orig/src/backend/utils/cache/syscache.c postgresql/src/backend/utils/cache/syscache.c
*** postgresql.orig/src/backend/utils/cache/syscache.c	2011-04-26 09:54:04.095356326 +0200
--- postgresql/src/backend/utils/cache/syscache.c	2011-04-28 14:21:14.775173869 +0200
***************
*** 45,50 ****
--- 45,51 ----
  #include "catalog/pg_proc.h"
  #include "catalog/pg_rewrite.h"
  #include "catalog/pg_statistic.h"
+ #include "catalog/pg_statistic3.h"
  #include "catalog/pg_tablespace.h"
  #include "catalog/pg_ts_config.h"
  #include "catalog/pg_ts_config_map.h"
*************** static const struct cachedesc cacheinfo[
*** 587,592 ****
--- 588,604 ----
  		},
  		1024
  	},
+ 	{Statistic3RelationId,		/* STAT3RELEXPRINH */
+ 		Statistic3RelidExprInhIndexId,
+ 		3,
+ 		{
+ 			Anum_pg_statistic3_sta3relid,
+ 			Anum_pg_statistic3_sta3expr,
+ 			Anum_pg_statistic3_sta3inherit,
+ 			0
+ 		},
+ 		1024
+ 	},
  	{StatisticRelationId,		/* STATRELATTINH */
  		StatisticRelidAttnumInhIndexId,
  		3,
diff -dcrpN postgresql.orig/src/include/catalog/indexing.h postgresql/src/include/catalog/indexing.h
*** postgresql.orig/src/include/catalog/indexing.h	2011-02-10 10:36:32.320680534 +0100
--- postgresql/src/include/catalog/indexing.h	2011-04-28 14:21:14.777173734 +0200
*************** DECLARE_UNIQUE_INDEX(pg_extension_oid_in
*** 300,305 ****
--- 300,312 ----
  DECLARE_UNIQUE_INDEX(pg_extension_name_index, 3081, on pg_extension using btree(extname name_ops));
  #define ExtensionNameIndexId 3081
  
+ DECLARE_UNIQUE_INDEX(pg_statistic2_relid_att_inh_index, 3072, on pg_statistic2 using btree(sta2relid oid_ops, sta2attnums array_ops, sta2inherit bool_ops));
+ #define Statistic2RelidAttnumsInhIndexId	3072
+ 
+ DECLARE_UNIQUE_INDEX(pg_statistic3_relid_expr_inh_index, 3074, on pg_statistic3 using btree(sta3relid oid_ops, sta3expr text_ops, sta3inherit bool_ops));
+ #define Statistic3RelidExprInhIndexId	3074
+ 
+ 
  /* last step of initialization script: build the indexes declared above */
  BUILD_INDICES
  
diff -dcrpN postgresql.orig/src/include/catalog/pg_statistic2.h postgresql/src/include/catalog/pg_statistic2.h
*** postgresql.orig/src/include/catalog/pg_statistic2.h	1970-01-01 01:00:00.000000000 +0100
--- postgresql/src/include/catalog/pg_statistic2.h	2011-04-28 14:21:14.779173600 +0200
***************
*** 0 ****
--- 1,265 ----
+ /*-------------------------------------------------------------------------
+  *
+  * pg_statistic2.h
+  *	  definition of the system "cross-column statistic" relation (pg_statistic2)
+  *	  along with the relation's initial contents.
+  *
+  *
+  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * src/include/catalog/pg_statistic2.h
+  *
+  * NOTES
+  *	  the genbki.pl script reads this file and generates .bki
+  *	  information from the DATA() statements.
+  *
+  *-------------------------------------------------------------------------
+  */
+ #ifndef PG_STATISTIC2_H
+ #define PG_STATISTIC2_H
+ 
+ #include "catalog/genbki.h"
+ 
+ /*
+  * The CATALOG definition has to refer to the type of stavaluesN as
+  * "anyarray" so that bootstrap mode recognizes it.  There is no real
+  * typedef for that, however.  Since the fields are potentially-null and
+  * therefore can't be accessed directly from C code, there is no particular
+  * need for the C struct definition to show a valid field type --- instead
+  * we just make it int.
+  */
+ #define anyarray int
+ 
+ /* ----------------
+  *		pg_statistic2 definition.  cpp turns this into
+  *		typedef struct FormData_pg_statistic2
+  * ----------------
+  */
+ #define Statistic2RelationId  3071
+ 
+ CATALOG(pg_statistic2,3071) BKI_WITHOUT_OIDS
+ {
+ 	/* These fields form the unique key for the entry: */
+ 	Oid			sta2relid;		/* relation containing attribute */
+ 	int2		sta2attnums[1];		/* attribute (column) stats are for */
+ 	bool		sta2inherit;		/* true if inheritance children are included */
+ 
+ 	/* the fraction of the column's entries that are NULL: */
+ 	float4		sta2nullfrac;
+ 
+ 	/*
+ 	 * stawidth is the average width in bytes of non-null entries.	For
+ 	 * fixed-width datatypes this is of course the same as the typlen, but for
+ 	 * var-width types it is more useful.  Note that this is the average width
+ 	 * of the data as actually stored, post-TOASTing (eg, for a
+ 	 * moved-out-of-line value, only the size of the pointer object is
+ 	 * counted).  This is the appropriate definition for the primary use of
+ 	 * the statistic, which is to estimate sizes of in-memory hash tables of
+ 	 * tuples.
+ 	 */
+ 	int4		sta2width;
+ 
+ 	/* ----------------
+ 	 * stadistinct indicates the (approximate) number of distinct non-null
+ 	 * data values in the column.  The interpretation is:
+ 	 *		0		unknown or not computed
+ 	 *		> 0		actual number of distinct values
+ 	 *		< 0		negative of multiplier for number of rows
+ 	 * The special negative case allows us to cope with columns that are
+ 	 * unique (stadistinct = -1) or nearly so (for example, a column in
+ 	 * which values appear about twice on the average could be represented
+ 	 * by stadistinct = -0.5).	Because the number-of-rows statistic in
+ 	 * pg_class may be updated more frequently than pg_statistic2 is, it's
+ 	 * important to be able to describe such situations as a multiple of
+ 	 * the number of rows, rather than a fixed number of distinct values.
+ 	 * But in other cases a fixed number is correct (eg, a boolean column).
+ 	 * ----------------
+ 	 */
+ 	float4		sta2distinct;
+ 
+ 	/* ----------------
+ 	 * To allow keeping statistics on different kinds of datatypes,
+ 	 * we do not hard-wire any particular meaning for the remaining
+ 	 * statistical fields.	Instead, we provide several "slots" in which
+ 	 * statistical data can be placed.	Each slot includes:
+ 	 *		kind			integer code identifying kind of data
+ 	 *		op				OID of associated operator, if needed
+ 	 *		numbers			float4 array (for statistical values)
+ 	 *		values			anyarray (for representations of data values)
+ 	 * The ID and operator fields are never NULL; they are zeroes in an
+ 	 * unused slot.  The numbers and values fields are NULL in an unused
+ 	 * slot, and might also be NULL in a used slot if the slot kind has
+ 	 * no need for one or the other.
+ 	 * ----------------
+ 	 */
+ 
+ 	int2		sta2kind1;
+ 	int2		sta2kind2;
+ 	int2		sta2kind3;
+ 	int2		sta2kind4;
+ 
+ 	Oid			sta2op1;
+ 	Oid			sta2op2;
+ 	Oid			sta2op3;
+ 	Oid			sta2op4;
+ 
+ 	/*
+ 	 * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+ 	 * (NULL). They cannot be accessed as C struct entries; you have to use
+ 	 * the full field access machinery (heap_getattr) for them.  We declare
+ 	 * them here for the catalog machinery.
+ 	 */
+ 
+ 	float4		sta2numbers1[1];
+ 	float4		sta2numbers2[1];
+ 	float4		sta2numbers3[1];
+ 	float4		sta2numbers4[1];
+ 
+ 	/*
+ 	 * Values in these arrays are values of the column's data type.  We
+ 	 * presently have to cheat quite a bit to allow polymorphic arrays of this
+ 	 * kind, but perhaps someday it'll be a less bogus facility.
+ 	 */
+ 	anyarray	sta2values1;
+ 	anyarray	sta2values2;
+ 	anyarray	sta2values3;
+ 	anyarray	sta2values4;
+ } FormData_pg_statistic2;
+ 
+ #define STATISTIC_NUM_SLOTS  4
+ 
+ #undef anyarray
+ 
+ 
+ /* ----------------
+  *		Form_pg_statistic2 corresponds to a pointer to a tuple with
+  *		the format of pg_statistic2 relation.
+  * ----------------
+  */
+ typedef FormData_pg_statistic2 *Form_pg_statistic2;
+ 
+ /* ----------------
+  *		compiler constants for pg_statistic2
+  * ----------------
+  */
+ #define Natts_pg_statistic2				22
+ #define Anum_pg_statistic2_sta2relid		1
+ #define Anum_pg_statistic2_sta2attnums		2
+ #define Anum_pg_statistic2_sta2inherit	3
+ #define Anum_pg_statistic2_sta2nullfrac	4
+ #define Anum_pg_statistic2_sta2width		5
+ #define Anum_pg_statistic2_sta2distinct	6
+ #define Anum_pg_statistic2_sta2kind1		7
+ #define Anum_pg_statistic2_sta2kind2		8
+ #define Anum_pg_statistic2_sta2kind3		9
+ #define Anum_pg_statistic2_sta2kind4		10
+ #define Anum_pg_statistic2_sta2op1		11
+ #define Anum_pg_statistic2_sta2op2		12
+ #define Anum_pg_statistic2_sta2op3		13
+ #define Anum_pg_statistic2_sta2op4		14
+ #define Anum_pg_statistic2_sta2numbers1	15
+ #define Anum_pg_statistic2_sta2numbers2	16
+ #define Anum_pg_statistic2_sta2numbers3	17
+ #define Anum_pg_statistic2_sta2numbers4	18
+ #define Anum_pg_statistic2_sta2values1	19
+ #define Anum_pg_statistic2_sta2values2	20
+ #define Anum_pg_statistic2_sta2values3	21
+ #define Anum_pg_statistic2_sta2values4	22
+ 
+ #if 0
+ 
+ /*
+  * Currently, three statistical slot "kinds" are defined: most common values,
+  * histogram, and correlation.	Additional "kinds" will probably appear in
+  * future to help cope with non-scalar datatypes.  Also, custom data types
+  * can define their own "kind" codes by mutual agreement between a custom
+  * typanalyze routine and the selectivity estimation functions of the type's
+  * operators.
+  *
+  * Code reading the pg_statistic2 relation should not assume that a particular
+  * data "kind" will appear in any particular slot.	Instead, search the
+  * stakind fields to see if the desired data is available.	(The standard
+  * function get_attstatsslot() may be used for this.)
+  */
+ 
+ /*
+  * The present allocation of "kind" codes is:
+  *
+  *	1-99:		reserved for assignment by the core PostgreSQL project
+  *				(values in this range will be documented in this file)
+  *	100-199:	reserved for assignment by the PostGIS project
+  *				(values to be documented in PostGIS documentation)
+  *	200-299:	reserved for assignment by the ESRI ST_Geometry project
+  *				(values to be documented in ESRI ST_Geometry documentation)
+  *	300-9999:	reserved for future public assignments
+  *
+  * For private use you may choose a "kind" code at random in the range
+  * 10000-30000.  However, for code that is to be widely disseminated it is
+  * better to obtain a publicly defined "kind" code by request from the
+  * PostgreSQL Global Development Group.
+  */
+ 
+ /*
+  * In a "most common values" slot, staop is the OID of the "=" operator
+  * used to decide whether values are the same or not.  stavalues contains
+  * the K most common non-null values appearing in the column, and stanumbers
+  * contains their frequencies (fractions of total row count).  The values
+  * shall be ordered in decreasing frequency.  Note that since the arrays are
+  * variable-size, K may be chosen by the statistics collector.	Values should
+  * not appear in MCV unless they have been observed to occur more than once;
+  * a unique column will have no MCV slot.
+  */
+ #define STATISTIC_KIND_MCV	1
+ 
+ /*
+  * A "histogram" slot describes the distribution of scalar data.  staop is
+  * the OID of the "<" operator that describes the sort ordering.  (In theory,
+  * more than one histogram could appear, if a datatype has more than one
+  * useful sort operator.)  stavalues contains M (>=2) non-null values that
+  * divide the non-null column data values into M-1 bins of approximately equal
+  * population.	The first stavalues item is the MIN and the last is the MAX.
+  * stanumbers is not used and should be NULL.  IMPORTANT POINT: if an MCV
+  * slot is also provided, then the histogram describes the data distribution
+  * *after removing the values listed in MCV* (thus, it's a "compressed
+  * histogram" in the technical parlance).  This allows a more accurate
+  * representation of the distribution of a column with some very-common
+  * values.	In a column with only a few distinct values, it's possible that
+  * the MCV list describes the entire data population; in this case the
+  * histogram reduces to empty and should be omitted.
+  */
+ #define STATISTIC_KIND_HISTOGRAM  2
+ 
+ /*
+  * A "correlation" slot describes the correlation between the physical order
+  * of table tuples and the ordering of data values of this column, as seen
+  * by the "<" operator identified by staop.  (As with the histogram, more
+  * than one entry could theoretically appear.)	stavalues is not used and
+  * should be NULL.	stanumbers contains a single entry, the correlation
+  * coefficient between the sequence of data values and the sequence of
+  * their actual tuple positions.  The coefficient ranges from +1 to -1.
+  */
+ #define STATISTIC_KIND_CORRELATION	3
+ 
+ /*
+  * A "most common elements" slot is similar to a "most common values" slot,
+  * except that it stores the most common non-null *elements* of the column
+  * values.	This is useful when the column datatype is an array or some other
+  * type with identifiable elements (for instance, tsvector).  staop contains
+  * the equality operator appropriate to the element type.  stavalues contains
+  * the most common element values, and stanumbers their frequencies.  Unlike
+  * MCV slots, the values are sorted into order (to support binary search
+  * for a particular value).  Since this puts the minimum and maximum
+  * frequencies at unpredictable spots in stanumbers, there are two extra
+  * members of stanumbers, holding copies of the minimum and maximum
+  * frequencies.
+  *
+  * Note: in current usage for tsvector columns, the stavalues elements are of
+  * type text, even though their representation within tsvector is not
+  * exactly text.
+  */
+ #define STATISTIC_KIND_MCELEM  4
+ 
+ #endif
+ 
+ #endif   /* PG_STATISTIC2_H */
diff -dcrpN postgresql.orig/src/include/catalog/pg_statistic3.h postgresql/src/include/catalog/pg_statistic3.h
*** postgresql.orig/src/include/catalog/pg_statistic3.h	1970-01-01 01:00:00.000000000 +0100
--- postgresql/src/include/catalog/pg_statistic3.h	2011-04-28 14:21:14.780173533 +0200
***************
*** 0 ****
--- 1,265 ----
+ /*-------------------------------------------------------------------------
+  *
+  * pg_statistic3.h
+  *	  definition of the system "expression statistic" relation (pg_statistic3)
+  *	  along with the relation's initial contents.
+  *
+  *
+  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * src/include/catalog/pg_statistic3.h
+  *
+  * NOTES
+  *	  the genbki.pl script reads this file and generates .bki
+  *	  information from the DATA() statements.
+  *
+  *-------------------------------------------------------------------------
+  */
+ #ifndef PG_STATISTIC3_H
+ #define PG_STATISTIC3_H
+ 
+ #include "catalog/genbki.h"
+ 
+ /*
+  * The CATALOG definition has to refer to the type of stavaluesN as
+  * "anyarray" so that bootstrap mode recognizes it.  There is no real
+  * typedef for that, however.  Since the fields are potentially-null and
+  * therefore can't be accessed directly from C code, there is no particular
+  * need for the C struct definition to show a valid field type --- instead
+  * we just make it int.
+  */
+ #define anyarray int
+ 
+ /* ----------------
+  *		pg_statistic3 definition.  cpp turns this into
+  *		typedef struct FormData_pg_statistic3
+  * ----------------
+  */
+ #define Statistic3RelationId  3073
+ 
+ CATALOG(pg_statistic3,3073) BKI_WITHOUT_OIDS
+ {
+ 	/* These fields form the unique key for the entry: */
+ 	Oid			sta3relid;		/* relation containing attribute */
+ 	pg_node_tree		sta3expr;		/* expression stat is for */
+ 	bool		sta3inherit;		/* true if inheritance children are included */
+ 
+ 	/* the fraction of the column's entries that are NULL: */
+ 	float4		sta3nullfrac;
+ 
+ 	/*
+ 	 * stawidth is the average width in bytes of non-null entries.	For
+ 	 * fixed-width datatypes this is of course the same as the typlen, but for
+ 	 * var-width types it is more useful.  Note that this is the average width
+ 	 * of the data as actually stored, post-TOASTing (eg, for a
+ 	 * moved-out-of-line value, only the size of the pointer object is
+ 	 * counted).  This is the appropriate definition for the primary use of
+ 	 * the statistic, which is to estimate sizes of in-memory hash tables of
+ 	 * tuples.
+ 	 */
+ 	int4		sta3width;
+ 
+ 	/* ----------------
+ 	 * stadistinct indicates the (approximate) number of distinct non-null
+ 	 * data values in the column.  The interpretation is:
+ 	 *		0		unknown or not computed
+ 	 *		> 0		actual number of distinct values
+ 	 *		< 0		negative of multiplier for number of rows
+ 	 * The special negative case allows us to cope with columns that are
+ 	 * unique (stadistinct = -1) or nearly so (for example, a column in
+ 	 * which values appear about twice on the average could be represented
+ 	 * by stadistinct = -0.5).	Because the number-of-rows statistic in
+ 	 * pg_class may be updated more frequently than pg_statistic3 is, it's
+ 	 * important to be able to describe such situations as a multiple of
+ 	 * the number of rows, rather than a fixed number of distinct values.
+ 	 * But in other cases a fixed number is correct (eg, a boolean column).
+ 	 * ----------------
+ 	 */
+ 	float4		sta3distinct;
+ 
+ 	/* ----------------
+ 	 * To allow keeping statistics on different kinds of datatypes,
+ 	 * we do not hard-wire any particular meaning for the remaining
+ 	 * statistical fields.	Instead, we provide several "slots" in which
+ 	 * statistical data can be placed.	Each slot includes:
+ 	 *		kind			integer code identifying kind of data
+ 	 *		op				OID of associated operator, if needed
+ 	 *		numbers			float4 array (for statistical values)
+ 	 *		values			anyarray (for representations of data values)
+ 	 * The ID and operator fields are never NULL; they are zeroes in an
+ 	 * unused slot.  The numbers and values fields are NULL in an unused
+ 	 * slot, and might also be NULL in a used slot if the slot kind has
+ 	 * no need for one or the other.
+ 	 * ----------------
+ 	 */
+ 
+ 	int2		sta3kind1;
+ 	int2		sta3kind2;
+ 	int2		sta3kind3;
+ 	int2		sta3kind4;
+ 
+ 	Oid			sta3op1;
+ 	Oid			sta3op2;
+ 	Oid			sta3op3;
+ 	Oid			sta3op4;
+ 
+ 	/*
+ 	 * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+ 	 * (NULL). They cannot be accessed as C struct entries; you have to use
+ 	 * the full field access machinery (heap_getattr) for them.  We declare
+ 	 * them here for the catalog machinery.
+ 	 */
+ 
+ 	float4		sta3numbers1[1];
+ 	float4		sta3numbers2[1];
+ 	float4		sta3numbers3[1];
+ 	float4		sta3numbers4[1];
+ 
+ 	/*
+ 	 * Values in these arrays are values of the column's data type.  We
+ 	 * presently have to cheat quite a bit to allow polymorphic arrays of this
+ 	 * kind, but perhaps someday it'll be a less bogus facility.
+ 	 */
+ 	anyarray	sta3values1;
+ 	anyarray	sta3values2;
+ 	anyarray	sta3values3;
+ 	anyarray	sta3values4;
+ } FormData_pg_statistic3;
+ 
+ #define STATISTIC_NUM_SLOTS  4
+ 
+ #undef anyarray
+ 
+ 
+ /* ----------------
+  *		Form_pg_statistic3 corresponds to a pointer to a tuple with
+  *		the format of pg_statistic3 relation.
+  * ----------------
+  */
+ typedef FormData_pg_statistic3 *Form_pg_statistic3;
+ 
+ /* ----------------
+  *		compiler constants for pg_statistic3
+  * ----------------
+  */
+ #define Natts_pg_statistic3				22
+ #define Anum_pg_statistic3_sta3relid		1
+ #define Anum_pg_statistic3_sta3expr		2
+ #define Anum_pg_statistic3_sta3inherit	3
+ #define Anum_pg_statistic3_sta3nullfrac	4
+ #define Anum_pg_statistic3_sta3width		5
+ #define Anum_pg_statistic3_sta3distinct	6
+ #define Anum_pg_statistic3_sta3kind1		7
+ #define Anum_pg_statistic3_sta3kind2		8
+ #define Anum_pg_statistic3_sta3kind3		9
+ #define Anum_pg_statistic3_sta3kind4		10
+ #define Anum_pg_statistic3_sta3op1		11
+ #define Anum_pg_statistic3_sta3op2		12
+ #define Anum_pg_statistic3_sta3op3		13
+ #define Anum_pg_statistic3_sta3op4		14
+ #define Anum_pg_statistic3_sta3numbers1	15
+ #define Anum_pg_statistic3_sta3numbers2	16
+ #define Anum_pg_statistic3_sta3numbers3	17
+ #define Anum_pg_statistic3_sta3numbers4	18
+ #define Anum_pg_statistic3_sta3values1	19
+ #define Anum_pg_statistic3_sta3values2	20
+ #define Anum_pg_statistic3_sta3values3	21
+ #define Anum_pg_statistic3_sta3values4	22
+ 
+ #if 0
+ 
+ /*
+  * Currently, three statistical slot "kinds" are defined: most common values,
+  * histogram, and correlation.	Additional "kinds" will probably appear in
+  * future to help cope with non-scalar datatypes.  Also, custom data types
+  * can define their own "kind" codes by mutual agreement between a custom
+  * typanalyze routine and the selectivity estimation functions of the type's
+  * operators.
+  *
+  * Code reading the pg_statistic3 relation should not assume that a particular
+  * data "kind" will appear in any particular slot.	Instead, search the
+  * stakind fields to see if the desired data is available.	(The standard
+  * function get_attstatsslot() may be used for this.)
+  */
+ 
+ /*
+  * The present allocation of "kind" codes is:
+  *
+  *	1-99:		reserved for assignment by the core PostgreSQL project
+  *				(values in this range will be documented in this file)
+  *	100-199:	reserved for assignment by the PostGIS project
+  *				(values to be documented in PostGIS documentation)
+  *	200-299:	reserved for assignment by the ESRI ST_Geometry project
+  *				(values to be documented in ESRI ST_Geometry documentation)
+  *	300-9999:	reserved for future public assignments
+  *
+  * For private use you may choose a "kind" code at random in the range
+  * 10000-30000.  However, for code that is to be widely disseminated it is
+  * better to obtain a publicly defined "kind" code by request from the
+  * PostgreSQL Global Development Group.
+  */
+ 
+ /*
+  * In a "most common values" slot, staop is the OID of the "=" operator
+  * used to decide whether values are the same or not.  stavalues contains
+  * the K most common non-null values appearing in the column, and stanumbers
+  * contains their frequencies (fractions of total row count).  The values
+  * shall be ordered in decreasing frequency.  Note that since the arrays are
+  * variable-size, K may be chosen by the statistics collector.	Values should
+  * not appear in MCV unless they have been observed to occur more than once;
+  * a unique column will have no MCV slot.
+  */
+ #define STATISTIC_KIND_MCV	1
+ 
+ /*
+  * A "histogram" slot describes the distribution of scalar data.  staop is
+  * the OID of the "<" operator that describes the sort ordering.  (In theory,
+  * more than one histogram could appear, if a datatype has more than one
+  * useful sort operator.)  stavalues contains M (>=2) non-null values that
+  * divide the non-null column data values into M-1 bins of approximately equal
+  * population.	The first stavalues item is the MIN and the last is the MAX.
+  * stanumbers is not used and should be NULL.  IMPORTANT POINT: if an MCV
+  * slot is also provided, then the histogram describes the data distribution
+  * *after removing the values listed in MCV* (thus, it's a "compressed
+  * histogram" in the technical parlance).  This allows a more accurate
+  * representation of the distribution of a column with some very-common
+  * values.	In a column with only a few distinct values, it's possible that
+  * the MCV list describes the entire data population; in this case the
+  * histogram reduces to empty and should be omitted.
+  */
+ #define STATISTIC_KIND_HISTOGRAM  2
+ 
+ /*
+  * A "correlation" slot describes the correlation between the physical order
+  * of table tuples and the ordering of data values of this column, as seen
+  * by the "<" operator identified by staop.  (As with the histogram, more
+  * than one entry could theoretically appear.)	stavalues is not used and
+  * should be NULL.	stanumbers contains a single entry, the correlation
+  * coefficient between the sequence of data values and the sequence of
+  * their actual tuple positions.  The coefficient ranges from +1 to -1.
+  */
+ #define STATISTIC_KIND_CORRELATION	3
+ 
+ /*
+  * A "most common elements" slot is similar to a "most common values" slot,
+  * except that it stores the most common non-null *elements* of the column
+  * values.	This is useful when the column datatype is an array or some other
+  * type with identifiable elements (for instance, tsvector).  staop contains
+  * the equality operator appropriate to the element type.  stavalues contains
+  * the most common element values, and stanumbers their frequencies.  Unlike
+  * MCV slots, the values are sorted into order (to support binary search
+  * for a particular value).  Since this puts the minimum and maximum
+  * frequencies at unpredictable spots in stanumbers, there are two extra
+  * members of stanumbers, holding copies of the minimum and maximum
+  * frequencies.
+  *
+  * Note: in current usage for tsvector columns, the stavalues elements are of
+  * type text, even though their representation within tsvector is not
+  * exactly text.
+  */
+ #define STATISTIC_KIND_MCELEM  4
+ 
+ #endif
+ 
+ #endif   /* PG_STATISTIC2_H */
diff -dcrpN postgresql.orig/src/include/commands/defrem.h postgresql/src/include/commands/defrem.h
*** postgresql.orig/src/include/commands/defrem.h	2011-04-11 15:36:27.243806451 +0200
--- postgresql/src/include/commands/defrem.h	2011-04-28 14:21:14.782173399 +0200
*************** extern char *ChooseIndexName(const char 
*** 50,55 ****
--- 50,56 ----
  				bool primary, bool isconstraint);
  extern List *ChooseIndexColumnNames(List *indexElems);
  extern Oid	GetDefaultOpClass(Oid type_id, Oid am_id);
+ extern void ExtraStatistics(ExtraStatStmt *stmt);
  
  /* commands/functioncmds.c */
  extern void CreateFunction(CreateFunctionStmt *stmt, const char *queryString);
diff -dcrpN postgresql.orig/src/include/nodes/nodes.h postgresql/src/include/nodes/nodes.h
*** postgresql.orig/src/include/nodes/nodes.h	2011-03-22 17:53:48.045903422 +0100
--- postgresql/src/include/nodes/nodes.h	2011-04-28 14:21:14.784173265 +0200
*************** typedef enum NodeTag
*** 362,367 ****
--- 362,368 ----
  	T_CreateExtensionStmt,
  	T_AlterExtensionStmt,
  	T_AlterExtensionContentsStmt,
+ 	T_ExtraStatStmt,
  
  	/*
  	 * TAGS FOR PARSE TREE NODES (parsenodes.h)
diff -dcrpN postgresql.orig/src/include/nodes/parsenodes.h postgresql/src/include/nodes/parsenodes.h
*** postgresql.orig/src/include/nodes/parsenodes.h	2011-04-26 09:54:04.106355573 +0200
--- postgresql/src/include/nodes/parsenodes.h	2011-04-28 14:21:14.789172925 +0200
*************** typedef enum DropBehavior
*** 1160,1165 ****
--- 1160,1178 ----
  } DropBehavior;
  
  /* ----------------------
+  *	Create Cross Column Statistics
+  * ----------------------
+  */
+ typedef struct ExtraStatStmt
+ {
+ 	NodeTag		type;
+ 	bool		create;
+ 	RangeVar   *relation;
+ 	List	   *columns;
+ 	Node	   *expr;
+ } ExtraStatStmt;
+ 
+ /* ----------------------
   *	Alter Table
   * ----------------------
   */
diff -dcrpN postgresql.orig/src/include/parser/kwlist.h postgresql/src/include/parser/kwlist.h
*** postgresql.orig/src/include/parser/kwlist.h	2011-03-18 13:11:36.826637445 +0100
--- postgresql/src/include/parser/kwlist.h	2011-04-28 14:21:14.790172858 +0200
*************** PG_KEYWORD("exclusive", EXCLUSIVE, UNRES
*** 148,153 ****
--- 148,154 ----
  PG_KEYWORD("execute", EXECUTE, UNRESERVED_KEYWORD)
  PG_KEYWORD("exists", EXISTS, COL_NAME_KEYWORD)
  PG_KEYWORD("explain", EXPLAIN, UNRESERVED_KEYWORD)
+ PG_KEYWORD("expression", EXPRESSION, UNRESERVED_KEYWORD)
  PG_KEYWORD("extension", EXTENSION, UNRESERVED_KEYWORD)
  PG_KEYWORD("external", EXTERNAL, UNRESERVED_KEYWORD)
  PG_KEYWORD("extract", EXTRACT, COL_NAME_KEYWORD)
diff -dcrpN postgresql.orig/src/include/parser/parse_utilcmd.h postgresql/src/include/parser/parse_utilcmd.h
*** postgresql.orig/src/include/parser/parse_utilcmd.h	2011-01-04 15:13:16.163549374 +0100
--- postgresql/src/include/parser/parse_utilcmd.h	2011-04-28 14:21:14.792172725 +0200
*************** extern void transformRuleStmt(RuleStmt *
*** 25,28 ****
--- 25,33 ----
  				  List **actions, Node **whereClause);
  extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt);
  
+ extern ExtraStatStmt *transformExtraStatistics(ExtraStatStmt *stmt,
+ 						const char *queryString);
+ 
+ extern bool set_location_unknown_walker(Node *node, void *context);
+ 
  #endif   /* PARSE_UTILCMD_H */
diff -dcrpN postgresql.orig/src/include/utils/lsyscache.h postgresql/src/include/utils/lsyscache.h
*** postgresql.orig/src/include/utils/lsyscache.h	2011-04-11 15:36:27.256805539 +0200
--- postgresql/src/include/utils/lsyscache.h	2011-04-28 14:21:14.793172658 +0200
***************
*** 16,21 ****
--- 16,22 ----
  #include "access/attnum.h"
  #include "access/htup.h"
  #include "nodes/pg_list.h"
+ #include "utils/selfuncs.h"
  
  /* I/O function selector for get_type_io_data */
  typedef enum IOFuncSelector
*************** extern Oid	getBaseType(Oid typid);
*** 131,137 ****
  extern Oid	getBaseTypeAndTypmod(Oid typid, int32 *typmod);
  extern int32 get_typavgwidth(Oid typid, int32 typmod);
  extern int32 get_attavgwidth(Oid relid, AttrNumber attnum);
! extern bool get_attstatsslot(HeapTuple statstuple,
  				 Oid atttype, int32 atttypmod,
  				 int reqkind, Oid reqop,
  				 Oid *actualop,
--- 132,139 ----
  extern Oid	getBaseTypeAndTypmod(Oid typid, int32 *typmod);
  extern int32 get_typavgwidth(Oid typid, int32 typmod);
  extern int32 get_attavgwidth(Oid relid, AttrNumber attnum);
! 
! extern bool get_attstatsslot(HeapTuple statstuple, StatType stat_type,
  				 Oid atttype, int32 atttypmod,
  				 int reqkind, Oid reqop,
  				 Oid *actualop,
diff -dcrpN postgresql.orig/src/include/utils/selfuncs.h postgresql/src/include/utils/selfuncs.h
*** postgresql.orig/src/include/utils/selfuncs.h	2011-04-13 10:11:05.060214051 +0200
--- postgresql/src/include/utils/selfuncs.h	2011-04-28 14:21:14.795172522 +0200
***************
*** 62,75 ****
  			p = 1.0; \
  	} while (0)
  
  
  /* Return data from examine_variable and friends */
  typedef struct VariableStatData
  {
  	Node	   *var;			/* the Var or expression tree */
  	RelOptInfo *rel;			/* Relation, or NULL if not identifiable */
! 	HeapTuple	statsTuple;		/* pg_statistic tuple, or NULL if none */
! 	/* NB: if statsTuple!=NULL, it must be freed when caller is done */
  	void		(*freefunc) (HeapTuple tuple);	/* how to free statsTuple */
  	Oid			vartype;		/* exposed type of expression */
  	Oid			atttype;		/* type to pass to get_attstatsslot */
--- 62,81 ----
  			p = 1.0; \
  	} while (0)
  
+ typedef enum StatType {
+ 	STAT_VARIABLE,
+ 	STAT_EXPRESSION
+ } StatType;
  
  /* Return data from examine_variable and friends */
  typedef struct VariableStatData
  {
  	Node	   *var;			/* the Var or expression tree */
  	RelOptInfo *rel;			/* Relation, or NULL if not identifiable */
! 	StatType	stats_type;
! 	HeapTuple	statsTuple;		/* pg_statistic or pg_statistic3 tuple depending on stats_type
! 						 * or NULL if none */
! 	/* NB: if statsTuple!=NULL || stats3Tuple!=NULL, it must be freed when caller is done */
  	void		(*freefunc) (HeapTuple tuple);	/* how to free statsTuple */
  	Oid			vartype;		/* exposed type of expression */
  	Oid			atttype;		/* type to pass to get_attstatsslot */
diff -dcrpN postgresql.orig/src/include/utils/syscache.h postgresql/src/include/utils/syscache.h
*** postgresql.orig/src/include/utils/syscache.h	2011-02-10 10:36:32.352678334 +0100
--- postgresql/src/include/utils/syscache.h	2011-04-28 14:21:14.796172454 +0200
*************** enum SysCacheIdentifier
*** 73,78 ****
--- 73,79 ----
  	RELNAMENSP,
  	RELOID,
  	RULERELNAME,
+ 	STAT3RELEXPRINH,
  	STATRELATTINH,
  	TABLESPACEOID,
  	TSCONFIGMAP,
diff -dcrpN postgresql.orig/src/test/regress/expected/sanity_check.out postgresql/src/test/regress/expected/sanity_check.out
*** postgresql.orig/src/test/regress/expected/sanity_check.out	2011-02-10 10:36:32.374676822 +0100
--- postgresql/src/test/regress/expected/sanity_check.out	2011-04-28 14:21:14.797172386 +0200
*************** SELECT relname, relhasindex
*** 121,126 ****
--- 121,128 ----
   pg_shdepend             | t
   pg_shdescription        | t
   pg_statistic            | t
+  pg_statistic2           | t
+  pg_statistic3           | t
   pg_tablespace           | t
   pg_trigger              | t
   pg_ts_config            | t
*************** SELECT relname, relhasindex
*** 157,163 ****
   timetz_tbl              | f
   tinterval_tbl           | f
   varchar_tbl             | f
! (146 rows)
  
  --
  -- another sanity check: every system catalog that has OIDs should have
--- 159,165 ----
   timetz_tbl              | f
   tinterval_tbl           | f
   varchar_tbl             | f
! (148 rows)
  
  --
  -- another sanity check: every system catalog that has OIDs should have