diff -dcrpN postgresql.orig/src/backend/catalog/Makefile postgresql/src/backend/catalog/Makefile *** postgresql.orig/src/backend/catalog/Makefile 2011-02-22 18:51:42.675518441 +0100 --- postgresql/src/backend/catalog/Makefile 2011-04-28 14:21:14.694179328 +0200 *************** POSTGRES_BKI_SRCS = $(addprefix $(top_sr *** 31,38 **** pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \ pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \ pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \ ! pg_statistic.h pg_rewrite.h pg_trigger.h pg_description.h \ ! pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \ pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \ pg_authid.h pg_auth_members.h pg_shdepend.h pg_shdescription.h \ pg_ts_config.h pg_ts_config_map.h pg_ts_dict.h \ --- 31,38 ---- pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \ pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \ pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \ ! pg_statistic.h pg_statistic2.h pg_statistic3.h pg_rewrite.h pg_trigger.h \ ! pg_description.h pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \ pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \ pg_authid.h pg_auth_members.h pg_shdepend.h pg_shdescription.h \ pg_ts_config.h pg_ts_config_map.h pg_ts_dict.h \ diff -dcrpN postgresql.orig/src/backend/commands/indexcmds.c postgresql/src/backend/commands/indexcmds.c *** postgresql.orig/src/backend/commands/indexcmds.c 2011-04-26 09:54:04.012362009 +0200 --- postgresql/src/backend/commands/indexcmds.c 2011-04-28 14:21:14.697179127 +0200 *************** *** 26,32 **** --- 26,35 ---- #include "catalog/indexing.h" #include "catalog/pg_opclass.h" #include "catalog/pg_opfamily.h" + #include "catalog/pg_statistic2.h" + #include "catalog/pg_statistic3.h" #include "catalog/pg_tablespace.h" + #include "catalog/pg_type.h" #include "commands/dbcommands.h" #include "commands/defrem.h" #include "commands/tablecmds.h" *************** *** 36,41 **** --- 39,45 ---- #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/planner.h" + #include "optimizer/var.h" #include "parser/parse_coerce.h" #include "parser/parse_func.h" #include "parser/parse_oper.h" *************** ReindexDatabase(const char *databaseName *** 1693,1695 **** --- 1697,1961 ---- MemoryContextDelete(private_context); } + + /* + * DoCrossColStat + * Add or remove one row in pg_statistic2 + */ + static void + DoCrossColStat(ExtraStatStmt *stmt) + { + Oid relId; + Relation rel; + ListCell *l; + int len, i, j; + bool differ = false; + AttrNumber *attnums; + AttrNumber *sorted_attnums; + int16 typlen; + bool typbyval; + char typalign; + Datum *datum_attnums; + ArrayType *arr_attnums; + ScanKeyData scanKey[2]; + SysScanDesc scan; + HeapTuple tuple; + TupleDesc tupDesc; + Datum values[Natts_pg_statistic2]; + bool nulls[Natts_pg_statistic2]; + + relId = RangeVarGetRelid(stmt->relation, false); + + len = list_length(stmt->columns); + if (len < 2) + elog(ERROR, "cross column statistics need at least two columns"); + + attnums = (int2 *)palloc(len * sizeof(AttrNumber)); + sorted_attnums = (int2 *)palloc(len * sizeof(AttrNumber)); + datum_attnums = (Datum *)palloc(len * sizeof(Datum)); + + i = 0; + foreach(l, stmt->columns) + { + Node *node = (Node *) lfirst(l); + Var *var; + + if (!IsA(node, Var)) + elog(ERROR, "not a column reference"); + + var = (Var *) node; + + if (var->varattno == 0) + elog(ERROR, "row expansion via \"*\" is not supported here"); + + attnums[i++] = var->varattno; + } + + for (i = 0; i < len; i++) + sorted_attnums[i] = attnums[i]; + for (i = 0; i < len - 1; i++) + for (j = i+1; j < len; j++) + if (sorted_attnums[i] > sorted_attnums[j]) + { + AttrNumber tmp = sorted_attnums[i]; + + sorted_attnums[i] = sorted_attnums[j]; + sorted_attnums[j] = tmp; + } + + for (i = 0; i < len; i++) + { + if (!differ && attnums[i] != sorted_attnums[i]) + differ = true; + + if ((i < len - 1) && sorted_attnums[i] == sorted_attnums[i+1]) + elog(ERROR, "column list must contain every column exactly once"); + + datum_attnums[i] = Int16GetDatum(sorted_attnums[i]); + } + + if (differ) + elog(WARNING, "the column list was reordered in the order of table attributes"); + + get_typlenbyvalalign(INT2OID, &typlen, &typbyval, &typalign); + arr_attnums = construct_array(datum_attnums, len, + INT2OID, typlen, typbyval, typalign); + + rel = heap_open(Statistic2RelationId, RowExclusiveLock); + + /* + * There's no syscache for pg_statistic2, + * arrays aren't supported there as search keys. + * We need to do the hard way. + */ + ScanKeyInit(&scanKey[0], + Anum_pg_statistic2_sta2relid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relId)); + ScanKeyInit(&scanKey[1], + Anum_pg_statistic2_sta2attnums, + BTEqualStrategyNumber, F_ARRAY_EQ, + PointerGetDatum(arr_attnums)); + + scan = systable_beginscan(rel, Statistic2RelidAttnumsInhIndexId, true, + SnapshotNow, 2, scanKey); + + tuple = systable_getnext(scan); + + if (stmt->create) + { + if (HeapTupleIsValid(tuple)) + { + systable_endscan(scan); + elog(ERROR, "pg_statistic2 entry already exists for this table and set of columns"); + } + systable_endscan(scan); + + for (i = 0; i < Natts_pg_statistic2; i++) + nulls[i] = TRUE; + + values[Anum_pg_statistic2_sta2relid - 1] = ObjectIdGetDatum(relId); + nulls[Anum_pg_statistic2_sta2relid - 1] = FALSE; + + values[Anum_pg_statistic2_sta2attnums - 1] = PointerGetDatum(arr_attnums); + nulls[Anum_pg_statistic2_sta2attnums - 1] = FALSE; + + values[Anum_pg_statistic2_sta2inherit - 1] = BoolGetDatum(FALSE); + nulls[Anum_pg_statistic2_sta2inherit - 1] = FALSE; + + tupDesc = RelationGetDescr(rel); + + tuple = heap_form_tuple(tupDesc, values, nulls); + + simple_heap_insert(rel, tuple); + + CatalogUpdateIndexes(rel, tuple); + } + else + { + if (!HeapTupleIsValid(tuple)) + { + systable_endscan(scan); + elog(ERROR, "pg_statistic2 entry doesn't exist for this table and set of columns"); + } + + simple_heap_delete(rel, &tuple->t_self); + + systable_endscan(scan); + } + + relation_close(rel, NoLock); + } + + /* + * DoExprColStat + * Add or remove one row in pg_statistic3 + */ + static void + DoExprStat(ExtraStatStmt *stmt) + { + Oid relId; + Relation rel; + int i; + char *exprbin; + Datum exprbindatum; + ScanKeyData scanKey[2]; + SysScanDesc scan; + HeapTuple tuple; + TupleDesc tupDesc; + Datum values[Natts_pg_statistic3]; + bool nulls[Natts_pg_statistic3]; + + if (IsA(stmt->expr, Var) || IsA(stmt->expr, ColumnRef)) + elog(ERROR, "single column are covered by basic statistics"); + + relId = RangeVarGetRelid(stmt->relation, false); + + rel = heap_open(Statistic3RelationId, RowExclusiveLock); + + exprbin = nodeToString(stmt->expr); + exprbindatum = CStringGetTextDatum(exprbin); + + /* + * There's no syscache for pg_statistic3, + * arrays aren't supported there as search keys. + * We need to do the hard way. + */ + ScanKeyInit(&scanKey[0], + Anum_pg_statistic3_sta3relid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relId)); + ScanKeyInit(&scanKey[1], + Anum_pg_statistic3_sta3expr, + BTEqualStrategyNumber, F_TEXTEQ, + exprbindatum); + + scan = systable_beginscan(rel, Statistic3RelidExprInhIndexId, true, + SnapshotNow, 2, scanKey); + + tuple = systable_getnext(scan); + + if (stmt->create) + { + if (HeapTupleIsValid(tuple)) + { + systable_endscan(scan); + elog(ERROR, "pg_statistic3 entry already exists for this table and expression"); + } + systable_endscan(scan); + + if (!contain_var_clause(stmt->expr)) + elog(ERROR, "constant expressions are not interesting"); + + for (i = 0; i < Natts_pg_statistic3; i++) + nulls[i] = TRUE; + + values[Anum_pg_statistic3_sta3relid - 1] = ObjectIdGetDatum(relId); + nulls[Anum_pg_statistic3_sta3relid - 1] = FALSE; + + values[Anum_pg_statistic3_sta3expr - 1] = exprbindatum; + nulls[Anum_pg_statistic3_sta3expr - 1] = FALSE; + + values[Anum_pg_statistic3_sta3inherit - 1] = BoolGetDatum(FALSE); + nulls[Anum_pg_statistic3_sta3inherit - 1] = FALSE; + + tupDesc = RelationGetDescr(rel); + + tuple = heap_form_tuple(tupDesc, values, nulls); + + simple_heap_insert(rel, tuple); + + CatalogUpdateIndexes(rel, tuple); + } + else + { + if (!HeapTupleIsValid(tuple)) + { + systable_endscan(scan); + elog(ERROR, "pg_statistic3 entry doesn't exist for this table and expression"); + } + + simple_heap_delete(rel, &tuple->t_self); + + systable_endscan(scan); + } + + pfree(exprbin); + pfree(DatumGetPointer(exprbindatum)); + + relation_close(rel, NoLock); + } + + /* + * ExtraColStat + * Add or remove one entry in pg_statistics2 or pg_statistic3 + */ + void ExtraStatistics(ExtraStatStmt *stmt) + { + if (list_length(stmt->columns) > 0) + DoCrossColStat(stmt); + else if (stmt->expr != NULL) + DoExprStat(stmt); + else + elog(ERROR, "internal error in ExtraStatistics"); + } diff -dcrpN postgresql.orig/src/backend/executor/nodeHash.c postgresql/src/backend/executor/nodeHash.c *** postgresql.orig/src/backend/executor/nodeHash.c 2011-04-11 15:36:27.096816773 +0200 --- postgresql/src/backend/executor/nodeHash.c 2011-04-28 14:21:14.700178924 +0200 *************** ExecHashBuildSkewHash(HashJoinTable hash *** 1144,1150 **** if (!HeapTupleIsValid(statsTuple)) return; ! if (get_attstatsslot(statsTuple, node->skewColType, node->skewColTypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, &values, &nvalues, --- 1144,1150 ---- if (!HeapTupleIsValid(statsTuple)) return; ! if (get_attstatsslot(statsTuple, STAT_VARIABLE, node->skewColType, node->skewColTypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, &values, &nvalues, diff -dcrpN postgresql.orig/src/backend/nodes/copyfuncs.c postgresql/src/backend/nodes/copyfuncs.c *** postgresql.orig/src/backend/nodes/copyfuncs.c 2011-04-19 09:37:54.828715621 +0200 --- postgresql/src/backend/nodes/copyfuncs.c 2011-04-28 14:21:14.704178653 +0200 *************** _copyCreateForeignTableStmt(CreateForeig *** 3458,3463 **** --- 3458,3476 ---- return newnode; } + static ExtraStatStmt * + _copyExtraStatStmt(ExtraStatStmt *from) + { + ExtraStatStmt *newnode = makeNode(ExtraStatStmt); + + COPY_SCALAR_FIELD(create); + newnode->relation = _copyRangeVar(from->relation); + COPY_NODE_FIELD(columns); + COPY_NODE_FIELD(expr); + + return newnode; + } + static CreateTrigStmt * _copyCreateTrigStmt(CreateTrigStmt *from) { *************** copyObject(void *from) *** 4377,4382 **** --- 4390,4398 ---- case T_CreateForeignTableStmt: retval = _copyCreateForeignTableStmt(from); break; + case T_ExtraStatStmt: + retval = _copyExtraStatStmt(from); + break; case T_CreateTrigStmt: retval = _copyCreateTrigStmt(from); break; diff -dcrpN postgresql.orig/src/backend/nodes/equalfuncs.c postgresql/src/backend/nodes/equalfuncs.c *** postgresql.orig/src/backend/nodes/equalfuncs.c 2011-04-19 09:37:54.829715550 +0200 --- postgresql/src/backend/nodes/equalfuncs.c 2011-04-28 14:21:14.707178453 +0200 *************** _equalCreateForeignTableStmt(CreateForei *** 1795,1800 **** --- 1795,1812 ---- } static bool + _equalExtraStatStmt(ExtraStatStmt *a, ExtraStatStmt *b) + { + COMPARE_SCALAR_FIELD(create); + if (!_equalRangeVar(a->relation, b->relation)) + return FALSE; + COMPARE_NODE_FIELD(columns); + COMPARE_NODE_FIELD(expr); + + return true; + } + + static bool _equalCreateTrigStmt(CreateTrigStmt *a, CreateTrigStmt *b) { COMPARE_STRING_FIELD(trigname); *************** equal(void *a, void *b) *** 2930,2935 **** --- 2942,2950 ---- case T_CreateForeignTableStmt: retval = _equalCreateForeignTableStmt(a, b); break; + case T_ExtraStatStmt: + retval = _equalExtraStatStmt(a, b); + break; case T_CreateTrigStmt: retval = _equalCreateTrigStmt(a, b); break; diff -dcrpN postgresql.orig/src/backend/optimizer/path/clausesel.c postgresql/src/backend/optimizer/path/clausesel.c *** postgresql.orig/src/backend/optimizer/path/clausesel.c 2011-01-04 15:13:15.940560845 +0100 --- postgresql/src/backend/optimizer/path/clausesel.c 2011-04-28 14:21:14.720177575 +0200 *************** *** 13,29 **** --- 13,40 ---- *------------------------------------------------------------------------- */ #include "postgres.h" + #include "postgres_ext.h" + #include "access/skey.h" + #include "access/relscan.h" + #include "catalog/indexing.h" #include "catalog/pg_operator.h" + #include "catalog/pg_statistic2.h" + #include "catalog/pg_statistic3.h" + #include "catalog/pg_type.h" #include "nodes/makefuncs.h" + #include "nodes/pg_list.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/plancat.h" + #include "optimizer/var.h" #include "parser/parsetree.h" + #include "utils/array.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/selfuncs.h" + #include "utils/tqual.h" /* *************** typedef struct RangeQueryClause *** 34,39 **** --- 45,51 ---- { struct RangeQueryClause *next; /* next in linked list */ Node *var; /* The common variable of the clauses */ + AttrNumber varattno; /* for finding cross-column statistics */ bool have_lobound; /* found a low-bound clause yet? */ bool have_hibound; /* found a high-bound clause yet? */ Selectivity lobound; /* Selectivity of a var > something clause */ *************** typedef struct RangeQueryClause *** 43,48 **** --- 55,75 ---- static void addRangeClause(RangeQueryClause **rqlist, Node *clause, bool varonleft, bool isLTsel, Selectivity s2); + typedef struct CrossColumnClause + { + struct CrossColumnClause *next; + Node *var; + AttrNumber varattno; + Node *expr; + Selectivity sel; + } CrossColumnClause; + + static void addXCClause(CrossColumnClause **xclist, Node *clause, + bool varonleft, Selectivity s2); + + static bool crosscolumn_selectivity(Oid relId, + CrossColumnClause **xcnext, RangeQueryClause **rqlist, + Selectivity *result_sel); /**************************************************************************** * ROUTINES TO COMPUTE SELECTIVITIES *************** clauselist_selectivity(PlannerInfo *root *** 99,106 **** --- 126,138 ---- { Selectivity s1 = 1.0; RangeQueryClause *rqlist = NULL; + CrossColumnClause *xclist = NULL; + Oid relId = InvalidOid; + bool onerel = false; ListCell *l; + // elog(NOTICE, "clauselist_selectivity varRelid %d, list length %d", varRelid, list_length(clauses)); + /* * If there's exactly one clause, then no use in trying to match up pairs, * so just go directly to clause_selectivity(). *************** clauselist_selectivity(PlannerInfo *root *** 162,167 **** --- 194,215 ---- (varonleft = false, is_pseudo_constant_clause_relids(linitial(expr->args), rinfo->left_relids))); + if (ok) + { + int relid; + Oid tmprelId; + + relid = bms_singleton_member(rinfo->clause_relids); + tmprelId = root->simple_rte_array[relid]->relid; + + if (!OidIsValid(relId)) + { + onerel = true; + relId = tmprelId; + } + else if (relId != tmprelId) + onerel = false; + } } else { *************** clauselist_selectivity(PlannerInfo *root *** 169,174 **** --- 217,241 ---- (is_pseudo_constant_clause(lsecond(expr->args)) || (varonleft = false, is_pseudo_constant_clause(linitial(expr->args)))); + if (ok) + { + Relids relids; + int relid; + Oid tmprelId; + + relids = pull_varnos(clause); + relid = bms_singleton_member(relids); + tmprelId = root->simple_rte_array[relid]->relid; + bms_free(relids); + + if (!OidIsValid(relId)) + { + onerel = true; + relId = tmprelId; + } + else if (relId != tmprelId) + onerel = false; + } } if (ok) *************** clauselist_selectivity(PlannerInfo *root *** 188,193 **** --- 255,264 ---- addRangeClause(&rqlist, clause, varonleft, false, s2); break; + case F_EQSEL: + addXCClause(&xclist, clause, + varonleft, s2); + break; default: /* Just merge the selectivity in generically */ s1 = s1 * s2; *************** clauselist_selectivity(PlannerInfo *root *** 202,207 **** --- 273,299 ---- } /* + * Scan xclist and rqlist recursively and filter out + * all possible cross-column selectivities. + */ + if (onerel) + crosscolumn_selectivity(relId, &xclist, &rqlist, &s1); + + /* + * Free the cross-column clauses + */ + while (xclist != NULL) + { + CrossColumnClause *xcnext; + + s1 = s1 * xclist->sel; + + xcnext = xclist->next; + pfree(xclist); + xclist = xcnext; + } + + /* * Now scan the rangequery pair list. */ while (rqlist != NULL) *************** clauselist_selectivity(PlannerInfo *root *** 279,284 **** --- 371,392 ---- return s1; } + static AttrNumber + var_get_attno(Node *clause) + { + Var *var; + + if (IsA(clause, Var)) + { + var = (Var *)clause; + // elog(NOTICE, "var_get_attno varattno %d", var->varattno); + return var->varattno; + } + + // elog(NOTICE, "var_get_attno default 0"); + return 0; + } + /* * addRangeClause --- add a new range clause for clauselist_selectivity * *************** addRangeClause(RangeQueryClause **rqlist *** 358,363 **** --- 466,473 ---- /* No matching var found, so make a new clause-pair data structure */ rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause)); rqelem->var = var; + rqelem->varattno = var_get_attno(var); + if (is_lobound) { rqelem->have_lobound = true; *************** addRangeClause(RangeQueryClause **rqlist *** 375,380 **** --- 485,522 ---- } /* + * addXCClause - add a new clause to the list of clauses for cross-column stats inspection + * + */ + static void + addXCClause(CrossColumnClause **xclist, Node *clause, + bool varonleft, Selectivity s) + { + CrossColumnClause *xcelem; + Node *var; + Node *expr; + + if (varonleft) + { + var = get_leftop((Expr *) clause); + expr = get_rightop((Expr *) clause); + } + else + { + var = get_rightop((Expr *) clause); + expr = get_leftop((Expr *) clause); + } + + xcelem = (CrossColumnClause *) palloc(sizeof(CrossColumnClause)); + xcelem->var = var; + xcelem->varattno = var_get_attno(var); + xcelem->expr = expr; + xcelem->sel = s; + xcelem->next = *xclist; + *xclist = xcelem; + } + + /* * bms_is_subset_singleton * * Same result as bms_is_subset(s, bms_make_singleton(x)), *************** clause_selectivity(PlannerInfo *root, *** 499,504 **** --- 641,648 ---- { rinfo = (RestrictInfo *) clause; + // elog(NOTICE, "RestrictInfo, %s", nodeToString(rinfo->clause)); + /* * If the clause is marked pseudoconstant, then it will be used as a * gating qual and should not affect selectivity estimates; hence *************** clause_selectivity(PlannerInfo *root, *** 779,781 **** --- 923,1211 ---- return s1; } + + static bool + has_xcol_selectivity(Oid relId, int natts, AttrNumber *attnums, Selectivity *result_sel) + { + Relation rel; + Datum *datums = (Datum *)palloc(natts * sizeof(Datum)); + ArrayType *arr_attnums; + int i; + int16 typlen; + bool typbyval; + char typalign; + ScanKeyData scanKey[2]; + SysScanDesc scan; + HeapTuple tuple; + bool result; + Selectivity sel = 1e-5; /* fixed selectivity for now */ + + for (i = 0; i < natts; i++) + datums[i] = Int16GetDatum(attnums[i]); + + get_typlenbyvalalign(INT2OID, &typlen, &typbyval, &typalign); + arr_attnums = construct_array(datums, natts, + INT2OID, typlen, typbyval, typalign); + + rel = heap_open(Statistic2RelationId, AccessShareLock); + + ScanKeyInit(&scanKey[0], + Anum_pg_statistic2_sta2relid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relId)); + ScanKeyInit(&scanKey[1], + Anum_pg_statistic2_sta2attnums, + BTEqualStrategyNumber, F_ARRAY_EQ, + PointerGetDatum(arr_attnums)); + + scan = systable_beginscan(rel, Statistic2RelidAttnumsInhIndexId, true, + SnapshotNow, 2, scanKey); + + tuple = systable_getnext(scan); + + result = HeapTupleIsValid(tuple); + + systable_endscan(scan); + + heap_close(rel, NoLock); + + pfree(datums); + + if (result) + *result_sel = sel; + + return result; + } + + typedef struct { + CrossColumnClause *xc; + RangeQueryClause *rq; + } reclist; + + typedef struct { + int len; + reclist *rclist; + AttrNumber *attnums; + } reclist2; + + + /* add rclist to our list, so the ordered attnums arrays are unique */ + static void + add_reclist(int len, reclist *rclist, List **results) + { + ListCell *lc; + int i, j; + reclist2 *rclist2; + AttrNumber *attnums = (AttrNumber *) palloc(len * sizeof(AttrNumber)); + + /* collect the ordered varattnos from the Vars */ + for (i = 0; i < len; i++) + { + if (rclist[i].xc) + attnums[i] = rclist[i].xc->varattno; + else + attnums[i] = rclist[i].rq->varattno; + } + for (i = 0; i < len - 1; i++) + for (j = i + 1; j < len; j++) + { + AttrNumber tmp = attnums[i]; + attnums[i] = attnums[j]; + attnums[j] = tmp; + } + + /* match this ordered attnum list against the current list of attnum arrays */ + foreach(lc, *results) + { + reclist2 *rc2 = (reclist2 *) lfirst(lc); + + if (len != rc2->len) + continue; + + for (i = 0; i < len; i++) + if (attnums[i] != rc2->attnums[i]) + break; + if (i < len) + continue; + + /* found */ + return; + } + + /* not found, add it to the list */ + rclist2 = (reclist2 *) palloc(sizeof(reclist2)); + rclist2->len = len; + rclist2->rclist = (reclist *) palloc(len * sizeof(reclist)); + for (i = 0; i < len; i++) + { + rclist2->rclist[i].xc = rclist[i].xc; + rclist2->rclist[i].rq = rclist[i].rq; + } + rclist2->attnums = attnums; + + *results = lappend(*results, rclist2); + } + + static int + compare_reclist2(reclist2 *a, reclist2 *b) + { + int i; + + if (a->len < b->len) + return -1; + else if (a->len > b->len) + return 1; + + for (i = 0; i < a->len; i++) + { + if (a->attnums[i] < b->attnums[i]) + return -1; + else if (a->attnums[i] > b->attnums[i]) + return 1; + } + + return 0; + } + + static bool + add_reclist2(int *len, reclist2 **p_reclist2, reclist2 *rclist2) + { + int curr_len = *len; + int i, j; + + if (curr_len == 0) + { + p_reclist2[i] = rclist2; + curr_len++; + *len = curr_len; + return true; + } + + for (i = 0; i < curr_len; i++) + { + if (compare_reclist2(rclist2, p_reclist2[i]) > 0) + { + for (j = curr_len; j > i; j--) + p_reclist2[j] = p_reclist2[j - 1]; + p_reclist2[i] = rclist2; + curr_len++; + *len = curr_len; + return true; + } + } + + return false; + } + + static void + collect_xcol_lists(int curr_depth, CrossColumnClause *xclist, RangeQueryClause *rqlist, reclist *rclist, List **results) + { + CrossColumnClause *xc_tmp; + RangeQueryClause *rq_tmp; + + for (xc_tmp = xclist; xc_tmp; xc_tmp = xc_tmp->next) + { + if (xc_tmp->varattno == 0) + continue; + + rclist[curr_depth].xc = xc_tmp; + collect_xcol_lists(curr_depth + 1, xc_tmp->next, rqlist, rclist, results); + add_reclist(curr_depth + 1, rclist, results); + rclist[curr_depth].xc = NULL; + } + + for (rq_tmp = rqlist; rq_tmp; rq_tmp = rq_tmp->next) + { + if (rq_tmp->varattno == 0) + continue; + + rclist[curr_depth].rq = rq_tmp; + collect_xcol_lists(curr_depth + 1, (xclist ? xclist->next : xclist), rq_tmp->next, rclist, results); + add_reclist(curr_depth + 1, rclist, results); + rclist[curr_depth].rq = NULL; + } + } + + static bool + crosscolumn_selectivity(Oid relId, CrossColumnClause **xclist, RangeQueryClause **rqlist, Selectivity *result_sel) + { + CrossColumnClause *xc; + RangeQueryClause *rq; + List *resultlist = NIL; + ListCell *lc; + reclist *rclist; + reclist2 **p_rclist2; + int max_len, i; + Selectivity sel = 1.0; + bool found_xc_sel = false; + + max_len = 0; + for (rq = *rqlist; rq; max_len++, rq = rq->next) + ; + for (xc = *xclist; xc; max_len++, xc = xc->next) + ; + + // elog(NOTICE, "crosscolumn_selectivity max length of array %d", max_len); + + rclist = (reclist *) palloc(max_len * sizeof(reclist)); + for (i = 0; i < max_len; i++) + { + rclist[i].xc = NULL; + rclist[i].rq = NULL; + } + + collect_xcol_lists(0, *xclist, *rqlist, rclist, &resultlist); + + pfree(rclist); + + max_len = list_length(resultlist); + // elog(NOTICE, "crosscolumn_selectivity list length of arrays %d", max_len); + p_rclist2 = (reclist2 **) palloc(max_len * sizeof(reclist2 *)); + + max_len = 0; + foreach (lc, resultlist) + { + reclist2 *rclist2 = (reclist2 *) lfirst(lc); + + if (!add_reclist2(&max_len, p_rclist2, rclist2)) + { + pfree(rclist2->rclist); + pfree(rclist2->attnums); + pfree(rclist2); + } + } + // elog(NOTICE, "crosscolumn_selectivity length of ordered/unique array of previous list %d", max_len); + + list_free(resultlist); + + for (i = 0; i < max_len; i++) + { + if (p_rclist2[i] == NULL) + continue; + + if (has_xcol_selectivity(relId, p_rclist2[i]->len, p_rclist2[i]->attnums, &sel)) + { + int j; + + /* remove the xclist and rqlist members found in p_rclist2[i] */ + for (j = 0; j < p_rclist2[i]->len; j++) + { + /* TODO ... */ + } + + /* also, remove later elements in p_rclist2 that has any of the removed elements */ + /* TODO ... */ + + // elog(NOTICE, "crosscolumn_selectivity found xc selectivity %lf", sel); + found_xc_sel = true; + *result_sel *= sel; + } + + pfree(p_rclist2[i]->rclist); + pfree(p_rclist2[i]->attnums); + pfree(p_rclist2[i]); + } + pfree(p_rclist2); + + return found_xc_sel; + } diff -dcrpN postgresql.orig/src/backend/parser/gram.y postgresql/src/backend/parser/gram.y *** postgresql.orig/src/backend/parser/gram.y 2011-04-26 09:54:04.055359065 +0200 --- postgresql/src/backend/parser/gram.y 2011-04-28 14:21:14.739176296 +0200 *************** static void SplitColQualList(List *qualL *** 199,209 **** CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt CreateAssertStmt CreateTrigStmt ! CreateUserStmt CreateUserMappingStmt CreateRoleStmt ! CreatedbStmt DeclareCursorStmt DefineStmt DeleteStmt DiscardStmt DoStmt DropGroupStmt DropOpClassStmt DropOpFamilyStmt DropPLangStmt DropStmt ! DropAssertStmt DropTrigStmt DropRuleStmt DropCastStmt DropRoleStmt ! DropUserStmt DropdbStmt DropTableSpaceStmt DropFdwStmt DropForeignServerStmt DropUserMappingStmt ExplainStmt FetchStmt GrantStmt GrantRoleStmt IndexStmt InsertStmt ListenStmt LoadStmt LockStmt NotifyStmt ExplainableStmt PreparableStmt --- 199,210 ---- CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt CreateAssertStmt CreateTrigStmt ! CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatedbStmt ! CreateCCStmt CreateESStmt ! DeclareCursorStmt DefineStmt DeleteStmt DiscardStmt DoStmt DropGroupStmt DropOpClassStmt DropOpFamilyStmt DropPLangStmt DropStmt ! DropAssertStmt DropTrigStmt DropRuleStmt DropCastStmt DropCCStmt DropESStmt ! DropRoleStmt DropUserStmt DropdbStmt DropTableSpaceStmt DropFdwStmt DropForeignServerStmt DropUserMappingStmt ExplainStmt FetchStmt GrantStmt GrantRoleStmt IndexStmt InsertStmt ListenStmt LoadStmt LockStmt NotifyStmt ExplainableStmt PreparableStmt *************** static void SplitColQualList(List *qualL *** 315,320 **** --- 316,323 ---- %type opt_fdw_options fdw_options %type fdw_option + %type cc_column_list + %type OptTempTableName %type into_clause create_as_target *************** static void SplitColQualList(List *qualL *** 499,505 **** DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EXCEPT ! EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN EXTENSION EXTERNAL EXTRACT FALSE_P FAMILY FETCH FIRST_P FLOAT_P FOLLOWING FOR FORCE FOREIGN FORWARD --- 502,508 ---- DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EXCEPT ! EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN EXPRESSION EXTENSION EXTERNAL EXTRACT FALSE_P FAMILY FETCH FIRST_P FLOAT_P FOLLOWING FOR FORCE FOREIGN FORWARD *************** stmt : *** 700,707 **** --- 703,712 ---- | CreateAsStmt | CreateAssertStmt | CreateCastStmt + | CreateCCStmt | CreateConversionStmt | CreateDomainStmt + | CreateESStmt | CreateExtensionStmt | CreateFdwStmt | CreateForeignServerStmt *************** stmt : *** 729,734 **** --- 734,741 ---- | DoStmt | DropAssertStmt | DropCastStmt + | DropCCStmt + | DropESStmt | DropFdwStmt | DropForeignServerStmt | DropGroupStmt *************** schema_stmt: *** 1190,1195 **** --- 1197,1267 ---- /***************************************************************************** * + * Create cross column / expression statistics + * + *****************************************************************************/ + + CreateCCStmt: + CREATE CROSS COLUMN STATISTICS ON qualified_name '(' cc_column_list ')' + { + ExtraStatStmt *n = makeNode(ExtraStatStmt); + n->create = TRUE; + n->relation = $6; + n->columns = $8; + n->expr = NULL; + $$ = (Node *)n; + } + ; + + DropCCStmt: + DROP CROSS COLUMN STATISTICS ON qualified_name '(' cc_column_list ')' + { + ExtraStatStmt *n = makeNode(ExtraStatStmt); + n->create = FALSE; + n->relation = $6; + n->columns = $8; + n->expr = NULL; + $$ = (Node *)n; + } + ; + + cc_column_list: + columnref + { + $$ = list_make1($1); + } + | cc_column_list ',' columnref + { + $$ = lappend($1, $3); + } + ; + + CreateESStmt: + CREATE EXPRESSION STATISTICS ON qualified_name '(' a_expr ')' + { + ExtraStatStmt *n = makeNode(ExtraStatStmt); + n->create = TRUE; + n->relation = $5; + n->columns = NIL; + n->expr = $7; + $$ = (Node *)n; + } + ; + + DropESStmt: + DROP EXPRESSION STATISTICS ON qualified_name '(' a_expr ')' + { + ExtraStatStmt *n = makeNode(ExtraStatStmt); + n->create = FALSE; + n->relation = $5; + n->columns = NIL; + n->expr = $7; + $$ = (Node *)n; + } + ; + + /***************************************************************************** + * * Set PG internal variable * SET name TO 'var_value' * Include SQL92 syntax (thomas 1997-10-22): *************** unreserved_keyword: *** 11898,11903 **** --- 11970,11976 ---- | EXCLUSIVE | EXECUTE | EXPLAIN + | EXPRESSION | EXTENSION | EXTERNAL | FAMILY diff -dcrpN postgresql.orig/src/backend/parser/parse_utilcmd.c postgresql/src/backend/parser/parse_utilcmd.c *** postgresql.orig/src/backend/parser/parse_utilcmd.c 2011-04-26 09:54:04.062358585 +0200 --- postgresql/src/backend/parser/parse_utilcmd.c 2011-04-28 14:21:14.745175892 +0200 *************** setSchemaName(char *context_schema, char *** 2700,2702 **** --- 2700,2878 ---- "different from the one being created (%s)", *stmt_schema_name, context_schema))); } + + /* + * + */ + bool + set_location_unknown_walker(Node *node, void *dummy) + { + if (node == NULL) + return false; + + switch (node->type) + { + case T_TypeName: + { + TypeName *n = (TypeName *) node; + n->location = -1; + } + break; + case T_ColumnRef: + { + ColumnRef *n = (ColumnRef *)node; + n->location = -1; + } + break; + case T_ParamRef: + { + ParamRef *n = (ParamRef *) node; + n->location = -1; + } + break; + case T_A_Expr: + { + A_Expr *n = (A_Expr *) node; + n->location = -1; + } + break; + case T_A_Const: + { + A_Const *n = (A_Const *) node; + n->location = -1; + } + break; + case T_TypeCast: + { + TypeCast *n = (TypeCast *) node; + n->location = -1; + } + break; + case T_FuncCall: + { + FuncCall *n = (FuncCall *) node; + n->location = -1; + } + break; + case T_A_ArrayExpr: + { + A_ArrayExpr *n = (A_ArrayExpr *) node; + n->location = -1; + } + break; + case T_Var: + { + Var *n = (Var *) node; + n->location = -1; + } + break; + case T_Const: + { + Const *n = (Const *) node; + n->location = -1; + } + break; + case T_FuncExpr: + { + FuncExpr *n = (FuncExpr *) node; + n->location = -1; + } + break; + case T_OpExpr: + { + OpExpr *n = (OpExpr *) node; + n->location = -1; + } + break; + case T_DistinctExpr: + { + DistinctExpr *n = (DistinctExpr *) node; + n->location = -1; + } + break; + case T_ScalarArrayOpExpr: + { + ScalarArrayOpExpr *n = (ScalarArrayOpExpr *) node; + n->location = -1; + } + break; + case T_BoolExpr: + { + BoolExpr *n = (BoolExpr *) node; + n->location = -1; + } + break; + case T_CaseExpr: + { + CaseExpr *n = (CaseExpr *) node; + n->location = -1; + } + break; + case T_CaseWhen: + { + CaseWhen *n = (CaseWhen *) node; + n->location = -1; + } + break; + case T_ArrayExpr: + { + ArrayExpr *n = (ArrayExpr *) node; + n->location = -1; + } + break; + case T_CoalesceExpr: + { + CoalesceExpr *n = (CoalesceExpr *) node; + n->location = -1; + } + break; + case T_CoerceToDomain: + { + CoerceToDomain *n = (CoerceToDomain *) node; + n->location = -1; + } + break; + default: + break; + } + + return expression_tree_walker(node, set_location_unknown_walker, NULL); + } + + /* + * transformExtraStatistics + * Transform the column list or the expression into a form + * usable by the executor. + */ + ExtraStatStmt * + transformExtraStatistics(ExtraStatStmt *stmt, const char *queryString) + { + ParseState *pstate; + RangeTblEntry *rte; + ExtraStatStmt *newstmt; + List *columns = NIL; + ListCell *cell; + + pstate = make_parsestate(NULL); + pstate->p_sourcetext = queryString; + + rte = addRangeTableEntry(pstate, stmt->relation, NULL, false, true); + addRTEtoQuery(pstate, rte, true, true, true); + + newstmt = makeNode(ExtraStatStmt); + newstmt->create = stmt->create; + newstmt->relation = copyObject(stmt->relation); + + foreach(cell, stmt->columns) + { + Node *col = lfirst(cell); + + columns = lappend(columns, transformExpr(pstate, col)); + } + + newstmt->columns = columns; + newstmt->expr = transformExpr(pstate, stmt->expr); + query_or_expression_tree_walker(newstmt->expr, set_location_unknown_walker, NULL, 0); + + return newstmt; + } diff -dcrpN postgresql.orig/src/backend/tcop/utility.c postgresql/src/backend/tcop/utility.c *** postgresql.orig/src/backend/tcop/utility.c 2011-04-26 09:54:04.075357697 +0200 --- postgresql/src/backend/tcop/utility.c 2011-04-28 14:21:14.748175689 +0200 *************** check_xact_readonly(Node *parsetree) *** 229,234 **** --- 229,235 ---- case T_AlterTableSpaceOptionsStmt: case T_CreateForeignTableStmt: case T_SecLabelStmt: + case T_ExtraStatStmt: PreventCommandIfReadOnly(CreateCommandTag(parsetree)); break; default: *************** standard_ProcessUtility(Node *parsetree, *** 573,578 **** --- 574,587 ---- } break; + case T_ExtraStatStmt: + { + ExtraStatStmt *newstmt = transformExtraStatistics((ExtraStatStmt *)parsetree, queryString); + + ExtraStatistics(newstmt); + } + break; + case T_CreateTableSpaceStmt: PreventTransactionChain(isTopLevel, "CREATE TABLESPACE"); CreateTableSpace((CreateTableSpaceStmt *) parsetree); *************** CreateCommandTag(Node *parsetree) *** 1734,1739 **** --- 1743,1771 ---- tag = "CREATE FOREIGN TABLE"; break; + case T_ExtraStatStmt: + { + ExtraStatStmt *stmt = (ExtraStatStmt *)parsetree; + + if (list_length(stmt->columns) > 0) + { + if (stmt->create) + tag = "CREATE CROSS COLUMN STATISTICS"; + else + tag = "DROP CROSS COLUMN STATISTICS"; + } + else if (stmt->expr != NULL) + { + if (stmt->create) + tag = "CREATE EXPRESSION STATISTICS"; + else + tag = "DROP EXPRESSION STATISTICS"; + } + else + tag = "???"; + } + break; + case T_DropStmt: switch (((DropStmt *) parsetree)->removeType) { diff -dcrpN postgresql.orig/src/backend/tsearch/ts_selfuncs.c postgresql/src/backend/tsearch/ts_selfuncs.c *** postgresql.orig/src/backend/tsearch/ts_selfuncs.c 2011-04-11 15:36:27.150812982 +0200 --- postgresql/src/backend/tsearch/ts_selfuncs.c 2011-04-28 14:21:14.749175621 +0200 *************** tsquerysel(VariableStatData *vardata, Da *** 169,175 **** stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); /* MCELEM will be an array of TEXT elements for a tsvector column */ ! if (get_attstatsslot(vardata->statsTuple, TEXTOID, -1, STATISTIC_KIND_MCELEM, InvalidOid, NULL, --- 169,175 ---- stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); /* MCELEM will be an array of TEXT elements for a tsvector column */ ! if (get_attstatsslot(vardata->statsTuple, STAT_VARIABLE, TEXTOID, -1, STATISTIC_KIND_MCELEM, InvalidOid, NULL, diff -dcrpN postgresql.orig/src/backend/utils/adt/selfuncs.c postgresql/src/backend/utils/adt/selfuncs.c *** postgresql.orig/src/backend/utils/adt/selfuncs.c 2011-04-26 09:54:04.094356395 +0200 --- postgresql/src/backend/utils/adt/selfuncs.c 2011-04-28 15:53:46.195302017 +0200 *************** *** 94,102 **** --- 94,104 ---- #include "access/gin.h" #include "access/sysattr.h" #include "catalog/index.h" + #include "catalog/indexing.h" #include "catalog/pg_collation.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_statistic.h" + #include "catalog/pg_statistic3.h" #include "catalog/pg_type.h" #include "executor/executor.h" #include "mb/pg_wchar.h" *************** *** 111,116 **** --- 113,119 ---- #include "optimizer/restrictinfo.h" #include "optimizer/var.h" #include "parser/parse_coerce.h" + #include "parser/parse_utilcmd.h" #include "parser/parsetree.h" #include "utils/builtins.h" #include "utils/bytea.h" *************** var_eq_const(VariableStatData *vardata, *** 275,281 **** * don't like this, maybe you shouldn't be using eqsel for your * operator...) */ ! if (get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, --- 278,284 ---- * don't like this, maybe you shouldn't be using eqsel for your * operator...) */ ! if (get_attstatsslot(vardata->statsTuple, vardata->stats_type, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, *************** var_eq_non_const(VariableStatData *varda *** 417,423 **** * Cross-check: selectivity should never be estimated as more than the * most common value's. */ ! if (get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, --- 420,426 ---- * Cross-check: selectivity should never be estimated as more than the * most common value's. */ ! if (get_attstatsslot(vardata->statsTuple, vardata->stats_type, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, *************** mcv_selectivity(VariableStatData *vardat *** 588,594 **** sumcommon = 0.0; if (HeapTupleIsValid(vardata->statsTuple) && ! get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, --- 591,597 ---- sumcommon = 0.0; if (HeapTupleIsValid(vardata->statsTuple) && ! get_attstatsslot(vardata->statsTuple, vardata->stats_type, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, *************** histogram_selectivity(VariableStatData * *** 664,670 **** Assert(min_hist_size > 2 * n_skip); if (HeapTupleIsValid(vardata->statsTuple) && ! get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, --- 667,673 ---- Assert(min_hist_size > 2 * n_skip); if (HeapTupleIsValid(vardata->statsTuple) && ! get_attstatsslot(vardata->statsTuple, vardata->stats_type, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, *************** ineq_histogram_selectivity(PlannerInfo * *** 741,747 **** * the reverse way if isgt is TRUE. */ if (HeapTupleIsValid(vardata->statsTuple) && ! get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, &hist_op, --- 744,750 ---- * the reverse way if isgt is TRUE. */ if (HeapTupleIsValid(vardata->statsTuple) && ! get_attstatsslot(vardata->statsTuple, vardata->stats_type, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, &hist_op, *************** booltestsel(PlannerInfo *root, BoolTestT *** 1434,1440 **** stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); freq_null = stats->stanullfrac; ! if (get_attstatsslot(vardata.statsTuple, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, --- 1437,1443 ---- stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); freq_null = stats->stanullfrac; ! if (get_attstatsslot(vardata.statsTuple, vardata.stats_type, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, *************** eqjoinsel_inner(Oid operator, *** 2074,2080 **** if (HeapTupleIsValid(vardata1->statsTuple)) { stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); ! have_mcvs1 = get_attstatsslot(vardata1->statsTuple, vardata1->atttype, vardata1->atttypmod, STATISTIC_KIND_MCV, --- 2077,2083 ---- if (HeapTupleIsValid(vardata1->statsTuple)) { stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); ! have_mcvs1 = get_attstatsslot(vardata1->statsTuple, vardata1->stats_type, vardata1->atttype, vardata1->atttypmod, STATISTIC_KIND_MCV, *************** eqjoinsel_inner(Oid operator, *** 2087,2093 **** if (HeapTupleIsValid(vardata2->statsTuple)) { stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple); ! have_mcvs2 = get_attstatsslot(vardata2->statsTuple, vardata2->atttype, vardata2->atttypmod, STATISTIC_KIND_MCV, --- 2090,2096 ---- if (HeapTupleIsValid(vardata2->statsTuple)) { stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple); ! have_mcvs2 = get_attstatsslot(vardata2->statsTuple, vardata2->stats_type, vardata2->atttype, vardata2->atttypmod, STATISTIC_KIND_MCV, *************** eqjoinsel_semi(Oid operator, *** 2309,2315 **** if (HeapTupleIsValid(vardata1->statsTuple)) { stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); ! have_mcvs1 = get_attstatsslot(vardata1->statsTuple, vardata1->atttype, vardata1->atttypmod, STATISTIC_KIND_MCV, --- 2312,2318 ---- if (HeapTupleIsValid(vardata1->statsTuple)) { stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); ! have_mcvs1 = get_attstatsslot(vardata1->statsTuple, vardata1->stats_type, vardata1->atttype, vardata1->atttypmod, STATISTIC_KIND_MCV, *************** eqjoinsel_semi(Oid operator, *** 2321,2327 **** if (HeapTupleIsValid(vardata2->statsTuple)) { ! have_mcvs2 = get_attstatsslot(vardata2->statsTuple, vardata2->atttype, vardata2->atttypmod, STATISTIC_KIND_MCV, --- 2324,2330 ---- if (HeapTupleIsValid(vardata2->statsTuple)) { ! have_mcvs2 = get_attstatsslot(vardata2->statsTuple, vardata2->stats_type, vardata2->atttype, vardata2->atttypmod, STATISTIC_KIND_MCV, *************** estimate_hash_bucketsize(PlannerInfo *ro *** 3322,3328 **** if (HeapTupleIsValid(vardata.statsTuple)) { ! if (get_attstatsslot(vardata.statsTuple, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, --- 3325,3331 ---- if (HeapTupleIsValid(vardata.statsTuple)) { ! if (get_attstatsslot(vardata.statsTuple, vardata.stats_type, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, *************** examine_variable(PlannerInfo *root, Node *** 4103,4108 **** --- 4106,4112 ---- { Node *basenode; Relids varnos; + int onerelid = 0; RelOptInfo *onerel; /* Make sure we don't return dangling pointers in vardata */ *************** examine_variable(PlannerInfo *root, Node *** 4147,4152 **** --- 4151,4157 ---- } else if (rte->rtekind == RTE_RELATION) { + vardata->stats_type = STAT_VARIABLE; vardata->statsTuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(rte->relid), Int16GetDatum(var->varattno), *************** examine_variable(PlannerInfo *root, Node *** 4185,4192 **** case BMS_SINGLETON: if (varRelid == 0 || bms_is_member(varRelid, varnos)) { ! onerel = find_base_rel(root, ! (varRelid ? varRelid : bms_singleton_member(varnos))); vardata->rel = onerel; node = basenode; /* strip any relabeling */ } --- 4190,4197 ---- case BMS_SINGLETON: if (varRelid == 0 || bms_is_member(varRelid, varnos)) { ! onerelid = (varRelid ? varRelid : bms_singleton_member(varnos)); ! onerel = find_base_rel(root, onerelid); vardata->rel = onerel; node = basenode; /* strip any relabeling */ } *************** examine_variable(PlannerInfo *root, Node *** 4220,4233 **** { /* * We have an expression in vars of a single relation. Try to match ! * it to expressional index columns, in hopes of finding some ! * statistics. * * XXX it's conceivable that there are multiple matches with different * index opfamilies; if so, we need to pick one that matches the * operator we are estimating for. FIXME later. */ ListCell *ilist; foreach(ilist, onerel->indexlist) { --- 4225,4301 ---- { /* * We have an expression in vars of a single relation. Try to match ! * it to expression statistics first then to expressional index columns, ! * in hopes of finding some statistics. * * XXX it's conceivable that there are multiple matches with different * index opfamilies; if so, we need to pick one that matches the * operator we are estimating for. FIXME later. */ ListCell *ilist; + Node *expr = copyObject(node); + char *exprbin; + Datum exprbindatum; + + #define USE_SYSCACHE_FOR_SEARCH 0 + #if !USE_SYSCACHE_FOR_SEARCH + Relation rel; + ScanKeyData scanKey[2]; + SysScanDesc scan; + HeapTuple tuple; + #endif + + query_or_expression_tree_walker(expr, set_location_unknown_walker, NULL, 0); + exprbin = nodeToString(expr); + exprbindatum = CStringGetTextDatum(exprbin); + + #if USE_SYSCACHE_FOR_SEARCH + vardata->statsTuple = SearchSysCache3(STAT3RELEXPRINH, + ObjectIdGetDatum(root->simple_rte_array[onerelid]->relid), + exprbindatum, + BoolGetDatum(false)); + if (HeapTupleIsValid(vardata->statsTuple)) + { + vardata->stats_type = STAT_EXPRESSION; + vardata->freefunc = ReleaseSysCache; + return; + } + + #else + + rel = heap_open(Statistic3RelationId, RowShareLock); + + ScanKeyInit(&scanKey[0], + Anum_pg_statistic3_sta3relid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(root->simple_rte_array[onerelid]->relid)); + ScanKeyInit(&scanKey[1], + Anum_pg_statistic3_sta3expr, + BTEqualStrategyNumber, F_TEXTEQ, + exprbindatum); + + scan = systable_beginscan(rel, Statistic3RelidExprInhIndexId, true, + SnapshotNow, 2, scanKey); + + tuple = systable_getnext(scan); + if (HeapTupleIsValid(tuple)) + { + // elog(NOTICE, "examine_variable expression found"); + vardata->stats_type = STAT_EXPRESSION; + vardata->statsTuple = heap_copytuple(tuple); + vardata->freefunc = heap_freetuple; + } + + systable_endscan(scan); + + pfree(exprbin); + pfree(DatumGetPointer(exprbindatum)); + + relation_close(rel, RowShareLock); + + if (vardata->statsTuple) + return; + #endif foreach(ilist, onerel->indexlist) { *************** examine_variable(PlannerInfo *root, Node *** 4286,4291 **** --- 4354,4360 ---- } else if (index->indpred == NIL) { + vardata->stats_type = STAT_VARIABLE; vardata->statsTuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(index->indexoid), *************** get_variable_numdistinct(VariableStatDat *** 4327,4337 **** */ if (HeapTupleIsValid(vardata->statsTuple)) { ! /* Use the pg_statistic entry */ ! Form_pg_statistic stats; ! stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); ! stadistinct = stats->stadistinct; } else if (vardata->vartype == BOOLOID) { --- 4396,4425 ---- */ if (HeapTupleIsValid(vardata->statsTuple)) { ! switch (vardata->stats_type) ! { ! case STAT_VARIABLE: ! { ! /* Use the pg_statistic entry */ ! Form_pg_statistic stats; ! stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); ! stadistinct = stats->stadistinct; ! break; ! } ! case STAT_EXPRESSION: ! { ! /* Use the pg_statistic entry */ ! Form_pg_statistic3 stats3; ! ! stats3 = (Form_pg_statistic3) GETSTRUCT(vardata->statsTuple); ! stadistinct = stats3->sta3distinct; ! break; ! } ! default: ! elog(ERROR, "internal error"); ! return 0.0; ! } } else if (vardata->vartype == BOOLOID) { *************** get_variable_range(PlannerInfo *root, Va *** 4462,4468 **** * the one we want, fail --- this suggests that there is data we can't * use. */ ! if (get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_HISTOGRAM, sortop, NULL, --- 4550,4556 ---- * the one we want, fail --- this suggests that there is data we can't * use. */ ! if (get_attstatsslot(vardata->statsTuple, vardata->stats_type, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_HISTOGRAM, sortop, NULL, *************** get_variable_range(PlannerInfo *root, Va *** 4477,4483 **** } free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); } ! else if (get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, --- 4565,4571 ---- } free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); } ! else if (get_attstatsslot(vardata->statsTuple, vardata->stats_type, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, *************** get_variable_range(PlannerInfo *root, Va *** 4494,4500 **** * the MCVs. However, usually the MCVs will not be the extreme values, so * avoid unnecessary data copying. */ ! if (get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, --- 4582,4588 ---- * the MCVs. However, usually the MCVs will not be the extreme values, so * avoid unnecessary data copying. */ ! if (get_attstatsslot(vardata->statsTuple, vardata->stats_type, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, *************** btcostestimate(PG_FUNCTION_ARGS) *** 6255,6260 **** --- 6343,6349 ---- } else { + vardata.stats_type = STAT_VARIABLE; vardata.statsTuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(relid), Int16GetDatum(colnum), *************** btcostestimate(PG_FUNCTION_ARGS) *** 6281,6286 **** --- 6370,6376 ---- } else { + vardata.stats_type = STAT_VARIABLE; vardata.statsTuple = SearchSysCache3(STATRELATTINH, ObjectIdGetDatum(relid), Int16GetDatum(colnum), *************** btcostestimate(PG_FUNCTION_ARGS) *** 6300,6306 **** index->opcintype[0], BTLessStrategyNumber); if (OidIsValid(sortop) && ! get_attstatsslot(vardata.statsTuple, InvalidOid, 0, STATISTIC_KIND_CORRELATION, sortop, NULL, --- 6390,6396 ---- index->opcintype[0], BTLessStrategyNumber); if (OidIsValid(sortop) && ! get_attstatsslot(vardata.statsTuple, vardata.stats_type, InvalidOid, 0, STATISTIC_KIND_CORRELATION, sortop, NULL, diff -dcrpN postgresql.orig/src/backend/utils/cache/catcache.c postgresql/src/backend/utils/cache/catcache.c *** postgresql.orig/src/backend/utils/cache/catcache.c 2011-04-13 10:11:05.021216766 +0200 --- postgresql/src/backend/utils/cache/catcache.c 2011-04-28 14:21:14.766174476 +0200 *************** GetCCHashEqFuncs(Oid keytype, PGFunction *** 135,140 **** --- 135,141 ---- *eqfunc = F_INT4EQ; break; case TEXTOID: + case PGNODETREEOID: *hashfunc = hashtext; *eqfunc = F_TEXTEQ; diff -dcrpN postgresql.orig/src/backend/utils/cache/lsyscache.c postgresql/src/backend/utils/cache/lsyscache.c *** postgresql.orig/src/backend/utils/cache/lsyscache.c 2011-04-11 15:36:27.175811226 +0200 --- postgresql/src/backend/utils/cache/lsyscache.c 2011-04-28 14:21:14.769174273 +0200 *************** *** 27,32 **** --- 27,33 ---- #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" #include "catalog/pg_statistic.h" + #include "catalog/pg_statistic3.h" #include "catalog/pg_type.h" #include "miscadmin.h" #include "nodes/makefuncs.h" *************** get_attavgwidth(Oid relid, AttrNumber at *** 2667,2680 **** * type ID to pass to free_attstatsslot later. */ bool ! get_attstatsslot(HeapTuple statstuple, Oid atttype, int32 atttypmod, int reqkind, Oid reqop, Oid *actualop, Datum **values, int *nvalues, float4 **numbers, int *nnumbers) { ! Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple); int i, j; Datum val; --- 2668,2682 ---- * type ID to pass to free_attstatsslot later. */ bool ! get_attstatsslot(HeapTuple statstuple, StatType stat_type, Oid atttype, int32 atttypmod, int reqkind, Oid reqop, Oid *actualop, Datum **values, int *nvalues, float4 **numbers, int *nnumbers) { ! Form_pg_statistic stats; ! Form_pg_statistic3 stats3; int i, j; Datum val; *************** get_attstatsslot(HeapTuple statstuple, *** 2685,2707 **** HeapTuple typeTuple; Form_pg_type typeForm; ! for (i = 0; i < STATISTIC_NUM_SLOTS; i++) { ! if ((&stats->stakind1)[i] == reqkind && ! (reqop == InvalidOid || (&stats->staop1)[i] == reqop)) break; } if (i >= STATISTIC_NUM_SLOTS) return false; /* not there */ if (actualop) ! *actualop = (&stats->staop1)[i]; if (values) { ! val = SysCacheGetAttr(STATRELATTINH, statstuple, Anum_pg_statistic_stavalues1 + i, &isnull); if (isnull) elog(ERROR, "stavalues is null"); statarray = DatumGetArrayTypeP(val); --- 2687,2755 ---- HeapTuple typeTuple; Form_pg_type typeForm; ! switch (stat_type) { ! case STAT_VARIABLE: ! stats = (Form_pg_statistic) GETSTRUCT(statstuple); ! stats3 = NULL; ! ! for (i = 0; i < STATISTIC_NUM_SLOTS; i++) ! { ! if ((&stats->stakind1)[i] == reqkind && ! (reqop == InvalidOid || (&stats->staop1)[i] == reqop)) ! break; ! } break; + case STAT_EXPRESSION: + stats = NULL; + stats3 = (Form_pg_statistic3) GETSTRUCT(statstuple); + + for (i = 0; i < STATISTIC_NUM_SLOTS; i++) + { + if ((&stats3->sta3kind1)[i] == reqkind && + (reqop == InvalidOid || (&stats3->sta3op1)[i] == reqop)) + break; + } + break; + default: + elog(ERROR, "internal error"); + return false; /* make compiler quiet */ } + if (i >= STATISTIC_NUM_SLOTS) return false; /* not there */ if (actualop) ! { ! switch (stat_type) ! { ! case STAT_VARIABLE: ! *actualop = (&stats->staop1)[i]; ! break; ! case STAT_EXPRESSION: ! *actualop = (&stats3->sta3op1)[i]; ! break; ! } ! } if (values) { ! switch (stat_type) ! { ! case STAT_VARIABLE: ! val = SysCacheGetAttr(STATRELATTINH, statstuple, Anum_pg_statistic_stavalues1 + i, &isnull); + break; + case STAT_EXPRESSION: + val = SysCacheGetAttr(STAT3RELEXPRINH, statstuple, + Anum_pg_statistic3_sta3values1 + i, + &isnull); + break; + default: + elog(ERROR, "internal error"); + return false; /* silence compiler */ + } if (isnull) elog(ERROR, "stavalues is null"); statarray = DatumGetArrayTypeP(val); *************** get_attstatsslot(HeapTuple statstuple, *** 2753,2761 **** if (numbers) { ! val = SysCacheGetAttr(STATRELATTINH, statstuple, Anum_pg_statistic_stanumbers1 + i, &isnull); if (isnull) elog(ERROR, "stanumbers is null"); statarray = DatumGetArrayTypeP(val); --- 2801,2821 ---- if (numbers) { ! switch (stat_type) ! { ! case STAT_VARIABLE: ! val = SysCacheGetAttr(STATRELATTINH, statstuple, Anum_pg_statistic_stanumbers1 + i, &isnull); + break; + case STAT_EXPRESSION: + val = SysCacheGetAttr(STAT3RELEXPRINH, statstuple, + Anum_pg_statistic3_sta3numbers1 + i, + &isnull); + break; + default: + return false; /* silence compiler */ + } if (isnull) elog(ERROR, "stanumbers is null"); statarray = DatumGetArrayTypeP(val); diff -dcrpN postgresql.orig/src/backend/utils/cache/syscache.c postgresql/src/backend/utils/cache/syscache.c *** postgresql.orig/src/backend/utils/cache/syscache.c 2011-04-26 09:54:04.095356326 +0200 --- postgresql/src/backend/utils/cache/syscache.c 2011-04-28 14:21:14.775173869 +0200 *************** *** 45,50 **** --- 45,51 ---- #include "catalog/pg_proc.h" #include "catalog/pg_rewrite.h" #include "catalog/pg_statistic.h" + #include "catalog/pg_statistic3.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_config_map.h" *************** static const struct cachedesc cacheinfo[ *** 587,592 **** --- 588,604 ---- }, 1024 }, + {Statistic3RelationId, /* STAT3RELEXPRINH */ + Statistic3RelidExprInhIndexId, + 3, + { + Anum_pg_statistic3_sta3relid, + Anum_pg_statistic3_sta3expr, + Anum_pg_statistic3_sta3inherit, + 0 + }, + 1024 + }, {StatisticRelationId, /* STATRELATTINH */ StatisticRelidAttnumInhIndexId, 3, diff -dcrpN postgresql.orig/src/include/catalog/indexing.h postgresql/src/include/catalog/indexing.h *** postgresql.orig/src/include/catalog/indexing.h 2011-02-10 10:36:32.320680534 +0100 --- postgresql/src/include/catalog/indexing.h 2011-04-28 14:21:14.777173734 +0200 *************** DECLARE_UNIQUE_INDEX(pg_extension_oid_in *** 300,305 **** --- 300,312 ---- DECLARE_UNIQUE_INDEX(pg_extension_name_index, 3081, on pg_extension using btree(extname name_ops)); #define ExtensionNameIndexId 3081 + DECLARE_UNIQUE_INDEX(pg_statistic2_relid_att_inh_index, 3072, on pg_statistic2 using btree(sta2relid oid_ops, sta2attnums array_ops, sta2inherit bool_ops)); + #define Statistic2RelidAttnumsInhIndexId 3072 + + DECLARE_UNIQUE_INDEX(pg_statistic3_relid_expr_inh_index, 3074, on pg_statistic3 using btree(sta3relid oid_ops, sta3expr text_ops, sta3inherit bool_ops)); + #define Statistic3RelidExprInhIndexId 3074 + + /* last step of initialization script: build the indexes declared above */ BUILD_INDICES diff -dcrpN postgresql.orig/src/include/catalog/pg_statistic2.h postgresql/src/include/catalog/pg_statistic2.h *** postgresql.orig/src/include/catalog/pg_statistic2.h 1970-01-01 01:00:00.000000000 +0100 --- postgresql/src/include/catalog/pg_statistic2.h 2011-04-28 14:21:14.779173600 +0200 *************** *** 0 **** --- 1,265 ---- + /*------------------------------------------------------------------------- + * + * pg_statistic2.h + * definition of the system "cross-column statistic" relation (pg_statistic2) + * along with the relation's initial contents. + * + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/catalog/pg_statistic2.h + * + * NOTES + * the genbki.pl script reads this file and generates .bki + * information from the DATA() statements. + * + *------------------------------------------------------------------------- + */ + #ifndef PG_STATISTIC2_H + #define PG_STATISTIC2_H + + #include "catalog/genbki.h" + + /* + * The CATALOG definition has to refer to the type of stavaluesN as + * "anyarray" so that bootstrap mode recognizes it. There is no real + * typedef for that, however. Since the fields are potentially-null and + * therefore can't be accessed directly from C code, there is no particular + * need for the C struct definition to show a valid field type --- instead + * we just make it int. + */ + #define anyarray int + + /* ---------------- + * pg_statistic2 definition. cpp turns this into + * typedef struct FormData_pg_statistic2 + * ---------------- + */ + #define Statistic2RelationId 3071 + + CATALOG(pg_statistic2,3071) BKI_WITHOUT_OIDS + { + /* These fields form the unique key for the entry: */ + Oid sta2relid; /* relation containing attribute */ + int2 sta2attnums[1]; /* attribute (column) stats are for */ + bool sta2inherit; /* true if inheritance children are included */ + + /* the fraction of the column's entries that are NULL: */ + float4 sta2nullfrac; + + /* + * stawidth is the average width in bytes of non-null entries. For + * fixed-width datatypes this is of course the same as the typlen, but for + * var-width types it is more useful. Note that this is the average width + * of the data as actually stored, post-TOASTing (eg, for a + * moved-out-of-line value, only the size of the pointer object is + * counted). This is the appropriate definition for the primary use of + * the statistic, which is to estimate sizes of in-memory hash tables of + * tuples. + */ + int4 sta2width; + + /* ---------------- + * stadistinct indicates the (approximate) number of distinct non-null + * data values in the column. The interpretation is: + * 0 unknown or not computed + * > 0 actual number of distinct values + * < 0 negative of multiplier for number of rows + * The special negative case allows us to cope with columns that are + * unique (stadistinct = -1) or nearly so (for example, a column in + * which values appear about twice on the average could be represented + * by stadistinct = -0.5). Because the number-of-rows statistic in + * pg_class may be updated more frequently than pg_statistic2 is, it's + * important to be able to describe such situations as a multiple of + * the number of rows, rather than a fixed number of distinct values. + * But in other cases a fixed number is correct (eg, a boolean column). + * ---------------- + */ + float4 sta2distinct; + + /* ---------------- + * To allow keeping statistics on different kinds of datatypes, + * we do not hard-wire any particular meaning for the remaining + * statistical fields. Instead, we provide several "slots" in which + * statistical data can be placed. Each slot includes: + * kind integer code identifying kind of data + * op OID of associated operator, if needed + * numbers float4 array (for statistical values) + * values anyarray (for representations of data values) + * The ID and operator fields are never NULL; they are zeroes in an + * unused slot. The numbers and values fields are NULL in an unused + * slot, and might also be NULL in a used slot if the slot kind has + * no need for one or the other. + * ---------------- + */ + + int2 sta2kind1; + int2 sta2kind2; + int2 sta2kind3; + int2 sta2kind4; + + Oid sta2op1; + Oid sta2op2; + Oid sta2op3; + Oid sta2op4; + + /* + * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent + * (NULL). They cannot be accessed as C struct entries; you have to use + * the full field access machinery (heap_getattr) for them. We declare + * them here for the catalog machinery. + */ + + float4 sta2numbers1[1]; + float4 sta2numbers2[1]; + float4 sta2numbers3[1]; + float4 sta2numbers4[1]; + + /* + * Values in these arrays are values of the column's data type. We + * presently have to cheat quite a bit to allow polymorphic arrays of this + * kind, but perhaps someday it'll be a less bogus facility. + */ + anyarray sta2values1; + anyarray sta2values2; + anyarray sta2values3; + anyarray sta2values4; + } FormData_pg_statistic2; + + #define STATISTIC_NUM_SLOTS 4 + + #undef anyarray + + + /* ---------------- + * Form_pg_statistic2 corresponds to a pointer to a tuple with + * the format of pg_statistic2 relation. + * ---------------- + */ + typedef FormData_pg_statistic2 *Form_pg_statistic2; + + /* ---------------- + * compiler constants for pg_statistic2 + * ---------------- + */ + #define Natts_pg_statistic2 22 + #define Anum_pg_statistic2_sta2relid 1 + #define Anum_pg_statistic2_sta2attnums 2 + #define Anum_pg_statistic2_sta2inherit 3 + #define Anum_pg_statistic2_sta2nullfrac 4 + #define Anum_pg_statistic2_sta2width 5 + #define Anum_pg_statistic2_sta2distinct 6 + #define Anum_pg_statistic2_sta2kind1 7 + #define Anum_pg_statistic2_sta2kind2 8 + #define Anum_pg_statistic2_sta2kind3 9 + #define Anum_pg_statistic2_sta2kind4 10 + #define Anum_pg_statistic2_sta2op1 11 + #define Anum_pg_statistic2_sta2op2 12 + #define Anum_pg_statistic2_sta2op3 13 + #define Anum_pg_statistic2_sta2op4 14 + #define Anum_pg_statistic2_sta2numbers1 15 + #define Anum_pg_statistic2_sta2numbers2 16 + #define Anum_pg_statistic2_sta2numbers3 17 + #define Anum_pg_statistic2_sta2numbers4 18 + #define Anum_pg_statistic2_sta2values1 19 + #define Anum_pg_statistic2_sta2values2 20 + #define Anum_pg_statistic2_sta2values3 21 + #define Anum_pg_statistic2_sta2values4 22 + + #if 0 + + /* + * Currently, three statistical slot "kinds" are defined: most common values, + * histogram, and correlation. Additional "kinds" will probably appear in + * future to help cope with non-scalar datatypes. Also, custom data types + * can define their own "kind" codes by mutual agreement between a custom + * typanalyze routine and the selectivity estimation functions of the type's + * operators. + * + * Code reading the pg_statistic2 relation should not assume that a particular + * data "kind" will appear in any particular slot. Instead, search the + * stakind fields to see if the desired data is available. (The standard + * function get_attstatsslot() may be used for this.) + */ + + /* + * The present allocation of "kind" codes is: + * + * 1-99: reserved for assignment by the core PostgreSQL project + * (values in this range will be documented in this file) + * 100-199: reserved for assignment by the PostGIS project + * (values to be documented in PostGIS documentation) + * 200-299: reserved for assignment by the ESRI ST_Geometry project + * (values to be documented in ESRI ST_Geometry documentation) + * 300-9999: reserved for future public assignments + * + * For private use you may choose a "kind" code at random in the range + * 10000-30000. However, for code that is to be widely disseminated it is + * better to obtain a publicly defined "kind" code by request from the + * PostgreSQL Global Development Group. + */ + + /* + * In a "most common values" slot, staop is the OID of the "=" operator + * used to decide whether values are the same or not. stavalues contains + * the K most common non-null values appearing in the column, and stanumbers + * contains their frequencies (fractions of total row count). The values + * shall be ordered in decreasing frequency. Note that since the arrays are + * variable-size, K may be chosen by the statistics collector. Values should + * not appear in MCV unless they have been observed to occur more than once; + * a unique column will have no MCV slot. + */ + #define STATISTIC_KIND_MCV 1 + + /* + * A "histogram" slot describes the distribution of scalar data. staop is + * the OID of the "<" operator that describes the sort ordering. (In theory, + * more than one histogram could appear, if a datatype has more than one + * useful sort operator.) stavalues contains M (>=2) non-null values that + * divide the non-null column data values into M-1 bins of approximately equal + * population. The first stavalues item is the MIN and the last is the MAX. + * stanumbers is not used and should be NULL. IMPORTANT POINT: if an MCV + * slot is also provided, then the histogram describes the data distribution + * *after removing the values listed in MCV* (thus, it's a "compressed + * histogram" in the technical parlance). This allows a more accurate + * representation of the distribution of a column with some very-common + * values. In a column with only a few distinct values, it's possible that + * the MCV list describes the entire data population; in this case the + * histogram reduces to empty and should be omitted. + */ + #define STATISTIC_KIND_HISTOGRAM 2 + + /* + * A "correlation" slot describes the correlation between the physical order + * of table tuples and the ordering of data values of this column, as seen + * by the "<" operator identified by staop. (As with the histogram, more + * than one entry could theoretically appear.) stavalues is not used and + * should be NULL. stanumbers contains a single entry, the correlation + * coefficient between the sequence of data values and the sequence of + * their actual tuple positions. The coefficient ranges from +1 to -1. + */ + #define STATISTIC_KIND_CORRELATION 3 + + /* + * A "most common elements" slot is similar to a "most common values" slot, + * except that it stores the most common non-null *elements* of the column + * values. This is useful when the column datatype is an array or some other + * type with identifiable elements (for instance, tsvector). staop contains + * the equality operator appropriate to the element type. stavalues contains + * the most common element values, and stanumbers their frequencies. Unlike + * MCV slots, the values are sorted into order (to support binary search + * for a particular value). Since this puts the minimum and maximum + * frequencies at unpredictable spots in stanumbers, there are two extra + * members of stanumbers, holding copies of the minimum and maximum + * frequencies. + * + * Note: in current usage for tsvector columns, the stavalues elements are of + * type text, even though their representation within tsvector is not + * exactly text. + */ + #define STATISTIC_KIND_MCELEM 4 + + #endif + + #endif /* PG_STATISTIC2_H */ diff -dcrpN postgresql.orig/src/include/catalog/pg_statistic3.h postgresql/src/include/catalog/pg_statistic3.h *** postgresql.orig/src/include/catalog/pg_statistic3.h 1970-01-01 01:00:00.000000000 +0100 --- postgresql/src/include/catalog/pg_statistic3.h 2011-04-28 14:21:14.780173533 +0200 *************** *** 0 **** --- 1,265 ---- + /*------------------------------------------------------------------------- + * + * pg_statistic3.h + * definition of the system "expression statistic" relation (pg_statistic3) + * along with the relation's initial contents. + * + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/catalog/pg_statistic3.h + * + * NOTES + * the genbki.pl script reads this file and generates .bki + * information from the DATA() statements. + * + *------------------------------------------------------------------------- + */ + #ifndef PG_STATISTIC3_H + #define PG_STATISTIC3_H + + #include "catalog/genbki.h" + + /* + * The CATALOG definition has to refer to the type of stavaluesN as + * "anyarray" so that bootstrap mode recognizes it. There is no real + * typedef for that, however. Since the fields are potentially-null and + * therefore can't be accessed directly from C code, there is no particular + * need for the C struct definition to show a valid field type --- instead + * we just make it int. + */ + #define anyarray int + + /* ---------------- + * pg_statistic3 definition. cpp turns this into + * typedef struct FormData_pg_statistic3 + * ---------------- + */ + #define Statistic3RelationId 3073 + + CATALOG(pg_statistic3,3073) BKI_WITHOUT_OIDS + { + /* These fields form the unique key for the entry: */ + Oid sta3relid; /* relation containing attribute */ + pg_node_tree sta3expr; /* expression stat is for */ + bool sta3inherit; /* true if inheritance children are included */ + + /* the fraction of the column's entries that are NULL: */ + float4 sta3nullfrac; + + /* + * stawidth is the average width in bytes of non-null entries. For + * fixed-width datatypes this is of course the same as the typlen, but for + * var-width types it is more useful. Note that this is the average width + * of the data as actually stored, post-TOASTing (eg, for a + * moved-out-of-line value, only the size of the pointer object is + * counted). This is the appropriate definition for the primary use of + * the statistic, which is to estimate sizes of in-memory hash tables of + * tuples. + */ + int4 sta3width; + + /* ---------------- + * stadistinct indicates the (approximate) number of distinct non-null + * data values in the column. The interpretation is: + * 0 unknown or not computed + * > 0 actual number of distinct values + * < 0 negative of multiplier for number of rows + * The special negative case allows us to cope with columns that are + * unique (stadistinct = -1) or nearly so (for example, a column in + * which values appear about twice on the average could be represented + * by stadistinct = -0.5). Because the number-of-rows statistic in + * pg_class may be updated more frequently than pg_statistic3 is, it's + * important to be able to describe such situations as a multiple of + * the number of rows, rather than a fixed number of distinct values. + * But in other cases a fixed number is correct (eg, a boolean column). + * ---------------- + */ + float4 sta3distinct; + + /* ---------------- + * To allow keeping statistics on different kinds of datatypes, + * we do not hard-wire any particular meaning for the remaining + * statistical fields. Instead, we provide several "slots" in which + * statistical data can be placed. Each slot includes: + * kind integer code identifying kind of data + * op OID of associated operator, if needed + * numbers float4 array (for statistical values) + * values anyarray (for representations of data values) + * The ID and operator fields are never NULL; they are zeroes in an + * unused slot. The numbers and values fields are NULL in an unused + * slot, and might also be NULL in a used slot if the slot kind has + * no need for one or the other. + * ---------------- + */ + + int2 sta3kind1; + int2 sta3kind2; + int2 sta3kind3; + int2 sta3kind4; + + Oid sta3op1; + Oid sta3op2; + Oid sta3op3; + Oid sta3op4; + + /* + * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent + * (NULL). They cannot be accessed as C struct entries; you have to use + * the full field access machinery (heap_getattr) for them. We declare + * them here for the catalog machinery. + */ + + float4 sta3numbers1[1]; + float4 sta3numbers2[1]; + float4 sta3numbers3[1]; + float4 sta3numbers4[1]; + + /* + * Values in these arrays are values of the column's data type. We + * presently have to cheat quite a bit to allow polymorphic arrays of this + * kind, but perhaps someday it'll be a less bogus facility. + */ + anyarray sta3values1; + anyarray sta3values2; + anyarray sta3values3; + anyarray sta3values4; + } FormData_pg_statistic3; + + #define STATISTIC_NUM_SLOTS 4 + + #undef anyarray + + + /* ---------------- + * Form_pg_statistic3 corresponds to a pointer to a tuple with + * the format of pg_statistic3 relation. + * ---------------- + */ + typedef FormData_pg_statistic3 *Form_pg_statistic3; + + /* ---------------- + * compiler constants for pg_statistic3 + * ---------------- + */ + #define Natts_pg_statistic3 22 + #define Anum_pg_statistic3_sta3relid 1 + #define Anum_pg_statistic3_sta3expr 2 + #define Anum_pg_statistic3_sta3inherit 3 + #define Anum_pg_statistic3_sta3nullfrac 4 + #define Anum_pg_statistic3_sta3width 5 + #define Anum_pg_statistic3_sta3distinct 6 + #define Anum_pg_statistic3_sta3kind1 7 + #define Anum_pg_statistic3_sta3kind2 8 + #define Anum_pg_statistic3_sta3kind3 9 + #define Anum_pg_statistic3_sta3kind4 10 + #define Anum_pg_statistic3_sta3op1 11 + #define Anum_pg_statistic3_sta3op2 12 + #define Anum_pg_statistic3_sta3op3 13 + #define Anum_pg_statistic3_sta3op4 14 + #define Anum_pg_statistic3_sta3numbers1 15 + #define Anum_pg_statistic3_sta3numbers2 16 + #define Anum_pg_statistic3_sta3numbers3 17 + #define Anum_pg_statistic3_sta3numbers4 18 + #define Anum_pg_statistic3_sta3values1 19 + #define Anum_pg_statistic3_sta3values2 20 + #define Anum_pg_statistic3_sta3values3 21 + #define Anum_pg_statistic3_sta3values4 22 + + #if 0 + + /* + * Currently, three statistical slot "kinds" are defined: most common values, + * histogram, and correlation. Additional "kinds" will probably appear in + * future to help cope with non-scalar datatypes. Also, custom data types + * can define their own "kind" codes by mutual agreement between a custom + * typanalyze routine and the selectivity estimation functions of the type's + * operators. + * + * Code reading the pg_statistic3 relation should not assume that a particular + * data "kind" will appear in any particular slot. Instead, search the + * stakind fields to see if the desired data is available. (The standard + * function get_attstatsslot() may be used for this.) + */ + + /* + * The present allocation of "kind" codes is: + * + * 1-99: reserved for assignment by the core PostgreSQL project + * (values in this range will be documented in this file) + * 100-199: reserved for assignment by the PostGIS project + * (values to be documented in PostGIS documentation) + * 200-299: reserved for assignment by the ESRI ST_Geometry project + * (values to be documented in ESRI ST_Geometry documentation) + * 300-9999: reserved for future public assignments + * + * For private use you may choose a "kind" code at random in the range + * 10000-30000. However, for code that is to be widely disseminated it is + * better to obtain a publicly defined "kind" code by request from the + * PostgreSQL Global Development Group. + */ + + /* + * In a "most common values" slot, staop is the OID of the "=" operator + * used to decide whether values are the same or not. stavalues contains + * the K most common non-null values appearing in the column, and stanumbers + * contains their frequencies (fractions of total row count). The values + * shall be ordered in decreasing frequency. Note that since the arrays are + * variable-size, K may be chosen by the statistics collector. Values should + * not appear in MCV unless they have been observed to occur more than once; + * a unique column will have no MCV slot. + */ + #define STATISTIC_KIND_MCV 1 + + /* + * A "histogram" slot describes the distribution of scalar data. staop is + * the OID of the "<" operator that describes the sort ordering. (In theory, + * more than one histogram could appear, if a datatype has more than one + * useful sort operator.) stavalues contains M (>=2) non-null values that + * divide the non-null column data values into M-1 bins of approximately equal + * population. The first stavalues item is the MIN and the last is the MAX. + * stanumbers is not used and should be NULL. IMPORTANT POINT: if an MCV + * slot is also provided, then the histogram describes the data distribution + * *after removing the values listed in MCV* (thus, it's a "compressed + * histogram" in the technical parlance). This allows a more accurate + * representation of the distribution of a column with some very-common + * values. In a column with only a few distinct values, it's possible that + * the MCV list describes the entire data population; in this case the + * histogram reduces to empty and should be omitted. + */ + #define STATISTIC_KIND_HISTOGRAM 2 + + /* + * A "correlation" slot describes the correlation between the physical order + * of table tuples and the ordering of data values of this column, as seen + * by the "<" operator identified by staop. (As with the histogram, more + * than one entry could theoretically appear.) stavalues is not used and + * should be NULL. stanumbers contains a single entry, the correlation + * coefficient between the sequence of data values and the sequence of + * their actual tuple positions. The coefficient ranges from +1 to -1. + */ + #define STATISTIC_KIND_CORRELATION 3 + + /* + * A "most common elements" slot is similar to a "most common values" slot, + * except that it stores the most common non-null *elements* of the column + * values. This is useful when the column datatype is an array or some other + * type with identifiable elements (for instance, tsvector). staop contains + * the equality operator appropriate to the element type. stavalues contains + * the most common element values, and stanumbers their frequencies. Unlike + * MCV slots, the values are sorted into order (to support binary search + * for a particular value). Since this puts the minimum and maximum + * frequencies at unpredictable spots in stanumbers, there are two extra + * members of stanumbers, holding copies of the minimum and maximum + * frequencies. + * + * Note: in current usage for tsvector columns, the stavalues elements are of + * type text, even though their representation within tsvector is not + * exactly text. + */ + #define STATISTIC_KIND_MCELEM 4 + + #endif + + #endif /* PG_STATISTIC2_H */ diff -dcrpN postgresql.orig/src/include/commands/defrem.h postgresql/src/include/commands/defrem.h *** postgresql.orig/src/include/commands/defrem.h 2011-04-11 15:36:27.243806451 +0200 --- postgresql/src/include/commands/defrem.h 2011-04-28 14:21:14.782173399 +0200 *************** extern char *ChooseIndexName(const char *** 50,55 **** --- 50,56 ---- bool primary, bool isconstraint); extern List *ChooseIndexColumnNames(List *indexElems); extern Oid GetDefaultOpClass(Oid type_id, Oid am_id); + extern void ExtraStatistics(ExtraStatStmt *stmt); /* commands/functioncmds.c */ extern void CreateFunction(CreateFunctionStmt *stmt, const char *queryString); diff -dcrpN postgresql.orig/src/include/nodes/nodes.h postgresql/src/include/nodes/nodes.h *** postgresql.orig/src/include/nodes/nodes.h 2011-03-22 17:53:48.045903422 +0100 --- postgresql/src/include/nodes/nodes.h 2011-04-28 14:21:14.784173265 +0200 *************** typedef enum NodeTag *** 362,367 **** --- 362,368 ---- T_CreateExtensionStmt, T_AlterExtensionStmt, T_AlterExtensionContentsStmt, + T_ExtraStatStmt, /* * TAGS FOR PARSE TREE NODES (parsenodes.h) diff -dcrpN postgresql.orig/src/include/nodes/parsenodes.h postgresql/src/include/nodes/parsenodes.h *** postgresql.orig/src/include/nodes/parsenodes.h 2011-04-26 09:54:04.106355573 +0200 --- postgresql/src/include/nodes/parsenodes.h 2011-04-28 14:21:14.789172925 +0200 *************** typedef enum DropBehavior *** 1160,1165 **** --- 1160,1178 ---- } DropBehavior; /* ---------------------- + * Create Cross Column Statistics + * ---------------------- + */ + typedef struct ExtraStatStmt + { + NodeTag type; + bool create; + RangeVar *relation; + List *columns; + Node *expr; + } ExtraStatStmt; + + /* ---------------------- * Alter Table * ---------------------- */ diff -dcrpN postgresql.orig/src/include/parser/kwlist.h postgresql/src/include/parser/kwlist.h *** postgresql.orig/src/include/parser/kwlist.h 2011-03-18 13:11:36.826637445 +0100 --- postgresql/src/include/parser/kwlist.h 2011-04-28 14:21:14.790172858 +0200 *************** PG_KEYWORD("exclusive", EXCLUSIVE, UNRES *** 148,153 **** --- 148,154 ---- PG_KEYWORD("execute", EXECUTE, UNRESERVED_KEYWORD) PG_KEYWORD("exists", EXISTS, COL_NAME_KEYWORD) PG_KEYWORD("explain", EXPLAIN, UNRESERVED_KEYWORD) + PG_KEYWORD("expression", EXPRESSION, UNRESERVED_KEYWORD) PG_KEYWORD("extension", EXTENSION, UNRESERVED_KEYWORD) PG_KEYWORD("external", EXTERNAL, UNRESERVED_KEYWORD) PG_KEYWORD("extract", EXTRACT, COL_NAME_KEYWORD) diff -dcrpN postgresql.orig/src/include/parser/parse_utilcmd.h postgresql/src/include/parser/parse_utilcmd.h *** postgresql.orig/src/include/parser/parse_utilcmd.h 2011-01-04 15:13:16.163549374 +0100 --- postgresql/src/include/parser/parse_utilcmd.h 2011-04-28 14:21:14.792172725 +0200 *************** extern void transformRuleStmt(RuleStmt * *** 25,28 **** --- 25,33 ---- List **actions, Node **whereClause); extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt); + extern ExtraStatStmt *transformExtraStatistics(ExtraStatStmt *stmt, + const char *queryString); + + extern bool set_location_unknown_walker(Node *node, void *context); + #endif /* PARSE_UTILCMD_H */ diff -dcrpN postgresql.orig/src/include/utils/lsyscache.h postgresql/src/include/utils/lsyscache.h *** postgresql.orig/src/include/utils/lsyscache.h 2011-04-11 15:36:27.256805539 +0200 --- postgresql/src/include/utils/lsyscache.h 2011-04-28 14:21:14.793172658 +0200 *************** *** 16,21 **** --- 16,22 ---- #include "access/attnum.h" #include "access/htup.h" #include "nodes/pg_list.h" + #include "utils/selfuncs.h" /* I/O function selector for get_type_io_data */ typedef enum IOFuncSelector *************** extern Oid getBaseType(Oid typid); *** 131,137 **** extern Oid getBaseTypeAndTypmod(Oid typid, int32 *typmod); extern int32 get_typavgwidth(Oid typid, int32 typmod); extern int32 get_attavgwidth(Oid relid, AttrNumber attnum); ! extern bool get_attstatsslot(HeapTuple statstuple, Oid atttype, int32 atttypmod, int reqkind, Oid reqop, Oid *actualop, --- 132,139 ---- extern Oid getBaseTypeAndTypmod(Oid typid, int32 *typmod); extern int32 get_typavgwidth(Oid typid, int32 typmod); extern int32 get_attavgwidth(Oid relid, AttrNumber attnum); ! ! extern bool get_attstatsslot(HeapTuple statstuple, StatType stat_type, Oid atttype, int32 atttypmod, int reqkind, Oid reqop, Oid *actualop, diff -dcrpN postgresql.orig/src/include/utils/selfuncs.h postgresql/src/include/utils/selfuncs.h *** postgresql.orig/src/include/utils/selfuncs.h 2011-04-13 10:11:05.060214051 +0200 --- postgresql/src/include/utils/selfuncs.h 2011-04-28 14:21:14.795172522 +0200 *************** *** 62,75 **** p = 1.0; \ } while (0) /* Return data from examine_variable and friends */ typedef struct VariableStatData { Node *var; /* the Var or expression tree */ RelOptInfo *rel; /* Relation, or NULL if not identifiable */ ! HeapTuple statsTuple; /* pg_statistic tuple, or NULL if none */ ! /* NB: if statsTuple!=NULL, it must be freed when caller is done */ void (*freefunc) (HeapTuple tuple); /* how to free statsTuple */ Oid vartype; /* exposed type of expression */ Oid atttype; /* type to pass to get_attstatsslot */ --- 62,81 ---- p = 1.0; \ } while (0) + typedef enum StatType { + STAT_VARIABLE, + STAT_EXPRESSION + } StatType; /* Return data from examine_variable and friends */ typedef struct VariableStatData { Node *var; /* the Var or expression tree */ RelOptInfo *rel; /* Relation, or NULL if not identifiable */ ! StatType stats_type; ! HeapTuple statsTuple; /* pg_statistic or pg_statistic3 tuple depending on stats_type ! * or NULL if none */ ! /* NB: if statsTuple!=NULL || stats3Tuple!=NULL, it must be freed when caller is done */ void (*freefunc) (HeapTuple tuple); /* how to free statsTuple */ Oid vartype; /* exposed type of expression */ Oid atttype; /* type to pass to get_attstatsslot */ diff -dcrpN postgresql.orig/src/include/utils/syscache.h postgresql/src/include/utils/syscache.h *** postgresql.orig/src/include/utils/syscache.h 2011-02-10 10:36:32.352678334 +0100 --- postgresql/src/include/utils/syscache.h 2011-04-28 14:21:14.796172454 +0200 *************** enum SysCacheIdentifier *** 73,78 **** --- 73,79 ---- RELNAMENSP, RELOID, RULERELNAME, + STAT3RELEXPRINH, STATRELATTINH, TABLESPACEOID, TSCONFIGMAP, diff -dcrpN postgresql.orig/src/test/regress/expected/sanity_check.out postgresql/src/test/regress/expected/sanity_check.out *** postgresql.orig/src/test/regress/expected/sanity_check.out 2011-02-10 10:36:32.374676822 +0100 --- postgresql/src/test/regress/expected/sanity_check.out 2011-04-28 14:21:14.797172386 +0200 *************** SELECT relname, relhasindex *** 121,126 **** --- 121,128 ---- pg_shdepend | t pg_shdescription | t pg_statistic | t + pg_statistic2 | t + pg_statistic3 | t pg_tablespace | t pg_trigger | t pg_ts_config | t *************** SELECT relname, relhasindex *** 157,163 **** timetz_tbl | f tinterval_tbl | f varchar_tbl | f ! (146 rows) -- -- another sanity check: every system catalog that has OIDs should have --- 159,165 ---- timetz_tbl | f tinterval_tbl | f varchar_tbl | f ! (148 rows) -- -- another sanity check: every system catalog that has OIDs should have