From 2591a15ccf957fba91cc7ef00220802a0bd86788 Mon Sep 17 00:00:00 2001 From: Alexandra Wang Date: Mon, 8 Jun 2026 12:00:00 -0700 Subject: [PATCH v8 1/2] Remove stxkeys from pg_statistic_ext, unify into stxexprs Remove the stxkeys int2vector column from pg_statistic_ext. All target columns are now represented as Var nodes in stxexprs alongside complex expressions. This simplifies the catalog representation and preserves the user-written column order, which was previously lost because columns were sorted by attnum. The pg_stats_ext view is updated accordingly: the old attnames column is removed, and the exprs column (now text[]) shows all targets via a revised pg_get_statisticsobjdef_columns() function. Documentation in catalogs.sgml, perform.sgml, and system-views.sgml updated to match. At planning, ANALYZE, and stats-import time, the unified stxexprs list is split back into a keys Bitmapset (plain Var nodes) and an exprs list (everything else). Virtual generated columns are an exception: although stored as Vars, they are classified as expressions because they receive per-expression statistics. This is a prerequisite refactor for the join statistics patch. Suggested-by: Tom Lane Discussion: https://postgr.es/m/711247.1779913876@sss.pgh.pa.us --- doc/src/sgml/catalogs.sgml | 18 +- doc/src/sgml/perform.sgml | 108 +++---- doc/src/sgml/system-views.sgml | 13 +- src/backend/catalog/system_views.sql | 7 +- src/backend/commands/statscmds.c | 187 ++++-------- src/backend/optimizer/util/plancat.c | 72 ++--- src/backend/parser/parse_utilcmd.c | 65 ++-- src/backend/statistics/dependencies.c | 33 +- src/backend/statistics/extended_stats.c | 107 ++++--- src/backend/statistics/extended_stats_funcs.c | 88 ++---- src/backend/statistics/mvdistinct.c | 29 +- src/backend/utils/adt/ruleutils.c | 283 ++++++++++-------- src/bin/psql/describe.c | 20 +- src/include/catalog/pg_proc.dat | 4 +- src/include/catalog/pg_statistic_ext.h | 14 +- .../statistics/extended_stats_internal.h | 7 +- src/include/statistics/statistics.h | 2 + .../regress/expected/create_table_like.out | 9 +- src/test/regress/expected/oidjoins.out | 1 - src/test/regress/expected/rules.out | 5 +- src/test/regress/expected/stats_ext.out | 39 ++- src/test/regress/expected/stats_import.out | 72 ++++- src/test/regress/sql/create_table_like.sql | 9 +- src/test/regress/sql/stats_ext.sql | 16 +- src/test/regress/sql/stats_import.sql | 60 ++++ 25 files changed, 652 insertions(+), 616 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 4b474c13917..f6f376d5281 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -8323,18 +8323,6 @@ SCRAM-SHA-256$<iteration count>:&l - - - stxkeys int2vector - (references pg_attribute.attnum) - - - An array of attribute numbers, indicating which table columns are - covered by this statistics object; - for example a value of 1 3 would - mean that the first and the third table columns are covered - - @@ -8373,9 +8361,9 @@ SCRAM-SHA-256$<iteration count>:&l Expression trees (in nodeToString() - representation) for statistics object attributes that are not simple - column references. This is a list with one element per expression. - Null if all statistics object attributes are simple references. + representation) for all statistics object targets. Both column + references and expressions are stored here, with one element per + column or expression, in user-written order. diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml index 604e8578a8d..c1410a9e1e3 100644 --- a/doc/src/sgml/perform.sgml +++ b/doc/src/sgml/perform.sgml @@ -1541,27 +1541,28 @@ CREATE STATISTICS stts (dependencies) ON city, zip FROM zipcodes; ANALYZE zipcodes; -SELECT stxkeys AS k, jsonb_pretty(stxddependencies::text::jsonb) AS dep +SELECT pg_get_statisticsobjdef_columns(oid) AS cols, + jsonb_pretty(stxddependencies::text::jsonb) AS dep FROM pg_statistic_ext JOIN pg_statistic_ext_data ON (oid = stxoid) WHERE stxname = 'stts'; --[ RECORD 1 ]-------------------- -k | 1 5 -dep | [ + - | { + - | "degree": 1.000000,+ - | "attributes": [ + - | 1 + - | ], + - | "dependency": 5 + - | }, + - | { + - | "degree": 0.423130,+ - | "attributes": [ + - | 5 + - | ], + - | "dependency": 1 + - | } + - | ] +-[ RECORD 1 ]--------------------- +cols | {city,zip} +dep | [ + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 1 + + | ], + + | "dependency": 5 + + | }, + + | { + + | "degree": 0.423130, + + | "attributes": [ + + | 5 + + | ], + + | "dependency": 1 + + | } + + | ] (1 row) Here it can be seen that column 1 (zip code) fully determines column @@ -1647,42 +1648,43 @@ CREATE STATISTICS stts2 (ndistinct) ON city, state, zip FROM zipcodes; ANALYZE zipcodes; -SELECT stxkeys AS k, jsonb_pretty(stxdndistinct::text::jsonb) AS nd +SELECT pg_get_statisticsobjdef_columns(oid) AS cols, + jsonb_pretty(stxdndistinct::text::jsonb) AS nd FROM pg_statistic_ext JOIN pg_statistic_ext_data on (oid = stxoid) WHERE stxname = 'stts2'; --[ RECORD 1 ]------------------- -k | 1 2 5 -nd | [ + - | { + - | "ndistinct": 33178,+ - | "attributes": [ + - | 1, + - | 2 + - | ] + - | }, + - | { + - | "ndistinct": 33178,+ - | "attributes": [ + - | 1, + - | 5 + - | ] + - | }, + - | { + - | "ndistinct": 27435,+ - | "attributes": [ + - | 2, + - | 5 + - | ] + - | }, + - | { + - | "ndistinct": 33178,+ - | "attributes": [ + - | 1, + - | 2, + - | 5 + - | ] + - | } + - | ] +-[ RECORD 1 ]-------------------- +cols | {city,state,zip} +nd | [ + + | { + + | "ndistinct": 33178,+ + | "attributes": [ + + | 1, + + | 2 + + | ] + + | }, + + | { + + | "ndistinct": 33178,+ + | "attributes": [ + + | 1, + + | 5 + + | ] + + | }, + + | { + + | "ndistinct": 27435,+ + | "attributes": [ + + | 2, + + | 5 + + | ] + + | }, + + | { + + | "ndistinct": 33178,+ + | "attributes": [ + + | 1, + + | 2, + + | 5 + + | ] + + | } + + | ] (1 row) This indicates that there are three combinations of columns that diff --git a/doc/src/sgml/system-views.sgml b/doc/src/sgml/system-views.sgml index 2ebec6928d5..98e4cab396a 100644 --- a/doc/src/sgml/system-views.sgml +++ b/doc/src/sgml/system-views.sgml @@ -4736,22 +4736,13 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx - - - attnames name[] - (references pg_attribute.attname) - - - Names of the columns included in the extended statistics object - - - exprs text[] - Expressions included in the extended statistics object + All target columns and expressions included in the extended + statistics object, in user-written order diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 8f129baec90..233e902f2b9 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -285,12 +285,7 @@ CREATE VIEW pg_stats_ext WITH (security_barrier) AS s.stxname AS statistics_name, s.oid AS statistics_id, pg_get_userbyid(s.stxowner) AS statistics_owner, - ( SELECT array_agg(a.attname ORDER BY a.attnum) - FROM unnest(s.stxkeys) k - JOIN pg_attribute a - ON (a.attrelid = s.stxrelid AND a.attnum = k) - ) AS attnames, - pg_get_statisticsobjdef_expressions(s.oid) as exprs, + pg_get_statisticsobjdef_columns(s.oid) AS exprs, s.stxkind AS kinds, sd.stxdinherit AS inherited, sd.stxdndistinct AS n_distinct, diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c index b354723be44..30b69be1f06 100644 --- a/src/backend/commands/statscmds.c +++ b/src/backend/commands/statscmds.c @@ -45,26 +45,12 @@ static char *ChooseExtendedStatisticName(const char *name1, const char *name2, const char *label, Oid namespaceid); static char *ChooseExtendedStatisticNameAddition(List *exprs); - -/* qsort comparator for the attnums in CreateStatistics */ -static int -compare_int16(const void *a, const void *b) -{ - int av = *(const int16 *) a; - int bv = *(const int16 *) b; - - /* this can't overflow if int is wider than int16 */ - return (av - bv); -} - /* * CREATE STATISTICS */ ObjectAddress CreateStatistics(CreateStatsStmt *stmt, bool check_rights) { - int16 attnums[STATS_MAX_DIMENSIONS]; - int nattnums = 0; int numcols; char *namestr; NameData stxname; @@ -74,8 +60,8 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) HeapTuple htup; Datum values[Natts_pg_statistic_ext]; bool nulls[Natts_pg_statistic_ext]; - int2vector *stxkeys; List *stxexprs = NIL; + char *exprsString; Datum exprsDatum; Relation statrel; Relation rel = NULL; @@ -89,8 +75,8 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) bool build_dependencies; bool build_mcv; bool build_expressions; + bool have_vars = false; bool requested_type = false; - int i; ListCell *cell; ListCell *cell2; @@ -230,8 +216,8 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) STATS_MAX_DIMENSIONS))); /* - * Convert the expression list to a simple array of attnums, but also keep - * a list of more complex expressions. While at it, enforce some + * Convert the expression list to a list of expression trees. Simple + * column references are stored as Var nodes. While at it, enforce some * constraints - we don't allow extended statistics on system attributes, * and we require the data type to have a less-than operator, if we're * building multivariate statistics. @@ -286,24 +272,14 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) format_type_be(attForm->atttypid)))); } - /* Treat virtual generated columns as expressions */ - if (attForm->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL) - { - Node *expr; - - expr = (Node *) makeVar(1, - attForm->attnum, - attForm->atttypid, - attForm->atttypmod, - attForm->attcollation, - 0); - stxexprs = lappend(stxexprs, expr); - } - else - { - attnums[nattnums] = attForm->attnum; - nattnums++; - } + stxexprs = lappend(stxexprs, + (Node *) makeVar(1, + attForm->attnum, + attForm->atttypid, + attForm->atttypmod, + attForm->attcollation, + 0)); + have_vars = true; ReleaseSysCache(atttuple); } else if (IsA(selem->expr, Var)) /* column reference in parens */ @@ -333,16 +309,8 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) format_type_be(var->vartype)))); } - /* Treat virtual generated columns as expressions */ - if (get_attgenerated(relid, var->varattno) == ATTRIBUTE_GENERATED_VIRTUAL) - { - stxexprs = lappend(stxexprs, (Node *) var); - } - else - { - attnums[nattnums] = var->varattno; - nattnums++; - } + stxexprs = lappend(stxexprs, (Node *) var); + have_vars = true; } else /* expression */ { @@ -393,20 +361,23 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) * that we're building statistics on a single expression (or virtual * generated column). */ - if (numcols < 2 && list_length(stxexprs) != 1) - ereport(ERROR, - errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("cannot create extended statistics on a single non-virtual column"), - errdetail("Univariate statistics are already built for each individual non-virtual table column.")); + if (numcols == 1) + { + Node *single = (Node *) linitial(stxexprs); - /* - * Parse the statistics kinds (not allowed when building univariate - * statistics). - */ - if (numcols == 1 && stmt->stat_types != NIL) - ereport(ERROR, - errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot specify statistics kinds when building univariate statistics")); + if (IsA(single, Var) && + get_attgenerated(relid, ((Var *) single)->varattno) != ATTRIBUTE_GENERATED_VIRTUAL) + ereport(ERROR, + errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("cannot create extended statistics on a single non-virtual column"), + errdetail("Univariate statistics are already built for each individual non-virtual table column.")); + + /* statistics kinds are not allowed with univariate statistics */ + if (stmt->stat_types != NIL) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot specify statistics kinds when building univariate statistics")); + } build_ndistinct = false; build_dependencies = false; @@ -449,43 +420,29 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) } /* - * When there are non-trivial expressions, build the expression stats - * automatically. This allows calculating good estimates for stats that - * consider per-clause estimates (e.g. functional dependencies). - */ - build_expressions = (stxexprs != NIL); - - /* - * Sort the attnums, which makes detecting duplicates somewhat easier, and - * it does not hurt (it does not matter for the contents, unlike for - * indexes, for example). - */ - qsort(attnums, nattnums, sizeof(int16), compare_int16); - - /* - * Check for duplicates in the list of columns. The attnums are sorted so - * just check consecutive elements. + * When there are non-trivial expressions, or virtual generated columns + * (which are also stored as Var nodes but treated as expressions for + * statistics purposes), build the expression stats automatically. This + * allows calculating good estimates for stats that consider per-clause + * estimates (e.g. functional dependencies). */ - for (i = 1; i < nattnums; i++) + build_expressions = false; + foreach(cell, stxexprs) { - if (attnums[i] == attnums[i - 1]) - ereport(ERROR, - (errcode(ERRCODE_DUPLICATE_COLUMN), - errmsg("duplicate column name in statistics definition"))); + Node *expr = (Node *) lfirst(cell); + + if (!IsA(expr, Var) || + get_attgenerated(relid, ((Var *) expr)->varattno) == ATTRIBUTE_GENERATED_VIRTUAL) + { + build_expressions = true; + break; + } } /* - * Check for duplicate expressions. We do two loops, counting the - * occurrences of each expression. This is O(N^2) but we only allow small - * number of expressions and it's not executed often. - * - * XXX We don't cross-check attributes and expressions, because it does - * not seem worth it. In principle we could check that expressions don't - * contain trivial attribute references like "(a)", but the reasoning is - * similar to why we don't bother with extracting columns from - * expressions. It's either expensive or very easy to defeat for - * determined user, and there's no risk if we allow such statistics (the - * statistics is useless, but harmless). + * Check for duplicates. We use equal() to compare all entries (both + * simple Var references and complex expressions). This is O(N^2) but we + * only allow a small number of entries and it's not executed often. */ foreach(cell, stxexprs) { @@ -509,9 +466,6 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) errmsg("duplicate expression in statistics definition"))); } - /* Form an int2vector representation of the sorted column list */ - stxkeys = buildint2vector(attnums, nattnums); - /* construct the char array of enabled statistic types */ ntypes = 0; if (build_ndistinct) @@ -525,17 +479,10 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) Assert(ntypes > 0 && ntypes <= lengthof(types)); stxkind = construct_array_builtin(types, ntypes, CHAROID); - /* convert the expressions (if any) to a text datum */ - if (stxexprs != NIL) - { - char *exprsString; - - exprsString = nodeToString(stxexprs); - exprsDatum = CStringGetTextDatum(exprsString); - pfree(exprsString); - } - else - exprsDatum = (Datum) 0; + /* convert the expression list to a text datum */ + exprsString = nodeToString(stxexprs); + exprsDatum = CStringGetTextDatum(exprsString); + pfree(exprsString); statrel = table_open(StatisticExtRelationId, RowExclusiveLock); @@ -552,13 +499,9 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) values[Anum_pg_statistic_ext_stxname - 1] = NameGetDatum(&stxname); values[Anum_pg_statistic_ext_stxnamespace - 1] = ObjectIdGetDatum(namespaceId); values[Anum_pg_statistic_ext_stxowner - 1] = ObjectIdGetDatum(stxowner); - values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys); nulls[Anum_pg_statistic_ext_stxstattarget - 1] = true; values[Anum_pg_statistic_ext_stxkind - 1] = PointerGetDatum(stxkind); - values[Anum_pg_statistic_ext_stxexprs - 1] = exprsDatum; - if (exprsDatum == (Datum) 0) - nulls[Anum_pg_statistic_ext_stxexprs - 1] = true; /* insert it into pg_statistic_ext */ htup = heap_form_tuple(statrel->rd_att, values, nulls); @@ -588,25 +531,18 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) */ ObjectAddressSet(myself, StatisticExtRelationId, statoid); - /* add dependencies for plain column references */ - for (i = 0; i < nattnums; i++) - { - ObjectAddressSubSet(parentobject, RelationRelationId, relid, attnums[i]); - recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO); - } - /* * If there are no dependencies on a column, give the statistics object an * auto dependency on the whole table. In most cases, this will be - * redundant, but it might not be if the statistics expressions contain no - * Vars (which might seem strange but possible). This is consistent with - * what we do for indexes in index_create. + * redundant, but it might not be if the expressions contain no Vars or + * only whole-row Vars. This is consistent with what we do for indexes in + * index_create. * * XXX We intentionally don't consider the expressions before adding this * dependency, because recordDependencyOnSingleRelExpr may not create any * dependencies for whole-row Vars. */ - if (!nattnums) + if (!have_vars) { ObjectAddressSet(parentobject, RelationRelationId, relid); recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO); @@ -616,12 +552,11 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights) * Store dependencies on anything mentioned in statistics expressions, * just like we do for index expressions. */ - if (stxexprs) - recordDependencyOnSingleRelExpr(&myself, - (Node *) stxexprs, - relid, - DEPENDENCY_NORMAL, - DEPENDENCY_AUTO, false); + recordDependencyOnSingleRelExpr(&myself, + (Node *) stxexprs, + relid, + DEPENDENCY_NORMAL, + DEPENDENCY_AUTO, false); /* * Also add dependencies on namespace and owner. These are required diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 7c4be174869..8eb5e4af69a 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1748,28 +1748,18 @@ get_relation_statistics(PlannerInfo *root, RelOptInfo *rel, foreach(l, statoidlist) { Oid statOid = lfirst_oid(l); - Form_pg_statistic_ext staForm; HeapTuple htup; Bitmapset *keys = NULL; List *exprs = NIL; - int i; htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid)); if (!HeapTupleIsValid(htup)) elog(ERROR, "cache lookup failed for statistics object %u", statOid); - staForm = (Form_pg_statistic_ext) GETSTRUCT(htup); /* - * First, build the array of columns covered. This is ultimately - * wasted if no stats within the object have actually been built, but - * it doesn't seem worth troubling over that case. - */ - for (i = 0; i < staForm->stxkeys.dim1; i++) - keys = bms_add_member(keys, staForm->stxkeys.values[i]); - - /* - * Preprocess expressions (if any). We read the expressions, fix the - * varnos, and run them through eval_const_expressions. + * Preprocess stxexprs. We read all entries, separate simple column + * references (into the keys Bitmapset) from complex expressions, fix + * the varnos, and run expressions through eval_const_expressions. * * XXX We don't know yet if there are any data for this stats object, * with either stxdinherit value. But it's reasonable to assume there @@ -1777,44 +1767,30 @@ get_relation_statistics(PlannerInfo *root, RelOptInfo *rel, * keys and expressions here. */ { - bool isnull; - Datum datum; + statext_decode_stxexprs(htup, relation, &keys, &exprs); - /* decode expression (if any) */ - datum = SysCacheGetAttr(STATEXTOID, htup, - Anum_pg_statistic_ext_stxexprs, &isnull); + /* + * Modify the copies we obtain from the relcache to have the + * correct varno for the parent relation, so that they match up + * correctly against qual clauses. + * + * This must be done before const-simplification because + * eval_const_expressions reduces NullTest for Vars based on + * varno. + */ + if (exprs != NIL && varno != 1) + ChangeVarNodes((Node *) exprs, 1, varno, 0); - if (!isnull) + /* + * Run the expressions through eval_const_expressions. This is not + * just an optimization, but is necessary, because the planner + * will be comparing them to similarly-processed qual clauses, and + * may fail to detect valid matches without this. We must not use + * canonicalize_qual, however, since these aren't qual + * expressions. + */ + if (exprs != NIL) { - char *exprsString; - - exprsString = TextDatumGetCString(datum); - exprs = (List *) stringToNode(exprsString); - pfree(exprsString); - - /* Expand virtual generated columns in the expressions */ - exprs = (List *) expand_generated_columns_in_expr((Node *) exprs, relation, 1); - - /* - * Modify the copies we obtain from the relcache to have the - * correct varno for the parent relation, so that they match - * up correctly against qual clauses. - * - * This must be done before const-simplification because - * eval_const_expressions reduces NullTest for Vars based on - * varno. - */ - if (varno != 1) - ChangeVarNodes((Node *) exprs, 1, varno, 0); - - /* - * Run the expressions through eval_const_expressions. This is - * not just an optimization, but is necessary, because the - * planner will be comparing them to similarly-processed qual - * clauses, and may fail to detect valid matches without this. - * We must not use canonicalize_qual, however, since these - * aren't qual expressions. - */ exprs = (List *) eval_const_expressions(root, (Node *) exprs); /* May as well fix opfuncids too */ diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index a049cc67ed6..d6ce6005004 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -2042,9 +2042,9 @@ generateClonedIndexStmt(RangeVar *heapRel, Relation source_idx, * extended statistic "source_statsid", for the rel identified by heapRel and * heapRelid. * - * stxkeys in the source statistic holds attribute numbers from the parent + * stxexprs in the source statistic holds Var nodes referencing the parent * relation. Those attnums, along with the attribute numbers referenced by - * Vars inside the expression tree, are remapped to the new relation's + * Vars inside complex expressions, are remapped to the new relation's * numbering according to attmap. */ static CreateStatsStmt * @@ -2052,15 +2052,16 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid, Oid source_statsid, const AttrMap *attmap) { HeapTuple ht_stats; - Form_pg_statistic_ext statsrec; CreateStatsStmt *stats; List *stat_types = NIL; List *def_names = NIL; - bool isnull; Datum datum; ArrayType *arr; char *enabled; int i; + ListCell *lc; + List *exprs = NIL; + char *exprsString; Assert(OidIsValid(heapRelid)); Assert(heapRel != NULL); @@ -2071,7 +2072,6 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid, ht_stats = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(source_statsid)); if (!HeapTupleIsValid(ht_stats)) elog(ERROR, "cache lookup failed for statistics object %u", source_statsid); - statsrec = (Form_pg_statistic_ext) GETSTRUCT(ht_stats); /* Determine which statistics types exist */ datum = SysCacheGetAttrNotNull(STATEXTOID, ht_stats, @@ -2097,44 +2097,31 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid, elog(ERROR, "unrecognized statistics kind %c", enabled[i]); } - /* Determine which columns the statistics are on */ - for (i = 0; i < statsrec->stxkeys.dim1; i++) - { - StatsElem *selem = makeNode(StatsElem); - AttrNumber attnum = statsrec->stxkeys.values[i]; - - selem->name = - get_attname(heapRelid, attmap->attnums[attnum - 1], false); - selem->expr = NULL; - - def_names = lappend(def_names, selem); - } - /* - * Now handle expressions, if there are any. The order (with respect to - * regular attributes) does not really matter for extended stats, so we - * simply append them after simple column references. - * - * XXX Some places during build/estimation treat expressions as if they - * are before attributes, but for the CREATE command that's entirely - * irrelevant. + * Decode stxexprs to reconstruct the column/expression list. Simple Var + * nodes represent plain column references; other nodes are complex + * expressions. */ - datum = SysCacheGetAttr(STATEXTOID, ht_stats, - Anum_pg_statistic_ext_stxexprs, &isnull); + datum = SysCacheGetAttrNotNull(STATEXTOID, ht_stats, + Anum_pg_statistic_ext_stxexprs); + exprsString = TextDatumGetCString(datum); + exprs = (List *) stringToNode(exprsString); - if (!isnull) + foreach(lc, exprs) { - ListCell *lc; - List *exprs = NIL; - char *exprsString; + Node *expr = (Node *) lfirst(lc); + StatsElem *selem = makeNode(StatsElem); - exprsString = TextDatumGetCString(datum); - exprs = (List *) stringToNode(exprsString); + if (IsA(expr, Var) && ((Var *) expr)->varattno > 0) + { + AttrNumber attnum = ((Var *) expr)->varattno; - foreach(lc, exprs) + selem->name = + get_attname(heapRelid, attmap->attnums[attnum - 1], false); + selem->expr = NULL; + } + else { - Node *expr = (Node *) lfirst(lc); - StatsElem *selem = makeNode(StatsElem); bool found_whole_row; /* Adjust Vars to match new table's column numbering */ @@ -2146,13 +2133,13 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid heapRelid, selem->name = NULL; selem->expr = expr; - - def_names = lappend(def_names, selem); } - pfree(exprsString); + def_names = lappend(def_names, selem); } + pfree(exprsString); + /* finally, build the output node */ stats = makeNode(CreateStatsStmt); stats->defnames = NULL; diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index 95dcc218978..59ef192c2f6 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -393,7 +393,7 @@ statext_dependencies_build(StatsBuildData *data) d = (MVDependency *) palloc0(offsetof(MVDependency, attributes) + k * sizeof(AttrNumber)); - /* copy the dependency (and keep the indexes into stxkeys) */ + /* copy the dependency */ d->degree = degree; d->nattributes = k; for (i = 0; i < k; i++) @@ -598,17 +598,15 @@ statext_dependencies_free(MVDependencies *dependencies) * attributes list correspond to attnums/expressions defined by the * extended statistics object. * - * Positive attnums are attributes which must be found in the stxkeys, while - * negative attnums correspond to an expression number, no attribute number - * can be below (0 - numexprs). + * Positive attnums correspond to table columns (excluding virtual generated + * columns), while negative attnums correspond to expressions. No attribute + * number can be below (0 - numexprs). */ bool statext_dependencies_validate(const MVDependencies *dependencies, - const int2vector *stxkeys, + Bitmapset *keys, int numexprs, int elevel) { - int attnum_expr_lowbound = 0 - numexprs; - /* Scan through each dependency entry */ for (int i = 0; i < dependencies->ndeps; i++) { @@ -621,27 +619,8 @@ statext_dependencies_validate(const MVDependencies *dependencies, for (int j = 0; j < dep->nattributes; j++) { AttrNumber attnum = dep->attributes[j]; - bool ok = false; - - if (attnum > 0) - { - /* attribute number in stxkeys */ - for (int k = 0; k < stxkeys->dim1; k++) - { - if (attnum == stxkeys->values[k]) - { - ok = true; - break; - } - } - } - else if ((attnum < 0) && (attnum >= attnum_expr_lowbound)) - { - /* attribute number for an expression */ - ok = true; - } - if (!ok) + if (!statext_is_valid_attnum(attnum, keys, numexprs)) { ereport(elevel, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 2b83355d26e..b2ea8d8c11a 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -447,6 +447,77 @@ statext_is_kind_built(HeapTuple htup, char type) return !heap_attisnull(htup, attnum, NULL); } +/* + * statext_decode_stxexprs + * Decode the stxexprs field of a pg_statistic_ext tuple. + * + * Deserializes the expression list, expands virtual generated columns, + * separates simple Var references (into keys Bitmapset) from complex + * expressions (into exprs list), and const-folds the expressions (as + * RelationGetIndexExpressions does). + */ +void +statext_decode_stxexprs(HeapTuple htup, Relation rel, + Bitmapset **keys, List **exprs) +{ + Datum datum; + char *exprsString; + List *allexprs; + ListCell *lc; + + datum = SysCacheGetAttrNotNull(STATEXTOID, htup, + Anum_pg_statistic_ext_stxexprs); + exprsString = TextDatumGetCString(datum); + allexprs = (List *) stringToNode(exprsString); + pfree(exprsString); + + /* Expand virtual generated columns in the expressions */ + allexprs = (List *) expand_generated_columns_in_expr((Node *) allexprs, rel, 1); + + foreach(lc, allexprs) + { + Node *expr = (Node *) lfirst(lc); + + if (IsA(expr, Var) && ((Var *) expr)->varattno > 0) + *keys = bms_add_member(*keys, ((Var *) expr)->varattno); + else + *exprs = lappend(*exprs, expr); + } + + if (*exprs != NIL) + { + /* + * Run the expressions through eval_const_expressions. This is not + * just an optimization, but is necessary, because the planner will be + * comparing them to similarly-processed qual clauses, and may fail to + * detect valid matches without this. We must not use + * canonicalize_qual, however, since these aren't qual expressions. + */ + *exprs = (List *) eval_const_expressions(NULL, (Node *) *exprs); + + /* May as well fix opfuncids too */ + fix_opfuncids((Node *) *exprs); + } +} + +/* + * statext_is_valid_attnum + * Is attnum a valid reference within an extended statistics object? + * + * A positive attnum is a simple column and must be a member of the object's + * keys; a negative attnum references an expression and must be within the + * range [-numexprs, -1]. Used to validate imported stats against the object + * definition. + */ +bool +statext_is_valid_attnum(AttrNumber attnum, Bitmapset *keys, int numexprs) +{ + if (attnum > 0) + return bms_is_member(attnum, keys); + + return (attnum < 0) && (attnum >= 0 - numexprs); +} + /* * Return a list (of StatExtEntry) of statistics objects for the given relation. */ @@ -487,11 +558,6 @@ fetch_statentries_for_relation(Relation pg_statext, Relation rel) entry->statOid = staForm->oid; entry->schema = get_namespace_name(staForm->stxnamespace); entry->name = pstrdup(NameStr(staForm->stxname)); - for (i = 0; i < staForm->stxkeys.dim1; i++) - { - entry->columns = bms_add_member(entry->columns, - staForm->stxkeys.values[i]); - } datum = SysCacheGetAttr(STATEXTOID, htup, Anum_pg_statistic_ext_stxstattarget, &isnull); entry->stattarget = isnull ? -1 : DatumGetInt16(datum); @@ -514,35 +580,8 @@ fetch_statentries_for_relation(Relation pg_statext, Relation rel) entry->types = lappend_int(entry->types, (int) enabled[i]); } - /* decode expression (if any) */ - datum = SysCacheGetAttr(STATEXTOID, htup, - Anum_pg_statistic_ext_stxexprs, &isnull); - - if (!isnull) - { - char *exprsString; - - exprsString = TextDatumGetCString(datum); - exprs = (List *) stringToNode(exprsString); - - pfree(exprsString); - - /* Expand virtual generated columns in the expressions */ - exprs = (List *) expand_generated_columns_in_expr((Node *) exprs, rel, 1); - - /* - * Run the expressions through eval_const_expressions. This is not - * just an optimization, but is necessary, because the planner - * will be comparing them to similarly-processed qual clauses, and - * may fail to detect valid matches without this. We must not use - * canonicalize_qual, however, since these aren't qual - * expressions. - */ - exprs = (List *) eval_const_expressions(NULL, (Node *) exprs); - - /* May as well fix opfuncids too */ - fix_opfuncids((Node *) exprs); - } + /* Decode stxexprs into entry->columns and exprs (const-folded) */ + statext_decode_stxexprs(htup, rel, &entry->columns, &exprs); entry->exprs = exprs; diff --git a/src/backend/statistics/extended_stats_funcs.c b/src/backend/statistics/extended_stats_funcs.c index 2cb3942056f..5eb09c1fa91 100644 --- a/src/backend/statistics/extended_stats_funcs.c +++ b/src/backend/statistics/extended_stats_funcs.c @@ -17,6 +17,7 @@ #include "postgres.h" #include "access/heapam.h" +#include "access/table.h" #include "catalog/indexing.h" #include "catalog/namespace.h" #include "catalog/pg_collation_d.h" @@ -331,9 +332,9 @@ extended_statistics_update(FunctionCallInfo fcinfo) bool nulls[Natts_pg_statistic_ext_data] = {0}; bool replaces[Natts_pg_statistic_ext_data] = {0}; bool success = true; - Datum exprdatum; - bool isnull; + Relation rel; List *exprs = NIL; + Bitmapset *keys = NULL; int numattnums = 0; int numexprs = 0; int numattrs = 0; @@ -439,38 +440,15 @@ extended_statistics_update(FunctionCallInfo fcinfo) /* Find out what extended statistics kinds we should expect. */ expand_stxkind(tup, &enabled); - numattnums = stxform->stxkeys.dim1; - - /* decode expression (if any) */ - exprdatum = SysCacheGetAttr(STATEXTOID, - tup, - Anum_pg_statistic_ext_stxexprs, - &isnull); - if (!isnull) - { - char *s; - s = TextDatumGetCString(exprdatum); - exprs = (List *) stringToNode(s); - pfree(s); - - /* - * Run the expressions through eval_const_expressions(). This is not - * just an optimization, but is necessary, because the planner will be - * comparing them to similarly-processed qual clauses, and may fail to - * detect valid matches without this. - * - * We must not use canonicalize_qual(), however, since these are not - * qual expressions. - */ - exprs = (List *) eval_const_expressions(NULL, (Node *) exprs); + /* Decode stxexprs into keys and exprs (const-folded) */ + rel = table_open(relid, NoLock); + statext_decode_stxexprs(tup, rel, &keys, &exprs); + table_close(rel, NoLock); - /* May as well fix opfuncids too */ - fix_opfuncids((Node *) exprs); + numattnums = bms_num_members(keys); - /* Compute the number of expression, for input validation. */ - numexprs = list_length(exprs); - } + numexprs = list_length(exprs); numattrs = numattnums + numexprs; @@ -580,54 +558,54 @@ extended_statistics_update(FunctionCallInfo fcinfo) */ if (has.mcv || has.expressions) { + int idx; + int k; + atttypids = palloc0_array(Oid, numattrs); atttypmods = palloc0_array(int32, numattrs); atttypcolls = palloc0_array(Oid, numattrs); /* - * The leading stxkeys are attribute numbers up through numattnums. - * These keys must be in ascending AttrNumber order, but we do not - * rely on that. + * Get type info for plain column attributes (from the keys + * Bitmapset). */ - for (int i = 0; i < numattnums; i++) + idx = 0; + k = -1; + while ((k = bms_next_member(keys, k)) >= 0) { - AttrNumber attnum = stxform->stxkeys.values[i]; + AttrNumber attnum = (AttrNumber) k; HeapTuple atup = SearchSysCache2(ATTNUM, ObjectIdGetDatum(relid), Int16GetDatum(attnum)); - Form_pg_attribute attr; /* Attribute not found */ if (!HeapTupleIsValid(atup)) - elog(ERROR, "stxkeys references nonexistent attnum %d", attnum); + elog(ERROR, "stxexprs references nonexistent attnum %d", attnum); attr = (Form_pg_attribute) GETSTRUCT(atup); if (attr->attisdropped) - elog(ERROR, "stxkeys references dropped attnum %d", attnum); + elog(ERROR, "stxexprs references dropped attnum %d", attnum); - atttypids[i] = attr->atttypid; - atttypmods[i] = attr->atttypmod; - atttypcolls[i] = attr->attcollation; + atttypids[idx] = attr->atttypid; + atttypmods[idx] = attr->atttypmod; + atttypcolls[idx] = attr->attcollation; ReleaseSysCache(atup); + idx++; } /* - * After all the positive number attnums in stxkeys come the negative - * numbers (if any) which represent expressions in the order that they - * appear in stxdexpr. Because the expressions are always - * monotonically decreasing from -1, there is no point in looking at - * the values in stxkeys, it's enough to know how many of them there - * are. + * Get type info for expressions. */ - for (int i = numattnums; i < numattrs; i++) + for (int i = 0; i < numexprs; i++) { - Node *expr = list_nth(exprs, i - numattnums); + Node *expr = list_nth(exprs, i); - atttypids[i] = exprType(expr); - atttypmods[i] = exprTypmod(expr); - atttypcolls[i] = exprCollation(expr); + atttypids[idx] = exprType(expr); + atttypmods[idx] = exprTypmod(expr); + atttypcolls[idx] = exprCollation(expr); + idx++; } } @@ -659,7 +637,7 @@ extended_statistics_update(FunctionCallInfo fcinfo) bytea *data = DatumGetByteaPP(ndistinct_datum); MVNDistinct *ndistinct = statext_ndistinct_deserialize(data); - if (statext_ndistinct_validate(ndistinct, &stxform->stxkeys, + if (statext_ndistinct_validate(ndistinct, keys, numexprs, WARNING)) { values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = ndistinct_datum; @@ -678,7 +656,7 @@ extended_statistics_update(FunctionCallInfo fcinfo) bytea *data = DatumGetByteaPP(dependencies_datum); MVDependencies *dependencies = statext_dependencies_deserialize(data); - if (statext_dependencies_validate(dependencies, &stxform->stxkeys, + if (statext_dependencies_validate(dependencies, keys, numexprs, WARNING)) { values[Anum_pg_statistic_ext_data_stxddependencies - 1] = dependencies_datum; diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index 4f8f578a22f..431d7440b9e 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -344,17 +344,15 @@ statext_ndistinct_free(MVNDistinct *ndistinct) * attributes list correspond to attnums/expressions defined by the extended * statistics object. * - * Positive attnums are attributes which must be found in the stxkeys, - * while negative attnums correspond to an expression number, no attribute + * Positive attnums correspond to table columns (excluding virtual generated + * columns), while negative attnums correspond to expressions. No attribute * number can be below (0 - numexprs). */ bool statext_ndistinct_validate(const MVNDistinct *ndistinct, - const int2vector *stxkeys, + Bitmapset *keys, int numexprs, int elevel) { - int attnum_expr_lowbound = 0 - numexprs; - /* Scan through each MVNDistinct entry */ for (int i = 0; i < ndistinct->nitems; i++) { @@ -367,27 +365,8 @@ statext_ndistinct_validate(const MVNDistinct *ndistinct, for (int j = 0; j < item.nattributes; j++) { AttrNumber attnum = item.attributes[j]; - bool ok = false; - - if (attnum > 0) - { - /* attribute number in stxkeys */ - for (int k = 0; k < stxkeys->dim1; k++) - { - if (attnum == stxkeys->values[k]) - { - ok = true; - break; - } - } - } - else if ((attnum < 0) && (attnum >= attnum_expr_lowbound)) - { - /* attribute number for an expression */ - ok = true; - } - if (!ok) + if (!statext_is_valid_attnum(attnum, keys, numexprs)) { ereport(elevel, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 88de5c0481c..e889e848763 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -369,7 +369,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno, static void make_propgraphdef_elements(StringInfo buf, Oid pgrelid, char pgekind); static void make_propgraphdef_labels(StringInfo buf, Oid elid, const char *elalias, Oid elrelid); static void make_propgraphdef_properties(StringInfo buf, Oid ellabelid, Oid elrelid); -static char *pg_get_statisticsobj_worker(Oid statextid, bool columns_only, +static char *pg_get_statisticsobj_worker(Oid statextid, bool missing_ok); static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags, bool attrsOnly, bool missing_ok); @@ -1969,7 +1969,7 @@ pg_get_statisticsobjdef(PG_FUNCTION_ARGS) Oid statextid = PG_GETARG_OID(0); char *res; - res = pg_get_statisticsobj_worker(statextid, false, true); + res = pg_get_statisticsobj_worker(statextid, true); if (res == NULL) PG_RETURN_NULL(); @@ -1984,32 +1984,91 @@ pg_get_statisticsobjdef(PG_FUNCTION_ARGS) char * pg_get_statisticsobjdef_string(Oid statextid) { - return pg_get_statisticsobj_worker(statextid, false, false); + return pg_get_statisticsobj_worker(statextid, false); +} + +/* + * Deparse a single statistics target (column or expression) into a string. + * Plain column Vars are emitted as quoted identifiers; complex expressions + * are deparsed and wrapped in parentheses unless they look like function calls. + */ +static char * +deparse_stat_target(Node *expr, Oid relid, List *context) +{ + char *str; + + if (IsA(expr, Var) && ((Var *) expr)->varattno > 0) + return pstrdup(quote_identifier( + get_attname(relid, ((Var *) expr)->varattno, false))); + + str = deparse_expression_pretty(expr, context, false, false, + PRETTYFLAG_PAREN, 0); + if (looks_like_function(expr)) + return str; + return psprintf("(%s)", str); } /* * pg_get_statisticsobjdef_columns - * Get columns and expressions for an extended statistics object + * Get all target columns and expressions for an extended statistics + * object as a text array. Plain columns are emitted as column names, + * complex expressions are deparsed. */ Datum pg_get_statisticsobjdef_columns(PG_FUNCTION_ARGS) { Oid statextid = PG_GETARG_OID(0); - char *res; + Form_pg_statistic_ext statextrec; + HeapTuple statexttup; + Datum datum; + List *allexprs = NIL; + char *tmp; + List *context; + ListCell *lc; + ArrayBuildState *astate = NULL; - res = pg_get_statisticsobj_worker(statextid, true, true); + statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid)); - if (res == NULL) + if (!HeapTupleIsValid(statexttup)) PG_RETURN_NULL(); - PG_RETURN_TEXT_P(string_to_text(res)); + statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup); + + datum = SysCacheGetAttrNotNull(STATEXTOID, statexttup, + Anum_pg_statistic_ext_stxexprs); + tmp = TextDatumGetCString(datum); + allexprs = (List *) stringToNode(tmp); + pfree(tmp); + + context = deparse_context_for(get_relation_name(statextrec->stxrelid), + statextrec->stxrelid); + + foreach(lc, allexprs) + { + Node *expr = (Node *) lfirst(lc); + char *str; + + str = deparse_stat_target(expr, statextrec->stxrelid, context); + astate = accumArrayResult(astate, + PointerGetDatum(cstring_to_text(str)), + false, + TEXTOID, + CurrentMemoryContext); + } + + ReleaseSysCache(statexttup); + + if (astate == NULL) + PG_RETURN_NULL(); + + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); } /* * Internal workhorse to decompile an extended statistics object. */ static char * -pg_get_statisticsobj_worker(Oid statextid, bool columns_only, bool missing_ok) +pg_get_statisticsobj_worker(Oid statextid, bool missing_ok) { Form_pg_statistic_ext statextrec; HeapTuple statexttup; @@ -2019,6 +2078,7 @@ pg_get_statisticsobj_worker(Oid statextid, bool columns_only, bool missing_ok) ArrayType *arr; char *enabled; Datum datum; + char *exprsString; bool ndistinct_enabled; bool dependencies_enabled; bool mcv_enabled; @@ -2026,7 +2086,6 @@ pg_get_statisticsobj_worker(Oid statextid, bool columns_only, bool missing_ok) List *context; ListCell *lc; List *exprs = NIL; - bool has_exprs; int ncolumns; statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid)); @@ -2038,151 +2097,111 @@ pg_get_statisticsobj_worker(Oid statextid, bool columns_only, bool missing_ok) elog(ERROR, "cache lookup failed for statistics object %u", statextid); } - /* has the statistics expressions? */ - has_exprs = !heap_attisnull(statexttup, Anum_pg_statistic_ext_stxexprs, NULL); - statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup); /* - * Get the statistics expressions, if any. (NOTE: we do not use the - * relcache versions of the expressions, because we want to display - * non-const-folded expressions.) + * Get all statistics expressions. (NOTE: we do not use the relcache + * versions, because we want to display non-const-folded expressions.) */ - if (has_exprs) - { - Datum exprsDatum; - char *exprsString; - - exprsDatum = SysCacheGetAttrNotNull(STATEXTOID, statexttup, - Anum_pg_statistic_ext_stxexprs); - exprsString = TextDatumGetCString(exprsDatum); - exprs = (List *) stringToNode(exprsString); - pfree(exprsString); - } - else - exprs = NIL; + datum = SysCacheGetAttrNotNull(STATEXTOID, statexttup, + Anum_pg_statistic_ext_stxexprs); + exprsString = TextDatumGetCString(datum); + exprs = (List *) stringToNode(exprsString); + pfree(exprsString); - /* count the number of columns (attributes and expressions) */ - ncolumns = statextrec->stxkeys.dim1 + list_length(exprs); + ncolumns = list_length(exprs); initStringInfo(&buf); - if (!columns_only) - { - nsp = get_namespace_name_or_temp(statextrec->stxnamespace); - appendStringInfo(&buf, "CREATE STATISTICS %s", - quote_qualified_identifier(nsp, - NameStr(statextrec->stxname))); - - /* - * Decode the stxkind column so that we know which stats types to - * print. - */ - datum = SysCacheGetAttrNotNull(STATEXTOID, statexttup, - Anum_pg_statistic_ext_stxkind); - arr = DatumGetArrayTypeP(datum); - if (ARR_NDIM(arr) != 1 || - ARR_HASNULL(arr) || - ARR_ELEMTYPE(arr) != CHAROID) - elog(ERROR, "stxkind is not a 1-D char array"); - enabled = (char *) ARR_DATA_PTR(arr); + nsp = get_namespace_name_or_temp(statextrec->stxnamespace); + appendStringInfo(&buf, "CREATE STATISTICS %s", + quote_qualified_identifier(nsp, + NameStr(statextrec->stxname))); - ndistinct_enabled = false; - dependencies_enabled = false; - mcv_enabled = false; - - for (i = 0; i < ARR_DIMS(arr)[0]; i++) - { - if (enabled[i] == STATS_EXT_NDISTINCT) - ndistinct_enabled = true; - else if (enabled[i] == STATS_EXT_DEPENDENCIES) - dependencies_enabled = true; - else if (enabled[i] == STATS_EXT_MCV) - mcv_enabled = true; + /* + * Decode the stxkind column so that we know which stats types to print. + */ + datum = SysCacheGetAttrNotNull(STATEXTOID, statexttup, + Anum_pg_statistic_ext_stxkind); + arr = DatumGetArrayTypeP(datum); + if (ARR_NDIM(arr) != 1 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != CHAROID) + elog(ERROR, "stxkind is not a 1-D char array"); + enabled = (char *) ARR_DATA_PTR(arr); - /* ignore STATS_EXT_EXPRESSIONS (it's built automatically) */ - } + ndistinct_enabled = false; + dependencies_enabled = false; + mcv_enabled = false; - /* - * If any option is disabled, then we'll need to append the types - * clause to show which options are enabled. We omit the types clause - * on purpose when all options are enabled, so a pg_dump/pg_restore - * will create all statistics types on a newer postgres version, if - * the statistics had all options enabled on the original version. - * - * But if the statistics is defined on just a single column, it has to - * be an expression statistics. In that case we don't need to specify - * kinds. - */ - if ((!ndistinct_enabled || !dependencies_enabled || !mcv_enabled) && - (ncolumns > 1)) - { - bool gotone = false; - - appendStringInfoString(&buf, " ("); + for (i = 0; i < ARR_DIMS(arr)[0]; i++) + { + if (enabled[i] == STATS_EXT_NDISTINCT) + ndistinct_enabled = true; + else if (enabled[i] == STATS_EXT_DEPENDENCIES) + dependencies_enabled = true; + else if (enabled[i] == STATS_EXT_MCV) + mcv_enabled = true; - if (ndistinct_enabled) - { - appendStringInfoString(&buf, "ndistinct"); - gotone = true; - } + /* ignore STATS_EXT_EXPRESSIONS (it's built automatically) */ + } - if (dependencies_enabled) - { - appendStringInfo(&buf, "%sdependencies", gotone ? ", " : ""); - gotone = true; - } + /* + * If any option is disabled, then we'll need to append the types clause + * to show which options are enabled. We omit the types clause on purpose + * when all options are enabled, so a pg_dump/pg_restore will create all + * statistics types on a newer postgres version, if the statistics had all + * options enabled on the original version. + * + * But if the statistics is defined on just a single column, it has to be + * an expression statistics. In that case we don't need to specify kinds. + */ + if ((!ndistinct_enabled || !dependencies_enabled || !mcv_enabled) && + (ncolumns > 1)) + { + bool gotone = false; - if (mcv_enabled) - appendStringInfo(&buf, "%smcv", gotone ? ", " : ""); + appendStringInfoString(&buf, " ("); - appendStringInfoChar(&buf, ')'); + if (ndistinct_enabled) + { + appendStringInfoString(&buf, "ndistinct"); + gotone = true; } - appendStringInfoString(&buf, " ON "); - } - - /* decode simple column references */ - for (colno = 0; colno < statextrec->stxkeys.dim1; colno++) - { - AttrNumber attnum = statextrec->stxkeys.values[colno]; - char *attname; - - if (colno > 0) - appendStringInfoString(&buf, ", "); + if (dependencies_enabled) + { + appendStringInfo(&buf, "%sdependencies", gotone ? ", " : ""); + gotone = true; + } - attname = get_attname(statextrec->stxrelid, attnum, false); + if (mcv_enabled) + appendStringInfo(&buf, "%smcv", gotone ? ", " : ""); - appendStringInfoString(&buf, quote_identifier(attname)); + appendStringInfoChar(&buf, ')'); } + appendStringInfoString(&buf, " ON "); + context = deparse_context_for(get_relation_name(statextrec->stxrelid), statextrec->stxrelid); + colno = 0; foreach(lc, exprs) { Node *expr = (Node *) lfirst(lc); - char *str; - int prettyFlags = PRETTYFLAG_PAREN; - - str = deparse_expression_pretty(expr, context, false, false, - prettyFlags, 0); if (colno > 0) appendStringInfoString(&buf, ", "); - /* Need parens if it's not a bare function call */ - if (looks_like_function(expr)) - appendStringInfoString(&buf, str); - else - appendStringInfo(&buf, "(%s)", str); - + appendStringInfoString(&buf, + deparse_stat_target(expr, statextrec->stxrelid, + context)); colno++; } - if (!columns_only) - appendStringInfo(&buf, " FROM %s", - generate_relation_name(statextrec->stxrelid, NIL)); + appendStringInfo(&buf, " FROM %s", + generate_relation_name(statextrec->stxrelid, NIL)); ReleaseSysCache(statexttup); @@ -2202,7 +2221,6 @@ pg_get_statisticsobjdef_expressions(PG_FUNCTION_ARGS) List *context; ListCell *lc; List *exprs = NIL; - bool has_exprs; char *tmp; ArrayBuildState *astate = NULL; @@ -2211,20 +2229,11 @@ pg_get_statisticsobjdef_expressions(PG_FUNCTION_ARGS) if (!HeapTupleIsValid(statexttup)) PG_RETURN_NULL(); - /* Does the stats object have expressions? */ - has_exprs = !heap_attisnull(statexttup, Anum_pg_statistic_ext_stxexprs, NULL); - - /* no expressions? we're done */ - if (!has_exprs) - { - ReleaseSysCache(statexttup); - PG_RETURN_NULL(); - } - statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup); /* - * Get the statistics expressions, and deparse them into text values. + * Get all statistics expressions, and deparse the non-trivial ones (skip + * simple Var references which represent plain columns). */ datum = SysCacheGetAttrNotNull(STATEXTOID, statexttup, Anum_pg_statistic_ext_stxexprs); @@ -2241,6 +2250,11 @@ pg_get_statisticsobjdef_expressions(PG_FUNCTION_ARGS) char *str; int prettyFlags = PRETTYFLAG_INDENT; + /* Skip plain column references (but not virtual generated columns) */ + if (IsA(expr, Var) && ((Var *) expr)->varattno > 0 && + get_attgenerated(statextrec->stxrelid, ((Var *) expr)->varattno) != ATTRIBUTE_GENERATED_VIRTUAL) + continue; + str = deparse_expression_pretty(expr, context, false, false, prettyFlags, 0); @@ -2253,6 +2267,9 @@ pg_get_statisticsobjdef_expressions(PG_FUNCTION_ARGS) ReleaseSysCache(statexttup); + if (astate == NULL) + PG_RETURN_NULL(); + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); } diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index af3935b0078..1c55c52c101 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -2915,8 +2915,15 @@ describeOneTableDetails(const char *schemaname, "SELECT oid, " "stxrelid::pg_catalog.regclass, " "stxnamespace::pg_catalog.regnamespace::pg_catalog.text AS nsp, " - "stxname,\n" - "pg_catalog.pg_get_statisticsobjdef_columns(oid) AS columns,\n" + "stxname,\n"); + /* TODO: update threshold to 200000 when PG20 version is assigned */ + if (pset.sversion >= 190000) + appendPQExpBufferStr(&buf, + "pg_catalog.array_to_string(pg_catalog.pg_get_statisticsobjdef_columns(oid), ', ') AS columns,\n"); + else + appendPQExpBufferStr(&buf, + "pg_catalog.pg_get_statisticsobjdef_columns(oid) AS columns,\n"); + appendPQExpBuffer(&buf, " " CppAsString2(STATS_EXT_NDISTINCT) " = any(stxkind) AS ndist_enabled,\n" " " CppAsString2(STATS_EXT_DEPENDENCIES) " = any(stxkind) AS deps_enabled,\n" " " CppAsString2(STATS_EXT_MCV) " = any(stxkind) AS mcv_enabled,\n" @@ -5133,7 +5140,14 @@ listExtendedStats(const char *pattern, bool verbose) gettext_noop("Schema"), gettext_noop("Name")); - if (pset.sversion >= 140000) + /* TODO: update threshold to 200000 when PG20 version is assigned */ + if (pset.sversion >= 190000) + appendPQExpBuffer(&buf, + "pg_catalog.format('%%s FROM %%s', \n" + " pg_catalog.array_to_string(pg_catalog.pg_get_statisticsobjdef_columns(es.oid), ', '), \n" + " es.stxrelid::pg_catalog.regclass) AS \"%s\"", + gettext_noop("Definition")); + else if (pset.sversion >= 140000) appendPQExpBuffer(&buf, "pg_catalog.format('%%s FROM %%s', \n" " pg_catalog.pg_get_statisticsobjdef_columns(es.oid), \n" diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index be157a5fbe9..7b6821ff16a 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -4001,9 +4001,9 @@ proname => 'pg_get_statisticsobjdef', provolatile => 's', prorettype => 'text', proargtypes => 'oid', prosrc => 'pg_get_statisticsobjdef' }, -{ oid => '6174', descr => 'extended statistics columns', +{ oid => '6174', descr => 'extended statistics columns and expressions', proname => 'pg_get_statisticsobjdef_columns', provolatile => 's', - prorettype => 'text', proargtypes => 'oid', + prorettype => '_text', proargtypes => 'oid', prosrc => 'pg_get_statisticsobjdef_columns' }, { oid => '6173', descr => 'extended statistics expressions', proname => 'pg_get_statisticsobjdef_expressions', provolatile => 's', diff --git a/src/include/catalog/pg_statistic_ext.h b/src/include/catalog/pg_statistic_ext.h index e4a0cb4d41c..e8757bc6d76 100644 --- a/src/include/catalog/pg_statistic_ext.h +++ b/src/include/catalog/pg_statistic_ext.h @@ -46,19 +46,14 @@ CATALOG(pg_statistic_ext,3381,StatisticExtRelationId) Oid stxowner BKI_LOOKUP(pg_authid); /* statistics object's owner */ - /* - * variable-length/nullable fields start here, but we allow direct access - * to stxkeys - */ - int2vector stxkeys BKI_FORCE_NOT_NULL; /* array of column keys */ - #ifdef CATALOG_VARLEN int16 stxstattarget BKI_DEFAULT(_null_) BKI_FORCE_NULL; /* statistics target */ char stxkind[1] BKI_FORCE_NOT_NULL; /* statistics kinds requested * to build */ - pg_node_tree stxexprs; /* A list of expression trees for stats - * attributes that are not simple column - * references. */ + pg_node_tree stxexprs BKI_FORCE_NOT_NULL; /* A list of expression trees + * for all stats attributes, + * including simple column + * references as Var nodes */ #endif } FormData_pg_statistic_ext; @@ -81,7 +76,6 @@ DECLARE_INDEX(pg_statistic_ext_relid_index, 3379, StatisticExtRelidIndexId, pg_s MAKE_SYSCACHE(STATEXTOID, pg_statistic_ext_oid_index, 4); MAKE_SYSCACHE(STATEXTNAMENSP, pg_statistic_ext_name_index, 4); -DECLARE_ARRAY_FOREIGN_KEY((stxrelid, stxkeys), pg_attribute, (attrelid, attnum)); #ifdef EXPOSE_TO_CLIENT_CODE diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index c775442f2ee..d92f735cec1 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -73,7 +73,7 @@ extern MVNDistinct *statext_ndistinct_build(double totalrows, StatsBuildData *da extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct); extern MVNDistinct *statext_ndistinct_deserialize(bytea *data); extern bool statext_ndistinct_validate(const MVNDistinct *ndistinct, - const int2vector *stxkeys, + Bitmapset *keys, int numexprs, int elevel); extern void statext_ndistinct_free(MVNDistinct *ndistinct); @@ -81,10 +81,13 @@ extern MVDependencies *statext_dependencies_build(StatsBuildData *data); extern bytea *statext_dependencies_serialize(MVDependencies *dependencies); extern MVDependencies *statext_dependencies_deserialize(bytea *data); extern bool statext_dependencies_validate(const MVDependencies *dependencies, - const int2vector *stxkeys, + Bitmapset *keys, int numexprs, int elevel); extern void statext_dependencies_free(MVDependencies *dependencies); +extern bool statext_is_valid_attnum(AttrNumber attnum, Bitmapset *keys, + int numexprs); + extern MCVList *statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget); extern bytea *statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats); diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h index 8f9b9d237fd..a6c5a07dca6 100644 --- a/src/include/statistics/statistics.h +++ b/src/include/statistics/statistics.h @@ -127,5 +127,7 @@ extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind, List **clause_exprs, int nclauses); extern HeapTuple statext_expressions_load(Oid stxoid, bool inh, int idx); +extern void statext_decode_stxexprs(HeapTuple htup, Relation rel, + Bitmapset **keys, List **exprs); #endif /* STATISTICS_H */ diff --git a/src/test/regress/expected/create_table_like.out b/src/test/regress/expected/create_table_like.out index 76069bde756..057fcf91469 100644 --- a/src/test/regress/expected/create_table_like.out +++ b/src/test/regress/expected/create_table_like.out @@ -699,7 +699,7 @@ SELECT attname, attcompression FROM pg_attribute (5 rows) -- LIKE ... INCLUDING STATISTICS with dropped columns in the parent, --- so stxkeys attnums are not contiguous. +-- so column attnums are not contiguous. CREATE TABLE ctl_stats3_parent (a int, b int, c int); ALTER TABLE ctl_stats3_parent DROP COLUMN b; CREATE STATISTICS ctl_stats3_stat ON a, c FROM ctl_stats3_parent; @@ -709,15 +709,10 @@ ALTER TABLE ctl_stats4_parent DROP COLUMN b; CREATE STATISTICS ctl_stats4_stat ON a, c FROM ctl_stats4_parent; CREATE TABLE ctl_stats4_child (LIKE ctl_stats4_parent INCLUDING STATISTICS); SELECT s.stxrelid::regclass AS relation, - array_agg(a.attname ORDER BY u.ord) AS stats_columns + pg_get_statisticsobjdef_columns(s.oid) AS stats_columns FROM pg_statistic_ext s -CROSS JOIN LATERAL - unnest(s.stxkeys::int2[]) WITH ORDINALITY AS u(attnum, ord) -JOIN pg_attribute a - ON a.attrelid = s.stxrelid AND a.attnum = u.attnum WHERE s.stxrelid IN ('ctl_stats3_child'::regclass, 'ctl_stats4_child'::regclass) -GROUP BY s.stxrelid ORDER BY s.stxrelid::regclass::text; relation | stats_columns ------------------+--------------- diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out index d64169b7bf0..0e6bce84a60 100644 --- a/src/test/regress/expected/oidjoins.out +++ b/src/test/regress/expected/oidjoins.out @@ -166,7 +166,6 @@ NOTICE: checking pg_statistic {starelid,staattnum} => pg_attribute {attrelid,at NOTICE: checking pg_statistic_ext {stxrelid} => pg_class {oid} NOTICE: checking pg_statistic_ext {stxnamespace} => pg_namespace {oid} NOTICE: checking pg_statistic_ext {stxowner} => pg_authid {oid} -NOTICE: checking pg_statistic_ext {stxrelid,stxkeys} => pg_attribute {attrelid,attnum} NOTICE: checking pg_statistic_ext_data {stxoid} => pg_statistic_ext {oid} NOTICE: checking pg_rewrite {ev_class} => pg_class {oid} NOTICE: checking pg_trigger {tgrelid} => pg_class {oid} diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index a65a5bf0c4f..e0746360263 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2704,10 +2704,7 @@ pg_stats_ext| SELECT cn.nspname AS schemaname, s.stxname AS statistics_name, s.oid AS statistics_id, pg_get_userbyid(s.stxowner) AS statistics_owner, - ( SELECT array_agg(a.attname ORDER BY a.attnum) AS array_agg - FROM (unnest(s.stxkeys) k(k) - JOIN pg_attribute a ON (((a.attrelid = s.stxrelid) AND (a.attnum = k.k))))) AS attnames, - pg_get_statisticsobjdef_expressions(s.oid) AS exprs, + pg_get_statisticsobjdef_columns(s.oid) AS exprs, s.stxkind AS kinds, sd.stxdinherit AS inherited, sd.stxdndistinct AS n_distinct, diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 37070c1a896..3f8d69446ff 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -43,7 +43,7 @@ ERROR: relation "nonexistent" does not exist CREATE STATISTICS tst ON a, b FROM ext_stats_test; ERROR: column "a" does not exist CREATE STATISTICS tst ON x, x, y FROM ext_stats_test; -ERROR: duplicate column name in statistics definition +ERROR: duplicate expression in statistics definition CREATE STATISTICS tst ON x, x, y, x, x, y, x, x, y FROM ext_stats_test; ERROR: cannot have more than 8 columns in statistics CREATE STATISTICS tst ON x, x, y, x, x, (x || 'x'), (y + 1), (x || 'x'), (x || 'x'), (y + 1) FROM ext_stats_test; @@ -3177,6 +3177,23 @@ SELECT * FROM check_estimated_rows('SELECT * FROM virtual_gen_stats WHERE d = 0 10 | 10 (1 row) +SELECT expr FROM pg_stats_ext_exprs + WHERE statistics_name = 'virtual_gen_stats_1' AND NOT inherited; + expr +--------------- + c + (3 * b) + d + (d - (2 * a)) +(4 rows) + +SELECT pg_get_statisticsobjdef(oid) FROM pg_statistic_ext + WHERE stxname = 'virtual_gen_stats_1'; + pg_get_statisticsobjdef +--------------------------------------------------------------------------------------------------------- + CREATE STATISTICS public.virtual_gen_stats_1 (mcv) ON c, (3 * b), d, (d - 2 * a) FROM virtual_gen_stats +(1 row) + -- univariate statistics on individual virtual generated columns DROP STATISTICS virtual_gen_stats_1; SELECT * FROM check_estimated_rows('SELECT * FROM virtual_gen_stats WHERE c = 0'); @@ -3692,12 +3709,12 @@ CREATE STATISTICS stats_ext_range (mcv) ON irange, (irange + '[4,10)'::int4range) FROM stats_ext_tbl_range; ANALYZE stats_ext_tbl_range; -SELECT attnames, most_common_vals +SELECT exprs, most_common_vals FROM pg_stats_ext WHERE statistics_name = 'stats_ext_range'; - attnames | most_common_vals -----------+------------------------------------------------------------ - {irange} | {{"[1,7)","[1,10)"},{"[2,9)","[2,10)"},{"[3,9)","[3,10)"}} + exprs | most_common_vals +-------------------------------------------+------------------------------------------------------------ + {irange,"(irange + '[4,10)'::int4range)"} | {{"[1,7)","[1,10)"},{"[2,9)","[2,10)"},{"[3,9)","[3,10)"}} (1 row) SELECT range_length_histogram, range_empty_frac, range_bounds_histogram @@ -3709,3 +3726,15 @@ SELECT range_length_histogram, range_empty_frac, range_bounds_histogram (1 row) DROP TABLE stats_ext_tbl_range; +-- Verify user-written ordering of columns and expressions is preserved. +-- Before PG20, columns were sorted by attnum with expressions at the end. +-- Leave for pg_upgrade testing. +CREATE STATISTICS tenk1_mcv (mcv) ON ten, (ten + four), four FROM tenk1; +ANALYZE tenk1; +SELECT pg_get_statisticsobjdef(oid) FROM pg_statistic_ext + WHERE stxname = 'tenk1_mcv'; + pg_get_statisticsobjdef +-------------------------------------------------------------------------------- + CREATE STATISTICS public.tenk1_mcv (mcv) ON ten, (ten + four), four FROM tenk1 +(1 row) + diff --git a/src/test/regress/expected/stats_import.out b/src/test/regress/expected/stats_import.out index dabf9ba1cd8..900b6bbf995 100644 --- a/src/test/regress/expected/stats_import.out +++ b/src/test/regress/expected/stats_import.out @@ -78,7 +78,7 @@ SELECT COUNT(*) FROM pg_attribute attnum > 0; count ------- - 17 + 16 (1 row) -- Create a view that is used purely for the type based on pg_stats_ext. @@ -3450,6 +3450,72 @@ FROM stats_import.pg_stats_ext_exprs_get_difference('test_mr_stat', 'test_mr_sta \gx (0 rows) +-- Test import of extended statistics on virtual generated columns. +-- Virtual gen cols are stored as Var nodes in stxexprs but treated as +-- expressions during ANALYZE (they receive per-expression stats in stxdexpr). +CREATE TABLE stats_import.test_vgencol( + a int, + b int, + c int GENERATED ALWAYS AS (a + b) VIRTUAL +); +INSERT INTO stats_import.test_vgencol(a, b) + SELECT mod(i, 10), mod(i, 7) FROM generate_series(1, 1000) s(i); +CREATE STATISTICS stats_import.stat_vgencol ON a, c + FROM stats_import.test_vgencol; +ANALYZE stats_import.test_vgencol; +CREATE TABLE stats_import.test_vgencol_clone( + a int, + b int, + c int GENERATED ALWAYS AS (a + b) VIRTUAL +); +CREATE STATISTICS stats_import.stat_vgencol_clone ON a, c + FROM stats_import.test_vgencol_clone; +-- Import stats from stat_vgencol to stat_vgencol_clone +SELECT e.statistics_name, + pg_catalog.pg_restore_extended_stats( + 'schemaname', e.statistics_schemaname::text, + 'relname', 'test_vgencol_clone', + 'statistics_schemaname', e.statistics_schemaname::text, + 'statistics_name', 'stat_vgencol_clone', + 'inherited', e.inherited, + 'n_distinct', e.n_distinct, + 'dependencies', e.dependencies, + 'most_common_vals', e.most_common_vals, + 'most_common_freqs', e.most_common_freqs, + 'most_common_base_freqs', e.most_common_base_freqs, + 'exprs', x.exprs) +FROM pg_stats_ext AS e +CROSS JOIN LATERAL ( + SELECT jsonb_agg(jsonb_strip_nulls(jsonb_build_object( + 'null_frac', ee.null_frac::text, + 'avg_width', ee.avg_width::text, + 'n_distinct', ee.n_distinct::text, + 'most_common_vals', ee.most_common_vals::text, + 'most_common_freqs', ee.most_common_freqs::text, + 'histogram_bounds', ee.histogram_bounds::text, + 'correlation', ee.correlation::text))) + FROM pg_stats_ext_exprs AS ee + WHERE ee.statistics_schemaname = e.statistics_schemaname AND + ee.statistics_name = e.statistics_name AND + ee.inherited = e.inherited + ) AS x(exprs) +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'stat_vgencol'; + statistics_name | pg_restore_extended_stats +-----------------+--------------------------- + stat_vgencol | t +(1 row) + +SELECT statname, (stats).* +FROM stats_import.pg_stats_ext_get_difference('stat_vgencol', 'stat_vgencol_clone') +\gx +(0 rows) + +SELECT statname, (stats).* +FROM stats_import.pg_stats_ext_exprs_get_difference('stat_vgencol', 'stat_vgencol_clone') +\gx +(0 rows) + -- range_length_histogram, range_empty_frac, and range_bounds_histogram -- have been added to pg_stats_ext_exprs in PostgreSQL 19. When dumping -- expression statistics in a cluster with an older version, these fields @@ -3536,7 +3602,7 @@ SELECT COUNT(*) FROM stats_import.test_range_expr_null (1 row) DROP SCHEMA stats_import CASCADE; -NOTICE: drop cascades to 19 other objects +NOTICE: drop cascades to 21 other objects DETAIL: drop cascades to view stats_import.pg_stats_stable drop cascades to view stats_import.pg_statistic_flat_t drop cascades to function stats_import.pg_statistic_flat(text) @@ -3555,4 +3621,6 @@ drop cascades to sequence stats_import.testseq drop cascades to view stats_import.testview drop cascades to table stats_import.test_clone drop cascades to table stats_import.test_mr_clone +drop cascades to table stats_import.test_vgencol +drop cascades to table stats_import.test_vgencol_clone drop cascades to table stats_import.test_range_expr_null diff --git a/src/test/regress/sql/create_table_like.sql b/src/test/regress/sql/create_table_like.sql index d52a93ef131..864f54d0e6f 100644 --- a/src/test/regress/sql/create_table_like.sql +++ b/src/test/regress/sql/create_table_like.sql @@ -277,7 +277,7 @@ SELECT attname, attcompression FROM pg_attribute WHERE attrelid = 'ctl_foreign_table2'::regclass and attnum > 0 ORDER BY attnum; -- LIKE ... INCLUDING STATISTICS with dropped columns in the parent, --- so stxkeys attnums are not contiguous. +-- so column attnums are not contiguous. CREATE TABLE ctl_stats3_parent (a int, b int, c int); ALTER TABLE ctl_stats3_parent DROP COLUMN b; CREATE STATISTICS ctl_stats3_stat ON a, c FROM ctl_stats3_parent; @@ -287,15 +287,10 @@ ALTER TABLE ctl_stats4_parent DROP COLUMN b; CREATE STATISTICS ctl_stats4_stat ON a, c FROM ctl_stats4_parent; CREATE TABLE ctl_stats4_child (LIKE ctl_stats4_parent INCLUDING STATISTICS); SELECT s.stxrelid::regclass AS relation, - array_agg(a.attname ORDER BY u.ord) AS stats_columns + pg_get_statisticsobjdef_columns(s.oid) AS stats_columns FROM pg_statistic_ext s -CROSS JOIN LATERAL - unnest(s.stxkeys::int2[]) WITH ORDINALITY AS u(attnum, ord) -JOIN pg_attribute a - ON a.attrelid = s.stxrelid AND a.attnum = u.attnum WHERE s.stxrelid IN ('ctl_stats3_child'::regclass, 'ctl_stats4_child'::regclass) -GROUP BY s.stxrelid ORDER BY s.stxrelid::regclass::text; DROP TABLE ctl_stats3_parent; DROP TABLE ctl_stats3_child; diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 3cc6012b822..372d4a37bd1 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1593,6 +1593,12 @@ ANALYZE virtual_gen_stats; SELECT * FROM check_estimated_rows('SELECT * FROM virtual_gen_stats WHERE c = 0 AND (3*b) = 0'); SELECT * FROM check_estimated_rows('SELECT * FROM virtual_gen_stats WHERE d = 0 AND (d-2*a) = 0'); +SELECT expr FROM pg_stats_ext_exprs + WHERE statistics_name = 'virtual_gen_stats_1' AND NOT inherited; + +SELECT pg_get_statisticsobjdef(oid) FROM pg_statistic_ext + WHERE stxname = 'virtual_gen_stats_1'; + -- univariate statistics on individual virtual generated columns DROP STATISTICS virtual_gen_stats_1; @@ -1901,10 +1907,18 @@ CREATE STATISTICS stats_ext_range (mcv) ON irange, (irange + '[4,10)'::int4range) FROM stats_ext_tbl_range; ANALYZE stats_ext_tbl_range; -SELECT attnames, most_common_vals +SELECT exprs, most_common_vals FROM pg_stats_ext WHERE statistics_name = 'stats_ext_range'; SELECT range_length_histogram, range_empty_frac, range_bounds_histogram FROM pg_stats_ext_exprs WHERE statistics_name = 'stats_ext_range'; DROP TABLE stats_ext_tbl_range; + +-- Verify user-written ordering of columns and expressions is preserved. +-- Before PG20, columns were sorted by attnum with expressions at the end. +-- Leave for pg_upgrade testing. +CREATE STATISTICS tenk1_mcv (mcv) ON ten, (ten + four), four FROM tenk1; +ANALYZE tenk1; +SELECT pg_get_statisticsobjdef(oid) FROM pg_statistic_ext + WHERE stxname = 'tenk1_mcv'; diff --git a/src/test/regress/sql/stats_import.sql b/src/test/regress/sql/stats_import.sql index 58140315efb..070e9759f17 100644 --- a/src/test/regress/sql/stats_import.sql +++ b/src/test/regress/sql/stats_import.sql @@ -2400,6 +2400,66 @@ SELECT statname, (stats).* FROM stats_import.pg_stats_ext_exprs_get_difference('test_mr_stat', 'test_mr_stat_clone') \gx +-- Test import of extended statistics on virtual generated columns. +-- Virtual gen cols are stored as Var nodes in stxexprs but treated as +-- expressions during ANALYZE (they receive per-expression stats in stxdexpr). +CREATE TABLE stats_import.test_vgencol( + a int, + b int, + c int GENERATED ALWAYS AS (a + b) VIRTUAL +); +INSERT INTO stats_import.test_vgencol(a, b) + SELECT mod(i, 10), mod(i, 7) FROM generate_series(1, 1000) s(i); +CREATE STATISTICS stats_import.stat_vgencol ON a, c + FROM stats_import.test_vgencol; +ANALYZE stats_import.test_vgencol; +CREATE TABLE stats_import.test_vgencol_clone( + a int, + b int, + c int GENERATED ALWAYS AS (a + b) VIRTUAL +); +CREATE STATISTICS stats_import.stat_vgencol_clone ON a, c + FROM stats_import.test_vgencol_clone; +-- Import stats from stat_vgencol to stat_vgencol_clone +SELECT e.statistics_name, + pg_catalog.pg_restore_extended_stats( + 'schemaname', e.statistics_schemaname::text, + 'relname', 'test_vgencol_clone', + 'statistics_schemaname', e.statistics_schemaname::text, + 'statistics_name', 'stat_vgencol_clone', + 'inherited', e.inherited, + 'n_distinct', e.n_distinct, + 'dependencies', e.dependencies, + 'most_common_vals', e.most_common_vals, + 'most_common_freqs', e.most_common_freqs, + 'most_common_base_freqs', e.most_common_base_freqs, + 'exprs', x.exprs) +FROM pg_stats_ext AS e +CROSS JOIN LATERAL ( + SELECT jsonb_agg(jsonb_strip_nulls(jsonb_build_object( + 'null_frac', ee.null_frac::text, + 'avg_width', ee.avg_width::text, + 'n_distinct', ee.n_distinct::text, + 'most_common_vals', ee.most_common_vals::text, + 'most_common_freqs', ee.most_common_freqs::text, + 'histogram_bounds', ee.histogram_bounds::text, + 'correlation', ee.correlation::text))) + FROM pg_stats_ext_exprs AS ee + WHERE ee.statistics_schemaname = e.statistics_schemaname AND + ee.statistics_name = e.statistics_name AND + ee.inherited = e.inherited + ) AS x(exprs) +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'stat_vgencol'; + +SELECT statname, (stats).* +FROM stats_import.pg_stats_ext_get_difference('stat_vgencol', 'stat_vgencol_clone') +\gx + +SELECT statname, (stats).* +FROM stats_import.pg_stats_ext_exprs_get_difference('stat_vgencol', 'stat_vgencol_clone') +\gx + -- range_length_histogram, range_empty_frac, and range_bounds_histogram -- have been added to pg_stats_ext_exprs in PostgreSQL 19. When dumping -- expression statistics in a cluster with an older version, these fields -- 2.50.1 (Apple Git-155)