diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index df0435c..54940ce 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -296,6 +296,11 @@ + pg_statistic_ext + extended planner statistics + + + pg_subscription logical replication subscriptions @@ -4223,6 +4228,98 @@ + + <structname>pg_statistic_ext</structname> + + + pg_statistic_ext + + + + The catalog pg_statistic_ext + holds extended planner statistics. + + + + <structname>pg_statistic_ext</> Columns + + + + + Name + Type + References + Description + + + + + + + starelid + oid + pg_class.oid + The table that the described columns belongs to + + + + staname + name + + Name of the statistic. + + + + stanamespace + oid + pg_namespace.oid + + The OID of the namespace that contains this statistic + + + + + staowner + oid + pg_authid.oid + Owner of the statistic + + + + staenabled + char[] + + + An array with the modes of the enabled statistic types, encoded as + d for ndistinct coefficients. + + + + + stakeys + int2vector + pg_attribute.attnum + + This is an array of values that indicate which table columns this + statistic covers. For example a value of 1 3 would + mean that the first and the third table columns make up the statistic key. + + + + + standistinct + pg_ndistinct + + + N-distinct coefficients, serialized as pg_ndistinct type. + + + + + +
+
+ <structname>pg_namespace</structname> diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 9408a25..d2b50eb 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -16721,6 +16721,10 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); + pg_get_statisticsextdef + + + pg_get_triggerdef @@ -16890,6 +16894,11 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); uses + pg_get_statisticsextdef(statext_oid) + text + get CREATE STATISTICS command for extended statistics objects + + pg_get_triggerdef(trigger_oid) text get CREATE [ CONSTRAINT ] TRIGGER command for trigger @@ -17034,19 +17043,20 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); pg_get_constraintdef, pg_get_indexdef, pg_get_ruledef, - and pg_get_triggerdef, respectively reconstruct the - creating command for a constraint, index, rule, or trigger. (Note that this - is a decompiled reconstruction, not the original text of the command.) - pg_get_expr decompiles the internal form of an - individual expression, such as the default value for a column. It can be - useful when examining the contents of system catalogs. If the expression - might contain Vars, specify the OID of the relation they refer to as the - second parameter; if no Vars are expected, zero is sufficient. - pg_get_viewdef reconstructs the SELECT - query that defines a view. Most of these functions come in two variants, - one of which can optionally pretty-print the result. The - pretty-printed format is more readable, but the default format is more - likely to be interpreted the same way by future versions of + pg_get_statisticsextdef, and + pg_get_triggerdef, respectively reconstruct the + creating command for a constraint, index, rule, extended statistics object, + or trigger. (Note that this is a decompiled reconstruction, not the + original text of the command.) pg_get_expr decompiles + the internal form of an individual expression, such as the default value + for a column. It can be useful when examining the contents of system + catalogs. If the expression might contain Vars, specify the OID of the + relation they refer to as the second parameter; if no Vars are expected, + zero is sufficient. pg_get_viewdef reconstructs the + SELECT query that defines a view. Most of these functions come + in two variants, one of which can optionally pretty-print the + result. The pretty-printed format is more readable, but the default format + is more likely to be interpreted the same way by future versions of PostgreSQL; avoid using pretty-printed output for dump purposes. Passing false for the pretty-print parameter yields the same result as the variant that does not have the parameter at all. diff --git a/doc/src/sgml/planstats.sgml b/doc/src/sgml/planstats.sgml index b73c66b..618dbcc 100644 --- a/doc/src/sgml/planstats.sgml +++ b/doc/src/sgml/planstats.sgml @@ -448,4 +448,149 @@ rows = (outer_cardinality * inner_cardinality) * selectivity + + Extended Statistics + + + statistics + of the planner + + + + The examples presented in used + statistics about individual columns to compute selectivity estimates. + When estimating the selectivity of conditions over multiple columns, + the planner normally assumes each condition is independent of other + conditions, + and simply multiplies the selectivity estimates of each condition together + to produce a final selectivity estimation for all conditions. + This method can often lead to inaccurate row estimations + when the conditions have dependencies on one another. + Such misestimations can result in poor plan choices being made. + + + + The examples presented below demonstrate such estimation errors on simple + data sets, and also how to resolve them by creating extended statistics + using the CREATE STATISTICS command. + + + + Let's start with a very simple data set — a table with two columns, + containing exactly the same values: + + +CREATE TABLE t (a INT, b INT); +INSERT INTO t SELECT i % 100, i % 100 FROM generate_series(1, 10000) s(i); +ANALYZE t; + + + As explained in , + the planner can determine the cardinality of t + using the number of pages and rows + obtained from pg_class: + + +SELECT relpages, reltuples FROM pg_class WHERE relname = 't'; + + relpages | reltuples +----------+----------- + 45 | 10000 + + + The data distribution is very simple — there are only 100 distinct values + in each column, uniformly distributed. + + + + The following example shows the result of estimating a WHERE + condition on column a: + + +EXPLAIN ANALYZE SELECT * FROM t WHERE a = 1; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Seq Scan on t (cost=0.00..170.00 rows=100 width=8) (actual time=0.031..2.870 rows=100 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 9900 + Planning time: 0.092 ms + Execution time: 3.103 ms +(5 rows) + + + The planner examines the condition and computes the estimate using + eqsel (the selectivity function for =), and + statistics stored in the pg_stats table. In this case, + the planner estimates the condition matches 1% of rows, and by comparing + the estimated and actual number of rows, we see that the estimate is + very accurate (exact, in fact, as the table is very small). + + + + Adding a condition on the second column results in the following plan: + + +EXPLAIN ANALYZE SELECT * FROM t WHERE a = 1 AND b = 1; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Seq Scan on t (cost=0.00..195.00 rows=1 width=8) (actual time=0.033..3.006 rows=100 loops=1) + Filter: ((a = 1) AND (b = 1)) + Rows Removed by Filter: 9900 + Planning time: 0.121 ms + Execution time: 3.220 ms +(5 rows) + + + The planner estimates the selectivity for each condition individually, + arriving to the 1% estimates as above, and then multiplies them, getting + the final 0.01% estimate. The plan however shows that this results in + a significant underestimate, as the actual number of rows matching the + conditions is two orders of magnitude higher than estimated. + + + + Overestimates, i.e., errors in the opposite direction, are also possible. + Consider for example the following combination of range conditions: + + +EXPLAIN ANALYZE SELECT * FROM t WHERE a <= 49 AND b > 49; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Seq Scan on t (cost=0.00..195.00 rows=2500 width=8) (actual time=1.607..1.607 rows=0 loops=1) + Filter: ((a <= 49) AND (b > 49)) + Rows Removed by Filter: 10000 + Planning time: 0.050 ms + Execution time: 1.623 ms +(5 rows) + + + The planner examines both WHERE clauses and estimates them + using the scalarltsel and scalargtsel functions, + specified as the selectivity functions matching the <= and + > operators. Both conditions match 50% of the + table, and assuming independence the planner multiplies them to compute + the total estimate of 25%. However as the explain output shows, the actual + number of rows is 0, because the columns are correlated and the conditions + contradict each other. + + + + Both under- and over-estimation errors are caused because the independence + assumption does not hold, as the two columns contain exactly the same values, + and are therefore perfectly correlated. + Providing additional information about correlation between columns + is the purpose of extended statistics, + and the rest of this section explains in more detail how the planner + leverages them to improve estimates. + + + + For additional details about extended statistics, see + src/backend/statistics/README. There are additional + READMEs for each type of statistics, mentioned in the following + sections. + + + + diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml index 2bc4d9f..255e800 100644 --- a/doc/src/sgml/ref/allfiles.sgml +++ b/doc/src/sgml/ref/allfiles.sgml @@ -34,6 +34,7 @@ Complete list of usable sgml source files in this directory. + @@ -80,6 +81,7 @@ Complete list of usable sgml source files in this directory. + @@ -126,6 +128,7 @@ Complete list of usable sgml source files in this directory. + diff --git a/doc/src/sgml/ref/alter_statistics.sgml b/doc/src/sgml/ref/alter_statistics.sgml new file mode 100644 index 0000000..3e4d286 --- /dev/null +++ b/doc/src/sgml/ref/alter_statistics.sgml @@ -0,0 +1,115 @@ + + + + + ALTER STATISTICS + + + + ALTER STATISTICS + 7 + SQL - Language Statements + + + + ALTER STATISTICS + + change the definition of a extended statistics + + + + + +ALTER STATISTICS name OWNER TO { new_owner | CURRENT_USER | SESSION_USER } +ALTER STATISTICS name RENAME TO new_name +ALTER STATISTICS name SET SCHEMA new_schema + + + + + Description + + + ALTER STATISTICS changes the parameters of an existing + extended statistics. Any parameters not specifically set in the + ALTER STATISTICS command retain their prior settings. + + + + You must own the statistics to use ALTER STATISTICS. + To change a statistics' schema, you must also have CREATE + privilege on the new schema. + To alter the owner, you must also be a direct or indirect member of the new + owning role, and that role must have CREATE privilege on + the statistics' schema. (These restrictions enforce that altering the owner + doesn't do anything you couldn't do by dropping and recreating the statistics. + However, a superuser can alter ownership of any statistics anyway.) + + + + + Parameters + + + + + name + + + The name (optionally schema-qualified) of the statistics to be altered. + + + + + + new_owner + + + The user name of the new owner of the statistics. + + + + + + new_name + + + The new name for the statistics. + + + + + + new_schema + + + The new schema for the statistics. + + + + + + + + + + Compatibility + + + There's no ALTER STATISTICS command in the SQL standard. + + + + + See Also + + + + + + + + diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index 767ea32..75de226 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -119,9 +119,12 @@ ALTER TABLE [ IF EXISTS ] name This form drops a column from a table. Indexes and table constraints involving the column will be automatically - dropped as well. You will need to say CASCADE if - anything outside the table depends on the column, for example, - foreign key references or views. + dropped as well. + Multivariate statistics referencing the dropped column will also be + removed if the removal of the column would cause the statistics to + contain data for only a single column. + You will need to say CASCADE if anything outside the table + depends on the column, for example, foreign key references or views. If IF EXISTS is specified and the column does not exist, no error is thrown. In this case a notice is issued instead. diff --git a/doc/src/sgml/ref/comment.sgml b/doc/src/sgml/ref/comment.sgml index 7483c8c..8fe17a5 100644 --- a/doc/src/sgml/ref/comment.sgml +++ b/doc/src/sgml/ref/comment.sgml @@ -51,6 +51,7 @@ COMMENT ON SCHEMA object_name | SEQUENCE object_name | SERVER object_name | + STATISTICS object_name | TABLE object_name | TABLESPACE object_name | TEXT SEARCH CONFIGURATION object_name | @@ -125,8 +126,8 @@ COMMENT ON The name of the object to be commented. Names of tables, aggregates, collations, conversions, domains, foreign tables, functions, indexes, operators, operator classes, operator families, sequences, - text search objects, types, and views can be schema-qualified. - When commenting on a column, + statistics, text search objects, types, and views can be + schema-qualified. When commenting on a column, relation_name must refer to a table, view, composite type, or foreign table. @@ -327,6 +328,7 @@ COMMENT ON RULE my_rule ON my_table IS 'Logs updates of employee records'; COMMENT ON SCHEMA my_schema IS 'Departmental data'; COMMENT ON SEQUENCE my_sequence IS 'Used to generate primary keys'; COMMENT ON SERVER myserver IS 'my foreign server'; +COMMENT ON STATISTICS my_statistics IS 'Improves planner row estimations'; COMMENT ON TABLE my_schema.my_table IS 'Employee Information'; COMMENT ON TABLESPACE my_tablespace IS 'Tablespace for indexes'; COMMENT ON TEXT SEARCH CONFIGURATION my_config IS 'Special word filtering'; diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml new file mode 100644 index 0000000..60184a3 --- /dev/null +++ b/doc/src/sgml/ref/create_statistics.sgml @@ -0,0 +1,155 @@ + + + + + CREATE STATISTICS + + + + CREATE STATISTICS + 7 + SQL - Language Statements + + + + CREATE STATISTICS + define extended statistics + + + + +CREATE STATISTICS [ IF NOT EXISTS ] statistics_name ON ( + column_name, column_name [, ...]) + FROM table_name + + + + + + Description + + + CREATE STATISTICS will create a new extended statistics + object on the specified table. + The statistics will be created in the current database and + will be owned by the user issuing the command. + + + + If a schema name is given (for example, CREATE STATISTICS + myschema.mystat ...) then the statistics is created in the specified + schema. Otherwise it is created in the current schema. The name of + the statistics must be distinct from the name of any other statistics in the + same schema. + + + + + Parameters + + + + + IF NOT EXISTS + + + Do not throw an error if a statistics with the same name already exists. + A notice is issued in this case. Note that only the name of the + statistics object is considered here. The definition of the statistics is + not considered. + + + + + + statistics_name + + + The name (optionally schema-qualified) of the statistics to be created. + + + + + + column_name + + + The name of a column to be included in the statistics. + + + + + + table_name + + + The name (optionally schema-qualified) of the table the statistics should + be created on. + + + + + + + + + + Notes + + + You must be the owner of a table to create or change statistics on it. + + + + + Examples + + + Create table t1 with two functionally dependent columns, i.e. + knowledge of a value in the first column is sufficient for determining the + value in the other column. Then functional dependencies are built on those + columns: + + +CREATE TABLE t1 ( + a int, + b int +); + +INSERT INTO t1 SELECT i/100, i/500 + FROM generate_series(1,1000000) s(i); + +CREATE STATISTICS s1 ON (a, b) FROM t1; + +ANALYZE t1; + +-- valid combination of values +EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0); + +-- invalid combination of values +EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 1); + + + + + + + Compatibility + + + There's no CREATE STATISTICS command in the SQL standard. + + + + + See Also + + + + + + + diff --git a/doc/src/sgml/ref/drop_statistics.sgml b/doc/src/sgml/ref/drop_statistics.sgml new file mode 100644 index 0000000..98c3381 --- /dev/null +++ b/doc/src/sgml/ref/drop_statistics.sgml @@ -0,0 +1,98 @@ + + + + + DROP STATISTICS + + + + DROP STATISTICS + 7 + SQL - Language Statements + + + + DROP STATISTICS + remove extended statistics + + + + +DROP STATISTICS [ IF EXISTS ] name [, ...] + + + + + Description + + + DROP STATISTICS removes statistics from the database. + Only the statistics owner, the schema owner, and superuser can drop a + statistics. + + + + + + Parameters + + + + IF EXISTS + + + Do not throw an error if the statistics do not exist. A notice is + issued in this case. + + + + + + name + + + The name (optionally schema-qualified) of the statistics to drop. + + + + + + + + + Examples + + + To destroy two statistics objects on different schemas, without failing + if they don't exist: + + +DROP STATISTICS IF EXISTS + accounting.users_uid_creation, + public.grants_user_role; + + + + + + + Compatibility + + + There's no DROP STATISTICS command in the SQL standard. + + + + + See Also + + + + + + + + diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml index c8191de..aa8a157 100644 --- a/doc/src/sgml/reference.sgml +++ b/doc/src/sgml/reference.sgml @@ -60,6 +60,7 @@ &alterSchema; &alterSequence; &alterServer; + &alterStatistics; &alterSubscription; &alterSystem; &alterTable; @@ -108,6 +109,7 @@ &createSchema; &createSequence; &createServer; + &createStatistics; &createSubscription; &createTable; &createTableAs; @@ -154,6 +156,7 @@ &dropSchema; &dropSequence; &dropServer; + &dropStatistics; &dropSubscription; &dropTable; &dropTableSpace; diff --git a/src/backend/Makefile b/src/backend/Makefile index 7a0bbb2..426ef4f 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \ main nodes optimizer port postmaster regex replication rewrite \ - storage tcop tsearch utils $(top_builddir)/src/timezone + statistics storage tcop tsearch utils $(top_builddir)/src/timezone include $(srcdir)/common.mk diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index 3136858..ff7cc79 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -33,6 +33,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\ pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \ pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \ pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \ + pg_statistic_ext.h \ pg_statistic.h pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \ pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \ pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \ diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index be86d76..d01930f 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -48,6 +48,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" @@ -3302,6 +3303,8 @@ static const char *const no_priv_msg[MAX_ACL_KIND] = gettext_noop("permission denied for collation %s"), /* ACL_KIND_CONVERSION */ gettext_noop("permission denied for conversion %s"), + /* ACL_KIND_STATISTICS */ + gettext_noop("permission denied for statistics %s"), /* ACL_KIND_TABLESPACE */ gettext_noop("permission denied for tablespace %s"), /* ACL_KIND_TSDICTIONARY */ @@ -3352,6 +3355,8 @@ static const char *const not_owner_msg[MAX_ACL_KIND] = gettext_noop("must be owner of collation %s"), /* ACL_KIND_CONVERSION */ gettext_noop("must be owner of conversion %s"), + /* ACL_KIND_STATISTICS */ + gettext_noop("must be owner of statistics %s"), /* ACL_KIND_TABLESPACE */ gettext_noop("must be owner of tablespace %s"), /* ACL_KIND_TSDICTIONARY */ @@ -3467,6 +3472,10 @@ pg_aclmask(AclObjectKind objkind, Oid table_oid, AttrNumber attnum, Oid roleid, mask, how, NULL); case ACL_KIND_NAMESPACE: return pg_namespace_aclmask(table_oid, roleid, mask, how); + case ACL_KIND_STATISTICS: + elog(ERROR, "grantable rights not supported for statistics"); + /* not reached, but keep compiler quiet */ + return ACL_NO_RIGHTS; case ACL_KIND_TABLESPACE: return pg_tablespace_aclmask(table_oid, roleid, mask, how); case ACL_KIND_FDW: @@ -5104,6 +5113,32 @@ pg_subscription_ownercheck(Oid sub_oid, Oid roleid) } /* + * Ownership check for a extended statistics (specified by OID). + */ +bool +pg_statistics_ownercheck(Oid stat_oid, Oid roleid) +{ + HeapTuple tuple; + Oid ownerId; + + /* Superusers bypass all permission checking. */ + if (superuser_arg(roleid)) + return true; + + tuple = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stat_oid)); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("statistics with OID %u do not exist", stat_oid))); + + ownerId = ((Form_pg_statistic_ext) GETSTRUCT(tuple))->staowner; + + ReleaseSysCache(tuple); + + return has_privs_of_role(roleid, ownerId); +} + +/* * Check whether specified role has CREATEROLE privilege (or is a superuser) * * Note: roles do not have owners per se; instead we use this test in diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index fc088b2..ee27cae 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -51,6 +51,7 @@ #include "catalog/pg_publication.h" #include "catalog/pg_publication_rel.h" #include "catalog/pg_rewrite.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_transform.h" @@ -154,6 +155,7 @@ static const Oid object_classes[] = { RewriteRelationId, /* OCLASS_REWRITE */ TriggerRelationId, /* OCLASS_TRIGGER */ NamespaceRelationId, /* OCLASS_SCHEMA */ + StatisticExtRelationId, /* OCLASS_STATISTIC_EXT */ TSParserRelationId, /* OCLASS_TSPARSER */ TSDictionaryRelationId, /* OCLASS_TSDICT */ TSTemplateRelationId, /* OCLASS_TSTEMPLATE */ @@ -1263,6 +1265,10 @@ doDeletion(const ObjectAddress *object, int flags) DropTransformById(object->objectId); break; + case OCLASS_STATISTIC_EXT: + RemoveStatisticsById(object->objectId); + break; + default: elog(ERROR, "unrecognized object class: %u", object->classId); @@ -2377,6 +2383,9 @@ getObjectClass(const ObjectAddress *object) case NamespaceRelationId: return OCLASS_SCHEMA; + case StatisticExtRelationId: + return OCLASS_STATISTIC_EXT; + case TSParserRelationId: return OCLASS_TSPARSER; diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 41c0056..8a91849 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -52,6 +52,7 @@ #include "catalog/pg_opclass.h" #include "catalog/pg_partitioned_table.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" #include "catalog/pg_type_fn.h" @@ -1608,7 +1609,10 @@ RemoveAttributeById(Oid relid, AttrNumber attnum) heap_close(attr_rel, RowExclusiveLock); if (attnum > 0) + { RemoveStatistics(relid, attnum); + RemoveStatisticsExt(relid, attnum); + } relation_close(rel, NoLock); } @@ -1854,6 +1858,7 @@ heap_drop_with_catalog(Oid relid) * delete statistics */ RemoveStatistics(relid, 0); + RemoveStatisticsExt(relid, 0); /* * delete attribute tuples @@ -2766,6 +2771,75 @@ RemoveStatistics(Oid relid, AttrNumber attnum) /* + * RemoveStatisticsExt --- remove entries in pg_statistic_ext for a relation + * + * If attnum is zero, remove all entries for rel; else remove only the + * one(s) involving that column. + */ +void +RemoveStatisticsExt(Oid relid, AttrNumber attnum) +{ + Relation pgstatisticext; + SysScanDesc scan; + ScanKeyData key; + HeapTuple tuple; + + /* + * Scan pg_statistic_ext to delete relevant tuples + */ + pgstatisticext = heap_open(StatisticExtRelationId, RowExclusiveLock); + + ScanKeyInit(&key, + Anum_pg_statistic_ext_starelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + + scan = systable_beginscan(pgstatisticext, + StatisticExtRelidIndexId, + true, NULL, 1, &key); + + while (HeapTupleIsValid(tuple = systable_getnext(scan))) + { + bool delete = false; + + if (attnum == 0) + delete = true; + else if (attnum != 0) + { + Form_pg_statistic_ext staForm; + int i; + + /* + * Decode the stakeys array and delete any stats that involve the + * specified column. + */ + staForm = (Form_pg_statistic_ext) GETSTRUCT(tuple); + for (i = 0; i < staForm->stakeys.dim1; i++) + { + if (staForm->stakeys.values[i] == attnum) + { + delete = true; + break; + } + } + } + + if (delete) + { + CatalogTupleDelete(pgstatisticext, &tuple->t_self); + deleteDependencyRecordsFor(StatisticExtRelationId, + HeapTupleGetOid(tuple), + false); + } + } + + systable_endscan(scan); + + heap_close(pgstatisticext, RowExclusiveLock); +} + + +/* * RelationTruncateIndexes - truncate all indexes associated * with the heap relation to zero tuples. * diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index a38da30..e521bd9 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -2086,6 +2086,62 @@ ConversionIsVisible(Oid conid) } /* + * get_statistics_oid - find a statistics by possibly qualified name + * + * If not found, returns InvalidOid if missing_ok, else throws error + */ +Oid +get_statistics_oid(List *names, bool missing_ok) +{ + char *schemaname; + char *stats_name; + Oid namespaceId; + Oid stats_oid = InvalidOid; + ListCell *l; + + /* deconstruct the name list */ + DeconstructQualifiedName(names, &schemaname, &stats_name); + + if (schemaname) + { + /* use exact schema given */ + namespaceId = LookupExplicitNamespace(schemaname, missing_ok); + if (missing_ok && !OidIsValid(namespaceId)) + stats_oid = InvalidOid; + else + stats_oid = GetSysCacheOid2(STATEXTNAMENSP, + PointerGetDatum(stats_name), + ObjectIdGetDatum(namespaceId)); + } + else + { + /* search for it in search path */ + recomputeNamespacePath(); + + foreach(l, activeSearchPath) + { + namespaceId = lfirst_oid(l); + + if (namespaceId == myTempNamespace) + continue; /* do not look in temp namespace */ + stats_oid = GetSysCacheOid2(STATEXTNAMENSP, + PointerGetDatum(stats_name), + ObjectIdGetDatum(namespaceId)); + if (OidIsValid(stats_oid)) + break; + } + } + + if (!OidIsValid(stats_oid) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("statistics \"%s\" do not exist", + NameListToString(names)))); + + return stats_oid; +} + +/* * get_ts_parser_oid - find a TS parser by possibly qualified name * * If not found, returns InvalidOid if missing_ok, else throws error diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index 61a831b..2948d64 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -48,6 +48,7 @@ #include "catalog/pg_publication.h" #include "catalog/pg_publication_rel.h" #include "catalog/pg_rewrite.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_transform.h" @@ -478,6 +479,18 @@ static const ObjectPropertyType ObjectProperty[] = InvalidAttrNumber, ACL_KIND_SUBSCRIPTION, true + }, + { + StatisticExtRelationId, + StatisticExtOidIndexId, + STATEXTOID, + STATEXTNAMENSP, + Anum_pg_statistic_ext_staname, + Anum_pg_statistic_ext_stanamespace, + Anum_pg_statistic_ext_staowner, + InvalidAttrNumber, /* no ACL (same as relation) */ + ACL_KIND_STATISTICS, + true } }; @@ -696,6 +709,10 @@ static const struct object_type_map /* OCLASS_TRANSFORM */ { "transform", OBJECT_TRANSFORM + }, + /* OBJECT_STATISTIC_EXT */ + { + "statistics", OBJECT_STATISTIC_EXT } }; @@ -974,6 +991,12 @@ get_object_address(ObjectType objtype, Node *object, address = get_object_address_defacl(castNode(List, object), missing_ok); break; + case OBJECT_STATISTIC_EXT: + address.classId = StatisticExtRelationId; + address.objectId = get_statistics_oid(castNode(List, object), + missing_ok); + address.objectSubId = 0; + break; default: elog(ERROR, "unrecognized objtype: %d", (int) objtype); /* placate compiler, in case it thinks elog might return */ @@ -2083,6 +2106,7 @@ pg_get_object_address(PG_FUNCTION_ARGS) case OBJECT_ATTRIBUTE: case OBJECT_COLLATION: case OBJECT_CONVERSION: + case OBJECT_STATISTIC_EXT: case OBJECT_TSPARSER: case OBJECT_TSDICTIONARY: case OBJECT_TSTEMPLATE: @@ -2370,6 +2394,10 @@ check_object_ownership(Oid roleid, ObjectType objtype, ObjectAddress address, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser"))); break; + case OBJECT_STATISTIC_EXT: + if (!pg_statistics_ownercheck(address.objectId, roleid)) + aclcheck_error_type(ACLCHECK_NOT_OWNER, address.objectId); + break; default: elog(ERROR, "unrecognized object type: %d", (int) objtype); @@ -3857,6 +3885,10 @@ getObjectTypeDescription(const ObjectAddress *object) appendStringInfoString(&buffer, "subscription"); break; + case OCLASS_STATISTIC_EXT: + appendStringInfoString(&buffer, "statistics"); + break; + default: appendStringInfo(&buffer, "unrecognized %u", object->classId); break; @@ -4880,6 +4912,29 @@ getObjectIdentityParts(const ObjectAddress *object, break; } + case OCLASS_STATISTIC_EXT: + { + HeapTuple tup; + Form_pg_statistic_ext formStatistic; + char *schema; + + tup = SearchSysCache1(STATEXTOID, + ObjectIdGetDatum(object->objectId)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for statistics %u", + object->objectId); + formStatistic = (Form_pg_statistic_ext) GETSTRUCT(tup); + schema = get_namespace_name_or_temp(formStatistic->stanamespace); + appendStringInfoString(&buffer, + quote_qualified_identifier(schema, + NameStr(formStatistic->staname))); + if (objname) + *objname = list_make2(schema, + pstrdup(NameStr(formStatistic->staname))); + ReleaseSysCache(tup); + } + break; + default: appendStringInfo(&buffer, "unrecognized object %u %u %d", object->classId, diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c index 8d946ff..d28a8af 100644 --- a/src/backend/catalog/pg_shdepend.c +++ b/src/backend/catalog/pg_shdepend.c @@ -39,6 +39,7 @@ #include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" #include "catalog/pg_shdepend.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_ts_config.h" @@ -1415,6 +1416,7 @@ shdepReassignOwned(List *roleids, Oid newrole) case OperatorFamilyRelationId: case OperatorClassRelationId: case ExtensionRelationId: + case StatisticExtRelationId: case TableSpaceRelationId: case DatabaseRelationId: case TSConfigRelationId: diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index b6552da..5839187 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -186,6 +186,16 @@ CREATE OR REPLACE VIEW pg_sequences AS WHERE NOT pg_is_other_temp_schema(N.oid) AND relkind = 'S'; +CREATE VIEW pg_stats_ext AS + SELECT + N.nspname AS schemaname, + C.relname AS tablename, + S.staname AS staname, + S.stakeys AS attnums, + length(s.standistinct) AS ndistbytes + FROM (pg_statistic_ext S JOIN pg_class C ON (C.oid = S.starelid)) + LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace); + CREATE VIEW pg_stats WITH (security_barrier) AS SELECT nspname AS schemaname, diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile index e0fab38..4a6c99e 100644 --- a/src/backend/commands/Makefile +++ b/src/backend/commands/Makefile @@ -18,8 +18,8 @@ OBJS = amcmds.o aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o \ event_trigger.o explain.o extension.o foreigncmds.o functioncmds.o \ indexcmds.o lockcmds.o matview.o operatorcmds.o opclasscmds.o \ policy.o portalcmds.o prepare.o proclang.o publicationcmds.o \ - schemacmds.o seclabel.o sequence.o subscriptioncmds.o tablecmds.o \ - tablespace.o trigger.o tsearchcmds.o typecmds.o user.o vacuum.o \ - vacuumlazy.o variable.o view.o + schemacmds.o seclabel.o sequence.o statscmds.o subscriptioncmds.o \ + tablecmds.o tablespace.o trigger.o tsearchcmds.o typecmds.o user.o \ + vacuum.o vacuumlazy.o variable.o view.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c index cf1391c..2c6435b 100644 --- a/src/backend/commands/alter.c +++ b/src/backend/commands/alter.c @@ -33,6 +33,7 @@ #include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" #include "catalog/pg_subscription.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_dict.h" #include "catalog/pg_ts_parser.h" @@ -120,6 +121,10 @@ report_namespace_conflict(Oid classId, const char *name, Oid nspOid) Assert(OidIsValid(nspOid)); msgfmt = gettext_noop("conversion \"%s\" already exists in schema \"%s\""); break; + case StatisticExtRelationId: + Assert(OidIsValid(nspOid)); + msgfmt = gettext_noop("statistics \"%s\" already exists in schema \"%s\""); + break; case TSParserRelationId: Assert(OidIsValid(nspOid)); msgfmt = gettext_noop("text search parser \"%s\" already exists in schema \"%s\""); @@ -373,6 +378,7 @@ ExecRenameStmt(RenameStmt *stmt) case OBJECT_OPCLASS: case OBJECT_OPFAMILY: case OBJECT_LANGUAGE: + case OBJECT_STATISTIC_EXT: case OBJECT_TSCONFIGURATION: case OBJECT_TSDICTIONARY: case OBJECT_TSPARSER: @@ -489,6 +495,7 @@ ExecAlterObjectSchemaStmt(AlterObjectSchemaStmt *stmt, case OBJECT_OPERATOR: case OBJECT_OPCLASS: case OBJECT_OPFAMILY: + case OBJECT_STATISTIC_EXT: case OBJECT_TSCONFIGURATION: case OBJECT_TSDICTIONARY: case OBJECT_TSPARSER: @@ -803,6 +810,7 @@ ExecAlterOwnerStmt(AlterOwnerStmt *stmt) case OBJECT_OPERATOR: case OBJECT_OPCLASS: case OBJECT_OPFAMILY: + case OBJECT_STATISTIC_EXT: case OBJECT_TABLESPACE: case OBJECT_TSDICTIONARY: case OBJECT_TSCONFIGURATION: diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 055338f..8be2df3 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -17,6 +17,7 @@ #include #include "access/multixact.h" +#include "access/sysattr.h" #include "access/transam.h" #include "access/tupconvert.h" #include "access/tuptoaster.h" @@ -28,6 +29,7 @@ #include "catalog/pg_collation.h" #include "catalog/pg_inherits_fn.h" #include "catalog/pg_namespace.h" +#include "catalog/pg_statistic_ext.h" #include "commands/dbcommands.h" #include "commands/tablecmds.h" #include "commands/vacuum.h" @@ -39,13 +41,17 @@ #include "parser/parse_relation.h" #include "pgstat.h" #include "postmaster/autovacuum.h" +#include "statistics/stat_ext_internal.h" +#include "statistics/stats.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" #include "storage/proc.h" #include "storage/procarray.h" #include "utils/acl.h" #include "utils/attoptcache.h" +#include "utils/builtins.h" #include "utils/datum.h" +#include "utils/fmgroids.h" #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -566,6 +572,10 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, update_attstats(RelationGetRelid(Irel[ind]), false, thisdata->attr_cnt, thisdata->vacattrstats); } + + /* Build extended statistics (if there are any). */ + BuildRelationExtStatistics(onerel, totalrows, numrows, rows, attr_cnt, + vacattrstats); } /* @@ -1683,19 +1693,6 @@ ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) */ typedef struct { - Oid eqopr; /* '=' operator for datatype, if any */ - Oid eqfunc; /* and associated function */ - Oid ltopr; /* '<' operator for datatype, if any */ -} StdAnalyzeData; - -typedef struct -{ - Datum value; /* a data value */ - int tupno; /* position index for tuple it came from */ -} ScalarItem; - -typedef struct -{ int count; /* # of duplicates */ int first; /* values[] index of first occurrence */ } ScalarMCVItem; diff --git a/src/backend/commands/dropcmds.c b/src/backend/commands/dropcmds.c index ab73fbf..cb948f0 100644 --- a/src/backend/commands/dropcmds.c +++ b/src/backend/commands/dropcmds.c @@ -286,6 +286,13 @@ does_not_exist_skipping(ObjectType objtype, Node *object) msg = gettext_noop("schema \"%s\" does not exist, skipping"); name = strVal((Value *) object); break; + case OBJECT_STATISTIC_EXT: + if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name)) + { + msg = gettext_noop("extended statistics \"%s\" do not exist, skipping"); + name = NameListToString(castNode(List, object)); + } + break; case OBJECT_TSPARSER: if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name)) { diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c index 346b347..7366fc7 100644 --- a/src/backend/commands/event_trigger.c +++ b/src/backend/commands/event_trigger.c @@ -112,6 +112,7 @@ static event_trigger_support_data event_trigger_support[] = { {"SCHEMA", true}, {"SEQUENCE", true}, {"SERVER", true}, + {"STATISTICS", true}, {"SUBSCRIPTION", true}, {"TABLE", true}, {"TABLESPACE", false}, @@ -1108,6 +1109,7 @@ EventTriggerSupportsObjectType(ObjectType obtype) case OBJECT_SCHEMA: case OBJECT_SEQUENCE: case OBJECT_SUBSCRIPTION: + case OBJECT_STATISTIC_EXT: case OBJECT_TABCONSTRAINT: case OBJECT_TABLE: case OBJECT_TRANSFORM: @@ -1173,6 +1175,7 @@ EventTriggerSupportsObjectClass(ObjectClass objclass) case OCLASS_PUBLICATION: case OCLASS_PUBLICATION_REL: case OCLASS_SUBSCRIPTION: + case OCLASS_STATISTIC_EXT: return true; } diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c new file mode 100644 index 0000000..6b7c009 --- /dev/null +++ b/src/backend/commands/statscmds.c @@ -0,0 +1,296 @@ +/*------------------------------------------------------------------------- + * + * statscmds.c + * Commands for creating and altering extended statistics + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/statscmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/relscan.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_statistic_ext.h" +#include "commands/defrem.h" +#include "miscadmin.h" +#include "statistics/stats.h" +#include "utils/builtins.h" +#include "utils/inval.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/syscache.h" +#include "utils/typcache.h" + + +/* used for sorting the attnums in CreateStatistics */ +static int +compare_int16(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(int16)); +} + +/* + * CREATE STATISTICS + */ +ObjectAddress +CreateStatistics(CreateStatsStmt *stmt) +{ + int i; + ListCell *l; + int16 attnums[STATS_MAX_DIMENSIONS]; + int numcols = 0; + ObjectAddress address = InvalidObjectAddress; + char *namestr; + NameData staname; + Oid statoid; + Oid namespaceId; + HeapTuple htup; + Datum values[Natts_pg_statistic_ext]; + bool nulls[Natts_pg_statistic_ext]; + int2vector *stakeys; + Relation statrel; + Relation rel; + Oid relid; + ObjectAddress parentobject, + childobject; + Datum types[1]; /* only ndistinct defined now */ + int ntypes; + ArrayType *staenabled; + bool build_ndistinct; + bool requested_type = false; + + Assert(IsA(stmt, CreateStatsStmt)); + + /* resolve the pieces of the name (namespace etc.) */ + namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr); + namestrcpy(&staname, namestr); + + /* + * If if_not_exists was given and the statistics already exists, bail out. + */ + if (SearchSysCacheExists2(STATEXTNAMENSP, + PointerGetDatum(&staname), + ObjectIdGetDatum(namespaceId))) + { + if (stmt->if_not_exists) + { + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("statistics \"%s\" already exist, skipping", + namestr))); + return InvalidObjectAddress; + } + + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("statistics \"%s\" already exist", namestr))); + } + + rel = heap_openrv(stmt->relation, AccessExclusiveLock); + relid = RelationGetRelid(rel); + + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_MATVIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("relation \"%s\" is not a table or materialized view", + RelationGetRelationName(rel)))); + + /* + * Transform column names to array of attnums. While at it, enforce some + * constraints. + */ + foreach(l, stmt->keys) + { + char *attname = strVal(lfirst(l)); + HeapTuple atttuple; + Form_pg_attribute attForm; + TypeCacheEntry *type; + + atttuple = SearchSysCacheAttName(relid, attname); + if (!HeapTupleIsValid(atttuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" referenced in statistics does not exist", + attname))); + attForm = (Form_pg_attribute) GETSTRUCT(atttuple); + + /* Disallow use of system attributes in extended stats */ + if (attForm->attnum < 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("statistic creation on system columns is not supported"))); + + /* Disallow data types without a less-than operator */ + type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR); + if (type->lt_opr == InvalidOid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only scalar types can be used in extended statistics"))); + + /* Make sure no more than STATS_MAX_DIMENSIONS columns are used */ + if (numcols >= STATS_MAX_DIMENSIONS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("cannot have more than %d keys in statistics", + STATS_MAX_DIMENSIONS))); + + attnums[numcols] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum; + ReleaseSysCache(atttuple); + numcols++; + } + + /* + * Check that at least two columns were specified in the statement. The + * upper bound was already checked in the loop above. + */ + if (numcols < 2) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("statistics require at least 2 columns"))); + + /* + * Sort the attnums, which makes detecting duplicies somewhat easier, and + * it does not hurt (it does not affect the efficiency, unlike for + * indexes, for example). + */ + qsort(attnums, numcols, sizeof(int16), compare_int16); + + /* + * Look for duplicities in the list of columns. The attnums are sorted so + * just check consecutive elements. + */ + for (i = 1; i < numcols; i++) + if (attnums[i] == attnums[i - 1]) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("duplicate column name in statistics definition"))); + + stakeys = buildint2vector(attnums, numcols); + + /* + * Parse the statistics options. Currently only statistics types are + * recognized. + */ + build_ndistinct = false; + foreach(l, stmt->options) + { + DefElem *opt = (DefElem *) lfirst(l); + + if (strcmp(opt->defname, "ndistinct") == 0) + { + build_ndistinct = defGetBoolean(opt); + requested_type = true; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized STATISTICS option \"%s\"", + opt->defname))); + } + /* If no statistic type was specified, build them all. */ + if (!requested_type) + build_ndistinct = true; + + /* construct the char array of enabled statistic types */ + ntypes = 0; + if (build_ndistinct) + types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT); + Assert(ntypes > 0); + staenabled = construct_array(types, ntypes, CHAROID, 1, true, 'c'); + + /* + * Everything seems fine, so let's build the pg_statistic_ext tuple. + */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + values[Anum_pg_statistic_ext_starelid - 1] = ObjectIdGetDatum(relid); + values[Anum_pg_statistic_ext_staname - 1] = NameGetDatum(&staname); + values[Anum_pg_statistic_ext_stanamespace - 1] = ObjectIdGetDatum(namespaceId); + values[Anum_pg_statistic_ext_staowner - 1] = ObjectIdGetDatum(GetUserId()); + values[Anum_pg_statistic_ext_stakeys - 1] = PointerGetDatum(stakeys); + values[Anum_pg_statistic_ext_staenabled - 1] = PointerGetDatum(staenabled); + + /* no statistics build yet */ + nulls[Anum_pg_statistic_ext_standistinct - 1] = true; + + /* insert it into pg_statistic_ext */ + statrel = heap_open(StatisticExtRelationId, RowExclusiveLock); + htup = heap_form_tuple(statrel->rd_att, values, nulls); + CatalogTupleInsert(statrel, htup); + statoid = HeapTupleGetOid(htup); + heap_freetuple(htup); + heap_close(statrel, RowExclusiveLock); + relation_close(rel, NoLock); + + /* + * Add a dependency on a table, so that stats get dropped on DROP TABLE. + */ + ObjectAddressSet(parentobject, RelationRelationId, relid); + ObjectAddressSet(childobject, StatisticExtRelationId, statoid); + recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO); + + /* + * Also add dependency on the schema. This is required to ensure that we + * drop the statistics on DROP SCHEMA. This is not handled automatically + * by DROP TABLE because the statistics are not an object in the table's + * schema. + */ + ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId); + recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO); + + ObjectAddressSet(address, StatisticExtRelationId, statoid); + + /* + * Invalidate relcache so that others see the new statistics. + */ + CacheInvalidateRelcache(rel); + + return address; +} + +/* + * Guts of statistics deletion. + */ +void +RemoveStatisticsById(Oid statsOid) +{ + Relation relation; + Oid relid; + Relation rel; + HeapTuple tup; + Form_pg_statistic_ext statext; + + /* + * Delete the pg_statistic_ext tuple. + */ + relation = heap_open(StatisticExtRelationId, RowExclusiveLock); + + tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid)); + + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for statistics %u", statsOid); + + statext = (Form_pg_statistic_ext) GETSTRUCT(tup); + relid = statext->starelid; + + rel = heap_open(relid, AccessExclusiveLock); + + simple_heap_delete(relation, &tup->t_self); + + CacheInvalidateRelcache(rel); + + ReleaseSysCache(tup); + + heap_close(relation, RowExclusiveLock); + heap_close(rel, NoLock); +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 67c7de6..45757d9 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -3327,6 +3327,20 @@ _copyIndexStmt(const IndexStmt *from) return newnode; } +static CreateStatsStmt * +_copyCreateStatsStmt(const CreateStatsStmt *from) +{ + CreateStatsStmt *newnode = makeNode(CreateStatsStmt); + + COPY_NODE_FIELD(defnames); + COPY_NODE_FIELD(relation); + COPY_NODE_FIELD(keys); + COPY_NODE_FIELD(options); + COPY_SCALAR_FIELD(if_not_exists); + + return newnode; +} + static CreateFunctionStmt * _copyCreateFunctionStmt(const CreateFunctionStmt *from) { @@ -5034,6 +5048,9 @@ copyObject(const void *from) case T_IndexStmt: retval = _copyIndexStmt(from); break; + case T_CreateStatsStmt: + retval = _copyCreateStatsStmt(from); + break; case T_CreateFunctionStmt: retval = _copyCreateFunctionStmt(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 4d8e7fe..53152c0 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -1327,6 +1327,18 @@ _equalIndexStmt(const IndexStmt *a, const IndexStmt *b) } static bool +_equalCreateStatsStmt(const CreateStatsStmt *a, const CreateStatsStmt *b) +{ + COMPARE_NODE_FIELD(defnames); + COMPARE_NODE_FIELD(relation); + COMPARE_NODE_FIELD(keys); + COMPARE_NODE_FIELD(options); + COMPARE_SCALAR_FIELD(if_not_exists); + + return true; +} + +static bool _equalCreateFunctionStmt(const CreateFunctionStmt *a, const CreateFunctionStmt *b) { COMPARE_SCALAR_FIELD(replace); @@ -3222,6 +3234,9 @@ equal(const void *a, const void *b) case T_IndexStmt: retval = _equalIndexStmt(a, b); break; + case T_CreateStatsStmt: + retval = _equalCreateStatsStmt(a, b); + break; case T_CreateFunctionStmt: retval = _equalCreateFunctionStmt(a, b); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 1b9005f..541af02 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2202,6 +2202,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node) WRITE_NODE_FIELD(lateral_vars); WRITE_BITMAPSET_FIELD(lateral_referencers); WRITE_NODE_FIELD(indexlist); + WRITE_NODE_FIELD(statlist); WRITE_UINT_FIELD(pages); WRITE_FLOAT_FIELD(tuples, "%.0f"); WRITE_FLOAT_FIELD(allvisfrac, "%.6f"); @@ -2275,6 +2276,18 @@ _outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node) } static void +_outStatisticExtInfo(StringInfo str, const StatisticExtInfo *node) +{ + WRITE_NODE_TYPE("STATISTICEXTINFO"); + + /* NB: this isn't a complete set of fields */ + WRITE_OID_FIELD(statOid); + /* don't write rel, leads to infinite recursion in plan tree dump */ + WRITE_CHAR_FIELD(kind); + WRITE_BITMAPSET_FIELD(keys); +} + +static void _outEquivalenceClass(StringInfo str, const EquivalenceClass *node) { /* @@ -2578,6 +2591,18 @@ _outIndexStmt(StringInfo str, const IndexStmt *node) } static void +_outCreateStatsStmt(StringInfo str, const CreateStatsStmt *node) +{ + WRITE_NODE_TYPE("CREATESTATSSTMT"); + + WRITE_NODE_FIELD(defnames); + WRITE_NODE_FIELD(relation); + WRITE_NODE_FIELD(keys); + WRITE_NODE_FIELD(options); + WRITE_BOOL_FIELD(if_not_exists); +} + +static void _outNotifyStmt(StringInfo str, const NotifyStmt *node) { WRITE_NODE_TYPE("NOTIFY"); @@ -3936,6 +3961,9 @@ outNode(StringInfo str, const void *obj) case T_PlannerParamItem: _outPlannerParamItem(str, obj); break; + case T_StatisticExtInfo: + _outStatisticExtInfo(str, obj); + break; case T_ExtensibleNode: _outExtensibleNode(str, obj); @@ -3953,6 +3981,9 @@ outNode(StringInfo str, const void *obj) case T_IndexStmt: _outIndexStmt(str, obj); break; + case T_CreateStatsStmt: + _outCreateStatsStmt(str, obj); + break; case T_NotifyStmt: _outNotifyStmt(str, obj); break; diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 463f806..ca628a2 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -29,6 +29,7 @@ #include "catalog/heap.h" #include "catalog/partition.h" #include "catalog/pg_am.h" +#include "catalog/pg_statistic_ext.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -40,8 +41,11 @@ #include "parser/parse_relation.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" +#include "statistics/stats.h" #include "storage/bufmgr.h" +#include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/syscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -63,7 +67,7 @@ static List *get_relation_constraints(PlannerInfo *root, bool include_notnull); static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, Relation heapRelation); - +static List *get_relation_statistics(RelOptInfo *rel, Relation relation); /* * get_relation_info - @@ -398,6 +402,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, rel->indexlist = indexinfos; + rel->statlist = get_relation_statistics(rel, relation); + /* Grab foreign-table info using the relcache, while we have it */ if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE) { @@ -1251,6 +1257,65 @@ get_relation_constraints(PlannerInfo *root, return result; } +/* + * get_relation_statistics + * Retrieve extended statistics defined on the table. + * + * Returns a List (possibly empty) of StatisticExtInfo objects describing + * the statistics. Note that this doesn't load the actual statistics data, + * just the identifying metadata. Only stats actually built are considered. + */ +static List * +get_relation_statistics(RelOptInfo *rel, Relation relation) +{ + List *statoidlist; + List *stainfos = NIL; + ListCell *l; + + statoidlist = RelationGetStatExtList(relation); + + foreach(l, statoidlist) + { + Oid statOid = lfirst_oid(l); + Form_pg_statistic_ext staForm; + HeapTuple htup; + Bitmapset *keys = NULL; + int i; + + htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid)); + if (!htup) + elog(ERROR, "cache lookup failed for statistics %u", statOid); + staForm = (Form_pg_statistic_ext) GETSTRUCT(htup); + + /* + * First, build the array of columns covered. This is ultimately + * wasted if no stats are actually built, but it doesn't seem worth + * troubling over that case. + */ + for (i = 0; i < staForm->stakeys.dim1; i++) + keys = bms_add_member(keys, staForm->stakeys.values[i]); + + /* add one StatisticExtInfo for each kind built */ + if (statext_is_kind_built(htup, STATS_EXT_NDISTINCT)) + { + StatisticExtInfo *info = makeNode(StatisticExtInfo); + + info->statOid = statOid; + info->rel = rel; + info->kind = STATS_EXT_NDISTINCT; + info->keys = bms_copy(keys); + + stainfos = lcons(info, stainfos); + } + + ReleaseSysCache(htup); + bms_free(keys); + } + + list_free(statoidlist); + + return stainfos; +} /* * relation_excluded_by_constraints diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index d0d45a5..66fa13a 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -257,7 +257,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); ConstraintsSetStmt CopyStmt CreateAsStmt CreateCastStmt CreateDomainStmt CreateExtensionStmt CreateGroupStmt CreateOpClassStmt CreateOpFamilyStmt AlterOpFamilyStmt CreatePLangStmt - CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt + CreateSchemaStmt CreateSeqStmt CreateStmt CreateStatsStmt CreateTableSpaceStmt CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt CreateAssertStmt CreateTransformStmt CreateTrigStmt CreateEventTrigStmt CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatePolicyStmt @@ -873,6 +873,7 @@ stmt : | CreateSeqStmt | CreateStmt | CreateSubscriptionStmt + | CreateStatsStmt | CreateTableSpaceStmt | CreateTransformStmt | CreateTrigStmt @@ -3746,6 +3747,35 @@ OptConsTableSpace: USING INDEX TABLESPACE name { $$ = $4; } ExistingIndex: USING INDEX index_name { $$ = $3; } ; +/***************************************************************************** + * + * QUERY : + * CREATE STATISTICS stats_name WITH (options) ON (columns) FROM relname + * + *****************************************************************************/ + + +CreateStatsStmt: CREATE STATISTICS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name + { + CreateStatsStmt *n = makeNode(CreateStatsStmt); + n->defnames = $3; + n->relation = $10; + n->keys = $7; + n->options = $4; + n->if_not_exists = false; + $$ = (Node *)n; + } + | CREATE STATISTICS IF_P NOT EXISTS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name + { + CreateStatsStmt *n = makeNode(CreateStatsStmt); + n->defnames = $6; + n->relation = $13; + n->keys = $10; + n->options = $7; + n->if_not_exists = true; + $$ = (Node *)n; + } + ; /***************************************************************************** * @@ -6041,6 +6071,7 @@ drop_type_any_name: | FOREIGN TABLE { $$ = OBJECT_FOREIGN_TABLE; } | COLLATION { $$ = OBJECT_COLLATION; } | CONVERSION_P { $$ = OBJECT_CONVERSION; } + | STATISTICS { $$ = OBJECT_STATISTIC_EXT; } | TEXT_P SEARCH PARSER { $$ = OBJECT_TSPARSER; } | TEXT_P SEARCH DICTIONARY { $$ = OBJECT_TSDICTIONARY; } | TEXT_P SEARCH TEMPLATE { $$ = OBJECT_TSTEMPLATE; } @@ -6118,7 +6149,7 @@ opt_restart_seqs: * EXTENSION | EVENT TRIGGER | FOREIGN DATA WRAPPER | * FOREIGN TABLE | INDEX | [PROCEDURAL] LANGUAGE | * MATERIALIZED VIEW | POLICY | ROLE | SCHEMA | SEQUENCE | - * SERVER | TABLE | TABLESPACE | + * SERVER | STATISTICS | TABLE | TABLESPACE | * TEXT SEARCH CONFIGURATION | TEXT SEARCH DICTIONARY | * TEXT SEARCH PARSER | TEXT SEARCH TEMPLATE | TYPE | * VIEW] | @@ -6287,6 +6318,7 @@ comment_type_any_name: COLUMN { $$ = OBJECT_COLUMN; } | INDEX { $$ = OBJECT_INDEX; } | SEQUENCE { $$ = OBJECT_SEQUENCE; } + | STATISTICS { $$ = OBJECT_STATISTIC_EXT; } | TABLE { $$ = OBJECT_TABLE; } | VIEW { $$ = OBJECT_VIEW; } | MATERIALIZED VIEW { $$ = OBJECT_MATVIEW; } @@ -8427,6 +8459,15 @@ RenameStmt: ALTER AGGREGATE aggregate_with_argtypes RENAME TO name n->missing_ok = false; $$ = (Node *)n; } + | ALTER STATISTICS any_name RENAME TO name + { + RenameStmt *n = makeNode(RenameStmt); + n->renameType = OBJECT_STATISTIC_EXT; + n->object = (Node *) $3; + n->newname = $6; + n->missing_ok = false; + $$ = (Node *)n; + } | ALTER TEXT_P SEARCH PARSER any_name RENAME TO name { RenameStmt *n = makeNode(RenameStmt); @@ -8642,6 +8683,15 @@ AlterObjectSchemaStmt: n->missing_ok = true; $$ = (Node *)n; } + | ALTER STATISTICS any_name SET SCHEMA name + { + AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt); + n->objectType = OBJECT_STATISTIC_EXT; + n->object = (Node *) $3; + n->newschema = $6; + n->missing_ok = false; + $$ = (Node *)n; + } | ALTER TEXT_P SEARCH PARSER any_name SET SCHEMA name { AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt); @@ -8905,6 +8955,14 @@ AlterOwnerStmt: ALTER AGGREGATE aggregate_with_argtypes OWNER TO RoleSpec n->newowner = $6; $$ = (Node *)n; } + | ALTER STATISTICS any_name OWNER TO RoleSpec + { + AlterOwnerStmt *n = makeNode(AlterOwnerStmt); + n->objectType = OBJECT_STATISTIC_EXT; + n->object = (Node *) $3; + n->newowner = $6; + $$ = (Node *)n; + } | ALTER TEXT_P SEARCH DICTIONARY any_name OWNER TO RoleSpec { AlterOwnerStmt *n = makeNode(AlterOwnerStmt); diff --git a/src/backend/statistics/Makefile b/src/backend/statistics/Makefile new file mode 100644 index 0000000..c437eca --- /dev/null +++ b/src/backend/statistics/Makefile @@ -0,0 +1,17 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for statistics +# +# IDENTIFICATION +# src/backend/statistics/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/statistics +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = stat_ext.o mvdist.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/statistics/README b/src/backend/statistics/README new file mode 100644 index 0000000..beb7c24 --- /dev/null +++ b/src/backend/statistics/README @@ -0,0 +1,34 @@ +Extended statistics +=================== + +When estimating various quantities (e.g. condition selectivities) the default +approach relies on the assumption of independence. In practice that's often +not true, resulting in estimation errors. + +Extended statistics track different types of dependencies between the columns, +hopefully improving the estimates and producing better plans. + +Currently we only have one type of extended statistics - ndistinct +coefficients, and we use it to improve estimates of grouping queries. See +README.ndistinct for details. + + +Size of sample in ANALYZE +------------------------- +When performing ANALYZE, the number of rows to sample is determined as + + (300 * statistics_target) + +That works reasonably well for statistics on individual columns, but perhaps +it's not enough for extended statistics. Papers analyzing estimation errors +all use samples proportional to the table (usually finding that 1-3% of the +table is enough to build accurate stats). + +The requested accuracy (number of MCV items or histogram bins) should also +be considered when determining the sample size, and in extended statistics +those are not necessarily limited by statistics_target. + +This however merits further discussion, because collecting the sample is quite +expensive and increasing it further would make ANALYZE even more painful. +Judging by the experiments with the current implementation, the fixed size +seems to work reasonably well for now, so we leave this as a future work. diff --git a/src/backend/statistics/mvdist.c b/src/backend/statistics/mvdist.c new file mode 100644 index 0000000..c2970e2 --- /dev/null +++ b/src/backend/statistics/mvdist.c @@ -0,0 +1,675 @@ +/*------------------------------------------------------------------------- + * + * mvdist.c + * POSTGRES multivariate ndistinct coefficients + * + * Estimating number of groups in a combination of columns (e.g. for GROUP BY) + * is tricky, and the estimation error is often significant. + + * The ndistinct coefficients address this by storing ndistinct estimates not + * only for individual columns, but also for (all) combinations of columns. + * So for example given three columns (a,b,c) the statistics will estimate + * ndistinct for (a,b), (a,c), (b,c) and (a,b,c). The per-column estimates are + * already available in pg_statistic. + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/statistics/mvdist.c + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/htup_details.h" +#include "catalog/pg_statistic_ext.h" +#include "utils/fmgrprotos.h" +#include "utils/lsyscache.h" +#include "lib/stringinfo.h" +#include "utils/syscache.h" +#include "utils/typcache.h" +#include "statistics/stat_ext_internal.h" +#include "statistics/stats.h" + + +static double ndistinct_for_combination(double totalrows, int numrows, + HeapTuple *rows, int2vector *attrs, VacAttrStats **stats, + int k, int16 *combination); +static double estimate_ndistinct(double totalrows, int numrows, int d, int f1); +static int n_choose_k(int n, int k); +static int num_combinations(int n); + +/* Combination generator API */ + +/* internal state for generator of k-combinations of n elements */ +typedef struct CombinationGenerator +{ + int k; /* size of the combination */ + int n; /* total number of elements */ + int current; /* index of the next combination to return */ + int ncombinations; /* number of combinations (size of array) */ + int16 *combinations; /* array of pre-built combinations */ +} CombinationGenerator; + +static CombinationGenerator *generator_init(int n, int k); +static void generator_free(CombinationGenerator *state); +static int16 *generator_next(CombinationGenerator *state, int2vector *attrs); +static void generate_combinations(CombinationGenerator *state); + + +/* + * statext_ndistinct_build + * Compute ndistinct coefficient for the combination of attributes. + * + * This computes the ndistinct estimate using the same estimator used + * in analyze.c and then computes the coefficient. + */ +MVNDistinct * +statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows, + int2vector *attrs, VacAttrStats **stats) +{ + MVNDistinct *result; + int k; + int itemcnt; + int numattrs = attrs->dim1; + int numcombs = num_combinations(numattrs); + + result = palloc(offsetof(MVNDistinct, items) + + numcombs * sizeof(MVNDistinctItem)); + result->magic = STATS_NDISTINCT_MAGIC; + result->type = STATS_NDISTINCT_TYPE_BASIC; + result->nitems = numcombs; + + itemcnt = 0; + for (k = 2; k <= numattrs; k++) + { + int16 *combination; + CombinationGenerator *generator; + + /* generate combinations of K out of N elements */ + generator = generator_init(numattrs, k); + + while ((combination = generator_next(generator, attrs))) + { + MVNDistinctItem *item = &result->items[itemcnt]; + int j; + + item->attrs = NULL; + for (j = 0; j < k; j++) + item->attrs = bms_add_member(item->attrs, + stats[combination[j]]->attr->attnum); + item->ndistinct = + ndistinct_for_combination(totalrows, numrows, rows, + attrs, stats, k, combination); + + itemcnt++; + Assert(itemcnt <= result->nitems); + } + + generator_free(generator); + } + + /* must consume exactly the whole output array */ + Assert(itemcnt == result->nitems); + + return result; +} + +/* + * statext_ndistinct_load + * Load the ndistinct value for the indicated pg_statistic_ext tuple + */ +MVNDistinct * +statext_ndistinct_load(Oid mvoid) +{ + bool isnull = false; + Datum ndist; + HeapTuple htup; + + htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid)); + if (!htup) + elog(ERROR, "cache lookup failed for statistics %u", mvoid); + + ndist = SysCacheGetAttr(STATEXTOID, htup, + Anum_pg_statistic_ext_standistinct, &isnull); + if (isnull) + elog(ERROR, + "requested statistic kind %c not yet built for statistics %u", + STATS_EXT_NDISTINCT, mvoid); + + ReleaseSysCache(htup); + + return statext_ndistinct_deserialize(DatumGetByteaP(ndist)); +} + +/* + * statext_ndistinct_serialize + * serialize ndistinct to the on-disk bytea format + */ +bytea * +statext_ndistinct_serialize(MVNDistinct *ndistinct) +{ + int i; + bytea *output; + char *tmp; + Size len; + + Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC); + Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC); + + /* + * Base size is base struct size, plus one base struct for each items, + * including number of items for each. + */ + len = VARHDRSZ + offsetof(MVNDistinct, items) + + ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + sizeof(int)); + + /* and also include space for the actual attribute numbers */ + for (i = 0; i < ndistinct->nitems; i++) + { + int nmembers; + + nmembers = bms_num_members(ndistinct->items[i].attrs); + Assert(nmembers >= 2); + len += sizeof(AttrNumber) * nmembers; + } + + output = (bytea *) palloc(len); + SET_VARSIZE(output, len); + + tmp = VARDATA(output); + + /* Store the base struct values */ + memcpy(tmp, ndistinct, offsetof(MVNDistinct, items)); + tmp += offsetof(MVNDistinct, items); + + /* + * store number of attributes and attribute numbers for each ndistinct + * entry + */ + for (i = 0; i < ndistinct->nitems; i++) + { + MVNDistinctItem item = ndistinct->items[i]; + int nmembers = bms_num_members(item.attrs); + int x; + + memcpy(tmp, &item.ndistinct, sizeof(double)); + tmp += sizeof(double); + memcpy(tmp, &nmembers, sizeof(int)); + tmp += sizeof(int); + + x = -1; + while ((x = bms_next_member(item.attrs, x)) >= 0) + { + AttrNumber value = (AttrNumber) x; + + memcpy(tmp, &value, sizeof(AttrNumber)); + tmp += sizeof(AttrNumber); + } + + Assert(tmp <= ((char *) output + len)); + } + + return output; +} + +/* + * statext_ndistinct_deserialize + * Read an on-disk bytea format MVNDistinct to in-memory format + */ +MVNDistinct * +statext_ndistinct_deserialize(bytea *data) +{ + int i; + Size expected_size; + MVNDistinct *ndistinct; + char *tmp; + + if (data == NULL) + return NULL; + + if (VARSIZE_ANY_EXHDR(data) < offsetof(MVNDistinct, items)) + elog(ERROR, "invalid MVNDistinct size %ld (expected at least %ld)", + VARSIZE_ANY_EXHDR(data), offsetof(MVNDistinct, items)); + + /* read the MVNDistinct header */ + ndistinct = (MVNDistinct *) palloc(sizeof(MVNDistinct)); + + /* initialize pointer to the data part (skip the varlena header) */ + tmp = VARDATA_ANY(data); + + /* get the header and perform basic sanity checks */ + memcpy(ndistinct, tmp, offsetof(MVNDistinct, items)); + tmp += offsetof(MVNDistinct, items); + + if (ndistinct->magic != STATS_NDISTINCT_MAGIC) + elog(ERROR, "invalid ndistinct magic %d (expected %d)", + ndistinct->magic, STATS_NDISTINCT_MAGIC); + + if (ndistinct->type != STATS_NDISTINCT_TYPE_BASIC) + elog(ERROR, "invalid ndistinct type %d (expected %d)", + ndistinct->type, STATS_NDISTINCT_TYPE_BASIC); + + Assert(ndistinct->nitems > 0); + + /* what minimum bytea size do we expect for those parameters */ + expected_size = offsetof(MVNDistinct, items) + + ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + + sizeof(AttrNumber) * 2); + + if (VARSIZE_ANY_EXHDR(data) < expected_size) + elog(ERROR, "invalid dependencies size %ld (expected at least %ld)", + VARSIZE_ANY_EXHDR(data), expected_size); + + /* allocate space for the ndistinct items */ + ndistinct = repalloc(ndistinct, offsetof(MVNDistinct, items) + + (ndistinct->nitems * sizeof(MVNDistinctItem))); + + for (i = 0; i < ndistinct->nitems; i++) + { + MVNDistinctItem *item = &ndistinct->items[i]; + int nelems; + + item->attrs = NULL; + + /* ndistinct value */ + memcpy(&item->ndistinct, tmp, sizeof(double)); + tmp += sizeof(double); + + /* number of attributes */ + memcpy(&nelems, tmp, sizeof(int)); + tmp += sizeof(int); + Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS)); + + while (nelems-- > 0) + { + AttrNumber attno; + + memcpy(&attno, tmp, sizeof(AttrNumber)); + tmp += sizeof(AttrNumber); + item->attrs = bms_add_member(item->attrs, attno); + } + + /* still within the bytea */ + Assert(tmp <= ((char *) data + VARSIZE_ANY(data))); + } + + /* we should have consumed the whole bytea exactly */ + Assert(tmp == ((char *) data + VARSIZE_ANY(data))); + + return ndistinct; +} + +/* + * pg_ndistinct_in + * input routine for type pg_ndistinct + * + * pg_ndistinct is real enough to be a table column, but it has no + * operations of its own, and disallows input (jus like pg_node_tree). + */ +Datum +pg_ndistinct_in(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_ndistinct"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_ndistinct + * output routine for type pg_ndistinct + * + * Produces a human-readable representation of the value. + */ +Datum +pg_ndistinct_out(PG_FUNCTION_ARGS) +{ + bytea *data = PG_GETARG_BYTEA_PP(0); + MVNDistinct *ndist = statext_ndistinct_deserialize(data); + int i; + StringInfoData str; + + initStringInfo(&str); + appendStringInfoChar(&str, '['); + + for (i = 0; i < ndist->nitems; i++) + { + MVNDistinctItem item = ndist->items[i]; + + if (i > 0) + appendStringInfoString(&str, ", "); + + appendStringInfoChar(&str, '{'); + outBitmapset(&str, item.attrs); + appendStringInfo(&str, ", %f}", item.ndistinct); + } + + appendStringInfoChar(&str, ']'); + + PG_RETURN_CSTRING(str.data); +} + +/* + * pg_ndistinct_recv + * binary input routine for type pg_ndistinct + */ +Datum +pg_ndistinct_recv(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_ndistinct"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_ndistinct_send + * binary output routine for type pg_ndistinct + * + * n-distinct is serialized into a bytea value, so let's send that. + */ +Datum +pg_ndistinct_send(PG_FUNCTION_ARGS) +{ + return byteasend(fcinfo); +} + +/* + * ndistinct_for_combination + * Estimates number of distinct values in a combination of columns. + * + * This uses the same ndistinct estimator as compute_scalar_stats() in + * ANALYZE, i.e., + * n*d / (n - f1 + f1*n/N) + * + * except that instead of values in a single column we are dealing with + * combination of multiple columns. + */ +static double +ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, + int2vector *attrs, VacAttrStats **stats, + int k, int16 *combination) +{ + int i, + j; + int f1, + cnt, + d; + bool *isnull; + Datum *values; + SortItem *items; + MultiSortSupport mss; + + AssertArg((k >= 2) && (k <= attrs->dim1)); + + mss = multi_sort_init(k); + + /* + * In order to determine the number of distinct elements, create separate + * values[]/isnull[] arrays with all the data we have, then sort them + * using the specified column combination as dimensions. We could try to + * sort in place, but it'd probably be more complex and bug-prone. + */ + items = (SortItem *) palloc(numrows * sizeof(SortItem)); + values = (Datum *) palloc0(sizeof(Datum) * numrows * k); + isnull = (bool *) palloc0(sizeof(bool) * numrows * k); + + for (i = 0; i < numrows; i++) + { + items[i].values = &values[i * k]; + items[i].isnull = &isnull[i * k]; + } + + /* + * For each dimension, set up sort-support and fill in the values from + * the sample data. + */ + for (i = 0; i < k; i++) + { + VacAttrStats *colst = stats[combination[i]]; + TypeCacheEntry *type; + + type = lookup_type_cache(colst->attrtypid, TYPECACHE_LT_OPR); + if (type->lt_opr == InvalidOid) /* shouldn't happen */ + elog(ERROR, "cache lookup failed for ordering operator for type %u", + colst->attrtypid); + + /* prepare the sort function for this dimension */ + multi_sort_add_dimension(mss, i, type->lt_opr); + + /* accumulate all the data into the array */ + for (j = 0; j < numrows; j++) + { + items[j].values[i] = + heap_getattr(rows[j], attrs->values[combination[i]], + stats[combination[i]]->tupDesc, + &items[j].isnull[i]); + } + } + + /* We can sort the array now ... */ + qsort_arg((void *) items, numrows, sizeof(SortItem), + multi_sort_compare, mss); + + /* ... and count the number of distinct combinations */ + + f1 = 0; + cnt = 1; + d = 1; + for (i = 1; i < numrows; i++) + { + if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0) + { + if (cnt == 1) + f1 += 1; + + d++; + cnt = 0; + } + + cnt += 1; + } + + if (cnt == 1) + f1 += 1; + + return estimate_ndistinct(totalrows, numrows, d, f1); +} + +/* The Duj1 estimator (already used in analyze.c). */ +static double +estimate_ndistinct(double totalrows, int numrows, int d, int f1) +{ + double numer, + denom, + ndistinct; + + numer = (double) numrows * (double) d; + + denom = (double) (numrows - f1) + + (double) f1 *(double) numrows / totalrows; + + ndistinct = numer / denom; + + /* Clamp to sane range in case of roundoff error */ + if (ndistinct < (double) d) + ndistinct = (double) d; + + if (ndistinct > totalrows) + ndistinct = totalrows; + + return floor(ndistinct + 0.5); +} + +/* + * n_choose_k + * computes binomial coefficients using an algorithm that is both + * efficient and prevents overflows + */ +static int +n_choose_k(int n, int k) +{ + int d, + r; + + Assert((k > 0) && (n >= k)); + + /* use symmetry of the binomial coefficients */ + k = Min(k, n - k); + + r = 1; + for (d = 1; d <= k; ++d) + { + r *= n--; + r /= d; + } + + return r; +} + +/* + * num_combinations + * number of combinations, excluding single-value combinations + */ +static int +num_combinations(int n) +{ + int k; + int ncombs = 1; + + for (k = 1; k <= n; k++) + ncombs *= 2; + + ncombs -= (n + 1); + + return ncombs; +} + +/* + * generator_init + * initialize the generator of combinations + * + * The generator produces combinations of K elements from the attribute + * numbers. We do prebuild all the combinations in this method, as it + * seems somewhat simpler than generating them on the fly. + */ +static CombinationGenerator * +generator_init(int n, int k) +{ + CombinationGenerator *state; + + Assert((n >= k) && (k > 0)); + + /* allocate the generator state as a single chunk of memory */ + state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator)); + + state->ncombinations = n_choose_k(n, k); + + /* pre-allocate space for all combinations*/ + state->combinations + = (int16 *) palloc(sizeof(int16) * k * state->ncombinations); + + state->current = 0; + state->k = k; + state->n = n; + + /* now actually pre-generate all the combinations of K elements */ + generate_combinations(state); + + /* make sure we got the expected number of combinations */ + Assert(state->current == state->ncombinations); + + /* reset the number, so we start with the first one */ + state->current = 0; + + return state; +} + +/* + * generator_next + * returns the next combination from the prebuilt list + * + * Returns a combination of K attribute numbers (as specified in the + * call to generator_init), or NULL when there are no more combination. + */ +static int16 * +generator_next(CombinationGenerator *state, int2vector *attrs) +{ + if (state->current == state->ncombinations) + return NULL; + + return &state->combinations[state->k * state->current++]; +} + +/* + * genrator_free + * free the internal state of the generator + * + * Releases the generator internal state (pre-built combinations). + */ +static void +generator_free(CombinationGenerator *state) +{ + pfree(state->combinations); + pfree(state); +} + +/* + * generate_combinations_recurse + * given a prefix, generate all possible combinations + * + * Given a prefix (first few elements of the combination), generate + * following elements recursively. We generate the combinations in + * lexicographic order, which eliminates permutations of the same + * combination. + */ +static void +generate_combinations_recurse(CombinationGenerator *state, + int index, int start, int16 *current) +{ + /* If we haven't filled all the elements, simply recurse. */ + if (index < state->k) + { + int16 i; + + /* + * The values have to be in ascending order, so make sure we start + * with the value passed by parameter. + */ + + for (i = start; i < state->n; i++) + { + current[index] = i; + generate_combinations_recurse(state, (index + 1), (i + 1), current); + } + + return; + } + else + { + /* we got a valid combination, add it to the array */ + memcpy(&state->combinations[(state->k * state->current)], + current, state->k * sizeof(int16)); + state->current++; + } +} + +/* + * generate_combinations + * generate all k-combinations of N elements + */ +static void +generate_combinations(CombinationGenerator *state) +{ + int16 *current = (int16 *) palloc0(sizeof(int16) * state->k); + + generate_combinations_recurse(state, 0, 0, current); + + pfree(current); +} diff --git a/src/backend/statistics/stat_ext.c b/src/backend/statistics/stat_ext.c new file mode 100644 index 0000000..cac4650 --- /dev/null +++ b/src/backend/statistics/stat_ext.c @@ -0,0 +1,361 @@ +/*------------------------------------------------------------------------- + * + * stat_ext.c + * POSTGRES extended statistics + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/statistics/stat_ext.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "catalog/indexing.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_statistic_ext.h" +#include "nodes/relation.h" +#include "statistics/stat_ext_internal.h" +#include "statistics/stats.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +typedef struct StatExtEntry +{ + Oid statOid; /* OID of pg_statistic_ext entry */ + int2vector *columns; /* columns */ + List *types; /* enabled types */ +} StatExtEntry; + + +static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid); +static VacAttrStats **lookup_var_attr_stats(int2vector *attrs, + int natts, VacAttrStats **vacattrstats); +static void statext_store(Relation pg_stext, Oid relid, + MVNDistinct *ndistinct, + VacAttrStats **stats); + + +/* + * Compute requested extended stats, using the rows sampled for the plain + * (single-column) stats. + * + * This fetches a list of stats from pg_statistic_ext, computes the stats + * and serializes them back into the catalog (as bytea values). + */ +void +BuildRelationExtStatistics(Relation onerel, double totalrows, + int numrows, HeapTuple *rows, + int natts, VacAttrStats **vacattrstats) +{ + Relation pg_stext; + ListCell *lc; + List *stats; + + pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock); + stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel)); + + foreach(lc, stats) + { + StatExtEntry *stat = (StatExtEntry *) lfirst(lc); + MVNDistinct *ndistinct = NULL; + VacAttrStats **stats; + ListCell *lc2; + + /* filter only the interesting vacattrstats records */ + stats = lookup_var_attr_stats(stat->columns, natts, vacattrstats); + + /* check allowed number of dimensions */ + Assert((stat->columns->dim1 >= 2) && + (stat->columns->dim1 <= STATS_MAX_DIMENSIONS)); + + /* compute statistic of each type */ + foreach(lc2, stat->types) + { + char t = (char) lfirst_int(lc2); + + if (t == STATS_EXT_NDISTINCT) + ndistinct = statext_ndistinct_build(totalrows, numrows, rows, + stat->columns, stats); + } + + /* store the statistics in the catalog */ + statext_store(pg_stext, stat->statOid, ndistinct, stats); + } + + heap_close(pg_stext, RowExclusiveLock); +} + +/* + * statext_is_kind_built + * Is this stat kind built in the given pg_statistic_ext tuple? + */ +bool +statext_is_kind_built(HeapTuple htup, char type) +{ + AttrNumber attnum; + + switch (type) + { + case STATS_EXT_NDISTINCT: + attnum = Anum_pg_statistic_ext_standistinct; + break; + + default: + elog(ERROR, "unexpected statistics type requested: %d", type); + } + + return !heap_attisnull(htup, attnum); +} + +/* + * Return a list (of StatExtEntry) of statistics enabled for the given relation. + */ +static List * +fetch_statentries_for_relation(Relation pg_statext, Oid relid) +{ + SysScanDesc scan; + ScanKeyData skey; + HeapTuple htup; + List *result = NIL; + + /* + * Prepare to scan pg_statistic_ext for entries having indrelid = this + * rel. + */ + ScanKeyInit(&skey, + Anum_pg_statistic_ext_starelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + + scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(scan))) + { + StatExtEntry *entry; + Datum datum; + bool isnull; + int i; + ArrayType *arr; + char *enabled; + Form_pg_statistic_ext staForm; + + entry = palloc0(sizeof(StatExtEntry)); + entry->statOid = HeapTupleGetOid(htup); + staForm = (Form_pg_statistic_ext) GETSTRUCT(htup); + entry->columns = buildint2vector((int16 *) staForm->stakeys.values, + staForm->stakeys.dim1); + + /* decode the staenabled char array into a list of chars */ + datum = SysCacheGetAttr(STATEXTOID, htup, + Anum_pg_statistic_ext_staenabled, &isnull); + Assert(!isnull); + arr = DatumGetArrayTypeP(datum); + if (ARR_NDIM(arr) != 1 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != CHAROID) + elog(ERROR, "staenabled is not a 1-D char array"); + enabled = (char *) ARR_DATA_PTR(arr); + for (i = 0; i < ARR_DIMS(arr)[0]; i++) + { + Assert(enabled[i] == STATS_EXT_NDISTINCT); + entry->types = lappend_int(entry->types, (int) enabled[i]); + } + + result = lappend(result, entry); + } + + systable_endscan(scan); + + return result; +} + +/* + * Lookup the VacAttrStats info for the selected columns, with indexes + * matching the attrs vector (to make it easy to work with when + * computing extended stats). + */ +static VacAttrStats ** +lookup_var_attr_stats(int2vector *attrs, int natts, VacAttrStats **vacattrstats) +{ + int i; + int numattrs; + VacAttrStats **stats; + + numattrs = attrs->dim1; + stats = (VacAttrStats **) palloc0(numattrs * sizeof(VacAttrStats *)); + + /* lookup VacAttrStats info for the requested columns (same attnum) */ + for (i = 0; i < numattrs; i++) + { + int j; + + stats[i] = NULL; + for (j = 0; j < natts; j++) + { + if (attrs->values[i] == vacattrstats[j]->tupattnum) + { + stats[i] = vacattrstats[j]; + break; + } + } + + /* + * Check that we found a non-dropped column and that the attnum + * matches. + */ + Assert(!stats[i]->attr->attisdropped); + Assert(stats[i]->tupattnum == attrs->values[i]); + } + + return stats; +} + +/* + * statext_store + * Serializes the statistics and stores them into the pg_statistic_ext tuple. + */ +static void +statext_store(Relation pg_stext, Oid statOid, + MVNDistinct *ndistinct, + VacAttrStats **stats) +{ + HeapTuple stup, + oldtup; + Datum values[Natts_pg_statistic_ext]; + bool nulls[Natts_pg_statistic_ext]; + bool replaces[Natts_pg_statistic_ext]; + + memset(nulls, 1, Natts_pg_statistic_ext * sizeof(bool)); + memset(replaces, 0, Natts_pg_statistic_ext * sizeof(bool)); + memset(values, 0, Natts_pg_statistic_ext * sizeof(Datum)); + + /* + * Construct a new pg_statistic_ext tuple, replacing the calculated stats. + */ + if (ndistinct != NULL) + { + bytea *data = statext_ndistinct_serialize(ndistinct); + + nulls[Anum_pg_statistic_ext_standistinct - 1] = (data == NULL); + values[Anum_pg_statistic_ext_standistinct - 1] = PointerGetDatum(data); + } + + /* always replace the value (either by bytea or NULL) */ + replaces[Anum_pg_statistic_ext_standistinct - 1] = true; + + /* there should already be a pg_statistic_ext tuple */ + oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid)); + if (!HeapTupleIsValid(oldtup)) + elog(ERROR, "cache lookup failed for extended statistics %u", statOid); + + /* replace it */ + stup = heap_modify_tuple(oldtup, + RelationGetDescr(pg_stext), + values, + nulls, + replaces); + ReleaseSysCache(oldtup); + CatalogTupleUpdate(pg_stext, &stup->t_self, stup); + + heap_freetuple(stup); +} + +/* initialize multi-dimensional sort */ +MultiSortSupport +multi_sort_init(int ndims) +{ + MultiSortSupport mss; + + Assert(ndims >= 2); + + mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup) + +sizeof(SortSupportData) * ndims); + + mss->ndims = ndims; + + return mss; +} + +/* + * Prepare sort support info using the given sort operator + * at the position 'sortdim' + */ +void +multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper) +{ + SortSupport ssup = &mss->ssup[sortdim]; + + ssup->ssup_cxt = CurrentMemoryContext; + ssup->ssup_collation = DEFAULT_COLLATION_OID; + ssup->ssup_nulls_first = false; + ssup->ssup_cxt = CurrentMemoryContext; + + PrepareSortSupportFromOrderingOp(oper, ssup); +} + +/* compare all the dimensions in the selected order */ +int +multi_sort_compare(const void *a, const void *b, void *arg) +{ + int i; + SortItem *ia = (SortItem *) a; + SortItem *ib = (SortItem *) b; + + MultiSortSupport mss = (MultiSortSupport) arg; + + for (i = 0; i < mss->ndims; i++) + { + int compare; + + compare = ApplySortComparator(ia->values[i], ia->isnull[i], + ib->values[i], ib->isnull[i], + &mss->ssup[i]); + + if (compare != 0) + return compare; + } + + /* equal by default */ + return 0; +} + +/* compare selected dimension */ +int +multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, + MultiSortSupport mss) +{ + return ApplySortComparator(a->values[dim], a->isnull[dim], + b->values[dim], b->isnull[dim], + &mss->ssup[dim]); +} + +int +multi_sort_compare_dims(int start, int end, + const SortItem *a, const SortItem *b, + MultiSortSupport mss) +{ + int dim; + + for (dim = start; dim <= end; dim++) + { + int r = ApplySortComparator(a->values[dim], a->isnull[dim], + b->values[dim], b->isnull[dim], + &mss->ssup[dim]); + + if (r != 0) + return r; + } + + return 0; +} diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 20b5273..1a559a5 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1623,6 +1623,10 @@ ProcessUtilitySlow(ParseState *pstate, commandCollected = true; break; + case T_CreateStatsStmt: /* CREATE STATISTICS */ + address = CreateStatistics((CreateStatsStmt *) parsetree); + break; + default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(parsetree)); @@ -1988,6 +1992,8 @@ AlterObjectTypeCommandTag(ObjectType objtype) break; case OBJECT_SUBSCRIPTION: tag = "ALTER SUBSCRIPTION"; + case OBJECT_STATISTIC_EXT: + tag = "ALTER STATISTICS"; break; default: tag = "???"; @@ -2282,6 +2288,8 @@ CreateCommandTag(Node *parsetree) break; case OBJECT_PUBLICATION: tag = "DROP PUBLICATION"; + case OBJECT_STATISTIC_EXT: + tag = "DROP STATISTICS"; break; default: tag = "???"; @@ -2681,6 +2689,10 @@ CreateCommandTag(Node *parsetree) tag = "EXECUTE"; break; + case T_CreateStatsStmt: + tag = "CREATE STATISTICS"; + break; + case T_DeallocateStmt: { DeallocateStmt *stmt = (DeallocateStmt *) parsetree; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 5c82325..81c9103 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -35,6 +35,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_partitioned_table.h" #include "catalog/pg_proc.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_trigger.h" #include "catalog/pg_type.h" #include "commands/defrem.h" @@ -317,6 +318,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno, const Oid *excludeOps, bool attrsOnly, bool showTblSpc, int prettyFlags, bool missing_ok); +static char *pg_get_statisticsext_worker(Oid statextid, bool missing_ok); static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags, bool attrsOnly); static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand, @@ -1422,6 +1424,85 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, } /* + * pg_get_statisticsextdef + * Get the definition of an extended statistics object + */ +Datum +pg_get_statisticsextdef(PG_FUNCTION_ARGS) +{ + Oid statextid = PG_GETARG_OID(0); + char *res; + + res = pg_get_statisticsext_worker(statextid, true); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +/* + * Internal workhorse to decompile an extended statistics object. + */ +static char * +pg_get_statisticsext_worker(Oid statextid, bool missing_ok) +{ + Form_pg_statistic_ext statextrec; + Form_pg_class pgclassrec; + HeapTuple statexttup; + HeapTuple pgclasstup; + StringInfoData buf; + int colno; + + statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid)); + + if (!HeapTupleIsValid(statexttup)) + { + if (missing_ok) + return NULL; + elog(ERROR, "cache lookup failed for extended statistics %u", statextid); + } + + statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup); + + pgclasstup = SearchSysCache1(RELOID, ObjectIdGetDatum(statextrec->starelid)); + + if (!HeapTupleIsValid(statexttup)) + { + ReleaseSysCache(statexttup); + elog(ERROR, "cache lookup failed for relation %u", statextrec->starelid); + } + + pgclassrec = (Form_pg_class) GETSTRUCT(pgclasstup); + + initStringInfo(&buf); + + appendStringInfo(&buf, "CREATE STATISTICS %s ON (", + quote_identifier(NameStr(statextrec->staname))); + + for (colno = 0; colno < statextrec->stakeys.dim1; colno++) + { + AttrNumber attnum = statextrec->stakeys.values[colno]; + char *attname; + + if (colno > 0) + appendStringInfoString(&buf, ", "); + + attname = get_relid_attribute_name(statextrec->starelid, attnum); + + appendStringInfoString(&buf, quote_identifier(attname)); + } + + appendStringInfo(&buf, ") FROM %s", + quote_identifier(NameStr(pgclassrec->relname))); + + ReleaseSysCache(statexttup); + ReleaseSysCache(pgclasstup); + + return buf.data; +} + +/* * pg_get_partkeydef * * Returns the partition key specification, ie, the following: diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index bb9a544..0b79497 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -110,6 +110,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_type.h" #include "executor/executor.h" #include "mb/pg_wchar.h" @@ -126,6 +127,7 @@ #include "parser/parse_clause.h" #include "parser/parse_coerce.h" #include "parser/parsetree.h" +#include "statistics/stats.h" #include "utils/builtins.h" #include "utils/bytea.h" #include "utils/date.h" @@ -164,6 +166,8 @@ static double eqjoinsel_inner(Oid operator, static double eqjoinsel_semi(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2, RelOptInfo *inner_rel); +static bool estimate_multivariate_ndistinct(PlannerInfo *root, + RelOptInfo *rel, List **varinfos, double *ndistinct); static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound); @@ -208,7 +212,6 @@ static Const *string_to_const(const char *str, Oid datatype); static Const *string_to_bytea_const(const char *str, size_t str_len); static List *add_predicate_to_quals(IndexOptInfo *index, List *indexQuals); - /* * eqsel - Selectivity of "=" for any data types. * @@ -3398,25 +3401,25 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, { GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos); RelOptInfo *rel = varinfo1->rel; - double reldistinct = varinfo1->ndistinct; + double reldistinct = 1; double relmaxndistinct = reldistinct; int relvarcount = 1; List *newvarinfos = NIL; + List *relvarinfos = NIL; /* - * Get the product of numdistinct estimates of the Vars for this rel. - * Also, construct new varinfos list of remaining Vars. + * Split the list of varinfos in two - one for the current rel, + * one for remaining Vars on other rels. */ + relvarinfos = lcons(varinfo1, relvarinfos); for_each_cell(l, lnext(list_head(varinfos))) { GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l); if (varinfo2->rel == varinfo1->rel) { - reldistinct *= varinfo2->ndistinct; - if (relmaxndistinct < varinfo2->ndistinct) - relmaxndistinct = varinfo2->ndistinct; - relvarcount++; + /* varinfos on current rel */ + relvarinfos = lcons(varinfo2, relvarinfos); } else { @@ -3426,6 +3429,43 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, } /* + * Get the numdistinct estimate for the Vars of this rel. We + * iteratively search for multivariate n-distinct with maximum number + * of vars; assuming that each var group is independent of the others, + * we multiply them together. Any remaining relvarinfos after + * no more multivariate matches are found are assumed independent too, + * so their individual ndistinct estimates are multiplied also. + */ + while (relvarinfos) + { + double mvndistinct; + + if (estimate_multivariate_ndistinct(root, rel, &relvarinfos, + &mvndistinct)) + { + reldistinct *= mvndistinct; + if (relmaxndistinct < mvndistinct) + relmaxndistinct = mvndistinct; + relvarcount++; /* inaccurate, but doesn't matter */ + } + else + { + foreach (l, relvarinfos) + { + GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l); + + reldistinct *= varinfo2->ndistinct; + if (relmaxndistinct < varinfo2->ndistinct) + relmaxndistinct = varinfo2->ndistinct; + relvarcount++; + } + + /* we're done with this relation */ + relvarinfos = NIL; + } + } + + /* * Sanity check --- don't divide by zero if empty relation. */ Assert(rel->reloptkind == RELOPT_BASEREL); @@ -3437,12 +3477,17 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, * don't know by how much. We should never clamp to less than the * largest ndistinct value for any of the Vars, though, since * there will surely be at least that many groups. + * + * However we don't need to do this if we have ndistinct stats on + * the columns - in that case we can simply use the coefficient to + * get the (probably way more accurate) estimate. */ double clamp = rel->tuples; if (relvarcount > 1) { clamp *= 0.1; + if (clamp < relmaxndistinct) { clamp = relmaxndistinct; @@ -3506,7 +3551,6 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, */ numdistinct *= reldistinct; } - varinfos = newvarinfos; } while (varinfos != NIL); @@ -3668,6 +3712,132 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets) */ /* + * Find applicable ndistinct statistics for the given list of VarInfos (which + * must all belong to the given rel), and update *ndistinct to the estimate of + * the MVNDistinctItem that best matches. If a match it found, *varinfos is + * updated to remove the list of matched varinfos. + * + * Varinfos that aren't for simple Vars are ignored. + * + * Return TRUE if we're able to find a match, FALSE otherwise. + */ +static bool +estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, + List **varinfos, double *ndistinct) +{ + ListCell *lc; + Bitmapset *attnums = NULL; + int nmatches; + Oid statOid = InvalidOid; + MVNDistinct *stats; + Bitmapset *matched = NULL; + + /* bail out immediately if the table has no extended statistics */ + if (!rel->statlist) + return false; + + /* Determine the attnums we're looking for */ + foreach(lc, *varinfos) + { + GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc); + + Assert(varinfo->rel == rel); + + if (IsA(varinfo->var, Var)) + { + attnums = bms_add_member(attnums, + ((Var *) varinfo->var)->varattno); + } + } + + /* look for the ndistinct statistics matching the most vars */ + nmatches = 1; /* we require at least two matches */ + foreach(lc, rel->statlist) + { + StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc); + Bitmapset *shared; + + /* skip statistics of other kinds */ + if (info->kind != STATS_EXT_NDISTINCT) + continue; + + /* compute attnums shared by the vars and the statistic */ + shared = bms_intersect(info->keys, attnums); + + /* + * Does this statistics matches more columns than the currently + * best statistic? If so, use this one instead. + * + * XXX This should break ties using name of the statistic, or + * something like that, to make the outcome stable. + */ + if (bms_num_members(shared) > nmatches) + { + statOid = info->statOid; + nmatches = bms_num_members(shared); + matched = shared; + } + } + + /* No match? */ + if (statOid == InvalidOid) + return false; + Assert(nmatches > 1 && matched != NULL); + + stats = statext_ndistinct_load(statOid); + + /* + * If we have a match, search it for the specific item that matches (there + * must be one), and construct the output values. + */ + if (stats) + { + int i; + List *newlist = NIL; + MVNDistinctItem *item = NULL; + + /* Find the specific item that exactly matches the combination */ + for (i = 0; i < stats->nitems; i++) + { + MVNDistinctItem *tmpitem = &stats->items[i]; + + if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL) + { + item = tmpitem; + break; + } + } + + /* make sure we found an item */ + if (!item) + elog(ERROR, "corrupt MVNDistinct entry"); + + /* Form the output varinfo list, keeping only unmatched ones */ + foreach(lc, *varinfos) + { + GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc); + AttrNumber attnum; + + if (!IsA(varinfo->var, Var)) + { + newlist = lappend(newlist, varinfo); + continue; + } + + attnum = ((Var *) varinfo->var)->varattno; + if (!bms_is_member(attnum, matched)) + newlist = lappend(newlist, varinfo); + } + + *varinfos = newlist; + *ndistinct = item->ndistinct; + return true; + } + + return false; +} + +/* * convert_to_scalar * Convert non-NULL values of the indicated types to the comparison * scale needed by scalarineqsel(). diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index ce55fc5..a6b60c6 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -56,6 +56,7 @@ #include "catalog/pg_publication.h" #include "catalog/pg_rewrite.h" #include "catalog/pg_shseclabel.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_trigger.h" @@ -4452,6 +4453,82 @@ RelationGetIndexList(Relation relation) } /* + * RelationGetStatExtList + * get a list of OIDs of extended statistics on this relation + * + * The statistics list is created only if someone requests it, in a way + * similar to RelationGetIndexList(). We scan pg_statistic_ext to find + * relevant statistics, and add the list to the relcache entry so that we + * won't have to compute it again. Note that shared cache inval of a + * relcache entry will delete the old list and set rd_statvalid to 0, + * so that we must recompute the statistics list on next request. This + * handles creation or deletion of a statistic. + * + * The returned list is guaranteed to be sorted in order by OID, although + * this is not currently needed. + * + * Since shared cache inval causes the relcache's copy of the list to go away, + * we return a copy of the list palloc'd in the caller's context. The caller + * may list_free() the returned list after scanning it. This is necessary + * since the caller will typically be doing syscache lookups on the relevant + * statistics, and syscache lookup could cause SI messages to be processed! + */ +List * +RelationGetStatExtList(Relation relation) +{ + Relation indrel; + SysScanDesc indscan; + ScanKeyData skey; + HeapTuple htup; + List *result; + List *oldlist; + MemoryContext oldcxt; + + /* Quick exit if we already computed the list. */ + if (relation->rd_statvalid != 0) + return list_copy(relation->rd_statlist); + + /* + * We build the list we intend to return (in the caller's context) while + * doing the scan. After successfully completing the scan, we copy that + * list into the relcache entry. This avoids cache-context memory leakage + * if we get some sort of error partway through. + */ + result = NIL; + + /* Prepare to scan pg_statistic_ext for entries having starelid = this rel. */ + ScanKeyInit(&skey, + Anum_pg_statistic_ext_starelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + indrel = heap_open(StatisticExtRelationId, AccessShareLock); + indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(indscan))) + /* TODO maybe include only already built statistics? */ + result = insert_ordered_oid(result, HeapTupleGetOid(htup)); + + systable_endscan(indscan); + + heap_close(indrel, AccessShareLock); + + /* Now save a copy of the completed list in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + oldlist = relation->rd_statlist; + relation->rd_statlist = list_copy(result); + + relation->rd_statvalid = true; + MemoryContextSwitchTo(oldcxt); + + /* Don't leak the old list, if there is one */ + list_free(oldlist); + + return result; +} + +/* * insert_ordered_oid * Insert a new Oid into a sorted list of Oids, preserving ordering * @@ -5560,6 +5637,8 @@ load_relcache_init_file(bool shared) rel->rd_pkattr = NULL; rel->rd_idattr = NULL; rel->rd_pubactions = NULL; + rel->rd_statvalid = false; + rel->rd_statlist = NIL; rel->rd_createSubid = InvalidSubTransactionId; rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; rel->rd_amcache = NULL; diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index b1c0b4b..4a9cb76 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -61,6 +61,7 @@ #include "catalog/pg_shseclabel.h" #include "catalog/pg_replication_origin.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_transform.h" @@ -725,6 +726,28 @@ static const struct cachedesc cacheinfo[] = { }, 32 }, + {StatisticExtRelationId, /* STATEXTNAMENSP */ + StatisticExtNameIndexId, + 2, + { + Anum_pg_statistic_ext_staname, + Anum_pg_statistic_ext_stanamespace, + 0, + 0 + }, + 4 + }, + {StatisticExtRelationId, /* STATEXTOID */ + StatisticExtOidIndexId, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0 + }, + 4 + }, {StatisticRelationId, /* STATRELATTINH */ StatisticRelidAttnumInhIndexId, 3, diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c index 89530a9..e2bc357 100644 --- a/src/bin/pg_dump/common.c +++ b/src/bin/pg_dump/common.c @@ -273,6 +273,10 @@ getSchemaData(Archive *fout, int *numTablesPtr) getIndexes(fout, tblinfo, numTables); if (g_verbose) + write_msg(NULL, "reading extended statistics\n"); + getExtendedStatistics(fout, tblinfo, numTables); + + if (g_verbose) write_msg(NULL, "reading constraints\n"); getConstraints(fout, tblinfo, numTables); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index dd08925..f77581d 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -3540,7 +3540,8 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData, bool acl_pass) strcmp(te->desc, "TRIGGER") == 0 || strcmp(te->desc, "ROW SECURITY") == 0 || strcmp(te->desc, "POLICY") == 0 || - strcmp(te->desc, "USER MAPPING") == 0) + strcmp(te->desc, "USER MAPPING") == 0 || + strcmp(te->desc, "STATISTICS") == 0) { /* these object types don't have separate owners */ } diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 2b5a526..d7d6181 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -192,6 +192,7 @@ static void dumpAttrDef(Archive *fout, AttrDefInfo *adinfo); static void dumpSequence(Archive *fout, TableInfo *tbinfo); static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo); static void dumpIndex(Archive *fout, IndxInfo *indxinfo); +static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo); static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo); static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo); static void dumpTSParser(Archive *fout, TSParserInfo *prsinfo); @@ -6584,6 +6585,99 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) } /* + * getExtendedStatistics + * get information about extended statistics on a dumpable table + * or materialized view. + * + * Note: extended statistics data is not returned directly to the caller, but + * it does get entered into the DumpableObject tables. + */ +void +getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables) +{ + int i, + j; + PQExpBuffer query; + PGresult *res; + StatsExtInfo *statsextinfo; + int ntups; + int i_tableoid; + int i_oid; + int i_staname; + int i_stadef; + + /* Extended statistics were new in v10 */ + if (fout->remoteVersion < 100000) + return; + + query = createPQExpBuffer(); + + for (i = 0; i < numTables; i++) + { + TableInfo *tbinfo = &tblinfo[i]; + + /* Only plain tables and materialized views can have extended statistics. */ + if (tbinfo->relkind != RELKIND_RELATION && + tbinfo->relkind != RELKIND_MATVIEW) + continue; + + /* + * Ignore extended statistics of tables whose definitions are not to + * be dumped. + */ + if (!(tbinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)) + continue; + + if (g_verbose) + write_msg(NULL, "reading extended statistics for table \"%s.%s\"\n", + tbinfo->dobj.namespace->dobj.name, + tbinfo->dobj.name); + + /* Make sure we are in proper schema so stadef is right */ + selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name); + + resetPQExpBuffer(query); + + appendPQExpBuffer(query, + "SELECT " + "tableoid, " + "oid, " + "staname, " + "pg_catalog.pg_get_statisticsextdef(oid) AS stadef " + "FROM pg_statistic_ext " + "WHERE starelid = '%u' " + "ORDER BY staname", tbinfo->dobj.catId.oid); + + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + ntups = PQntuples(res); + + i_tableoid = PQfnumber(res, "tableoid"); + i_oid = PQfnumber(res, "oid"); + i_staname = PQfnumber(res, "staname"); + i_stadef = PQfnumber(res, "stadef"); + + statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo)); + + for (j = 0; j < ntups; j++) + { + statsextinfo[j].dobj.objType = DO_STATSEXT; + statsextinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid)); + statsextinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid)); + AssignDumpId(&statsextinfo[j].dobj); + statsextinfo[j].dobj.name = pg_strdup(PQgetvalue(res, j, i_staname)); + statsextinfo[j].dobj.namespace = tbinfo->dobj.namespace; + statsextinfo[j].statsexttable = tbinfo; + statsextinfo[j].statsextdef = pg_strdup(PQgetvalue(res, j, i_stadef)); + } + + PQclear(res); + } + + destroyPQExpBuffer(query); +} + +/* * getConstraints * * Get info about constraints on dumpable tables. @@ -9235,6 +9329,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) case DO_INDEX: dumpIndex(fout, (IndxInfo *) dobj); break; + case DO_STATSEXT: + dumpStatisticsExt(fout, (StatsExtInfo *) dobj); + break; case DO_REFRESH_MATVIEW: refreshMatViewData(fout, (TableDataInfo *) dobj); break; @@ -15675,6 +15772,61 @@ dumpIndex(Archive *fout, IndxInfo *indxinfo) } /* + * dumpStatisticsExt + * write out to fout an extended statistics object + */ +static void +dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo) +{ + DumpOptions *dopt = fout->dopt; + TableInfo *tbinfo = statsextinfo->statsexttable; + PQExpBuffer q; + PQExpBuffer delq; + PQExpBuffer labelq; + + if (dopt->dataOnly) + return; + + q = createPQExpBuffer(); + delq = createPQExpBuffer(); + labelq = createPQExpBuffer(); + + appendPQExpBuffer(labelq, "STATISTICS %s", + fmtId(statsextinfo->dobj.name)); + + appendPQExpBuffer(q, "%s;\n", statsextinfo->statsextdef); + + appendPQExpBuffer(delq, "DROP STATISTICS %s.", + fmtId(tbinfo->dobj.namespace->dobj.name)); + appendPQExpBuffer(delq, "%s;\n", + fmtId(statsextinfo->dobj.name)); + + if (statsextinfo->dobj.dump & DUMP_COMPONENT_DEFINITION) + ArchiveEntry(fout, statsextinfo->dobj.catId, + statsextinfo->dobj.dumpId, + statsextinfo->dobj.name, + tbinfo->dobj.namespace->dobj.name, + NULL, + tbinfo->rolname, false, + "STATISTICS", SECTION_POST_DATA, + q->data, delq->data, NULL, + NULL, 0, + NULL, NULL); + + /* Dump Statistics Comments */ + if (statsextinfo->dobj.dump & DUMP_COMPONENT_COMMENT) + dumpComment(fout, labelq->data, + tbinfo->dobj.namespace->dobj.name, + tbinfo->rolname, + statsextinfo->dobj.catId, 0, + statsextinfo->dobj.dumpId); + + destroyPQExpBuffer(q); + destroyPQExpBuffer(delq); + destroyPQExpBuffer(labelq); +} + +/* * dumpConstraint * write out to fout a user-defined constraint */ @@ -17212,6 +17364,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs, addObjectDependency(postDataBound, dobj->dumpId); break; case DO_INDEX: + case DO_STATSEXT: case DO_REFRESH_MATVIEW: case DO_TRIGGER: case DO_EVENT_TRIGGER: diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index a466527..cb22f63 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -56,6 +56,7 @@ typedef enum DO_TABLE, DO_ATTRDEF, DO_INDEX, + DO_STATSEXT, DO_RULE, DO_TRIGGER, DO_CONSTRAINT, @@ -362,6 +363,13 @@ typedef struct _indxInfo int relpages; /* relpages of the underlying table */ } IndxInfo; +typedef struct _statsExtInfo +{ + DumpableObject dobj; + TableInfo *statsexttable; /* link to table the stats ext is for */ + char *statsextdef; +} StatsExtInfo; + typedef struct _ruleInfo { DumpableObject dobj; @@ -682,6 +690,7 @@ extern void getOwnedSeqs(Archive *fout, TableInfo tblinfo[], int numTables); extern InhInfo *getInherits(Archive *fout, int *numInherits); extern PartInfo *getPartitions(Archive *fout, int *numPartitions); extern void getIndexes(Archive *fout, TableInfo tblinfo[], int numTables); +extern void getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables); extern void getConstraints(Archive *fout, TableInfo tblinfo[], int numTables); extern RuleInfo *getRules(Archive *fout, int *numRules); extern void getTriggers(Archive *fout, TableInfo tblinfo[], int numTables); diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index e555de8..5c19b05 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -53,10 +53,11 @@ static const int dbObjectTypePriority[] = 18, /* DO_TABLE */ 20, /* DO_ATTRDEF */ 28, /* DO_INDEX */ - 29, /* DO_RULE */ - 30, /* DO_TRIGGER */ + 29, /* DO_STATSEXT */ + 30, /* DO_RULE */ + 31, /* DO_TRIGGER */ 27, /* DO_CONSTRAINT */ - 31, /* DO_FK_CONSTRAINT */ + 32, /* DO_FK_CONSTRAINT */ 2, /* DO_PROCLANG */ 10, /* DO_CAST */ 23, /* DO_TABLE_DATA */ @@ -1291,6 +1292,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize) "INDEX %s (ID %d OID %u)", obj->name, obj->dumpId, obj->catId.oid); return; + case DO_STATSEXT: + snprintf(buf, bufsize, + "STATISTICS %s (ID %d OID %u)", + obj->name, obj->dumpId, obj->catId.oid); + return; case DO_REFRESH_MATVIEW: snprintf(buf, bufsize, "REFRESH MATERIALIZED VIEW %s (ID %d OID %u)", diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 61a3e2a..d848f16 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -2320,6 +2320,56 @@ describeOneTableDetails(const char *schemaname, PQclear(result); } + /* print any extended statistics */ + if (pset.sversion >= 100000) + { + printfPQExpBuffer(&buf, + "SELECT oid, stanamespace::regnamespace AS nsp, staname, stakeys,\n" + " (SELECT string_agg(pg_catalog.quote_ident(attname::text),', ') \n" + " FROM ((SELECT unnest(stakeys) AS attnum) s\n" + " JOIN pg_attribute a ON (starelid = a.attrelid AND\n" + "a.attnum = s.attnum AND not attisdropped))) AS columns,\n" + " (staenabled::char[] @> '{d}'::char[]) AS ndist_enabled\n" + "FROM pg_statistic_ext stat WHERE starelid = '%s' ORDER BY 1;", + oid); + + result = PSQLexec(buf.data); + if (!result) + goto error_return; + else + tuples = PQntuples(result); + + if (tuples > 0) + { + printTableAddFooter(&cont, _("Statistics:")); + + for (i = 0; i < tuples; i++) + { + int cnt = 0; + + printfPQExpBuffer(&buf, " "); + + /* statistics name (qualified with namespace) */ + appendPQExpBuffer(&buf, "\"%s.%s\" WITH (", + PQgetvalue(result, i, 1), + PQgetvalue(result, i, 2)); + + /* options */ + if (strcmp(PQgetvalue(result, i, 5), "t") == 0) + { + appendPQExpBufferStr(&buf, "ndistinct"); + cnt++; + } + + appendPQExpBuffer(&buf, ") ON (%s)", + PQgetvalue(result, i, 4)); + + printTableAddFooter(&cont, buf.data); + } + } + PQclear(result); + } + /* print rules */ if (tableinfo.hasrules && tableinfo.relkind != RELKIND_MATVIEW) { diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index 10759c7..9effbce 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -147,6 +147,7 @@ typedef enum ObjectClass OCLASS_REWRITE, /* pg_rewrite */ OCLASS_TRIGGER, /* pg_trigger */ OCLASS_SCHEMA, /* pg_namespace */ + OCLASS_STATISTIC_EXT, /* pg_statistic_ext */ OCLASS_TSPARSER, /* pg_ts_parser */ OCLASS_TSDICT, /* pg_ts_dict */ OCLASS_TSTEMPLATE, /* pg_ts_template */ diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index 1187797..473fe17 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -119,6 +119,7 @@ extern void RemoveAttrDefault(Oid relid, AttrNumber attnum, DropBehavior behavior, bool complain, bool internal); extern void RemoveAttrDefaultById(Oid attrdefId); extern void RemoveStatistics(Oid relid, AttrNumber attnum); +extern void RemoveStatisticsExt(Oid relid, AttrNumber attnum); extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno, bool relhasoids); diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index 6bce732..8130581 100644 --- a/src/include/catalog/indexing.h +++ b/src/include/catalog/indexing.h @@ -182,6 +182,13 @@ DECLARE_UNIQUE_INDEX(pg_largeobject_loid_pn_index, 2683, on pg_largeobject using DECLARE_UNIQUE_INDEX(pg_largeobject_metadata_oid_index, 2996, on pg_largeobject_metadata using btree(oid oid_ops)); #define LargeObjectMetadataOidIndexId 2996 +DECLARE_UNIQUE_INDEX(pg_statistic_ext_oid_index, 3380, on pg_statistic_ext using btree(oid oid_ops)); +#define StatisticExtOidIndexId 3380 +DECLARE_UNIQUE_INDEX(pg_statistic_ext_name_index, 3997, on pg_statistic_ext using btree(staname name_ops, stanamespace oid_ops)); +#define StatisticExtNameIndexId 3997 +DECLARE_INDEX(pg_statistic_ext_relid_index, 3379, on pg_statistic_ext using btree(starelid oid_ops)); +#define StatisticExtRelidIndexId 3379 + DECLARE_UNIQUE_INDEX(pg_namespace_nspname_index, 2684, on pg_namespace using btree(nspname name_ops)); #define NamespaceNameIndexId 2684 DECLARE_UNIQUE_INDEX(pg_namespace_oid_index, 2685, on pg_namespace using btree(oid oid_ops)); diff --git a/src/include/catalog/namespace.h b/src/include/catalog/namespace.h index dbeb25b..35e0e2b 100644 --- a/src/include/catalog/namespace.h +++ b/src/include/catalog/namespace.h @@ -141,6 +141,8 @@ extern Oid get_collation_oid(List *collname, bool missing_ok); extern Oid get_conversion_oid(List *conname, bool missing_ok); extern Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding); +extern Oid get_statistics_oid(List *names, bool missing_ok); + /* initialization & transaction cleanup code */ extern void InitializeSearchPath(void); extern void AtEOXact_Namespace(bool isCommit, bool parallel); diff --git a/src/include/catalog/pg_cast.h b/src/include/catalog/pg_cast.h index ce8dc59..bc5d28a 100644 --- a/src/include/catalog/pg_cast.h +++ b/src/include/catalog/pg_cast.h @@ -254,6 +254,10 @@ DATA(insert ( 23 18 78 e f )); /* pg_node_tree can be coerced to, but not from, text */ DATA(insert ( 194 25 0 i b )); +/* pg_ndistinct can be coerced to, but not from, bytea and text */ +DATA(insert ( 3361 17 0 i b )); +DATA(insert ( 3361 25 0 i i )); + /* * Datetime category */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 836d6ff..84d8475 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -1983,6 +1983,8 @@ DESCR("select statement of a view"); DATA(insert OID = 1642 ( pg_get_userbyid PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 19 "26" _null_ _null_ _null_ _null_ _null_ pg_get_userbyid _null_ _null_ _null_ )); DESCR("role name by OID (with fallback)"); DATA(insert OID = 1643 ( pg_get_indexdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_indexdef _null_ _null_ _null_ )); +DESCR("extended statistics description"); +DATA(insert OID = 3415 ( pg_get_statisticsextdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_statisticsextdef _null_ _null_ _null_ )); DESCR("index description"); DATA(insert OID = 3352 ( pg_get_partkeydef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_partkeydef _null_ _null_ _null_ )); DESCR("partition key description"); @@ -2755,6 +2757,15 @@ DESCR("current user privilege on any column by rel name"); DATA(insert OID = 3029 ( has_any_column_privilege PGNSP PGUID 12 10 0 0 0 f f f f t f s s 2 0 16 "26 25" _null_ _null_ _null_ _null_ _null_ has_any_column_privilege_id _null_ _null_ _null_ )); DESCR("current user privilege on any column by rel oid"); +DATA(insert OID = 3355 ( pg_ndistinct_in PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3361 "2275" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_in _null_ _null_ _null_ )); +DESCR("I/O"); +DATA(insert OID = 3356 ( pg_ndistinct_out PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2275 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_out _null_ _null_ _null_ )); +DESCR("I/O"); +DATA(insert OID = 3357 ( pg_ndistinct_recv PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 3361 "2281" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_recv _null_ _null_ _null_ )); +DESCR("I/O"); +DATA(insert OID = 3358 ( pg_ndistinct_send PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_send _null_ _null_ _null_ )); +DESCR("I/O"); + DATA(insert OID = 1928 ( pg_stat_get_numscans PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ )); DESCR("statistics: number of scans done for table/index"); DATA(insert OID = 1929 ( pg_stat_get_tuples_returned PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_tuples_returned _null_ _null_ _null_ )); diff --git a/src/include/catalog/pg_statistic_ext.h b/src/include/catalog/pg_statistic_ext.h new file mode 100644 index 0000000..5f67fe7 --- /dev/null +++ b/src/include/catalog/pg_statistic_ext.h @@ -0,0 +1,75 @@ +/*------------------------------------------------------------------------- + * + * pg_statistic_ext.h + * definition of the system "extended statistic" relation (pg_statistic_ext) + * along with the relation's initial contents. + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/catalog/pg_statistic_ext.h + * + * NOTES + * the genbki.pl script reads this file and generates .bki + * information from the DATA() statements. + * + *------------------------------------------------------------------------- + */ +#ifndef PG_STATISTIC_EXT_H +#define PG_STATISTIC_EXT_H + +#include "catalog/genbki.h" + +/* ---------------- + * pg_statistic_ext definition. cpp turns this into + * typedef struct FormData_pg_statistic_ext + * ---------------- + */ +#define StatisticExtRelationId 3381 + +CATALOG(pg_statistic_ext,3381) +{ + /* These fields form the unique key for the entry: */ + Oid starelid; /* relation containing attributes */ + NameData staname; /* statistics name */ + Oid stanamespace; /* OID of namespace containing this statistics */ + Oid staowner; /* statistics owner */ + + /* + * variable-length fields start here, but we allow direct access to + * stakeys + */ + int2vector stakeys; /* array of column keys */ + +#ifdef CATALOG_VARLEN + char staenabled[1] BKI_FORCE_NOT_NULL; /* statistic types + * requested to build */ + pg_ndistinct standistinct; /* ndistinct coefficients (serialized) */ +#endif + +} FormData_pg_statistic_ext; + +/* ---------------- + * Form_pg_statistic_ext corresponds to a pointer to a tuple with + * the format of pg_statistic_ext relation. + * ---------------- + */ +typedef FormData_pg_statistic_ext *Form_pg_statistic_ext; + +/* ---------------- + * compiler constants for pg_statistic_ext + * ---------------- + */ +#define Natts_pg_statistic_ext 7 +#define Anum_pg_statistic_ext_starelid 1 +#define Anum_pg_statistic_ext_staname 2 +#define Anum_pg_statistic_ext_stanamespace 3 +#define Anum_pg_statistic_ext_staowner 4 +#define Anum_pg_statistic_ext_stakeys 5 +#define Anum_pg_statistic_ext_staenabled 6 +#define Anum_pg_statistic_ext_standistinct 7 + +#define STATS_EXT_NDISTINCT 'd' + +#endif /* PG_STATISTIC_EXT_H */ diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index 9f61238..9ad6725 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -364,6 +364,10 @@ DATA(insert OID = 194 ( pg_node_tree PGNSP PGUID -1 f b S f t \054 0 0 0 pg_node DESCR("string representing an internal node tree"); #define PGNODETREEOID 194 +DATA(insert OID = 3361 ( pg_ndistinct PGNSP PGUID -1 f b S f t \054 0 0 0 pg_ndistinct_in pg_ndistinct_out pg_ndistinct_recv pg_ndistinct_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ )); +DESCR("multivariate ndistinct coefficients"); +#define PGNDISTINCTOID 3361 + DATA(insert OID = 32 ( pg_ddl_command PGNSP PGUID SIZEOF_POINTER t p P f t \054 0 0 0 pg_ddl_command_in pg_ddl_command_out pg_ddl_command_recv pg_ddl_command_send - - - ALIGNOF_POINTER p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("internal type for passing CollectedCommand"); #define PGDDLCOMMANDOID 32 diff --git a/src/include/catalog/toasting.h b/src/include/catalog/toasting.h index db7f145..00d0a83 100644 --- a/src/include/catalog/toasting.h +++ b/src/include/catalog/toasting.h @@ -53,6 +53,7 @@ DECLARE_TOAST(pg_proc, 2836, 2837); DECLARE_TOAST(pg_rewrite, 2838, 2839); DECLARE_TOAST(pg_seclabel, 3598, 3599); DECLARE_TOAST(pg_statistic, 2840, 2841); +DECLARE_TOAST(pg_statistic_ext, 3439, 3440); DECLARE_TOAST(pg_trigger, 2336, 2337); /* shared catalogs */ diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 8740cee..c323e81 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -77,6 +77,10 @@ extern ObjectAddress DefineOperator(List *names, List *parameters); extern void RemoveOperatorById(Oid operOid); extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt); +/* commands/statscmds.c */ +extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt); +extern void RemoveStatisticsById(Oid statsOid); + /* commands/aggregatecmds.c */ extern ObjectAddress DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle, List *parameters); diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 2cbd6d7..eae6200 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -279,6 +279,7 @@ typedef enum NodeTag T_PlaceHolderInfo, T_MinMaxAggInfo, T_PlannerParamItem, + T_StatisticExtInfo, /* * TAGS FOR MEMORY NODES (memnodes.h) @@ -424,6 +425,7 @@ typedef enum NodeTag T_CreateSubscriptionStmt, T_AlterSubscriptionStmt, T_DropSubscriptionStmt, + T_CreateStatsStmt, /* * TAGS FOR PARSE TREE NODES (parsenodes.h) diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index a15df22..e912384 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1593,6 +1593,7 @@ typedef enum ObjectType OBJECT_SCHEMA, OBJECT_SEQUENCE, OBJECT_SUBSCRIPTION, + OBJECT_STATISTIC_EXT, OBJECT_TABCONSTRAINT, OBJECT_TABLE, OBJECT_TABLESPACE, @@ -2646,6 +2647,20 @@ typedef struct IndexStmt } IndexStmt; /* ---------------------- + * Create Statistics Statement + * ---------------------- + */ +typedef struct CreateStatsStmt +{ + NodeTag type; + List *defnames; /* qualified name (list of Value strings) */ + RangeVar *relation; /* relation to build statistics on */ + List *keys; /* String nodes naming referenced columns */ + List *options; /* list of DefElem */ + bool if_not_exists; /* do nothing if statistics already exists */ +} CreateStatsStmt; + +/* ---------------------- * Create Function Statement * ---------------------- */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 1c88a79..0a5187c 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -529,6 +529,7 @@ typedef struct RelOptInfo List *lateral_vars; /* LATERAL Vars and PHVs referenced by rel */ Relids lateral_referencers; /* rels that reference me laterally */ List *indexlist; /* list of IndexOptInfo */ + List *statlist; /* list of StatisticExtInfo */ BlockNumber pages; /* size estimates derived from pg_class */ double tuples; double allvisfrac; @@ -668,6 +669,24 @@ typedef struct ForeignKeyOptInfo List *rinfos[INDEX_MAX_KEYS]; } ForeignKeyOptInfo; +/* + * StatisticExtInfo + * Information about extended statistics for planning/optimization + * + * This contains information about which columns are covered by the + * statistics (stakeys), which options were requested while adding the + * statistics (*_enabled), and which kinds of statistics were actually + * built and are available for the optimizer (*_built). + */ +typedef struct StatisticExtInfo +{ + NodeTag type; + + Oid statOid; /* OID of the statistics row */ + RelOptInfo *rel; /* back-link to index's table */ + char kind; /* statistic kind of this entry */ + Bitmapset *keys; /* attnums of the columns covered */ +} StatisticExtInfo; /* * EquivalenceClasses diff --git a/src/include/statistics/stat_ext_internal.h b/src/include/statistics/stat_ext_internal.h new file mode 100644 index 0000000..1e6208a --- /dev/null +++ b/src/include/statistics/stat_ext_internal.h @@ -0,0 +1,64 @@ +/*------------------------------------------------------------------------- + * + * stat_ext_internal.h + * POSTGRES extended statistics internal declarations + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/include/statistics/stat_ext_internal.h + * + *------------------------------------------------------------------------- + */ +#ifndef STAT_EXT_INTERNAL_H +#define STAT_EXT_INTERNAL_H + +#include "utils/sortsupport.h" +#include "statistics/stats.h" + + +typedef struct +{ + Oid eqopr; /* '=' operator for datatype, if any */ + Oid eqfunc; /* and associated function */ + Oid ltopr; /* '<' operator for datatype, if any */ +} StdAnalyzeData; + +typedef struct +{ + Datum value; /* a data value */ + int tupno; /* position index for tuple it came from */ +} ScalarItem; + +/* multi-sort */ +typedef struct MultiSortSupportData +{ + int ndims; /* number of dimensions supported by the */ + SortSupportData ssup[1]; /* sort support data for each dimension */ +} MultiSortSupportData; + +typedef MultiSortSupportData *MultiSortSupport; + +typedef struct SortItem +{ + Datum *values; + bool *isnull; +} SortItem; + +extern MVNDistinct *statext_ndistinct_build(double totalrows, + int numrows, HeapTuple *rows, + int2vector *attrs, VacAttrStats **stats); +extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct); +extern MVNDistinct *statext_ndistinct_deserialize(bytea *data); + +extern MultiSortSupport multi_sort_init(int ndims); +extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, + Oid oper); +extern int multi_sort_compare(const void *a, const void *b, void *arg); +extern int multi_sort_compare_dim(int dim, const SortItem * a, + const SortItem * b, MultiSortSupport mss); +extern int multi_sort_compare_dims(int start, int end, const SortItem * a, + const SortItem * b, MultiSortSupport mss); + +#endif /* STAT_EXT_INTERNAL_H */ diff --git a/src/include/statistics/stats.h b/src/include/statistics/stats.h new file mode 100644 index 0000000..69c7dd1 --- /dev/null +++ b/src/include/statistics/stats.h @@ -0,0 +1,47 @@ +/*------------------------------------------------------------------------- + * + * stats.h + * Extended statistics and selectivity estimation functions. + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/statistics/stats.h + * + *------------------------------------------------------------------------- + */ +#ifndef STATS_H +#define STATS_H + +#include "commands/vacuum.h" + +#define STATS_MAX_DIMENSIONS 8 /* max number of attributes */ + +/* Multivariate distinct coefficients */ +#define STATS_NDISTINCT_MAGIC 0xA352BFA4 /* struct identifier */ +#define STATS_NDISTINCT_TYPE_BASIC 1 /* struct version */ + +/* MVDistinctItem represents a single combination of columns */ +typedef struct MVNDistinctItem +{ + double ndistinct; /* ndistinct value for this combination */ + Bitmapset *attrs; /* attr numbers of items */ +} MVNDistinctItem; + +/* A MVNDistinct object, comprising all possible combinations of columns */ +typedef struct MVNDistinct +{ + uint32 magic; /* magic constant marker */ + uint32 type; /* type of ndistinct (BASIC) */ + uint32 nitems; /* number of items in the statistic */ + MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]; +} MVNDistinct; + +extern MVNDistinct *statext_ndistinct_load(Oid mvoid); + +extern void BuildRelationExtStatistics(Relation onerel, double totalrows, + int numrows, HeapTuple *rows, + int natts, VacAttrStats **vacattrstats); +extern bool statext_is_kind_built(HeapTuple htup, char kind); + +#endif /* STATS_H */ diff --git a/src/include/utils/acl.h b/src/include/utils/acl.h index 0d11852..c957d8e 100644 --- a/src/include/utils/acl.h +++ b/src/include/utils/acl.h @@ -192,6 +192,7 @@ typedef enum AclObjectKind ACL_KIND_OPFAMILY, /* pg_opfamily */ ACL_KIND_COLLATION, /* pg_collation */ ACL_KIND_CONVERSION, /* pg_conversion */ + ACL_KIND_STATISTICS, /* pg_statistic_ext */ ACL_KIND_TABLESPACE, /* pg_tablespace */ ACL_KIND_TSDICTIONARY, /* pg_ts_dict */ ACL_KIND_TSCONFIGURATION, /* pg_ts_config */ @@ -326,6 +327,7 @@ extern bool pg_event_trigger_ownercheck(Oid et_oid, Oid roleid); extern bool pg_extension_ownercheck(Oid ext_oid, Oid roleid); extern bool pg_publication_ownercheck(Oid pub_oid, Oid roleid); extern bool pg_subscription_ownercheck(Oid sub_oid, Oid roleid); +extern bool pg_statistics_ownercheck(Oid stat_oid, Oid roleid); extern bool has_createrole_privilege(Oid roleid); extern bool has_bypassrls_privilege(Oid roleid); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index a617a7c..ab875bb 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -92,6 +92,7 @@ typedef struct RelationData bool rd_isvalid; /* relcache entry is valid */ char rd_indexvalid; /* state of rd_indexlist: 0 = not valid, 1 = * valid, 2 = temporarily forced */ + bool rd_statvalid; /* is rd_statlist valid? */ /* * rd_createSubid is the ID of the highest subtransaction the rel has @@ -136,6 +137,9 @@ typedef struct RelationData Oid rd_pkindex; /* OID of primary key, if any */ Oid rd_replidindex; /* OID of replica identity index, if any */ + /* data managed by RelationGetStatExtList: */ + List *rd_statlist; /* list of OIDs of extended stats */ + /* data managed by RelationGetIndexAttrBitmap: */ Bitmapset *rd_indexattr; /* identifies columns used in indexes */ Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index da36b67..81af3ae 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -39,6 +39,7 @@ extern void RelationClose(Relation relation); */ extern List *RelationGetFKeyList(Relation relation); extern List *RelationGetIndexList(Relation relation); +extern List *RelationGetStatExtList(Relation relation); extern Oid RelationGetOidIndex(Relation relation); extern Oid RelationGetPrimaryKeyIndex(Relation relation); extern Oid RelationGetReplicaIndex(Relation relation); diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index 66f60d2..048541e 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -86,6 +86,8 @@ enum SysCacheIdentifier PUBLICATIONRELMAP, RULERELNAME, SEQRELID, + STATEXTNAMENSP, + STATEXTOID, STATRELATTINH, SUBSCRIPTIONOID, SUBSCRIPTIONNAME, diff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out index b01be59..ce581bb 100644 --- a/src/test/regress/expected/alter_generic.out +++ b/src/test/regress/expected/alter_generic.out @@ -497,6 +497,48 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4); ERROR: function 2(integer,integer) does not exist in operator family "alt_opf18" DROP OPERATOR FAMILY alt_opf18 USING btree; -- +-- Statistics +-- +SET SESSION AUTHORIZATION regress_alter_user1; +CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER); +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; +ALTER STATISTICS alt_stat1 RENAME TO alt_stat2; -- failed (name conflict) +ERROR: statistics "alt_stat2" already exists in schema "alt_nsp1" +ALTER STATISTICS alt_stat1 RENAME TO alt_stat3; -- failed (name conflict) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2; -- failed (no role membership) +ERROR: must be member of role "regress_alter_user2" +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- OK +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- OK +SET SESSION AUTHORIZATION regress_alter_user2; +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; +ALTER STATISTICS alt_stat3 RENAME TO alt_stat4; -- failed (not owner) +ERROR: must be owner of statistics alt_stat3 +ALTER STATISTICS alt_stat1 RENAME TO alt_stat4; -- OK +ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner) +ERROR: must be owner of statistics alt_stat3 +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership) +ERROR: must be member of role "regress_alter_user3" +ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2; -- failed (not owner) +ERROR: must be owner of statistics alt_stat3 +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- failed (name conflict) +ERROR: statistics "alt_stat2" already exists in schema "alt_nsp2" +RESET SESSION AUTHORIZATION; +SELECT nspname, staname, rolname + FROM pg_statistic_ext s, pg_namespace n, pg_authid a + WHERE s.stanamespace = n.oid AND s.staowner = a.oid + AND n.nspname in ('alt_nsp1', 'alt_nsp2') + ORDER BY nspname, staname; + nspname | staname | rolname +----------+-----------+--------------------- + alt_nsp1 | alt_stat2 | regress_alter_user2 + alt_nsp1 | alt_stat3 | regress_alter_user1 + alt_nsp1 | alt_stat4 | regress_alter_user2 + alt_nsp2 | alt_stat2 | regress_alter_user3 +(4 rows) + +-- -- Text Search Dictionary -- SET SESSION AUTHORIZATION regress_alter_user1; @@ -639,7 +681,7 @@ DROP LANGUAGE alt_lang3 CASCADE; DROP LANGUAGE alt_lang4 CASCADE; ERROR: language "alt_lang4" does not exist DROP SCHEMA alt_nsp1 CASCADE; -NOTICE: drop cascades to 26 other objects +NOTICE: drop cascades to 27 other objects DETAIL: drop cascades to function alt_func3(integer) drop cascades to function alt_agg3(integer) drop cascades to function alt_func4(integer) @@ -656,6 +698,7 @@ drop cascades to operator family alt_opc1 for access method hash drop cascades to operator family alt_opc2 for access method hash drop cascades to operator family alt_opf4 for access method hash drop cascades to operator family alt_opf2 for access method hash +drop cascades to table alt_regress_1 drop cascades to text search dictionary alt_ts_dict3 drop cascades to text search dictionary alt_ts_dict4 drop cascades to text search dictionary alt_ts_dict2 diff --git a/src/test/regress/expected/object_address.out b/src/test/regress/expected/object_address.out index 90c4ba4..145a530 100644 --- a/src/test/regress/expected/object_address.out +++ b/src/test/regress/expected/object_address.out @@ -38,6 +38,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL ( TO SQL WITH FUNCTION int4recv(internal)); CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable; CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCREATE SLOT); +CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable; -- test some error cases SELECT pg_get_object_address('stone', '{}', '{}'); ERROR: unrecognized object type "stone" @@ -408,7 +409,8 @@ WITH objects (type, name, args) AS (VALUES ('access method', '{btree}', '{}'), ('publication', '{addr_pub}', '{}'), ('publication relation', '{addr_nsp, gentable}', '{addr_pub}'), - ('subscription', '{addr_sub}', '{}') + ('subscription', '{addr_sub}', '{}'), + ('statistics', '{addr_nsp, gentable_stat}', '{}') ) SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, -- test roundtrip through pg_identify_object_as_address @@ -456,6 +458,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, trigger | | | t on addr_nsp.gentable | t operator family | pg_catalog | integer_ops | pg_catalog.integer_ops USING btree | t policy | | | genpol on addr_nsp.gentable | t + statistics | addr_nsp | gentable_stat | addr_nsp.gentable_stat | t collation | pg_catalog | "default" | pg_catalog."default" | t transform | | | for integer on language sql | t text search dictionary | addr_nsp | addr_ts_dict | addr_nsp.addr_ts_dict | t @@ -465,7 +468,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, subscription | | addr_sub | addr_sub | t publication | | addr_pub | addr_pub | t publication relation | | | gentable in publication addr_pub | t -(45 rows) +(46 rows) --- --- Cleanup resources diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 64d9dd6..262036a 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -823,11 +823,12 @@ WHERE c.castmethod = 'b' AND text | character | 0 | i character varying | character | 0 | i pg_node_tree | text | 0 | i + pg_ndistinct | bytea | 0 | i cidr | inet | 0 | i xml | text | 0 | a xml | character varying | 0 | a xml | character | 0 | a -(7 rows) +(8 rows) -- **************** pg_conversion **************** -- Look for illegal values in pg_conversion fields. diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index bd13ae6..d4b2158 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2160,6 +2160,14 @@ pg_stats| SELECT n.nspname AS schemaname, JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum)))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text) AND ((c.relrowsecurity = false) OR (NOT row_security_active(c.oid)))); +pg_stats_ext| SELECT n.nspname AS schemaname, + c.relname AS tablename, + s.staname, + s.stakeys AS attnums, + length((s.standistinct)::text) AS ndistbytes + FROM ((pg_statistic_ext s + JOIN pg_class c ON ((c.oid = s.starelid))) + LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))); pg_tables| SELECT n.nspname AS schemaname, c.relname AS tablename, pg_get_userbyid(c.relowner) AS tableowner, diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out index 88b4c97..ddc4b88 100644 --- a/src/test/regress/expected/sanity_check.out +++ b/src/test/regress/expected/sanity_check.out @@ -142,6 +142,7 @@ pg_shdepend|t pg_shdescription|t pg_shseclabel|t pg_statistic|t +pg_statistic_ext|t pg_subscription|t pg_tablespace|t pg_transform|t diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out new file mode 100644 index 0000000..b3e3010 --- /dev/null +++ b/src/test/regress/expected/stats_ext.out @@ -0,0 +1,142 @@ +-- Generic extended statistics support +CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS ab1_a_b_stats; +CREATE SCHEMA regress_schema_2; +CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS regress_schema_2.ab1_a_b_stats; +CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +ALTER TABLE ab1 DROP COLUMN a; +\d ab1 + Table "public.ab1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | integer | | | + c | integer | | | +Statistics: + "public.ab1_b_c_stats" WITH (ndistinct) ON (b, c) + +DROP TABLE ab1; +-- data type passed by value +CREATE TABLE ndistinct ( + filler1 TEXT, + filler2 NUMERIC, + a INT, + b INT, + filler3 DATE, + c INT, + d INT +); +-- unknown column +CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct; +ERROR: column "unknown_column" referenced in statistics does not exist +-- single column +CREATE STATISTICS s10 ON (a) FROM ndistinct; +ERROR: statistics require at least 2 columns +-- single column, duplicated +CREATE STATISTICS s10 ON (a,a) FROM ndistinct; +ERROR: duplicate column name in statistics definition +-- two columns, one duplicated +CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct; +ERROR: duplicate column name in statistics definition +-- correct command +CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct; +-- perfectly correlated groups +INSERT INTO ndistinct (a, b, c, filler1) + SELECT i/100, i/100, i/100, cash_words(i::money) + FROM generate_series(1,10000) s(i); +ANALYZE ndistinct; +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + staenabled | standistinct +------------+------------------------------------------------------------------------------------------------ + {d} | [{(b 3 4), 101.000000}, {(b 3 6), 101.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 101.000000}] +(1 row) + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: a, b + -> Seq Scan on ndistinct +(3 rows) + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: a, b, c + -> Seq Scan on ndistinct +(3 rows) + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: a, b, c, d + -> Seq Scan on ndistinct +(3 rows) + +TRUNCATE TABLE ndistinct; +-- partially correlated groups +INSERT INTO ndistinct (a, b, c) + SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i); +ANALYZE ndistinct; +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + staenabled | standistinct +------------+------------------------------------------------------------------------------------------------ + {d} | [{(b 3 4), 201.000000}, {(b 3 6), 201.000000}, {(b 4 6), 101.000000}, {(b 3 4 6), 201.000000}] +(1 row) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + QUERY PLAN +--------------------------------------------------------------------- + HashAggregate (cost=230.00..232.01 rows=201 width=16) + Group Key: a, b + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + QUERY PLAN +---------------------------------------------------------------------- + HashAggregate (cost=255.00..257.01 rows=201 width=20) + Group Key: a, b, c + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + QUERY PLAN +---------------------------------------------------------------------- + HashAggregate (cost=280.00..290.00 rows=1000 width=24) + Group Key: a, b, c, d + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=16) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; + QUERY PLAN +---------------------------------------------------------------------- + HashAggregate (cost=255.00..265.00 rows=1000 width=20) + Group Key: b, c, d + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, d; + QUERY PLAN +--------------------------------------------------------------------- + HashAggregate (cost=230.00..240.00 rows=1000 width=16) + Group Key: a, d + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8) +(3 rows) + +DROP TABLE ndistinct; diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index 8d75bbf..84022f6 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -59,7 +59,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR -- Look for types that should have an array type according to their typtype, -- but don't. We exclude composites here because we have not bothered to -- make array types corresponding to the system catalogs' rowtypes. --- NOTE: as of v10, this check finds pg_node_tree and smgr. +-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr. SELECT p1.oid, p1.typname FROM pg_type as p1 WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%' @@ -67,11 +67,12 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%' (SELECT 1 FROM pg_type as p2 WHERE p2.typname = ('_' || p1.typname)::name AND p2.typelem = p1.oid and p1.typarray = p2.oid); - oid | typname ------+-------------- - 194 | pg_node_tree - 210 | smgr -(2 rows) + oid | typname +------+-------------- + 194 | pg_node_tree + 3361 | pg_ndistinct + 210 | smgr +(3 rows) -- Make sure typarray points to a varlena array type of our own base SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype, diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 38743d9..c283bdc 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -89,7 +89,7 @@ test: brin gin gist spgist privileges init_privs security_label collate matview # ---------- # Another group of parallel tests # ---------- -test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan +test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan stats_ext # rules cannot run concurrently with any test that creates a view test: rules psql_crosstab amutils diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index d9f64c2..3a0d536 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -130,6 +130,7 @@ test: misc_functions test: sysviews test: tsrf test: tidscan +test: stats_ext test: rules test: psql_crosstab test: select_parallel diff --git a/src/test/regress/sql/alter_generic.sql b/src/test/regress/sql/alter_generic.sql index c9ea479..f6fa8d8 100644 --- a/src/test/regress/sql/alter_generic.sql +++ b/src/test/regress/sql/alter_generic.sql @@ -433,6 +433,37 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4); DROP OPERATOR FAMILY alt_opf18 USING btree; +-- +-- Statistics +-- +SET SESSION AUTHORIZATION regress_alter_user1; +CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER); +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; + +ALTER STATISTICS alt_stat1 RENAME TO alt_stat2; -- failed (name conflict) +ALTER STATISTICS alt_stat1 RENAME TO alt_stat3; -- failed (name conflict) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2; -- failed (no role membership) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- OK +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- OK + +SET SESSION AUTHORIZATION regress_alter_user2; +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; + +ALTER STATISTICS alt_stat3 RENAME TO alt_stat4; -- failed (not owner) +ALTER STATISTICS alt_stat1 RENAME TO alt_stat4; -- OK +ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership) +ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2; -- failed (not owner) +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- failed (name conflict) + +RESET SESSION AUTHORIZATION; +SELECT nspname, staname, rolname + FROM pg_statistic_ext s, pg_namespace n, pg_authid a + WHERE s.stanamespace = n.oid AND s.staowner = a.oid + AND n.nspname in ('alt_nsp1', 'alt_nsp2') + ORDER BY nspname, staname; -- -- Text Search Dictionary diff --git a/src/test/regress/sql/object_address.sql b/src/test/regress/sql/object_address.sql index 6b85fe2..9d3230b 100644 --- a/src/test/regress/sql/object_address.sql +++ b/src/test/regress/sql/object_address.sql @@ -41,6 +41,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL ( TO SQL WITH FUNCTION int4recv(internal)); CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable; CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCREATE SLOT); +CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable; -- test some error cases SELECT pg_get_object_address('stone', '{}', '{}'); @@ -185,7 +186,8 @@ WITH objects (type, name, args) AS (VALUES ('access method', '{btree}', '{}'), ('publication', '{addr_pub}', '{}'), ('publication relation', '{addr_nsp, gentable}', '{addr_pub}'), - ('subscription', '{addr_sub}', '{}') + ('subscription', '{addr_sub}', '{}'), + ('statistics', '{addr_nsp, gentable_stat}', '{}') ) SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, -- test roundtrip through pg_identify_object_as_address diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql new file mode 100644 index 0000000..e5adb31 --- /dev/null +++ b/src/test/regress/sql/stats_ext.sql @@ -0,0 +1,91 @@ +-- Generic extended statistics support +CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); + +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS ab1_a_b_stats; + +CREATE SCHEMA regress_schema_2; +CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS regress_schema_2.ab1_a_b_stats; + +CREATE STATISTICS ab1_b_c_stats ON (b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +ALTER TABLE ab1 DROP COLUMN a; +\d ab1 + +DROP TABLE ab1; + + +-- data type passed by value +CREATE TABLE ndistinct ( + filler1 TEXT, + filler2 NUMERIC, + a INT, + b INT, + filler3 DATE, + c INT, + d INT +); + +-- unknown column +CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct; + +-- single column +CREATE STATISTICS s10 ON (a) FROM ndistinct; + +-- single column, duplicated +CREATE STATISTICS s10 ON (a,a) FROM ndistinct; + +-- two columns, one duplicated +CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct; + +-- correct command +CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct; + +-- perfectly correlated groups +INSERT INTO ndistinct (a, b, c, filler1) + SELECT i/100, i/100, i/100, cash_words(i::money) + FROM generate_series(1,10000) s(i); + +ANALYZE ndistinct; + +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + +TRUNCATE TABLE ndistinct; + +-- partially correlated groups +INSERT INTO ndistinct (a, b, c) + SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i); + +ANALYZE ndistinct; + +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, d; + +DROP TABLE ndistinct; diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index 0a31249..4c65814 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -53,7 +53,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR -- Look for types that should have an array type according to their typtype, -- but don't. We exclude composites here because we have not bothered to -- make array types corresponding to the system catalogs' rowtypes. --- NOTE: as of v10, this check finds pg_node_tree and smgr. +-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr. SELECT p1.oid, p1.typname FROM pg_type as p1