From f370eceec0cbb9b6bf76d3394e56a5df4280c906 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Sat, 23 Dec 2023 10:47:19 +0100 Subject: [PATCH v2 1/3] Make attstattarget nullable This changes the pg_attribute field attstattarget into a nullable field in the variable-length part of the row. If no value is set by the user for attstattarget, it is now null instead of previously -1. This saves space in pg_attribute and tuple descriptors for most practical scenarios. (ATTRIBUTE_FIXED_PART_SIZE is reduced from 108 to 104.) Also, null is the semantically more correct value. The ANALYZE code internally continues to represent the default statistics target by -1, so that that code can avoid having to deal with null values. But that is now contained to ANALYZE code. The DDL code deals with attstattarget possibly null. For system columns, the field is now always null but the effective value 0 (don't analyze) is assumed. To set a column's statistics target to the default value, the new command form ALTER TABLE ... SET STATISTICS DEFAULT can be used. (SET STATISTICS -1 still works.) Discussion: https://www.postgresql.org/message-id/flat/4da8d211-d54d-44b9-9847-f2a9f1184c76@eisentraut.org TODO: move get_attstattarget() into analyze.c? TODO: catversion --- doc/src/sgml/ref/alter_table.sgml | 4 +- src/backend/access/common/tupdesc.c | 4 -- src/backend/bootstrap/bootstrap.c | 1 - src/backend/catalog/genbki.pl | 1 - src/backend/catalog/heap.c | 14 +++---- src/backend/catalog/index.c | 21 ++++++++--- src/backend/commands/analyze.c | 7 +++- src/backend/commands/tablecmds.c | 44 +++++++++++++++++----- src/backend/parser/gram.y | 18 ++++++--- src/backend/utils/cache/lsyscache.c | 22 +++++++++-- src/bin/pg_dump/pg_dump.c | 7 +++- src/include/catalog/pg_attribute.h | 16 ++++---- src/include/commands/vacuum.h | 2 +- src/test/regress/expected/create_index.out | 4 +- 14 files changed, 109 insertions(+), 56 deletions(-) diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index e1d207bc60..9d637157eb 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -50,7 +50,7 @@ ALTER [ COLUMN ] column_name ADD GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY [ ( sequence_options ) ] ALTER [ COLUMN ] column_name { SET GENERATED { ALWAYS | BY DEFAULT } | SET sequence_option | RESTART [ [ WITH ] restart ] } [...] ALTER [ COLUMN ] column_name DROP IDENTITY [ IF EXISTS ] - ALTER [ COLUMN ] column_name SET STATISTICS integer + ALTER [ COLUMN ] column_name SET STATISTICS { integer | DEFAULT } ALTER [ COLUMN ] column_name SET ( attribute_option = value [, ... ] ) ALTER [ COLUMN ] column_name RESET ( attribute_option [, ... ] ) ALTER [ COLUMN ] column_name SET STORAGE { PLAIN | EXTERNAL | EXTENDED | MAIN | DEFAULT } @@ -317,7 +317,7 @@ Description sets the per-column statistics-gathering target for subsequent ANALYZE operations. The target can be set in the range 0 to 10000; alternatively, set it - to -1 to revert to using the system default statistics + to DEFAULT to revert to using the system default statistics target (). For more information on the use of statistics by the PostgreSQL query planner, refer to diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index 8826519e5e..054ccff1e2 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -453,8 +453,6 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2) return false; if (attr1->atttypid != attr2->atttypid) return false; - if (attr1->attstattarget != attr2->attstattarget) - return false; if (attr1->attlen != attr2->attlen) return false; if (attr1->attndims != attr2->attndims) @@ -639,7 +637,6 @@ TupleDescInitEntry(TupleDesc desc, else if (attributeName != NameStr(att->attname)) namestrcpy(&(att->attname), attributeName); - att->attstattarget = -1; att->attcacheoff = -1; att->atttypmod = typmod; @@ -702,7 +699,6 @@ TupleDescInitBuiltinEntry(TupleDesc desc, Assert(attributeName != NULL); namestrcpy(&(att->attname), attributeName); - att->attstattarget = -1; att->attcacheoff = -1; att->atttypmod = typmod; diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index e01dca9b7c..c7546da51e 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -552,7 +552,6 @@ DefineAttr(char *name, char *type, int attnum, int nullness) if (OidIsValid(attrtypes[attnum]->attcollation)) attrtypes[attnum]->attcollation = C_COLLATION_OID; - attrtypes[attnum]->attstattarget = -1; attrtypes[attnum]->attcacheoff = -1; attrtypes[attnum]->atttypmod = -1; attrtypes[attnum]->attislocal = true; diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl index 380bc23c82..13cd2fee14 100644 --- a/src/backend/catalog/genbki.pl +++ b/src/backend/catalog/genbki.pl @@ -840,7 +840,6 @@ sub gen_pg_attribute my %row; $row{attnum} = $attnum; $row{attrelid} = $table->{relation_oid}; - $row{attstattarget} = '0'; morph_row_for_pgattr(\%row, $schema, $attr, 1); print_bki_insert(\%row, $schema); diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index b93894889d..52b4485c4b 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -749,14 +749,16 @@ InsertPgAttributeTuples(Relation pg_attribute_rel, slot[slotCount]->tts_values[Anum_pg_attribute_attisdropped - 1] = BoolGetDatum(attrs->attisdropped); slot[slotCount]->tts_values[Anum_pg_attribute_attislocal - 1] = BoolGetDatum(attrs->attislocal); slot[slotCount]->tts_values[Anum_pg_attribute_attinhcount - 1] = Int16GetDatum(attrs->attinhcount); - slot[slotCount]->tts_values[Anum_pg_attribute_attstattarget - 1] = Int16GetDatum(attrs->attstattarget); slot[slotCount]->tts_values[Anum_pg_attribute_attcollation - 1] = ObjectIdGetDatum(attrs->attcollation); if (attoptions && attoptions[natts] != (Datum) 0) slot[slotCount]->tts_values[Anum_pg_attribute_attoptions - 1] = attoptions[natts]; else slot[slotCount]->tts_isnull[Anum_pg_attribute_attoptions - 1] = true; - /* start out with empty permissions and empty options */ + /* + * The remaining fields are not set for new columns. + */ + slot[slotCount]->tts_isnull[Anum_pg_attribute_attstattarget - 1] = true; slot[slotCount]->tts_isnull[Anum_pg_attribute_attacl - 1] = true; slot[slotCount]->tts_isnull[Anum_pg_attribute_attfdwoptions - 1] = true; slot[slotCount]->tts_isnull[Anum_pg_attribute_attmissingval - 1] = true; @@ -818,9 +820,6 @@ AddNewAttributeTuples(Oid new_rel_oid, indstate = CatalogOpenIndexes(rel); - /* set stats detail level to a sane default */ - for (int i = 0; i < natts; i++) - tupdesc->attrs[i].attstattarget = -1; InsertPgAttributeTuples(rel, tupdesc, new_rel_oid, NULL, indstate); /* add dependencies on their datatypes and collations */ @@ -1685,9 +1684,6 @@ RemoveAttributeById(Oid relid, AttrNumber attnum) /* Remove any not-null constraint the column may have */ attStruct->attnotnull = false; - /* We don't want to keep stats for it anymore */ - attStruct->attstattarget = 0; - /* Unset this so no one tries to look up the generation expression */ attStruct->attgenerated = '\0'; @@ -1707,6 +1703,8 @@ RemoveAttributeById(Oid relid, AttrNumber attnum) * Clear the other variable-length fields. This saves some space in * pg_attribute and removes no longer useful information. */ + nullsAtt[Anum_pg_attribute_attstattarget - 1] = true; + replacesAtt[Anum_pg_attribute_attstattarget - 1] = true; nullsAtt[Anum_pg_attribute_attacl - 1] = true; replacesAtt[Anum_pg_attribute_attacl - 1] = true; nullsAtt[Anum_pg_attribute_attoptions - 1] = true; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 7b186c0220..b2759df311 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -325,7 +325,6 @@ ConstructTupleDescriptor(Relation heapRelation, MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE); to->attnum = i + 1; - to->attstattarget = -1; to->attcacheoff = -1; to->attislocal = true; to->attcollation = (i < numkeyatts) ? collationIds[i] : InvalidOid; @@ -1780,10 +1779,12 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName) while (HeapTupleIsValid((attrTuple = systable_getnext(scan)))) { Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attrTuple); + HeapTuple tp; + Datum dat; + bool isnull; Datum repl_val[Natts_pg_attribute]; bool repl_null[Natts_pg_attribute]; bool repl_repl[Natts_pg_attribute]; - int attstattarget; HeapTuple newTuple; /* Ignore dropped columns */ @@ -1793,10 +1794,18 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName) /* * Get attstattarget from the old index and refresh the new value. */ - attstattarget = get_attstattarget(oldIndexId, att->attnum); + tp = SearchSysCache2(ATTNUM, ObjectIdGetDatum(oldIndexId), Int16GetDatum(att->attnum)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for attribute %d of relation %u", + att->attnum, oldIndexId); + dat = SysCacheGetAttr(ATTNUM, tp, Anum_pg_attribute_attstattarget, &isnull); + ReleaseSysCache(tp); - /* no need for a refresh if both match */ - if (attstattarget == att->attstattarget) + /* + * No need for a refresh if old index value is null. (All new + * index values are null at this point.) + */ + if (isnull) continue; memset(repl_val, 0, sizeof(repl_val)); @@ -1804,7 +1813,7 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName) memset(repl_repl, false, sizeof(repl_repl)); repl_repl[Anum_pg_attribute_attstattarget - 1] = true; - repl_val[Anum_pg_attribute_attstattarget - 1] = Int16GetDatum(attstattarget); + repl_val[Anum_pg_attribute_attstattarget - 1] = dat; newTuple = heap_modify_tuple(attrTuple, RelationGetDescr(pg_attribute), diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 1f4a951681..fd2202cbb8 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -1004,6 +1004,7 @@ static VacAttrStats * examine_attribute(Relation onerel, int attnum, Node *index_expr) { Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, attnum - 1); + int attstattarget; HeapTuple typtuple; VacAttrStats *stats; int i; @@ -1013,15 +1014,17 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr) if (attr->attisdropped) return NULL; + attstattarget = get_attstattarget(RelationGetRelid(onerel), attnum); + /* Don't analyze column if user has specified not to */ - if (attr->attstattarget == 0) + if (attstattarget == 0) return NULL; /* * Create the VacAttrStats struct. */ stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats)); - stats->attstattarget = attr->attstattarget; + stats->attstattarget = attstattarget; /* * When analyzing an expression index, believe the expression tree's type diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 6b0a20010e..77bb17c479 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -7124,7 +7124,6 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, attribute.attrelid = myrelid; namestrcpy(&(attribute.attname), colDef->colname); attribute.atttypid = typeOid; - attribute.attstattarget = -1; attribute.attlen = tform->typlen; attribute.attnum = newattnum; if (list_length(colDef->typeName->arrayBounds) > PG_INT16_MAX) @@ -8453,10 +8452,14 @@ ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newVa { int newtarget; Relation attrelation; - HeapTuple tuple; + HeapTuple tuple, + newtuple; Form_pg_attribute attrtuple; AttrNumber attnum; ObjectAddress address; + Datum repl_val[Natts_pg_attribute]; + bool repl_null[Natts_pg_attribute]; + bool repl_repl[Natts_pg_attribute]; /* * We allow referencing columns by numbers only for indexes, since table @@ -8469,8 +8472,18 @@ ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newVa (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot refer to non-index column by number"))); - Assert(IsA(newValue, Integer)); - newtarget = intVal(newValue); + if (newValue) + { + Assert(IsA(newValue, Integer)); + newtarget = intVal(newValue); + } + else + { + /* + * -1 was used in previous versions to represent the default setting + */ + newtarget = -1; + } /* * Limit target to a sane range @@ -8495,7 +8508,7 @@ ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newVa if (colName) { - tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), colName); + tuple = SearchSysCacheAttName(RelationGetRelid(rel), colName); if (!HeapTupleIsValid(tuple)) ereport(ERROR, @@ -8505,7 +8518,7 @@ ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newVa } else { - tuple = SearchSysCacheCopyAttNum(RelationGetRelid(rel), colNum); + tuple = SearchSysCacheAttNum(RelationGetRelid(rel), colNum); if (!HeapTupleIsValid(tuple)) ereport(ERROR, @@ -8539,16 +8552,27 @@ ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newVa errhint("Alter statistics on table column instead."))); } - attrtuple->attstattarget = newtarget; - - CatalogTupleUpdate(attrelation, &tuple->t_self, tuple); + /* Build new tuple. */ + memset(repl_null, false, sizeof(repl_null)); + memset(repl_repl, false, sizeof(repl_repl)); + if (newtarget != -1) + repl_val[Anum_pg_attribute_attstattarget - 1] = newtarget; + else + repl_null[Anum_pg_attribute_attstattarget - 1] = true; + repl_repl[Anum_pg_attribute_attstattarget - 1] = true; + newtuple = heap_modify_tuple(tuple, RelationGetDescr(attrelation), + repl_val, repl_null, repl_repl); + CatalogTupleUpdate(attrelation, &tuple->t_self, newtuple); InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), attrtuple->attnum); ObjectAddressSubSet(address, RelationRelationId, RelationGetRelid(rel), attnum); - heap_freetuple(tuple); + + heap_freetuple(newtuple); + + ReleaseSysCache(tuple); table_close(attrelation, RowExclusiveLock); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 63f172e175..b6f23e26e7 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -337,6 +337,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type alter_table_cmds alter_type_cmds %type alter_identity_column_option_list %type alter_identity_column_option +%type set_statistics_value %type createdb_opt_list createdb_opt_items copy_opt_list transaction_mode_list @@ -2436,18 +2437,18 @@ alter_table_cmd: n->missing_ok = true; $$ = (Node *) n; } - /* ALTER TABLE ALTER [COLUMN] SET STATISTICS */ - | ALTER opt_column ColId SET STATISTICS SignedIconst + /* ALTER TABLE ALTER [COLUMN] SET STATISTICS */ + | ALTER opt_column ColId SET STATISTICS set_statistics_value { AlterTableCmd *n = makeNode(AlterTableCmd); n->subtype = AT_SetStatistics; n->name = $3; - n->def = (Node *) makeInteger($6); + n->def = $6; $$ = (Node *) n; } - /* ALTER TABLE ALTER [COLUMN] SET STATISTICS */ - | ALTER opt_column Iconst SET STATISTICS SignedIconst + /* ALTER TABLE ALTER [COLUMN] SET STATISTICS */ + | ALTER opt_column Iconst SET STATISTICS set_statistics_value { AlterTableCmd *n = makeNode(AlterTableCmd); @@ -2459,7 +2460,7 @@ alter_table_cmd: n->subtype = AT_SetStatistics; n->num = (int16) $3; - n->def = (Node *) makeInteger($6); + n->def = $6; $$ = (Node *) n; } /* ALTER TABLE ALTER [COLUMN] SET ( column_parameter = value [, ... ] ) */ @@ -3060,6 +3061,11 @@ alter_identity_column_option: } ; +set_statistics_value: + SignedIconst { $$ = (Node *) makeInteger($1); } + | DEFAULT { $$ = NULL; } + ; + PartitionBoundSpec: /* a HASH partition */ FOR VALUES WITH '(' hash_partbound ')' diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index fc6d267e44..41f877a305 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -878,23 +878,39 @@ get_attnum(Oid relid, const char *attname) * Given the relation id and the attribute number, * return the "attstattarget" field from the attribute relation. * + * Returns -1 if attstattarget is null, except 0 for dropped columns. + * + * Always returns 0 for system columns. + * * Errors if not found. */ int get_attstattarget(Oid relid, AttrNumber attnum) { HeapTuple tp; - Form_pg_attribute att_tup; + Datum dat; + bool isnull; int result; + if (attnum < 0) + return 0; + tp = SearchSysCache2(ATTNUM, ObjectIdGetDatum(relid), Int16GetDatum(attnum)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for attribute %d of relation %u", attnum, relid); - att_tup = (Form_pg_attribute) GETSTRUCT(tp); - result = att_tup->attstattarget; + dat = SysCacheGetAttr(ATTNUM, tp, Anum_pg_attribute_attstattarget, &isnull); + if (isnull) + { + if (((Form_pg_attribute) GETSTRUCT(tp))->attisdropped) + result = 0; + else + result = -1; + } + else + result = DatumGetInt16(dat); ReleaseSysCache(tp); return result; } diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 8c0b5486b9..bc3651a73a 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -8682,7 +8682,10 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) tbinfo->dobj.name); tbinfo->attnames[j] = pg_strdup(PQgetvalue(res, r, i_attname)); tbinfo->atttypnames[j] = pg_strdup(PQgetvalue(res, r, i_atttypname)); - tbinfo->attstattarget[j] = atoi(PQgetvalue(res, r, i_attstattarget)); + if (PQgetisnull(res, r, i_attstattarget)) + tbinfo->attstattarget[j] = -1; + else + tbinfo->attstattarget[j] = atoi(PQgetvalue(res, r, i_attstattarget)); tbinfo->attstorage[j] = *(PQgetvalue(res, r, i_attstorage)); tbinfo->typstorage[j] = *(PQgetvalue(res, r, i_typstorage)); tbinfo->attidentity[j] = *(PQgetvalue(res, r, i_attidentity)); @@ -16261,7 +16264,7 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) /* * Dump per-column statistics information. We only issue an ALTER * TABLE statement if the attstattarget entry for this column is - * non-negative (i.e. it's not the default value) + * not the default value. */ if (tbinfo->attstattarget[j] >= 0) appendPQExpBuffer(q, "ALTER %sTABLE ONLY %s ALTER COLUMN %s SET STATISTICS %d;\n", diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h index 672a5a16ff..337a2d5bf9 100644 --- a/src/include/catalog/pg_attribute.h +++ b/src/include/catalog/pg_attribute.h @@ -158,22 +158,22 @@ CATALOG(pg_attribute,1249,AttributeRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(75, /* Number of times inherited from direct parent relation(s) */ int16 attinhcount BKI_DEFAULT(0); + /* attribute's collation, if any */ + Oid attcollation BKI_LOOKUP_OPT(pg_collation); + +#ifdef CATALOG_VARLEN /* variable-length fields start here */ + /* NOTE: The following fields are not present in tuple descriptors. */ + /* * attstattarget is the target number of statistics datapoints to collect * during VACUUM ANALYZE of this column. A zero here indicates that we do - * not wish to collect any stats about this column. A "-1" here indicates + * not wish to collect any stats about this column. A NULL here indicates * that no value has been explicitly set for this column, so ANALYZE * should use the default setting. * * int16 is sufficient for the current max value (MAX_STATISTICS_TARGET). */ - int16 attstattarget BKI_DEFAULT(-1); - - /* attribute's collation, if any */ - Oid attcollation BKI_LOOKUP_OPT(pg_collation); - -#ifdef CATALOG_VARLEN /* variable-length fields start here */ - /* NOTE: The following fields are not present in tuple descriptors. */ + int16 attstattarget BKI_DEFAULT(_null_) BKI_FORCE_NULL; /* Column-level access permissions */ aclitem attacl[1] BKI_DEFAULT(_null_); diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 4af02940c5..ea096ee8a8 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -121,7 +121,7 @@ typedef struct VacAttrStats * than the underlying column/expression. Therefore, use these fields for * information about the datatype being fed to the typanalyze function. */ - int attstattarget; + int attstattarget; /* -1 to use default */ Oid attrtypid; /* type of data being analyzed */ int32 attrtypmod; /* typmod of data being analyzed */ Form_pg_type attrtype; /* copy of pg_type row for attrtypid */ diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 446cfa678b..79fa117cb5 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -2707,8 +2707,8 @@ SELECT attrelid::regclass, attnum, attstattarget attrelid | attnum | attstattarget ---------------------------+--------+--------------- concur_exprs_index_expr | 1 | 100 - concur_exprs_index_pred | 1 | -1 - concur_exprs_index_pred_2 | 1 | -1 + concur_exprs_index_pred | 1 | + concur_exprs_index_pred_2 | 1 | (3 rows) DROP TABLE concur_exprs_tab; base-commit: 3e2e0d5ad7fcb89d18a71cbfc885ef184e1b6f2e -- 2.43.0