From bec0bdcd6d49b4ade505fb257a32b7aa7700dac0 Mon Sep 17 00:00:00 2001 From: Alberto Piai Date: Thu, 14 May 2026 14:51:33 -0700 Subject: [PATCH v4 2/2] Try to avoid a rewrite when adding a stored generated column This builds upon basic support for ... ALTER COLUMN ... ADD GENERATED ALWAYS AS (expr) STORED If we can find a constraint which proves that the given column is already always equal to the new generated column expression, skip the expensive rewrite of the table. The check constraint must use an equality operator which is mergejoinable, and the expression must match exactly the generated column's default expression. --- src/backend/catalog/pg_constraint.c | 73 +++++++++++ src/backend/commands/tablecmds.c | 40 +++--- src/include/catalog/pg_constraint.h | 2 + src/test/regress/expected/alter_table.out | 153 ++++++++++++++++++++++ src/test/regress/sql/alter_table.sql | 83 ++++++++++++ 5 files changed, 335 insertions(+), 16 deletions(-) diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c index b12765ae691..37628f65dc1 100644 --- a/src/backend/catalog/pg_constraint.c +++ b/src/backend/catalog/pg_constraint.c @@ -29,6 +29,7 @@ #include "catalog/pg_type.h" #include "commands/defrem.h" #include "common/int.h" +#include "nodes/nodeFuncs.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/fmgroids.h" @@ -694,6 +695,78 @@ findDomainNotNullConstraint(Oid typid) return retval; } +/* + * Given a relation, an attnum and a (cooked) expression, this returns true if + * it finds a CHECK constraint which proves that the given column is equal to + * the expression. + * + * The constraint must use a mergejoinable operator for the type of the column, + * a concept used by the planner as well to infer equivalence classes on the + * terms in a query (see op_mergejoinable()). + * + * The expressions are compared structurally, so they must match exactly for + * this check to succeed. + */ +bool +findStructuralCheckConstraintOnAttr(Oid relid, AttrNumber attnum, + const Node *target_expr) +{ + Relation pg_constraint; + HeapTuple conTup; + SysScanDesc scan; + ScanKeyData key; + bool found = false; + + pg_constraint = table_open(ConstraintRelationId, AccessShareLock); + ScanKeyInit(&key, + Anum_pg_constraint_conrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + scan = systable_beginscan(pg_constraint, ConstraintRelidTypidNameIndexId, + true, NULL, 1, &key); + + while (HeapTupleIsValid(conTup = systable_getnext(scan))) + { + Form_pg_constraint con = GETSTRUCT(conTup); + char *conbin; + Datum val; + Node *conexpr; + + if (con->contype != CONSTRAINT_CHECK) + continue; + if (!con->convalidated) + continue; + + val = SysCacheGetAttrNotNull(CONSTROID, conTup, + Anum_pg_constraint_conbin); + conbin = TextDatumGetCString(val); + conexpr = stringToNode(conbin); + + if (IsA(conexpr, OpExpr)) + { + OpExpr *op = (OpExpr *) conexpr; + + if (list_length(op->args) == 2 && IsA(linitial(op->args), Var)) + { + Var *var = linitial(op->args); + + if (var->varattno == attnum && + op_mergejoinable(op->opno, exprType((Node *) var)) && + equal(lsecond(op->args), target_expr)) + { + found = true; + break; + } + } + } + } + + systable_endscan(scan); + table_close(pg_constraint, AccessShareLock); + + return found; +} + /* * Given a pg_constraint tuple for a not-null constraint, return the column * number it is for. diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 39faef0a114..4afa157ee9e 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8939,6 +8939,7 @@ ATExecAddGeneratedAsExprStored(AlteredTableInfo *tab, Relation pg_attribute; List *newcons; CookedConstraint *cookedDef; + bool rewrite; Assert(def->raw_expr != NULL); Assert(def->cooked_expr == NULL); @@ -9013,25 +9014,32 @@ ATExecAddGeneratedAsExprStored(AlteredTableInfo *tab, cookedDef = linitial(newcons); - /* - * Clear all the missing values if we're rewriting the table, since this - * renders them pointless. - */ - RelationClearMissing(rel); + rewrite = !findStructuralCheckConstraintOnAttr(RelationGetRelid(rel), + attnum, + cookedDef->expr); - /* Make above changes visible */ - CommandCounterIncrement(); + if (rewrite) + { + /* + * Clear all the missing values if we're rewriting the table, since + * this renders them pointless. + */ + RelationClearMissing(rel); - /* Drop any pg_statistic entry for the column */ - RemoveStatistics(RelationGetRelid(rel), attnum); + /* Make above changes visible */ + CommandCounterIncrement(); - /* Schedule a rewrite */ - newval = palloc0_object(NewColumnValue); - newval->attnum = attnum; - newval->expr = (Expr *) cookedDef->expr; - newval->is_generated = true; - tab->newvals = lappend(tab->newvals, newval); - tab->rewrite |= AT_REWRITE_DEFAULT_VAL; + /* Drop any pg_statistic entry for the column */ + RemoveStatistics(RelationGetRelid(rel), attnum); + + /* Schedule a rewrite */ + newval = palloc0_object(NewColumnValue); + newval->attnum = attnum; + newval->expr = (Expr *) cookedDef->expr; + newval->is_generated = true; + tab->newvals = lappend(tab->newvals, newval); + tab->rewrite |= AT_REWRITE_DEFAULT_VAL; + } InvokeObjectPostAlterHook(RelationRelationId, RelationGetRelid(rel), attnum); diff --git a/src/include/catalog/pg_constraint.h b/src/include/catalog/pg_constraint.h index 1b7fedf1750..7ac9e00c28b 100644 --- a/src/include/catalog/pg_constraint.h +++ b/src/include/catalog/pg_constraint.h @@ -266,6 +266,8 @@ extern char *ChooseConstraintName(const char *name1, const char *name2, extern HeapTuple findNotNullConstraintAttnum(Oid relid, AttrNumber attnum); extern HeapTuple findNotNullConstraint(Oid relid, const char *colname); extern HeapTuple findDomainNotNullConstraint(Oid typid); +extern bool findStructuralCheckConstraintOnAttr(Oid relid, AttrNumber attnum, + const Node *target_expr); extern AttrNumber extractNotNullColumn(HeapTuple constrTup); extern bool AdjustNotNullInheritance(Oid relid, AttrNumber attnum, const char *new_conname, bool is_local, bool is_no_inherit, bool is_notvalid); diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index 2d0ce414d12..b269736ab1a 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -4956,6 +4956,159 @@ select :idx_filenode_before != :idx_filenode_after as did_rewrite_idx; (1 row) drop table testgen.t3; +-- turning a regular column into a stored generated column +-- without rewriting the table (when a check constraint proves it isn't needed) +create table testgen.t4 (a int, b int not null); +insert into testgen.t4 (a, b) select x, x * 2 from generate_series(0, 5) x; +alter table testgen.t4 add constraint chk_gen_clause check (b = a * 2); +select pg_relation_filenode('testgen.t4') as t4_filenode_before \gset +alter table testgen.t4 alter column b add generated always as (a * 2) stored; +select pg_relation_filenode('testgen.t4') as t4_filenode_after \gset +select :t4_filenode_before = :t4_filenode_after as did_skip_rewrite; + did_skip_rewrite +------------------ + t +(1 row) + +\d+ testgen.t4 + Table "testgen.t4" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+------------------------------------+---------+--------------+------------- + a | integer | | | | plain | | + b | integer | | not null | generated always as (a * 2) stored | plain | | +Check constraints: + "chk_gen_clause" CHECK (b = (a * 2)) +Not-null constraints: + "t4_b_not_null" NOT NULL "b" + +drop table testgen.t4; +-- turning a regular column into a stored generated column +-- same as the previous case, but a rewrite happens since the constraint is not +-- valid +create table testgen.t4 (a int, b int not null); +insert into testgen.t4 (a, b) select x, x * 2 from generate_series(0, 5) x; +alter table testgen.t4 add constraint chk_gen_clause check (b = a * 2) not valid; +select pg_relation_filenode('testgen.t4') as t4_filenode_before \gset +alter table testgen.t4 alter column b add generated always as (a * 2) stored; +select pg_relation_filenode('testgen.t4') as t4_filenode_after \gset +select :t4_filenode_before != :t4_filenode_after as did_rewrite; + did_rewrite +------------- + t +(1 row) + +\d+ testgen.t4 + Table "testgen.t4" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+------------------------------------+---------+--------------+------------- + a | integer | | | | plain | | + b | integer | | not null | generated always as (a * 2) stored | plain | | +Check constraints: + "chk_gen_clause" CHECK (b = (a * 2)) NOT VALID +Not-null constraints: + "t4_b_not_null" NOT NULL "b" + +drop table testgen.t4; +-- turning a regular column into a stored generated column +-- same as the previous case, but a rewrite happens since the constraint +-- operator is not mergejoinable +create table testgen.t4 (a int, b int not null); +insert into testgen.t4 (a, b) select x, x * 2 from generate_series(0, 5) x; +alter table testgen.t4 add constraint chk_gen_clause check (b >= a * 2); +select pg_relation_filenode('testgen.t4') as t4_filenode_before \gset +alter table testgen.t4 alter column b add generated always as (a * 3) stored; +select pg_relation_filenode('testgen.t4') as t4_filenode_after \gset +select :t4_filenode_before != :t4_filenode_after as did_rewrite; + did_rewrite +------------- + t +(1 row) + +\d+ testgen.t4 + Table "testgen.t4" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+------------------------------------+---------+--------------+------------- + a | integer | | | | plain | | + b | integer | | not null | generated always as (a * 3) stored | plain | | +Check constraints: + "chk_gen_clause" CHECK (b >= (a * 2)) +Not-null constraints: + "t4_b_not_null" NOT NULL "b" + +drop table testgen.t4; +-- test the whole process for adding a stored generated column without +-- long-lived exclusive locks +create table testgen.t5 (a int); +select pg_relation_filenode('testgen.t5') as t5_filenode_before \gset +insert into testgen.t5 select x from generate_series(1, 5) x; +alter table testgen.t5 add column b int; +-- take care of new and updated columns +create function testgen.gen () returns trigger language plpgsql as $$ +begin + new.b = new.a * 2; return new; +end +$$; +create trigger testgen_gen + before insert or update on testgen.t5 + for each row execute function testgen.gen(); +-- add the constraint as not valid: enforced only for new and updated rows +begin; +alter table testgen.t5 + add constraint chk_gen_clause check (b = a * 2) not valid; +select locktype, mode from pg_locks + where relation = 'testgen.t5'::regclass and granted; + locktype | mode +----------+--------------------- + relation | AccessExclusiveLock +(1 row) + +commit; +insert into testgen.t5 (a) values (100), (200), (300); +-- backfill existing rows at the appropriate pace +update testgen.t5 set b = a * 2 where b is null; +-- validate: this scans the table, but without an exclusive lock +begin; +alter table testgen.t5 validate constraint chk_gen_clause; +select locktype, mode from pg_locks + where relation = 'testgen.t5'::regclass and granted; + locktype | mode +----------+-------------------------- + relation | ShareUpdateExclusiveLock +(1 row) + +commit; +-- now the schema update, which skips the rewrite because of the check +begin; +drop trigger testgen_gen on testgen.t5; +alter table testgen.t5 alter column b + add generated always as (a * 2) stored; +select locktype, mode from pg_locks +where relation = 'testgen.t5'::regclass and granted; + locktype | mode +----------+--------------------- + relation | AccessShareLock + relation | AccessExclusiveLock +(2 rows) + +commit; +select pg_relation_filenode('testgen.t5') as t5_filenode_after \gset +select :t5_filenode_before = :t5_filenode_after as did_skip_rewrite; + did_skip_rewrite +------------------ + t +(1 row) + +\d+ testgen.t5 + Table "testgen.t5" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+------------------------------------+---------+--------------+------------- + a | integer | | | | plain | | + b | integer | | | generated always as (a * 2) stored | plain | | +Check constraints: + "chk_gen_clause" CHECK (b = (a * 2)) + +drop function testgen.gen; +drop table testgen.t5; -- test support for partitioned tables and inheritance create table testgen.tpart (a int, b int) partition by hash (a); create table testgen.tpart_p1 partition of testgen.tpart diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index 51d818d4995..9851093fb58 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -3198,6 +3198,89 @@ select pg_relation_filenode('testgen.idx_b') as idx_filenode_after \gset select :idx_filenode_before != :idx_filenode_after as did_rewrite_idx; drop table testgen.t3; +-- turning a regular column into a stored generated column +-- without rewriting the table (when a check constraint proves it isn't needed) +create table testgen.t4 (a int, b int not null); +insert into testgen.t4 (a, b) select x, x * 2 from generate_series(0, 5) x; +alter table testgen.t4 add constraint chk_gen_clause check (b = a * 2); +select pg_relation_filenode('testgen.t4') as t4_filenode_before \gset +alter table testgen.t4 alter column b add generated always as (a * 2) stored; +select pg_relation_filenode('testgen.t4') as t4_filenode_after \gset +select :t4_filenode_before = :t4_filenode_after as did_skip_rewrite; +\d+ testgen.t4 +drop table testgen.t4; + +-- turning a regular column into a stored generated column +-- same as the previous case, but a rewrite happens since the constraint is not +-- valid +create table testgen.t4 (a int, b int not null); +insert into testgen.t4 (a, b) select x, x * 2 from generate_series(0, 5) x; +alter table testgen.t4 add constraint chk_gen_clause check (b = a * 2) not valid; +select pg_relation_filenode('testgen.t4') as t4_filenode_before \gset +alter table testgen.t4 alter column b add generated always as (a * 2) stored; +select pg_relation_filenode('testgen.t4') as t4_filenode_after \gset +select :t4_filenode_before != :t4_filenode_after as did_rewrite; +\d+ testgen.t4 +drop table testgen.t4; + +-- turning a regular column into a stored generated column +-- same as the previous case, but a rewrite happens since the constraint +-- operator is not mergejoinable +create table testgen.t4 (a int, b int not null); +insert into testgen.t4 (a, b) select x, x * 2 from generate_series(0, 5) x; +alter table testgen.t4 add constraint chk_gen_clause check (b >= a * 2); +select pg_relation_filenode('testgen.t4') as t4_filenode_before \gset +alter table testgen.t4 alter column b add generated always as (a * 3) stored; +select pg_relation_filenode('testgen.t4') as t4_filenode_after \gset +select :t4_filenode_before != :t4_filenode_after as did_rewrite; +\d+ testgen.t4 +drop table testgen.t4; + +-- test the whole process for adding a stored generated column without +-- long-lived exclusive locks +create table testgen.t5 (a int); +select pg_relation_filenode('testgen.t5') as t5_filenode_before \gset +insert into testgen.t5 select x from generate_series(1, 5) x; +alter table testgen.t5 add column b int; +-- take care of new and updated columns +create function testgen.gen () returns trigger language plpgsql as $$ +begin + new.b = new.a * 2; return new; +end +$$; +create trigger testgen_gen + before insert or update on testgen.t5 + for each row execute function testgen.gen(); +-- add the constraint as not valid: enforced only for new and updated rows +begin; +alter table testgen.t5 + add constraint chk_gen_clause check (b = a * 2) not valid; +select locktype, mode from pg_locks + where relation = 'testgen.t5'::regclass and granted; +commit; +insert into testgen.t5 (a) values (100), (200), (300); +-- backfill existing rows at the appropriate pace +update testgen.t5 set b = a * 2 where b is null; +-- validate: this scans the table, but without an exclusive lock +begin; +alter table testgen.t5 validate constraint chk_gen_clause; +select locktype, mode from pg_locks + where relation = 'testgen.t5'::regclass and granted; +commit; +-- now the schema update, which skips the rewrite because of the check +begin; +drop trigger testgen_gen on testgen.t5; +alter table testgen.t5 alter column b + add generated always as (a * 2) stored; +select locktype, mode from pg_locks +where relation = 'testgen.t5'::regclass and granted; +commit; +select pg_relation_filenode('testgen.t5') as t5_filenode_after \gset +select :t5_filenode_before = :t5_filenode_after as did_skip_rewrite; +\d+ testgen.t5 +drop function testgen.gen; +drop table testgen.t5; + -- test support for partitioned tables and inheritance create table testgen.tpart (a int, b int) partition by hash (a); create table testgen.tpart_p1 partition of testgen.tpart -- 2.47.0