From 478660db3b96e9bb12adc7131c35b26f52d26c78 Mon Sep 17 00:00:00 2001 From: jian he Date: Tue, 3 Sep 2024 12:49:33 +0800 Subject: [PATCH v5 1/1] Virtual generated wholerow var and virtual checking optimization 1. make checking a var is references to a virtual generated column faster 2. make virtual generated column expand correct with wholerow var The first is via Tuple constraint struct (TupleConstr) has_generated_virtual boolean flag bit. walk through the query tree, open each relation, check has_generated_virtual flag. If so, then collect it. the second, see expand_generated_columns_mutator case when "attnum == 0" --- src/backend/rewrite/rewriteHandler.c | 221 +++++++++++++++--- .../regress/expected/generated_virtual.out | 31 +++ src/test/regress/sql/generated_virtual.sql | 7 + 3 files changed, 227 insertions(+), 32 deletions(-) diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index f2713eaef2..7985c79f29 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -4380,6 +4380,12 @@ struct expand_generated_context /* incremented for every level where it's true */ int ancestor_has_virtual; + + /* list of relation oids that have virtual generated column */ + List *virtual_gen_rel; + + /* list of attnums that is virtual generated column */ + List *virtual_attnums; }; static Node * @@ -4393,6 +4399,9 @@ expand_generated_columns_mutator(Node *node, struct expand_generated_context *co Var *v = (Var *) node; Oid relid; AttrNumber attnum; + ListCell *lc, + *lc2; + Oid attcollid; List *rtable = list_nth_node(List, context->rtables, list_length(context->rtables) - v->varlevelsup - 1); @@ -4400,42 +4409,116 @@ expand_generated_columns_mutator(Node *node, struct expand_generated_context *co relid = rt_fetch(v->varno, rtable)->relid; attnum = v->varattno; - if (!relid || !attnum) + if (!OidIsValid(relid)) return node; - if (get_attgenerated(relid, attnum) == ATTRIBUTE_GENERATED_VIRTUAL) + /* wholerow case when some individual var is virtual generated */ + if (attnum == 0) { Relation rt_entry_relation = table_open(relid, NoLock); - Oid attcollid; + TupleDesc tupdesc = RelationGetDescr(rt_entry_relation); + Node *virtual_node = NULL; + RowExpr *rowexpr = NULL; + List *fields = NIL; + int i; - node = build_column_default(rt_entry_relation, attnum); - if (node == NULL) - elog(ERROR, "no generation expression found for column number %d of table \"%s\"", - attnum, RelationGetRelationName(rt_entry_relation)); + if (tupdesc->constr && !tupdesc->constr->has_generated_virtual) + { + table_close(rt_entry_relation, NoLock); + return node; + } - /* - * If the column definition has a collation and it is different - * from the collation of the generation expression, put a COLLATE - * clause around the expression. - */ - attcollid = GetSysCacheOid(ATTNUM, Anum_pg_attribute_attcollation, relid, attnum, 0, 0); - if (attcollid && attcollid != exprCollation(node)) + for (i = 0; i < tupdesc->natts; i++) { - CollateExpr *ce = makeNode(CollateExpr); + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + if (att->attisdropped) + continue; //TODO, i am not so sure. - ce->arg = (Expr *) node; - ce->collOid = attcollid; - ce->location = -1; + if (att->attgenerated == 'v') + { + virtual_node = build_column_default(rt_entry_relation, att->attnum); + if (virtual_node == NULL) + elog(ERROR, "no generation expression found for column number %d of table \"%s\"", + attnum, RelationGetRelationName(rt_entry_relation)); - node = (Node *) ce; - } + attcollid = GetSysCacheOid(ATTNUM, Anum_pg_attribute_attcollation, relid, att->attnum, 0, 0); + if (attcollid && attcollid != exprCollation(node)) + { + CollateExpr *ce = makeNode(CollateExpr); - IncrementVarSublevelsUp(node, v->varlevelsup, 0); - ChangeVarNodes(node, 1, v->varno, v->varlevelsup); + ce->arg = (Expr *) node; + ce->collOid = attcollid; + ce->location = -1; + virtual_node = (Node *) ce; + } + + IncrementVarSublevelsUp(virtual_node, v->varlevelsup, 0); + ChangeVarNodes(virtual_node, 1, v->varno, v->varlevelsup); + fields = lappend(fields, virtual_node); + } + else + { + Var *var = makeVar(v->varno, + att->attnum, + att->atttypid, + att->atttypmod, + att->attcollation, + v->varlevelsup); + fields = lappend(fields, var); + } + } + rowexpr = makeNode(RowExpr); + rowexpr->args = fields; + rowexpr->row_typeid = v->vartype;; + rowexpr->row_format = COERCE_IMPLICIT_CAST; + rowexpr->colnames = NIL; + rowexpr->location = -1; table_close(rt_entry_relation, NoLock); + node = (Node *) rowexpr; + return node; } + + forboth(lc, context->virtual_gen_rel, lc2, context->virtual_attnums) + { + Oid virtual_rel = lfirst_oid(lc); + AttrNumber attno = lfirst_int(lc2); + + if(attno == attnum && virtual_rel == relid) + { + Relation rt_entry_relation = table_open(relid, NoLock); + + Assert(get_attgenerated(relid, attnum) == ATTRIBUTE_GENERATED_VIRTUAL); + + node = build_column_default(rt_entry_relation, attnum); + if (node == NULL) + elog(ERROR, "no generation expression found for column number %d of table \"%s\"", + attnum, RelationGetRelationName(rt_entry_relation)); + + /* + * If the column definition has a collation and it is different + * from the collation of the generation expression, put a COLLATE + * clause around the expression. + */ + attcollid = GetSysCacheOid(ATTNUM, Anum_pg_attribute_attcollation, relid, attnum, 0, 0); + if (attcollid && attcollid != exprCollation(node)) + { + CollateExpr *ce = makeNode(CollateExpr); + + ce->arg = (Expr *) node; + ce->collOid = attcollid; + ce->location = -1; + + node = (Node *) ce; + } + + IncrementVarSublevelsUp(node, v->varlevelsup, 0); + ChangeVarNodes(node, 1, v->varno, v->varlevelsup); + + table_close(rt_entry_relation, NoLock); + } + } return node; } else if (IsA(node, Query)) @@ -4457,6 +4540,7 @@ expand_generated_columns_in_expr(Node *node, Relation rel) if (tupdesc->constr && tupdesc->constr->has_generated_virtual) { + int i; RangeTblEntry *rte; List *rtable; struct expand_generated_context context; @@ -4470,6 +4554,23 @@ expand_generated_columns_in_expr(Node *node, Relation rel) rte->relid = RelationGetRelid(rel); rtable = list_make2(rte, rte); context.rtables = list_make1(rtable); + context.virtual_gen_rel = NIL; + context.virtual_attnums = NIL; + + for (i = 0; i < tupdesc->natts; i++) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + if (att->attisdropped) + continue; + + if (att->attgenerated == 'v') + { + context.virtual_gen_rel = lappend_oid(context.virtual_gen_rel, rte->relid); + context.virtual_attnums = lappend_int(context.virtual_attnums, att->attnum); + } + } + Assert(list_length(context.virtual_gen_rel) > 0); + Assert(list_length(context.virtual_gen_rel) == list_length(context.virtual_attnums)); return expression_tree_mutator(node, expand_generated_columns_mutator, &context); } @@ -4477,6 +4578,65 @@ expand_generated_columns_in_expr(Node *node, Relation rel) return node; } + +/* + * In a Query tree, if some vars are referencing virtual generated column we may + * need expand to its original default expression, however searching var is + * virtual generated via pg_attribute is expensive. so we preliminary collect + * the relation and virtual attnums that are virtual generated columns. later + * we ultile this in expand_generated_columns_mutator. +*/ +static bool +collect_all_virtual_reloid_walker(Node *node, struct expand_generated_context *context) +{ + if (node == NULL) + return false; + + if (IsA(node, RangeTblEntry)) + { + RangeTblEntry *rte = (RangeTblEntry *) node; + Oid relid = rte->relid; + + if (OidIsValid(relid)) + { + int i; + Relation rt_entry_relation = table_open(relid, NoLock); + TupleDesc tupdesc = RelationGetDescr(rt_entry_relation); + + if (tupdesc->constr && tupdesc->constr->has_generated_virtual) + { + for (i = 0; i < tupdesc->natts; i++) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, i); + if (att->attisdropped) + continue; + if (att->attgenerated == 'v') + { + context->virtual_gen_rel = lappend_oid(context->virtual_gen_rel, relid); + context->virtual_attnums = lappend_int(context->virtual_attnums, att->attnum); + } + } + Assert(list_length(context->virtual_gen_rel) > 0); + Assert(list_length(context->virtual_gen_rel) == list_length(context->virtual_attnums)); + } + table_close(rt_entry_relation, NoLock); + return false; + } + return false; /* allow range_table_walker to continue */ + } + + if (IsA(node, Query)) + { + return query_tree_walker((Query *) node, + collect_all_virtual_reloid_walker, + (void *) context, + QTW_EXAMINE_RTES_AFTER); + } + + return expression_tree_walker(node, collect_all_virtual_reloid_walker, + (void *) context); +} + /* * Expand virtual generated columns in a Query. We do some optimizations here * to avoid digging through the whole Query unless necessary. @@ -4505,22 +4665,14 @@ expand_generated_columns_in_query(Query *query, struct expand_generated_context */ else { - ListCell *lc; - - foreach(lc, query->rtable) + foreach_node(RangeTblEntry, rte, query->rtable) { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); - if (rte->rtekind == RTE_SUBQUERY) rte->subquery = expand_generated_columns_in_query(rte->subquery, context); } - foreach(lc, query->cteList) - { - CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); - + foreach_node(CommonTableExpr, cte, query->cteList) cte->ctequery = (Node *) expand_generated_columns_in_query(castNode(Query, cte->ctequery), context); - } } if (query->hasGeneratedVirtual) @@ -4579,6 +4731,11 @@ QueryRewrite(Query *parsetree) query = fireRIRrules(query, NIL); + query_or_expression_tree_walker((Node *) query, + collect_all_virtual_reloid_walker, + (void *) &context, + QTW_EXAMINE_RTES_AFTER); + query = expand_generated_columns_in_query(query, &context); query->queryId = input_query_id; diff --git a/src/test/regress/expected/generated_virtual.out b/src/test/regress/expected/generated_virtual.out index 4cf4f8118f..67e9ffeec3 100644 --- a/src/test/regress/expected/generated_virtual.out +++ b/src/test/regress/expected/generated_virtual.out @@ -33,6 +33,37 @@ SELECT table_name, column_name, dependent_column FROM information_schema.column_ Indexes: "gtest1_pkey" PRIMARY KEY, btree (a) +--test with wholerow and ROW expr, some invidiual virtual generated should expanded. +insert into gtest1 values (11, default) returning (select (select gtest1)); + gtest1 +--------- + (11,22) +(1 row) + +select gtest1 from gtest1 except all select gtest1 from gtest1; + gtest1 +-------- +(0 rows) + +select row(gtest1.b) from gtest1; + row +------ + (22) +(1 row) + +select row(gtest1) from gtest1; + row +------------- + ("(11,22)") +(1 row) + +with cte as (select (select gtest1) from gtest1) select cte from cte; + cte +------------- + ("(11,22)") +(1 row) + +truncate gtest1; -- duplicate generated CREATE TABLE gtest_err_1 (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) VIRTUAL GENERATED ALWAYS AS (a * 3) VIRTUAL); ERROR: multiple generation clauses specified for column "b" of table "gtest_err_1" diff --git a/src/test/regress/sql/generated_virtual.sql b/src/test/regress/sql/generated_virtual.sql index 34b8070b3e..cd5a2eec3d 100644 --- a/src/test/regress/sql/generated_virtual.sql +++ b/src/test/regress/sql/generated_virtual.sql @@ -15,6 +15,13 @@ SELECT table_name, column_name, dependent_column FROM information_schema.column_ \d gtest1 +--test with wholerow and ROW expr, some invidiual virtual generated should expanded. +insert into gtest1 values (11, default) returning (select (select gtest1)); +select gtest1 from gtest1 except all select gtest1 from gtest1; +select row(gtest1.b) from gtest1; +select row(gtest1) from gtest1; +with cte as (select (select gtest1) from gtest1) select cte from cte; +truncate gtest1; -- duplicate generated CREATE TABLE gtest_err_1 (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) VIRTUAL GENERATED ALWAYS AS (a * 3) VIRTUAL); -- 2.34.1